@@ -430,6 +430,103 @@ service ContentAddressableStorage {
430
430
rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
431
431
option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
432
432
}
433
+
434
+ // Split a blob into chunks.
435
+ //
436
+ // This splitting API aims to reduce download traffic between client and
437
+ // server, e.g., if a client needs to fetch a large blob that just has been
438
+ // modified slightly since the last built. In this case, there is no need to
439
+ // fetch the entire blob data, but just the binary differences between the two
440
+ // blob versions, which are typically determined by deduplication techniques
441
+ // such as content-defined chunking.
442
+ //
443
+ // Clients can use this API before downloading a blob to determine which parts
444
+ // of the blob are already present locally and do not need to be downloaded
445
+ // again. The server splits the blob into chunks according to a specified
446
+ // content-defined chunking algorithm and returns a list of the chunk digests
447
+ // in the order in which the chunks have to be concatenated to assemble the
448
+ // requested blob.
449
+ //
450
+ // A client can expect the following guarantees from the server if a split
451
+ // request is answered successfully:
452
+ // 1. The blob chunks are stored in CAS.
453
+ // 2. Concatenating the blob chunks in the order of the digest list returned
454
+ // by the server results in the original blob.
455
+ //
456
+ // The usage of this API is optional for clients but it allows them to
457
+ // download only the missing parts of a large blob instead of the entire blob
458
+ // data, which in turn can considerably reduce download network traffic.
459
+ //
460
+ // Since the generated chunks are stored as blobs, they underlie the same
461
+ // lifetimes as other blobs. However, their lifetime is extended if they are
462
+ // part of the result of a split blob request.
463
+ //
464
+ // For the client, it is recommended to verify whether the digest of the blob
465
+ // assembled by the fetched chunks results in the requested blob digest.
466
+ //
467
+ // If several clients use blob splitting, it is recommended that they request
468
+ // the same splitting algorithm to benefit from each others chunking data. In
469
+ // combination with blob splicing, an agreement about the chunking algorithm
470
+ // is recommended since both client as well as server side can benefit from
471
+ // each others chunking data.
472
+ //
473
+ // Servers are free to implement this functionality, but they need to declare
474
+ // whether they support it or not by setting the
475
+ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
476
+ // field accordingly.
477
+ //
478
+ // Errors:
479
+ //
480
+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
481
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
482
+ // chunks.
483
+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
484
+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
485
+ }
486
+
487
+ // Splice a blob from chunks.
488
+ //
489
+ // This is the complementary operation to the
490
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
491
+ // function to handle the splitted upload of large blobs to save upload
492
+ // traffic.
493
+ //
494
+ // If a client needs to upload a large blob and is able to split a blob into
495
+ // chunks locally according to some content-defined chunking algorithm, it can
496
+ // first determine which parts of the blob are already available in the remote
497
+ // CAS and upload the missing chunks, and then use this API to instruct the
498
+ // server to splice the original blob from the remotely available blob chunks.
499
+ //
500
+ // In order to ensure data consistency of the CAS, the server will verify the
501
+ // spliced result whether digest calculation results in the provided digest
502
+ // from the request and will reject a splice request if this check fails.
503
+ //
504
+ // The usage of this API is optional for clients but it allows them to upload
505
+ // only the missing parts of a large blob instead of the entire blob data,
506
+ // which in turn can considerably reduce upload network traffic.
507
+ //
508
+ // In order to split a blob into chunks, it is recommended for the client to
509
+ // use one of the servers' advertised chunking algorithms by
510
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
511
+ // to benefit from each others chunking data. If several clients use blob
512
+ // splicing, it is recommended that they use the same splitting algorithm to
513
+ // split their blobs into chunks.
514
+ //
515
+ // Servers are free to implement this functionality, but they need to declare
516
+ // whether they support it or not by setting the
517
+ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
518
+ // field accordingly.
519
+ //
520
+ // Errors:
521
+ //
522
+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
523
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
524
+ // spliced blob.
525
+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
526
+ // provided expected digest.
527
+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
528
+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
529
+ }
433
530
}
434
531
435
532
// The Capabilities service may be used by remote execution clients to query
@@ -1814,6 +1911,60 @@ message GetTreeResponse {
1814
1911
string next_page_token = 2 ;
1815
1912
}
1816
1913
1914
+ // A request message for
1915
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1916
+ message SplitBlobRequest {
1917
+ // The instance of the execution system to operate against. A server may
1918
+ // support multiple instances of the execution system (with their own workers,
1919
+ // storage, caches, etc.). The server MAY require use of this field to select
1920
+ // between them in an implementation-defined fashion, otherwise it can be
1921
+ // omitted.
1922
+ string instance_name = 1 ;
1923
+
1924
+ // The digest of the blob to be splitted.
1925
+ Digest blob_digest = 2 ;
1926
+
1927
+ // The chunking algorithm to be used. Must be IDENTITY (no chunking) or one of
1928
+ // the algorithms advertised by the
1929
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
1930
+ // field.
1931
+ ChunkingAlgorithm.Value chunking_algorithm = 3 ;
1932
+ }
1933
+
1934
+ // A response message for
1935
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1936
+ message SplitBlobResponse {
1937
+ // The ordered list of digests of the chunks into which the blob was splitted.
1938
+ // The original blob is assembled by concatenating the chunk data according to
1939
+ // the order of the digests given by this list.
1940
+ repeated Digest chunk_digests = 1 ;
1941
+ }
1942
+
1943
+ // A request message for
1944
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1945
+ message SpliceBlobRequest {
1946
+ // The instance of the execution system to operate against. A server may
1947
+ // support multiple instances of the execution system (with their own workers,
1948
+ // storage, caches, etc.). The server MAY require use of this field to select
1949
+ // between them in an implementation-defined fashion, otherwise it can be
1950
+ // omitted.
1951
+ string instance_name = 1 ;
1952
+
1953
+ // Expected digest of the spliced blob.
1954
+ Digest blob_digest = 2 ;
1955
+
1956
+ // The ordered list of digests of the chunks which need to be concatenated to
1957
+ // assemble the original blob.
1958
+ repeated Digest chunk_digests = 3 ;
1959
+ }
1960
+
1961
+ // A response message for
1962
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1963
+ message SpliceBlobResponse {
1964
+ // Computed digest of the spliced blob.
1965
+ Digest blob_digest = 1 ;
1966
+ }
1967
+
1817
1968
// A request message for
1818
1969
// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
1819
1970
message GetCapabilitiesRequest {
@@ -2000,6 +2151,40 @@ message Compressor {
2000
2151
}
2001
2152
}
2002
2153
2154
+ // Content-defined chunking algorithms used for splitting blobs into chunks.
2155
+ message ChunkingAlgorithm {
2156
+ enum Value {
2157
+ // No chunking. Servers MUST always support this, and do not need to
2158
+ // advertise it.
2159
+ IDENTITY = 0 ;
2160
+
2161
+ // Content-defined chunking using Rabin fingerprints. An implementation of
2162
+ // this scheme in presented in this paper
2163
+ // https://link.springer.com/chapter/10.1007/978-1-4613-9323-8_11. The final
2164
+ // implementation of this algorithm should be configured to have the
2165
+ // following properties on resulting chunk sizes.
2166
+ // - Minimum chunk size: 2 KB
2167
+ // - Average chunk size: 8 KB (0x00000000007FFFFF bit mask)
2168
+ // - Maximum chunk size: 64 KB
2169
+ // The irreducible polynomial to be used for the modulo divisions is the
2170
+ // following 64-bit polynomial of degree 53: 0x003DA3358B4DC173. The window
2171
+ // size to be used is 64 bits.
2172
+ RABINCDC = 1 ;
2173
+
2174
+ // Content-defined chunking using the FastCDC algorithm. The algorithm is
2175
+ // described in this paper https://ieeexplore.ieee.org/document/9055082
2176
+ // (Algorithm 2, FastCDC8KB). The algorithm is configured to have the
2177
+ // following properties on resulting chunk sizes.
2178
+ // - Minimum chunk size: 2 KB
2179
+ // - Average chunk size: 8 KB
2180
+ // - Maximum chunk size: 64 KB
2181
+ // The 256 64-bit random numbers in the Gear table are to be created with
2182
+ // the Mersenne Twister pseudo-random number generator for 64-bit numbers
2183
+ // with a state size of 19937 bits and a seed of 0.
2184
+ FASTCDC = 2 ;
2185
+ }
2186
+ }
2187
+
2003
2188
// Capabilities of the remote cache system.
2004
2189
message CacheCapabilities {
2005
2190
// All the digest functions supported by the remote cache.
@@ -2033,6 +2218,25 @@ message CacheCapabilities {
2033
2218
// [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs]
2034
2219
// requests.
2035
2220
repeated Compressor.Value supported_batch_update_compressors = 7 ;
2221
+
2222
+ // All the chunking algorithms supported by the remote cache. Remote cache may
2223
+ // support multiple chunking algorithms simultaneously. Servers MUST support
2224
+ // IDENTITY (no chunking), even if it is not listed here.
2225
+ repeated ChunkingAlgorithm.Value supported_chunking_algorithms = 8 ;
2226
+
2227
+ // Whether blob splitting is supported for the particular server/instance. If
2228
+ // yes, the server/instance implements the specified behavior for blob
2229
+ // splitting and a meaningful result can be expected from the
2230
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
2231
+ // operation.
2232
+ bool blob_split_support = 9 ;
2233
+
2234
+ // Whether blob splicing is supported for the particular server/instance. If
2235
+ // yes, the server/instance implements the specified behavior for blob
2236
+ // splicing and a meaningful result can be expected from the
2237
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2238
+ // operation.
2239
+ bool blob_splice_support = 10 ;
2036
2240
}
2037
2241
2038
2242
// Capabilities of the remote execution system.
0 commit comments