@@ -430,6 +430,103 @@ service ContentAddressableStorage {
430
430
rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
431
431
option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
432
432
}
433
+
434
+ // Split a blob into chunks.
435
+ //
436
+ // This splitting API aims to reduce download traffic between client and
437
+ // server, e.g., if a client needs to fetch a large blob that just has been
438
+ // modified slightly since the last built. In this case, there is no need to
439
+ // fetch the entire blob data, but just the binary differences between the two
440
+ // blob versions, which are typically determined by deduplication techniques
441
+ // such as content-defined chunking.
442
+ //
443
+ // Clients can use this API before downloading a blob to determine which parts
444
+ // of the blob are already present locally and do not need to be downloaded
445
+ // again. The server splits the blob into chunks according to a specified
446
+ // content-defined chunking algorithm and returns a list of the chunk digests
447
+ // in the order in which the chunks have to be concatenated to assemble the
448
+ // requested blob.
449
+ //
450
+ // A client can expect the following guarantees from the server if a split
451
+ // request is answered successfully:
452
+ // 1. The blob chunks are stored in CAS.
453
+ // 2. Concatenating the blob chunks in the order of the digest list returned
454
+ // by the server results in the original blob.
455
+ //
456
+ // The usage of this API is optional for clients but it allows them to
457
+ // download only the missing parts of a large blob instead of the entire blob
458
+ // data, which in turn can considerably reduce download network traffic.
459
+ //
460
+ // Since the generated chunks are stored as blobs, they underlie the same
461
+ // lifetimes as other blobs. However, their lifetime is extended if they are
462
+ // part of the result of a split blob request.
463
+ //
464
+ // For the client, it is recommended to verify whether the digest of the blob
465
+ // assembled by the fetched chunks results in the requested blob digest.
466
+ //
467
+ // If several clients use blob splitting, it is recommended that they request
468
+ // the same splitting algorithm to benefit from each others chunking data. In
469
+ // combination with blob splicing, an agreement about the chunking algorithm
470
+ // is recommended since both client as well as server side can benefit from
471
+ // each others chunking data.
472
+ //
473
+ // Servers are free to implement this functionality, but they need to declare
474
+ // whether they support it or not by setting the
475
+ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
476
+ // field accordingly.
477
+ //
478
+ // Errors:
479
+ //
480
+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
481
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
482
+ // chunks.
483
+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
484
+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
485
+ }
486
+
487
+ // Splice a blob from chunks.
488
+ //
489
+ // This is the complementary operation to the
490
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
491
+ // function to handle the splitted upload of large blobs to save upload
492
+ // traffic.
493
+ //
494
+ // If a client needs to upload a large blob and is able to split a blob into
495
+ // chunks locally according to some content-defined chunking algorithm, it can
496
+ // first determine which parts of the blob are already available in the remote
497
+ // CAS and upload the missing chunks, and then use this API to instruct the
498
+ // server to splice the original blob from the remotely available blob chunks.
499
+ //
500
+ // In order to ensure data consistency of the CAS, the server will verify the
501
+ // spliced result whether digest calculation results in the provided digest
502
+ // from the request and will reject a splice request if this check fails.
503
+ //
504
+ // The usage of this API is optional for clients but it allows them to upload
505
+ // only the missing parts of a large blob instead of the entire blob data,
506
+ // which in turn can considerably reduce upload network traffic.
507
+ //
508
+ // In order to split a blob into chunks, it is recommended for the client to
509
+ // use one of the servers' advertised chunking algorithms by
510
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
511
+ // to benefit from each others chunking data. If several clients use blob
512
+ // splicing, it is recommended that they use the same splitting algorithm to
513
+ // split their blobs into chunks.
514
+ //
515
+ // Servers are free to implement this functionality, but they need to declare
516
+ // whether they support it or not by setting the
517
+ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
518
+ // field accordingly.
519
+ //
520
+ // Errors:
521
+ //
522
+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
523
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
524
+ // spliced blob.
525
+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
526
+ // provided expected digest.
527
+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
528
+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
529
+ }
433
530
}
434
531
435
532
// The Capabilities service may be used by remote execution clients to query
@@ -1814,6 +1911,97 @@ message GetTreeResponse {
1814
1911
string next_page_token = 2 ;
1815
1912
}
1816
1913
1914
+ // A request message for
1915
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1916
+ message SplitBlobRequest {
1917
+ // The instance of the execution system to operate against. A server may
1918
+ // support multiple instances of the execution system (with their own workers,
1919
+ // storage, caches, etc.). The server MAY require use of this field to select
1920
+ // between them in an implementation-defined fashion, otherwise it can be
1921
+ // omitted.
1922
+ string instance_name = 1 ;
1923
+
1924
+ // The digest of the blob to be splitted.
1925
+ Digest blob_digest = 2 ;
1926
+
1927
+ // The chunking algorithm to be used. Must be IDENTITY (no chunking) or one of
1928
+ // the algorithms advertised by the
1929
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
1930
+ // field.
1931
+ ChunkingAlgorithm.Value chunking_algorithm = 3 ;
1932
+
1933
+ // The digest function of the blob to be splitted.
1934
+ //
1935
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1936
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1937
+ // that case the server SHOULD infer the digest function using the
1938
+ // length of the blob digest hashes and the digest functions announced
1939
+ // in the server's capabilities.
1940
+ DigestFunction.Value digest_function = 4 ;
1941
+ }
1942
+
1943
+ // A response message for
1944
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1945
+ message SplitBlobResponse {
1946
+ // The ordered list of digests of the chunks into which the blob was splitted.
1947
+ // The original blob is assembled by concatenating the chunk data according to
1948
+ // the order of the digests given by this list.
1949
+ repeated Digest chunk_digests = 1 ;
1950
+
1951
+ // The digest function of the chunks.
1952
+ //
1953
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1954
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1955
+ // that case the server SHOULD infer the digest function using the
1956
+ // length of the blob digest hashes and the digest functions announced
1957
+ // in the server's capabilities.
1958
+ DigestFunction.Value digest_function = 2 ;
1959
+ }
1960
+
1961
+ // A request message for
1962
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1963
+ message SpliceBlobRequest {
1964
+ // The instance of the execution system to operate against. A server may
1965
+ // support multiple instances of the execution system (with their own workers,
1966
+ // storage, caches, etc.). The server MAY require use of this field to select
1967
+ // between them in an implementation-defined fashion, otherwise it can be
1968
+ // omitted.
1969
+ string instance_name = 1 ;
1970
+
1971
+ // Expected digest of the spliced blob.
1972
+ Digest blob_digest = 2 ;
1973
+
1974
+ // The ordered list of digests of the chunks which need to be concatenated to
1975
+ // assemble the original blob.
1976
+ repeated Digest chunk_digests = 3 ;
1977
+
1978
+ // The digest function of the blob to be spliced as well as of the chunks to
1979
+ // be concatenated.
1980
+ //
1981
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1982
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1983
+ // that case the server SHOULD infer the digest function using the
1984
+ // length of the blob digest hashes and the digest functions announced
1985
+ // in the server's capabilities.
1986
+ DigestFunction.Value digest_function = 4 ;
1987
+ }
1988
+
1989
+ // A response message for
1990
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1991
+ message SpliceBlobResponse {
1992
+ // Computed digest of the spliced blob.
1993
+ Digest blob_digest = 1 ;
1994
+
1995
+ // The digest function of the spliced blob.
1996
+ //
1997
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1998
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1999
+ // that case the server SHOULD infer the digest function using the
2000
+ // length of the blob digest hashes and the digest functions announced
2001
+ // in the server's capabilities.
2002
+ DigestFunction.Value digest_function = 2 ;
2003
+ }
2004
+
1817
2005
// A request message for
1818
2006
// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
1819
2007
message GetCapabilitiesRequest {
@@ -2000,6 +2188,40 @@ message Compressor {
2000
2188
}
2001
2189
}
2002
2190
2191
+ // Content-defined chunking algorithms used for splitting blobs into chunks.
2192
+ message ChunkingAlgorithm {
2193
+ enum Value {
2194
+ // No chunking. Servers MUST always support this, and do not need to
2195
+ // advertise it.
2196
+ IDENTITY = 0 ;
2197
+
2198
+ // Content-defined chunking using Rabin fingerprints. An implementation of
2199
+ // this scheme in presented in this paper
2200
+ // https://link.springer.com/chapter/10.1007/978-1-4613-9323-8_11. The final
2201
+ // implementation of this algorithm should be configured to have the
2202
+ // following properties on resulting chunk sizes.
2203
+ // - Minimum chunk size: 128 KB
2204
+ // - Average chunk size: 512 KB (0x000000000007FFFF bit mask)
2205
+ // - Maximum chunk size: 2048 KB
2206
+ // The irreducible polynomial to be used for the modulo divisions is the
2207
+ // following 64-bit polynomial of degree 53: 0x003DA3358B4DC173. The window
2208
+ // size to be used is 64 bits.
2209
+ RABINCDC = 1 ;
2210
+
2211
+ // Content-defined chunking using the FastCDC algorithm. The algorithm is
2212
+ // described in this paper https://ieeexplore.ieee.org/document/9055082
2213
+ // (Algorithm 2, FastCDC8KB). The algorithm should be configured to have the
2214
+ // following properties on resulting chunk sizes.
2215
+ // - Minimum chunk size: 128 KB
2216
+ // - Average chunk size: 512 KB
2217
+ // - Maximum chunk size: 2048 KB
2218
+ // The 256 64-bit random numbers in the Gear table are to be created with
2219
+ // the Mersenne Twister pseudo-random number generator for 64-bit numbers
2220
+ // with a state size of 19937 bits and a seed of 0.
2221
+ FASTCDC = 2 ;
2222
+ }
2223
+ }
2224
+
2003
2225
// Capabilities of the remote cache system.
2004
2226
message CacheCapabilities {
2005
2227
// All the digest functions supported by the remote cache.
@@ -2033,6 +2255,25 @@ message CacheCapabilities {
2033
2255
// [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs]
2034
2256
// requests.
2035
2257
repeated Compressor.Value supported_batch_update_compressors = 7 ;
2258
+
2259
+ // All the chunking algorithms supported by the remote cache. Remote cache may
2260
+ // support multiple chunking algorithms simultaneously. Servers MUST support
2261
+ // IDENTITY (no chunking), even if it is not listed here.
2262
+ repeated ChunkingAlgorithm.Value supported_chunking_algorithms = 8 ;
2263
+
2264
+ // Whether blob splitting is supported for the particular server/instance. If
2265
+ // yes, the server/instance implements the specified behavior for blob
2266
+ // splitting and a meaningful result can be expected from the
2267
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
2268
+ // operation.
2269
+ bool blob_split_support = 9 ;
2270
+
2271
+ // Whether blob splicing is supported for the particular server/instance. If
2272
+ // yes, the server/instance implements the specified behavior for blob
2273
+ // splicing and a meaningful result can be expected from the
2274
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2275
+ // operation.
2276
+ bool blob_splice_support = 10 ;
2036
2277
}
2037
2278
2038
2279
// Capabilities of the remote execution system.
0 commit comments