@@ -430,6 +430,103 @@ service ContentAddressableStorage {
430
430
rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
431
431
option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
432
432
}
433
+
434
+ // Split a blob into chunks.
435
+ //
436
+ // This splitting API aims to reduce download traffic between client and
437
+ // server, e.g., if a client needs to fetch a large blob that just has been
438
+ // modified slightly since the last built. In this case, there is no need to
439
+ // fetch the entire blob data, but just the binary differences between the two
440
+ // blob versions, which are typically determined by deduplication techniques
441
+ // such as content-defined chunking.
442
+ //
443
+ // Clients can use this API before downloading a blob to determine which parts
444
+ // of the blob are already present locally and do not need to be downloaded
445
+ // again. The server splits the blob into chunks according to a specified
446
+ // content-defined chunking algorithm and returns a list of the chunk digests
447
+ // in the order in which the chunks have to be concatenated to assemble the
448
+ // requested blob.
449
+ //
450
+ // A client can expect the following guarantees from the server if a split
451
+ // request is answered successfully:
452
+ // 1. The blob chunks are stored in CAS.
453
+ // 2. Concatenating the blob chunks in the order of the digest list returned
454
+ // by the server results in the original blob.
455
+ //
456
+ // The usage of this API is optional for clients but it allows them to
457
+ // download only the missing parts of a large blob instead of the entire blob
458
+ // data, which in turn can considerably reduce download network traffic.
459
+ //
460
+ // Since the generated chunks are stored as blobs, they underlie the same
461
+ // lifetimes as other blobs. However, their lifetime is extended if they are
462
+ // part of the result of a split blob request.
463
+ //
464
+ // For the client, it is recommended to verify whether the digest of the blob
465
+ // assembled by the fetched chunks results in the requested blob digest.
466
+ //
467
+ // If several clients use blob splitting, it is recommended that they request
468
+ // the same splitting algorithm to benefit from each others chunking data. In
469
+ // combination with blob splicing, an agreement about the chunking algorithm
470
+ // is recommended since both client as well as server side can benefit from
471
+ // each others chunking data.
472
+ //
473
+ // Servers are free to implement this functionality, but they need to declare
474
+ // whether they support it or not by setting the
475
+ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
476
+ // field accordingly.
477
+ //
478
+ // Errors:
479
+ //
480
+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
481
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
482
+ // chunks.
483
+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
484
+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
485
+ }
486
+
487
+ // Splice a blob from chunks.
488
+ //
489
+ // This is the complementary operation to the
490
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
491
+ // function to handle the splitted upload of large blobs to save upload
492
+ // traffic.
493
+ //
494
+ // If a client needs to upload a large blob and is able to split a blob into
495
+ // chunks locally according to some content-defined chunking algorithm, it can
496
+ // first determine which parts of the blob are already available in the remote
497
+ // CAS and upload the missing chunks, and then use this API to instruct the
498
+ // server to splice the original blob from the remotely available blob chunks.
499
+ //
500
+ // In order to ensure data consistency of the CAS, the server will verify the
501
+ // spliced result whether digest calculation results in the provided digest
502
+ // from the request and will reject a splice request if this check fails.
503
+ //
504
+ // The usage of this API is optional for clients but it allows them to upload
505
+ // only the missing parts of a large blob instead of the entire blob data,
506
+ // which in turn can considerably reduce upload network traffic.
507
+ //
508
+ // In order to split a blob into chunks, it is recommended for the client to
509
+ // use one of the servers' advertised chunking algorithms by
510
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
511
+ // to benefit from each others chunking data. If several clients use blob
512
+ // splicing, it is recommended that they use the same splitting algorithm to
513
+ // split their blobs into chunks.
514
+ //
515
+ // Servers are free to implement this functionality, but they need to declare
516
+ // whether they support it or not by setting the
517
+ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
518
+ // field accordingly.
519
+ //
520
+ // Errors:
521
+ //
522
+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
523
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
524
+ // spliced blob.
525
+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
526
+ // provided expected digest.
527
+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
528
+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
529
+ }
433
530
}
434
531
435
532
// The Capabilities service may be used by remote execution clients to query
@@ -1837,6 +1934,97 @@ message GetTreeResponse {
1837
1934
string next_page_token = 2 ;
1838
1935
}
1839
1936
1937
+ // A request message for
1938
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1939
+ message SplitBlobRequest {
1940
+ // The instance of the execution system to operate against. A server may
1941
+ // support multiple instances of the execution system (with their own workers,
1942
+ // storage, caches, etc.). The server MAY require use of this field to select
1943
+ // between them in an implementation-defined fashion, otherwise it can be
1944
+ // omitted.
1945
+ string instance_name = 1 ;
1946
+
1947
+ // The digest of the blob to be splitted.
1948
+ Digest blob_digest = 2 ;
1949
+
1950
+ // The chunking algorithm to be used. Must be DEFAULT or one of the algorithms
1951
+ // advertised by the
1952
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
1953
+ // field.
1954
+ ChunkingAlgorithm.Value chunking_algorithm = 3 ;
1955
+
1956
+ // The digest function of the blob to be splitted.
1957
+ //
1958
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1959
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1960
+ // that case the server SHOULD infer the digest function using the
1961
+ // length of the blob digest hashes and the digest functions announced
1962
+ // in the server's capabilities.
1963
+ DigestFunction.Value digest_function = 4 ;
1964
+ }
1965
+
1966
+ // A response message for
1967
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1968
+ message SplitBlobResponse {
1969
+ // The ordered list of digests of the chunks into which the blob was splitted.
1970
+ // The original blob is assembled by concatenating the chunk data according to
1971
+ // the order of the digests given by this list.
1972
+ repeated Digest chunk_digests = 1 ;
1973
+
1974
+ // The digest function of the chunks.
1975
+ //
1976
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
1977
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
1978
+ // that case the server SHOULD infer the digest function using the
1979
+ // length of the blob digest hashes and the digest functions announced
1980
+ // in the server's capabilities.
1981
+ DigestFunction.Value digest_function = 2 ;
1982
+ }
1983
+
1984
+ // A request message for
1985
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1986
+ message SpliceBlobRequest {
1987
+ // The instance of the execution system to operate against. A server may
1988
+ // support multiple instances of the execution system (with their own workers,
1989
+ // storage, caches, etc.). The server MAY require use of this field to select
1990
+ // between them in an implementation-defined fashion, otherwise it can be
1991
+ // omitted.
1992
+ string instance_name = 1 ;
1993
+
1994
+ // Expected digest of the spliced blob.
1995
+ Digest blob_digest = 2 ;
1996
+
1997
+ // The ordered list of digests of the chunks which need to be concatenated to
1998
+ // assemble the original blob.
1999
+ repeated Digest chunk_digests = 3 ;
2000
+
2001
+ // The digest function of the blob to be spliced as well as of the chunks to
2002
+ // be concatenated.
2003
+ //
2004
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
2005
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
2006
+ // that case the server SHOULD infer the digest function using the
2007
+ // length of the blob digest hashes and the digest functions announced
2008
+ // in the server's capabilities.
2009
+ DigestFunction.Value digest_function = 4 ;
2010
+ }
2011
+
2012
+ // A response message for
2013
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
2014
+ message SpliceBlobResponse {
2015
+ // Computed digest of the spliced blob.
2016
+ Digest blob_digest = 1 ;
2017
+
2018
+ // The digest function of the spliced blob.
2019
+ //
2020
+ // If the digest function used is one of MD5, MURMUR3, SHA1, SHA256,
2021
+ // SHA384, SHA512, or VSO, the client MAY leave this field unset. In
2022
+ // that case the server SHOULD infer the digest function using the
2023
+ // length of the blob digest hashes and the digest functions announced
2024
+ // in the server's capabilities.
2025
+ DigestFunction.Value digest_function = 2 ;
2026
+ }
2027
+
1840
2028
// A request message for
1841
2029
// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
1842
2030
message GetCapabilitiesRequest {
@@ -2023,6 +2211,42 @@ message Compressor {
2023
2211
}
2024
2212
}
2025
2213
2214
+ // Content-defined chunking algorithms used for splitting blobs into chunks.
2215
+ message ChunkingAlgorithm {
2216
+ enum Value {
2217
+ // No special constraints about the used chunking algorithm. If a client
2218
+ // selects this value this means he does not care about which exact chunking
2219
+ // algorithm is used. Thus, the server is free to use any chunking algorithm
2220
+ // at its disposal. A server does not need to advertise this value.
2221
+ DEFAULT = 0 ;
2222
+
2223
+ // Content-defined chunking using Rabin fingerprints. An implementation of
2224
+ // this scheme in presented in this paper
2225
+ // https://www.researchgate.net/publication/2688260_Some_applications_of_Rabin's_fingerprinting_method.
2226
+ // The final implementation of this algorithm should be configured to have
2227
+ // the following properties on resulting chunk sizes.
2228
+ // - Minimum chunk size: 128 KB
2229
+ // - Average chunk size: 512 KB (0x000000000007FFFF bit mask)
2230
+ // - Maximum chunk size: 2048 KB
2231
+ // The irreducible polynomial to be used for the modulo divisions is the
2232
+ // following 64-bit polynomial of degree 53: 0x003DA3358B4DC173. The window
2233
+ // size to be used is 64 bytes.
2234
+ RABINCDC = 1 ;
2235
+
2236
+ // Content-defined chunking using the FastCDC algorithm. The algorithm is
2237
+ // described in this paper https://ieeexplore.ieee.org/document/9055082
2238
+ // (Algorithm 2, FastCDC8KB). The algorithm should be configured to have the
2239
+ // following properties on resulting chunk sizes.
2240
+ // - Minimum chunk size: 128 KB
2241
+ // - Average chunk size: 512 KB
2242
+ // - Maximum chunk size: 2048 KB
2243
+ // The 256 64-bit random numbers in the Gear table are to be created with
2244
+ // the Mersenne Twister pseudo-random number generator for 64-bit numbers
2245
+ // with a state size of 19937 bits and a seed of 0.
2246
+ FASTCDC = 2 ;
2247
+ }
2248
+ }
2249
+
2026
2250
// Capabilities of the remote cache system.
2027
2251
message CacheCapabilities {
2028
2252
// All the digest functions supported by the remote cache.
@@ -2056,6 +2280,28 @@ message CacheCapabilities {
2056
2280
// [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs]
2057
2281
// requests.
2058
2282
repeated Compressor.Value supported_batch_update_compressors = 7 ;
2283
+
2284
+ // All specifically defined chunking algorithms supported by the remote cache
2285
+ // in addition to a DEFAULT implementation. A remote cache may support
2286
+ // multiple chunking algorithms simultaneously. A server always has to provide
2287
+ // a DEFAULT implementation, but does not need to advertise it here. This
2288
+ // field is mainly existing for negotiation purposes between client and server
2289
+ // to agree on an exact chunking algorithm.
2290
+ repeated ChunkingAlgorithm.Value supported_chunking_algorithms = 8 ;
2291
+
2292
+ // Whether blob splitting is supported for the particular server/instance. If
2293
+ // yes, the server/instance implements the specified behavior for blob
2294
+ // splitting and a meaningful result can be expected from the
2295
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
2296
+ // operation.
2297
+ bool blob_split_support = 9 ;
2298
+
2299
+ // Whether blob splicing is supported for the particular server/instance. If
2300
+ // yes, the server/instance implements the specified behavior for blob
2301
+ // splicing and a meaningful result can be expected from the
2302
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
2303
+ // operation.
2304
+ bool blob_splice_support = 10 ;
2059
2305
}
2060
2306
2061
2307
// Capabilities of the remote execution system.
0 commit comments