@@ -430,6 +430,93 @@ service ContentAddressableStorage {
430
430
rpc GetTree (GetTreeRequest ) returns (stream GetTreeResponse ) {
431
431
option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
432
432
}
433
+
434
+ // Split a blob into chunks.
435
+ //
436
+ // This splitting API aims to reduce download traffic between client and
437
+ // server, e.g., if a client needs to fetch a large blob that just has been
438
+ // modified slightly since the last built. In this case, there is no need to
439
+ // fetch the entire blob data, but just the binary differences between the two
440
+ // blob versions, which are typically determined by deduplication techniques
441
+ // such as content-defined chunking.
442
+ //
443
+ // Clients can use this API before downloading a blob to determine which parts
444
+ // of the blob are already present locally and do not need to be downloaded
445
+ // again. The server splits the blob into chunks according to a specified
446
+ // content-defined chunking algorithm and returns a list of the chunk digests
447
+ // in the order in which the chunks have to be concatenated to assemble the
448
+ // requested blob.
449
+ //
450
+ // A client can expect the following guarantees from the server if a split
451
+ // request is answered successfully:
452
+ // 1. The blob chunks are stored in CAS.
453
+ // 2. Concatenating the blob chunks in the order of the digest list returned
454
+ // by the server results in the original blob.
455
+ //
456
+ // The usage of this API is optional for clients but it allows them to
457
+ // download only the missing parts of a large blob instead of the entire blob
458
+ // data, which in turn can considerably reduce download network traffic.
459
+ //
460
+ // Since the generated chunks are stored as blobs, they underlie the same
461
+ // lifetimes as other blobs. However, their lifetime is extended if they are
462
+ // part of the result of a split blob request.
463
+ //
464
+ // For the client, it is recommended to verify whether the digest of the blob
465
+ // assembled by the fetched chunks results in the requested blob digest.
466
+ //
467
+ // If several clients use blob splitting, it is recommended that they request
468
+ // the same splitting algorithm to benefit from each others chunking data. In
469
+ // combination with blob splicing, an agreement about the chunking algorithm
470
+ // is recommended since both client as well as server side can benefit from
471
+ // each others chunking data.
472
+ //
473
+ // Errors:
474
+ //
475
+ // * `NOT_FOUND`: The requested blob is not present in the CAS.
476
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
477
+ // chunks.
478
+ rpc SplitBlob (SplitBlobRequest ) returns (SplitBlobResponse ) {
479
+ option (google.api.http ) = { get : "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
480
+ }
481
+
482
+ // Splice a blob from chunks.
483
+ //
484
+ // This is the complementary operation to the
485
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
486
+ // function to handle the splitted upload of large blobs to save upload
487
+ // traffic.
488
+ //
489
+ // If a client needs to upload a large blob and is able to split a blob into
490
+ // chunks locally according to some content-defined chunking algorithm, it can
491
+ // first determine which parts of the blob are already available in the remote
492
+ // CAS and upload the missing chunks, and then use this API to instruct the
493
+ // server to splice the original blob from the remotely available blob chunks.
494
+ //
495
+ // In order to ensure data consistency of the CAS, the server will verify the
496
+ // spliced result whether digest calculation results in the provided digest
497
+ // from the request and will reject a splice request if this check fails.
498
+ //
499
+ // The usage of this API is optional for clients but it allows them to upload
500
+ // only the missing parts of a large blob instead of the entire blob data,
501
+ // which in turn can considerably reduce upload network traffic.
502
+ //
503
+ // In order to split a blob into chunks, it is recommended for the client to
504
+ // use one of the servers' advertised chunking algorithms by
505
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
506
+ // to benefit from each others chunking data. If several clients use blob
507
+ // splicing, it is recommended that they use the same splitting algorithm to
508
+ // split their blobs into chunk.
509
+ //
510
+ // Errors:
511
+ //
512
+ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
513
+ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
514
+ // spliced blob.
515
+ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
516
+ // provided expected digest.
517
+ rpc SpliceBlob (SpliceBlobRequest ) returns (SpliceBlobResponse ) {
518
+ option (google.api.http ) = { post : "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
519
+ }
433
520
}
434
521
435
522
// The Capabilities service may be used by remote execution clients to query
@@ -1778,6 +1865,59 @@ message GetTreeResponse {
1778
1865
string next_page_token = 2 ;
1779
1866
}
1780
1867
1868
+ // A request message for
1869
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1870
+ message SplitBlobRequest {
1871
+ // The instance of the execution system to operate against. A server may
1872
+ // support multiple instances of the execution system (with their own workers,
1873
+ // storage, caches, etc.). The server MAY require use of this field to select
1874
+ // between them in an implementation-defined fashion, otherwise it can be
1875
+ // omitted.
1876
+ string instance_name = 1 ;
1877
+
1878
+ // The digest of the blob to be splitted.
1879
+ Digest blob_digest = 2 ;
1880
+
1881
+ // The chunking algorithm to be used. Must be IDENTITY (no chunking) or one of
1882
+ // the algorithms advertised by the
1883
+ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
1884
+ // field.
1885
+ ChunkingAlgorithm.Value chunking_algorithm = 3 ;
1886
+ }
1887
+
1888
+ // A response message for
1889
+ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
1890
+ message SplitBlobResponse {
1891
+ // The ordered list of digests of the chunks into which the blob was splitted.
1892
+ // The original blob is assembled by concatenating the chunk data according to
1893
+ // the order of the digests given by this list.
1894
+ repeated Digest chunk_digests = 1 ;
1895
+ }
1896
+
1897
+ // A request message for
1898
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1899
+ message SpliceBlobRequest {
1900
+ // The instance of the execution system to operate against. A server may
1901
+ // support multiple instances of the execution system (with their own workers,
1902
+ // storage, caches, etc.). The server MAY require use of this field to select
1903
+ // between them in an implementation-defined fashion, otherwise it can be
1904
+ // omitted.
1905
+ string instance_name = 1 ;
1906
+
1907
+ // Expected digest of the spliced blob.
1908
+ Digest blob_digest = 2 ;
1909
+
1910
+ // The ordered list of digests of the chunks which need to be concatenated to
1911
+ // assemble the original blob.
1912
+ repeated Digest chunk_digests = 3 ;
1913
+ }
1914
+
1915
+ // A response message for
1916
+ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
1917
+ message SpliceBlobResponse {
1918
+ // Empty for now, but might need to be extended in future.
1919
+ }
1920
+
1781
1921
// A request message for
1782
1922
// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
1783
1923
message GetCapabilitiesRequest {
@@ -1964,6 +2104,36 @@ message Compressor {
1964
2104
}
1965
2105
}
1966
2106
2107
+ // Content-defined chunking algorithms used for splitting blobs into chunks.
2108
+ message ChunkingAlgorithm {
2109
+ enum Value {
2110
+ // No chunking. Servers MUST always support this, and do not need to
2111
+ // advertise it.
2112
+ IDENTITY = 0 ;
2113
+
2114
+ // Content-defined chunking algorithm based on Rabin fingerprinting. Details
2115
+ // about the implementation can be found in algorithm 3 (RabinCDC8KB) of
2116
+ // https://ieeexplore.ieee.org/document/9055082. This algorithm has the
2117
+ // following properties:
2118
+ // - minimum chunk size: 2 KB
2119
+ // - maximum chunk size: 64 KB
2120
+ // - average chunk size: 8 KB
2121
+ RABINCDC_8KB = 1 ;
2122
+
2123
+ // Content-defined chunking algorithm based on Gear hashing. Details about
2124
+ // the implementation can be found in algorithm 2 (FastCDC8KB) of
2125
+ // https://ieeexplore.ieee.org/document/9055082. This algorithm has the
2126
+ // following properties:
2127
+ // - minimum chunk size: 2 KB
2128
+ // - maximum chunk size: 64 KB
2129
+ // - average chunk size: 8 KB
2130
+ // The 256 64-bit random numbers in the Gear table are created with the
2131
+ // Mersenne Twister pseudo-random generator for 64-bit numbers with a state
2132
+ // size of 19937 bits and a seed of 0.
2133
+ FASTCDC_MT0_8KB = 2 ;
2134
+ }
2135
+ }
2136
+
1967
2137
// Capabilities of the remote cache system.
1968
2138
message CacheCapabilities {
1969
2139
// All the digest functions supported by the remote cache.
@@ -1997,6 +2167,11 @@ message CacheCapabilities {
1997
2167
// [BatchUpdateBlobs][build.bazel.remote.execution.v2.ContentAddressableStorage.BatchUpdateBlobs]
1998
2168
// requests.
1999
2169
repeated Compressor.Value supported_batch_update_compressors = 7 ;
2170
+
2171
+ // All the chunking algorithms supported by the remote cache. Remote cache may
2172
+ // support multiple chunking algorithms simultaneously. Servers MUST support
2173
+ // IDENTITY (no chunking), even if it is not listed here.
2174
+ repeated ChunkingAlgorithm.Value supported_chunking_algorithms = 8 ;
2000
2175
}
2001
2176
2002
2177
// Capabilities of the remote execution system.
0 commit comments