From 33a71e57e154b3d9c4b708ff862d784ee920fa34 Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Wed, 14 Jun 2023 11:57:26 -0700 Subject: [PATCH 1/8] Add BLAKE3 digest function to remote_execution.proto (cherry picked from commit e382abf33f1aedc18f19e9bf2bb2e407035f2c92) --- .../build/bazel/remote/execution/v2/remote_execution.proto | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto b/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto index 8e5fb4b81cc163..71affb0ca788ce 100644 --- a/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto +++ b/third_party/remoteapis/build/bazel/remote/execution/v2/remote_execution.proto @@ -1885,6 +1885,10 @@ message DigestFunction { // Test vectors of this digest function can be found in the // accompanying sha256tree_test_vectors.txt file. SHA256TREE = 8; + + // The BLAKE3 hash function. + // See https://github.com/BLAKE3-team/BLAKE3. + BLAKE3 = 9; } } From 67faa67938fd68531b211b2821380b5528b3be39 Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Fri, 16 Jun 2023 10:53:30 -0700 Subject: [PATCH 2/8] Add BLAKE3 source code to third_party (cherry picked from commit 0ff4f6dfa298c875f793e86c5d634f7c9186bb9e) --- third_party/blake3/blake3.BUILD | 83 +++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 third_party/blake3/blake3.BUILD diff --git a/third_party/blake3/blake3.BUILD b/third_party/blake3/blake3.BUILD new file mode 100644 index 00000000000000..6e96f051f1412e --- /dev/null +++ b/third_party/blake3/blake3.BUILD @@ -0,0 +1,83 @@ +load("@rules_license//rules:license.bzl", "license") +load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") + +licenses(["notice"]) # BSD/MIT-like license + +exports_files(["LICENSE"]) + +license( + name = "license", + package_name = "blake3", + license_kinds = [ + "@rules_license//licenses/spdx:Apache-2.0", + ], + license_text = "LICENSE", + package_version = "1.3.3", +) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//third_party:__pkg__"], +) + +cc_library( + name = "blake3", + srcs = [ + "c/blake3.c", + "c/blake3_dispatch.c", + "c/blake3_portable.c", + ] + select({ + "@bazel_tools//src/conditions:linux_x86_64": [ + "c/blake3_avx2_x86-64_unix.S", + "c/blake3_avx512_x86-64_unix.S", + "c/blake3_sse2_x86-64_unix.S", + "c/blake3_sse41_x86-64_unix.S", + ], + "@bazel_tools//src/conditions:windows_x64": [ + "c/blake3_avx2_x86-64_windows_msvc.asm", + "c/blake3_avx512_x86-64_windows_msvc.asm", + "c/blake3_sse2_x86-64_windows_msvc.asm", + "c/blake3_sse41_x86-64_windows_msvc.asm", + ], + "@bazel_tools//src/conditions:darwin_arm64": [ + "c/blake3_neon.c", + ], + "//conditions:default": [], + }), + hdrs = [ + "c/blake3.h", + "c/blake3_impl.h", + ], + copts = select({ + "@bazel_tools//src/conditions:linux_x86_64": [], + "@bazel_tools//src/conditions:windows_x64": [], + "@bazel_tools//src/conditions:darwin_arm64": [ + "-DBLAKE3_USE_NEON=1", + ], + "//conditions:default": [ + "-DBLAKE3_NO_SSE2", + "-DBLAKE3_NO_SSE41", + "-DBLAKE3_NO_AVX2", + "-DBLAKE3_NO_AVX512", + ], + }), + includes = ["."], + visibility = ["//visibility:public"], +) + +cc_binary( + name = "example", + srcs = [ + "c/example.c", + ], + copts = [ + "-w", + "-O3", + ], + includes = ["."], + visibility = ["//visibility:public"], + deps = [ + ":blake3", + ], +) From a5329b3443ab48c92b67435b5f5c8b47305be203 Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Mon, 19 Jun 2023 00:28:01 -0700 Subject: [PATCH 3/8] Add BLAKE3 source code to third_party This PR adds the BLAKE3 C and asm sources to third_party, and includes a BUILD file to build them. PiperOrigin-RevId: 541539341 Change-Id: I49b1edce20a7d0f986e29712e6050e4e0b9c1d44 (cherry picked from commit a3a569ea8b25192a8603f6599d69799ed9aaf233) --- WORKSPACE | 8 ++++++++ distdir_deps.bzl | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/WORKSPACE b/WORKSPACE index f6e0a3352d06f6..b7f17ad5fdf7a2 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -292,6 +292,14 @@ dist_http_archive( strip_prefix = "zstd-jni-1.5.2-3", ) +dist_http_archive( + name = "blake3", + build_file = "//third_party:blake3/blake3.BUILD", + patch_cmds = EXPORT_WORKSPACE_IN_BUILD_BAZEL_FILE, + patch_cmds_win = EXPORT_WORKSPACE_IN_BUILD_BAZEL_FILE_WIN, + strip_prefix = "BLAKE3-1.3.3", +) + http_archive( name = "org_snakeyaml", build_file_content = """ diff --git a/distdir_deps.bzl b/distdir_deps.bzl index b896f1b24eaf14..adb7ea22bf173f 100644 --- a/distdir_deps.bzl +++ b/distdir_deps.bzl @@ -256,6 +256,21 @@ DIST_DEPS = { "additional_distfiles", ], }, + "blake3": { + "archive": "v1.3.3.zip", + "sha256": "bb529ba133c0256df49139bd403c17835edbf60d2ecd6463549c6a5fe279364d", + "urls": [ + "https://github.com/BLAKE3-team/BLAKE3/archive/refs/tags/1.3.3.zip", + ], + "used_in": [ + "additional_distfiles", + ], + "license_kinds": [ + "@rules_license//licenses/spdx:Apache-2.0", + ], + "license_text": "LICENSE", + "package_version": "1.3.3", + }, ################################################### # # Build time dependencies for testing and packaging From 62463fbfd1da7c2aab403c2f7e75921a8ce7ab80 Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Tue, 27 Jun 2023 03:15:05 -0700 Subject: [PATCH 4/8] Support new-style digest functions Support new-style digest functions. This PR adds support for new-style digest functions to the remote execution library code. The remote-apis spec says: ``` // * `digest_function` is a lowercase string form of a `DigestFunction.Value` // enum, indicating which digest function was used to compute `hash`. If the // digest function used is one of MD5, MURMUR3, SHA1, SHA256, SHA384, SHA512, // or VSO, this component MUST be omitted. In that case the server SHOULD // infer the digest function using the length of the `hash` and the digest // functions announced in the server's capabilities. ``` PiperOrigin-RevId: 543691155 Change-Id: If8c386d923db1b24dff6054c8ab3f783409b7f13 (cherry picked from commit 88412ce8ef94dfc7b27c0331d625e14056fc0bc8) --- .../build/lib/remote/ByteStreamUploader.java | 37 +++++- .../build/lib/remote/GrpcCacheClient.java | 24 +++- .../lib/remote/ByteStreamUploaderTest.java | 117 ++++++++++++++---- ...SpawnRunnerWithGrpcRemoteExecutorTest.java | 12 +- 4 files changed, 155 insertions(+), 35 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java index 3ea76bf0fd3fce..2db0f9b955e96a 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java +++ b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java @@ -21,6 +21,7 @@ import static java.util.concurrent.TimeUnit.SECONDS; import build.bazel.remote.execution.v2.Digest; +import build.bazel.remote.execution.v2.DigestFunction; import com.google.bytestream.ByteStreamGrpc; import com.google.bytestream.ByteStreamGrpc.ByteStreamFutureStub; import com.google.bytestream.ByteStreamGrpc.ByteStreamStub; @@ -69,6 +70,7 @@ class ByteStreamUploader { private final CallCredentialsProvider callCredentialsProvider; private final long callTimeoutSecs; private final RemoteRetrier retrier; + private final DigestFunction.Value digestFunction; @Nullable private final Semaphore openedFilePermits; @@ -89,7 +91,8 @@ class ByteStreamUploader { CallCredentialsProvider callCredentialsProvider, long callTimeoutSecs, RemoteRetrier retrier, - int maximumOpenFiles) { + int maximumOpenFiles, + DigestFunction.Value digestFunction) { checkArgument(callTimeoutSecs > 0, "callTimeoutSecs must be gt 0."); this.instanceName = instanceName; this.channel = channel; @@ -97,6 +100,7 @@ class ByteStreamUploader { this.callTimeoutSecs = callTimeoutSecs; this.retrier = retrier; this.openedFilePermits = maximumOpenFiles != -1 ? new Semaphore(maximumOpenFiles) : null; + this.digestFunction = digestFunction; } @VisibleForTesting @@ -175,11 +179,34 @@ public ListenableFuture uploadBlobAsync( MoreExecutors.directExecutor()); } - private static String buildUploadResourceName( + private boolean isOldStyleDigestFunction() { + // Old-style digest functions (SHA256, etc) are distinguishable by the length + // of their hash alone and do not require extra specification, but newer + // digest functions (which may have the same length hashes as the older + // functions!) must be explicitly specified in the upload resource name. + return digestFunction.getNumber() <= 7; + } + + private String buildUploadResourceName( String instanceName, UUID uuid, Digest digest, boolean compressed) { - String template = - compressed ? "uploads/%s/compressed-blobs/zstd/%s/%d" : "uploads/%s/blobs/%s/%d"; - String resourceName = format(template, uuid, digest.getHash(), digest.getSizeBytes()); + + String resourceName; + + if (isOldStyleDigestFunction()) { + String template = + compressed ? "uploads/%s/compressed-blobs/zstd/%s/%d" : "uploads/%s/blobs/%s/%d"; + resourceName = format(template, uuid, digest.getHash(), digest.getSizeBytes()); + } else { + String template = + compressed ? "uploads/%s/compressed-blobs/zstd/%s/%s/%d" : "uploads/%s/blobs/%s/%s/%d"; + resourceName = + format( + template, + uuid, + Ascii.toLowerCase(digestFunction.getValueDescriptor().getName()), + digest.getHash(), + digest.getSizeBytes()); + } if (!Strings.isNullOrEmpty(instanceName)) { resourceName = instanceName + "/" + resourceName; } diff --git a/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java b/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java index 470d3845dc3273..8c86a31d9bafa8 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java +++ b/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java @@ -22,6 +22,7 @@ import build.bazel.remote.execution.v2.ContentAddressableStorageGrpc; import build.bazel.remote.execution.v2.ContentAddressableStorageGrpc.ContentAddressableStorageFutureStub; import build.bazel.remote.execution.v2.Digest; +import build.bazel.remote.execution.v2.DigestFunction; import build.bazel.remote.execution.v2.FindMissingBlobsRequest; import build.bazel.remote.execution.v2.FindMissingBlobsResponse; import build.bazel.remote.execution.v2.GetActionResultRequest; @@ -107,7 +108,8 @@ public GrpcCacheClient( callCredentialsProvider, options.remoteTimeout.getSeconds(), retrier, - options.maximumOpenFiles); + options.maximumOpenFiles, + digestUtil.getDigestFunction()); maxMissingBlobsDigestsPerMessage = computeMaxMissingBlobsDigestsPerMessage(); Preconditions.checkState( maxMissingBlobsDigestsPerMessage > 0, "Error: gRPC message size too small."); @@ -352,12 +354,24 @@ private ListenableFuture downloadBlob( MoreExecutors.directExecutor()); } - public static String getResourceName(String instanceName, Digest digest, boolean compressed) { + private static boolean isOldStyleDigestFunction(DigestFunction.Value digestFunction) { + // Old-style digest functions (SHA256, etc) are distinguishable by the length + // of their hash alone and do not require extra specification, but newer + // digest functions (which may have the same length hashes as the older + // functions!) must be explicitly specified in the upload resource name. + return digestFunction.getNumber() <= 7; + } + + public static String getResourceName( + String instanceName, Digest digest, boolean compressed, DigestFunction.Value digestFunction) { String resourceName = ""; if (!instanceName.isEmpty()) { resourceName += instanceName + "/"; } resourceName += compressed ? "compressed-blobs/zstd/" : "blobs/"; + if (!isOldStyleDigestFunction(digestFunction)) { + resourceName += Ascii.toLowerCase(digestFunction.getValueDescriptor().getName()) + "/"; + } return resourceName + DigestUtil.toString(digest); } @@ -369,7 +383,11 @@ private ListenableFuture requestRead( @Nullable Supplier digestSupplier, Channel channel) { String resourceName = - getResourceName(options.remoteInstanceName, digest, options.cacheCompression); + getResourceName( + options.remoteInstanceName, + digest, + options.cacheCompression, + digestUtil.getDigestFunction()); SettableFuture future = SettableFuture.create(); OutputStream out; try { diff --git a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java index 326232af5427f1..41fe6404440f65 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamUploaderTest.java @@ -21,6 +21,7 @@ import static org.mockito.ArgumentMatchers.any; import build.bazel.remote.execution.v2.Digest; +import build.bazel.remote.execution.v2.DigestFunction; import build.bazel.remote.execution.v2.RequestMetadata; import com.github.luben.zstd.Zstd; import com.github.luben.zstd.ZstdInputStream; @@ -165,7 +166,8 @@ public void singleBlobUploadShouldWork() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -192,7 +194,8 @@ public void singleChunkCompressedUploadAlreadyExists() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /* maximumOpenFiles= */ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = {'A'}; @@ -257,7 +260,8 @@ public void progressiveUploadShouldWork() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, 3, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -373,7 +377,8 @@ public void progressiveCompressedUploadShouldWork() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, 300, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); int chunkSize = 1024; int skipSize = chunkSize + 1; @@ -492,7 +497,8 @@ public void progressiveCompressedUploadSeesAlreadyExistsAtTheEnd() throws Except CallCredentialsProvider.NO_CREDENTIALS, 300, retrier, - /* maximumOpenFiles= */ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); int chunkSize = 1024; byte[] blob = new byte[chunkSize * 2 + 1]; @@ -550,7 +556,8 @@ public void concurrentlyCompletedUploadIsNotRetried() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, 1, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -608,7 +615,8 @@ public void unimplementedQueryShouldRestartUpload() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, 3, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -677,7 +685,8 @@ public void earlyWriteResponseShouldCompleteUpload() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, 3, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -714,7 +723,8 @@ public void incorrectCommittedSizeFailsCompletedUpload() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, 3, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -767,7 +777,8 @@ public void incorrectCommittedSizeDoesNotFailIncompleteUpload() throws Exception CallCredentialsProvider.NO_CREDENTIALS, 300, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -799,7 +810,8 @@ public void multipleBlobsUploadShouldWork() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); int numUploads = 10; Map blobsByHash = Maps.newHashMap(); @@ -831,7 +843,8 @@ public void tooManyFilesIOException_adviseMaximumOpenFilesFlag() throws Exceptio CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE]; Chunker chunker = Mockito.mock(Chunker.class); Digest digest = DIGEST_UTIL.compute(blob); @@ -863,7 +876,8 @@ public void availablePermitsOpenFileSemaphore_fewerPermitsThanUploads_endWithAll CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - maximumOpenFiles); + maximumOpenFiles, + /* digestFunction= */ DigestFunction.Value.SHA256); assertThat(uploader.getOpenedFilePermits().availablePermits()).isEqualTo(999); @@ -901,7 +915,8 @@ public void noMaximumOpenFilesFlags_nullSemaphore() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); assertThat(uploader.getOpenedFilePermits()).isNull(); int numUploads = 10; @@ -937,7 +952,8 @@ public void contextShouldBePreservedUponRetries() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); List toUpload = ImmutableList.of("aaaaaaaaaa", "bbbbbbbbbb", "cccccccccc"); Map chunkers = Maps.newHashMapWithExpectedSize(toUpload.size()); @@ -1067,7 +1083,8 @@ public int maxConcurrency() { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE]; Chunker chunker = Chunker.builder().setInput(blob).setChunkSize(CHUNK_SIZE).build(); @@ -1128,7 +1145,8 @@ public void errorsShouldBeReported() throws IOException, InterruptedException { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE]; Chunker chunker = Chunker.builder().setInput(blob).setChunkSize(CHUNK_SIZE).build(); @@ -1164,7 +1182,8 @@ public void failureInRetryExecutorShouldBeHandled() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); serviceRegistry.addService( new ByteStreamImplBase() { @@ -1203,7 +1222,8 @@ public void resourceNameWithoutInstanceName() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); serviceRegistry.addService( new ByteStreamImplBase() { @@ -1235,6 +1255,50 @@ public void onCompleted() { uploader.uploadBlob(context, digest, chunker); } + @Test + public void resourceWithNewStyleDigestFunction() throws Exception { + RemoteRetrier retrier = + TestUtils.newRemoteRetrier(() -> mockBackoff, (e) -> true, retryService); + ByteStreamUploader uploader = + new ByteStreamUploader( + /* instanceName= */ null, + referenceCountedChannel, + CallCredentialsProvider.NO_CREDENTIALS, + /* callTimeoutSecs= */ 60, + retrier, + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.BLAKE3); + + serviceRegistry.addService( + new ByteStreamImplBase() { + @Override + public StreamObserver write(StreamObserver response) { + return new StreamObserver() { + @Override + public void onNext(WriteRequest writeRequest) { + // Test that the resource name contains the digest function. + assertThat(writeRequest.getResourceName()).contains("blobs/blake3/"); + } + + @Override + public void onError(Throwable throwable) {} + + @Override + public void onCompleted() { + response.onNext(WriteResponse.newBuilder().setCommittedSize(1).build()); + response.onCompleted(); + } + }; + } + }); + + byte[] blob = new byte[1]; + Chunker chunker = Chunker.builder().setInput(blob).setChunkSize(CHUNK_SIZE).build(); + Digest digest = DIGEST_UTIL.compute(blob); + + uploader.uploadBlob(context, digest, chunker); + } + @Test public void nonRetryableStatusShouldNotBeRetried() throws Exception { RemoteRetrier retrier = @@ -1247,7 +1311,8 @@ public void nonRetryableStatusShouldNotBeRetried() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); AtomicInteger numCalls = new AtomicInteger(); @@ -1300,7 +1365,8 @@ public void refresh() throws IOException { callCredentialsProvider, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -1356,7 +1422,8 @@ public void refresh() throws IOException { callCredentialsProvider, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); @@ -1426,7 +1493,8 @@ public void failureAfterUploadCompletes() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - -1); + -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE - 1]; new Random().nextBytes(blob); @@ -1485,7 +1553,8 @@ public void testCompressedUploads() throws Exception { CallCredentialsProvider.NO_CREDENTIALS, /* callTimeoutSecs= */ 60, retrier, - /*maximumOpenFiles=*/ -1); + /* maximumOpenFiles= */ -1, + /* digestFunction= */ DigestFunction.Value.SHA256); byte[] blob = new byte[CHUNK_SIZE * 2 + 1]; new Random().nextBytes(blob); diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java index 7ec09f6218da8b..ee128fe43cfdb0 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerWithGrpcRemoteExecutorTest.java @@ -31,6 +31,7 @@ import build.bazel.remote.execution.v2.Command; import build.bazel.remote.execution.v2.ContentAddressableStorageGrpc.ContentAddressableStorageImplBase; import build.bazel.remote.execution.v2.Digest; +import build.bazel.remote.execution.v2.DigestFunction; import build.bazel.remote.execution.v2.Directory; import build.bazel.remote.execution.v2.ExecuteRequest; import build.bazel.remote.execution.v2.ExecuteResponse; @@ -1090,7 +1091,8 @@ public void findMissingBlobs( } }); String stdOutResourceName = - getResourceName(remoteOptions.remoteInstanceName, stdOutDigest, false); + getResourceName( + remoteOptions.remoteInstanceName, stdOutDigest, false, DigestFunction.Value.SHA256); serviceRegistry.addService( new ByteStreamImplBase() { @Override @@ -1152,7 +1154,8 @@ public void findMissingBlobs( } }); String stdOutResourceName = - getResourceName(remoteOptions.remoteInstanceName, stdOutDigest, false); + getResourceName( + remoteOptions.remoteInstanceName, stdOutDigest, false, DigestFunction.Value.SHA256); serviceRegistry.addService( new ByteStreamImplBase() { @Override @@ -1279,7 +1282,10 @@ public void getActionResult( }); String dummyTreeResourceName = getResourceName( - remoteOptions.remoteInstanceName, DUMMY_OUTPUT_DIRECTORY.getTreeDigest(), false); + remoteOptions.remoteInstanceName, + DUMMY_OUTPUT_DIRECTORY.getTreeDigest(), + false, + DigestFunction.Value.SHA256); serviceRegistry.addService( new ByteStreamImplBase() { private boolean first = true; From 9a5f88a032222005a50366269f704941212e50b5 Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Mon, 24 Jul 2023 05:27:12 -0700 Subject: [PATCH 5/8] Add BLAKE3 hasher to vfs This PR adds the Blake3Hasher and Blake3HashFunction classes to vfs and makes them available under the flag --digest_function=BLAKE3. PiperOrigin-RevId: 550525978 Change-Id: Iedc0886c51755585d56b4d8f47676d3be5bbedba (cherry picked from commit cc49d68644521d3e0577d27ae1f2bed8ab385117) --- BUILD | 1 + MODULE.bazel | 1 + distdir_deps.bzl | 2 +- .../java/com/google/devtools/build/lib/BUILD | 2 + .../com/google/devtools/build/lib/bazel/BUILD | 1 + .../lib/bazel/BazelFileSystemModule.java | 5 + .../google/devtools/build/lib/vfs/bazel/BUILD | 28 ++++ .../lib/vfs/bazel/BazelHashFunctions.java | 45 ++++++ .../lib/vfs/bazel/Blake3HashFunction.java | 88 +++++++++++ .../build/lib/vfs/bazel/Blake3Hasher.java | 146 ++++++++++++++++++ .../lib/vfs/bazel/Blake3MessageDigest.java | 139 +++++++++++++++++ .../build/lib/vfs/bazel/Blake3Provider.java | 24 +++ src/main/native/BUILD | 16 ++ src/main/native/blake3_jni.cc | 73 +++++++++ src/main/native/windows/BUILD | 1 + .../com/google/devtools/build/lib/vfs/BUILD | 1 + .../google/devtools/build/lib/vfs/bazel/BUILD | 42 +++++ .../build/lib/vfs/bazel/Blake3HasherTest.java | 48 ++++++ third_party/blake3/blake3.BUILD | 15 +- 19 files changed, 673 insertions(+), 5 deletions(-) create mode 100644 src/main/java/com/google/devtools/build/lib/vfs/bazel/BUILD create mode 100644 src/main/java/com/google/devtools/build/lib/vfs/bazel/BazelHashFunctions.java create mode 100644 src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3HashFunction.java create mode 100644 src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Hasher.java create mode 100644 src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java create mode 100644 src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Provider.java create mode 100644 src/main/native/blake3_jni.cc create mode 100644 src/test/java/com/google/devtools/build/lib/vfs/bazel/BUILD create mode 100644 src/test/java/com/google/devtools/build/lib/vfs/bazel/Blake3HasherTest.java diff --git a/BUILD b/BUILD index 0f8fdb64c05317..b87ebc94316f26 100644 --- a/BUILD +++ b/BUILD @@ -93,6 +93,7 @@ genrule( pkg_tar( name = "bootstrap-jars", srcs = [ + "@blake3", "@com_google_protobuf//:protobuf_java", "@com_google_protobuf//:protobuf_java_util", "@com_google_protobuf//:protobuf_javalite", diff --git a/MODULE.bazel b/MODULE.bazel index 8f2782ae47315c..dd3094f00effb9 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -16,6 +16,7 @@ bazel_dep(name = "platforms", version = "0.0.5") bazel_dep(name = "rules_pkg", version = "0.7.0") bazel_dep(name = "stardoc", version = "0.5.0", repo_name = "io_bazel_skydoc") bazel_dep(name = "zstd-jni", version = "1.5.2-3") +bazel_dep(name = "blake3", version = "1.3.3") bazel_dep(name = "zlib", version = "1.2.13") # The following are required when building without WORKSPACE SUFFIX diff --git a/distdir_deps.bzl b/distdir_deps.bzl index adb7ea22bf173f..712d75cffa8acd 100644 --- a/distdir_deps.bzl +++ b/distdir_deps.bzl @@ -257,7 +257,7 @@ DIST_DEPS = { ], }, "blake3": { - "archive": "v1.3.3.zip", + "archive": "1.3.3.zip", "sha256": "bb529ba133c0256df49139bd403c17835edbf60d2ecd6463549c6a5fe279364d", "urls": [ "https://github.com/BLAKE3-team/BLAKE3/archive/refs/tags/1.3.3.zip", diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index 9b52063095ae47..eb051d03f4c0ab 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD @@ -102,6 +102,7 @@ filegroup( "//src/main/java/com/google/devtools/build/lib/versioning:srcs", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs:srcs", "//src/main/java/com/google/devtools/build/lib/vfs:srcs", + "//src/main/java/com/google/devtools/build/lib/vfs/bazel:srcs", "//src/main/java/com/google/devtools/build/lib/windows:srcs", "//src/main/java/com/google/devtools/build/lib/worker:srcs", "//src/main/java/com/google/devtools/build/skyframe:srcs", @@ -403,6 +404,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:output_service", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", + "//src/main/java/com/google/devtools/build/lib/vfs/bazel", "//src/main/java/com/google/devtools/build/lib/windows", "//src/main/java/com/google/devtools/build/lib/worker:worker_metric", "//src/main/java/com/google/devtools/build/skyframe", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/BUILD index a131ab63a6d3c4..4a6db5b7eac4e1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/BUILD @@ -143,6 +143,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", + "//src/main/java/com/google/devtools/build/lib/vfs/bazel", "//src/main/java/com/google/devtools/build/lib/windows", "//src/main/java/com/google/devtools/common/options", "//src/main/protobuf:failure_details_java_proto", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BazelFileSystemModule.java b/src/main/java/com/google/devtools/build/lib/bazel/BazelFileSystemModule.java index 1027f3a6de3e4c..f5af64e7ab9f66 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/BazelFileSystemModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/BazelFileSystemModule.java @@ -31,6 +31,7 @@ import com.google.devtools.build.lib.vfs.FileSystem; import com.google.devtools.build.lib.vfs.JavaIoFileSystem; import com.google.devtools.build.lib.vfs.PathFragment; +import com.google.devtools.build.lib.vfs.bazel.BazelHashFunctions; import com.google.devtools.build.lib.windows.WindowsFileSystem; import com.google.devtools.common.options.OptionsParsingException; import com.google.devtools.common.options.OptionsParsingResult; @@ -44,6 +45,10 @@ * com.google.devtools.build.lib.vfs.FileSystem} class use {@code SHA256} by default. */ public class BazelFileSystemModule extends BlazeModule { + static { + BazelHashFunctions.ensureRegistered(); + } + @Override public ModuleFileSystem getFileSystem( OptionsParsingResult startupOptions, PathFragment realExecRootBase) diff --git a/src/main/java/com/google/devtools/build/lib/vfs/bazel/BUILD b/src/main/java/com/google/devtools/build/lib/vfs/bazel/BUILD new file mode 100644 index 00000000000000..fde90bc571f660 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/vfs/bazel/BUILD @@ -0,0 +1,28 @@ +load("@rules_java//java:defs.bzl", "java_library") + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//src:__subpackages__"], +) + +filegroup( + name = "srcs", + srcs = glob(["**"]), + visibility = ["//src:__subpackages__"], +) + +java_library( + name = "bazel", + srcs = glob( + [ + "*.java", + ], + ), + deps = [ + "//src/main/java/com/google/devtools/build/lib/jni", + "//src/main/java/com/google/devtools/build/lib/vfs", + "//third_party:error_prone_annotations", + "//third_party:guava", + "//third_party:jsr305", + ], +) diff --git a/src/main/java/com/google/devtools/build/lib/vfs/bazel/BazelHashFunctions.java b/src/main/java/com/google/devtools/build/lib/vfs/bazel/BazelHashFunctions.java new file mode 100644 index 00000000000000..c917441a5be460 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/vfs/bazel/BazelHashFunctions.java @@ -0,0 +1,45 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.vfs.bazel; + +import com.google.devtools.build.lib.jni.JniLoader; +import com.google.devtools.build.lib.vfs.DigestHashFunction; +import java.security.Security; +import javax.annotation.Nullable; + +/** Bazel specific {@link DigestHashFunction}s. */ +public final class BazelHashFunctions { + @Nullable public static final DigestHashFunction BLAKE3; + + static { + DigestHashFunction hashFunction = null; + + if (JniLoader.isJniAvailable()) { + try { + Security.addProvider(new Blake3Provider()); + hashFunction = DigestHashFunction.register(new Blake3HashFunction(), "BLAKE3"); + } catch (UnsatisfiedLinkError ignored) { + // This can happen if bazel was compiled manually (with compile.sh), + // on windows. In that case jni is available, but missing the blake3 + // symbols necessary to register the hasher. + } + } + + BLAKE3 = hashFunction; + } + + public static void ensureRegistered() {} + + private BazelHashFunctions() {} +} diff --git a/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3HashFunction.java b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3HashFunction.java new file mode 100644 index 00000000000000..0b203ba62eea5b --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3HashFunction.java @@ -0,0 +1,88 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.vfs.bazel; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkPositionIndexes; + +import com.google.common.hash.Funnel; +import com.google.common.hash.HashCode; +import com.google.common.hash.HashFunction; +import com.google.common.hash.Hasher; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; + +/** A {@link HashFunction} for BLAKE3. */ +public final class Blake3HashFunction implements HashFunction { + @Override + public int bits() { + return 256; + } + + @Override + public Hasher newHasher() { + return new Blake3Hasher(new Blake3MessageDigest()); + } + + @Override + public Hasher newHasher(int expectedInputSize) { + checkArgument( + expectedInputSize >= 0, "expectedInputSize must be >= 0 but was %s", expectedInputSize); + return newHasher(); + } + + /* The following methods implement the {HashFunction} interface. */ + + @Override + public HashCode hashObject(T instance, Funnel funnel) { + return newHasher().putObject(instance, funnel).hash(); + } + + @Override + public HashCode hashUnencodedChars(CharSequence input) { + int len = input.length(); + return newHasher(len * 2).putUnencodedChars(input).hash(); + } + + @Override + public HashCode hashString(CharSequence input, Charset charset) { + return newHasher().putString(input, charset).hash(); + } + + @Override + public HashCode hashInt(int input) { + return newHasher(4).putInt(input).hash(); + } + + @Override + public HashCode hashLong(long input) { + return newHasher(8).putLong(input).hash(); + } + + @Override + public HashCode hashBytes(byte[] input) { + return hashBytes(input, 0, input.length); + } + + @Override + public HashCode hashBytes(byte[] input, int off, int len) { + checkPositionIndexes(off, off + len, input.length); + return newHasher(len).putBytes(input, off, len).hash(); + } + + @Override + public HashCode hashBytes(ByteBuffer input) { + return newHasher(input.remaining()).putBytes(input).hash(); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Hasher.java b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Hasher.java new file mode 100644 index 00000000000000..180d62bdd6fc40 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Hasher.java @@ -0,0 +1,146 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.vfs.bazel; + +import static com.google.common.base.Preconditions.checkState; + +import com.google.common.hash.Funnel; +import com.google.common.hash.HashCode; +import com.google.common.hash.Hasher; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; + +/** A {@link Hasher} for BLAKE3. */ +public final class Blake3Hasher implements Hasher { + private final Blake3MessageDigest messageDigest; + private boolean isDone = false; + + public Blake3Hasher(Blake3MessageDigest blake3MessageDigest) { + messageDigest = blake3MessageDigest; + } + + /* The following methods implement the {Hasher} interface. */ + + @Override + @CanIgnoreReturnValue + public Hasher putBytes(ByteBuffer b) { + messageDigest.engineUpdate(b); + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putBytes(byte[] bytes, int off, int len) { + messageDigest.engineUpdate(bytes, off, len); + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putBytes(byte[] bytes) { + messageDigest.engineUpdate(bytes, 0, bytes.length); + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putByte(byte b) { + messageDigest.engineUpdate(b); + return this; + } + + @Override + public HashCode hash() { + checkState(!isDone); + isDone = true; + + return HashCode.fromBytes(messageDigest.engineDigest()); + } + + @Override + @CanIgnoreReturnValue + public final Hasher putBoolean(boolean b) { + return putByte(b ? (byte) 1 : (byte) 0); + } + + @Override + @CanIgnoreReturnValue + public final Hasher putDouble(double d) { + return putLong(Double.doubleToRawLongBits(d)); + } + + @Override + @CanIgnoreReturnValue + public final Hasher putFloat(float f) { + return putInt(Float.floatToRawIntBits(f)); + } + + @Override + @CanIgnoreReturnValue + public Hasher putUnencodedChars(CharSequence charSequence) { + for (int i = 0, len = charSequence.length(); i < len; i++) { + putChar(charSequence.charAt(i)); + } + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putString(CharSequence charSequence, Charset charset) { + return putBytes(charSequence.toString().getBytes(charset)); + } + + @Override + @CanIgnoreReturnValue + public Hasher putShort(short s) { + putByte((byte) s); + putByte((byte) (s >>> 8)); + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putInt(int i) { + putByte((byte) i); + putByte((byte) (i >>> 8)); + putByte((byte) (i >>> 16)); + putByte((byte) (i >>> 24)); + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putLong(long l) { + for (int i = 0; i < 64; i += 8) { + putByte((byte) (l >>> i)); + } + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putChar(char c) { + putByte((byte) c); + putByte((byte) (c >>> 8)); + return this; + } + + @Override + @CanIgnoreReturnValue + public Hasher putObject(T instance, Funnel funnel) { + funnel.funnel(instance, this); + return this; + } +} diff --git a/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java new file mode 100644 index 00000000000000..50d2ace80f51a8 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java @@ -0,0 +1,139 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.vfs.bazel; + +import static java.lang.Math.min; + +import com.google.devtools.build.lib.jni.JniLoader; +import java.nio.ByteBuffer; +import java.security.DigestException; +import java.security.MessageDigest; + +/** A {@link MessageDigest} for BLAKE3. */ +public final class Blake3MessageDigest extends MessageDigest { + // These constants match the native definitions in: + // https://github.com/BLAKE3-team/BLAKE3/blob/master/c/blake3.h + public static final int KEY_LEN = 32; + public static final int OUT_LEN = 32; + + static { + JniLoader.loadJni(); + } + + private static final int STATE_SIZE = hasher_size(); + private static final byte[] INITIAL_STATE = new byte[STATE_SIZE]; + + static { + initialize_hasher(INITIAL_STATE); + } + + // To reduce the number of calls made via JNI, buffer up to this many bytes + // before updating the hasher. + public static final int ONESHOT_THRESHOLD = 8 * 1024; + + private final ByteBuffer buffer = ByteBuffer.allocate(ONESHOT_THRESHOLD); + private final byte[] hasher = new byte[STATE_SIZE]; + + public Blake3MessageDigest() { + super("BLAKE3"); + System.arraycopy(INITIAL_STATE, 0, hasher, 0, STATE_SIZE); + } + + private void flush() { + if (buffer.position() > 0) { + blake3_hasher_update(hasher, buffer.array(), buffer.position()); + buffer.clear(); + } + } + + @Override + public void engineUpdate(byte[] data, int offset, int length) { + while (length > 0) { + int numToCopy = min(length, buffer.remaining()); + buffer.put(data, offset, numToCopy); + length -= numToCopy; + offset += numToCopy; + + if (buffer.remaining() == 0) { + flush(); + } + } + } + + @Override + public void engineUpdate(byte b) { + if (buffer.remaining() == 0) { + flush(); + } + buffer.put(b); + } + + @Override + public void engineUpdate(ByteBuffer input) { + super.engineUpdate(input); + } + + private byte[] getOutput(int outputLength) { + flush(); + + byte[] retByteArray = new byte[outputLength]; + blake3_hasher_finalize(hasher, retByteArray, outputLength); + + engineReset(); + return retByteArray; + } + + @Override + public Object clone() throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } + + @Override + public void engineReset() { + buffer.clear(); + System.arraycopy(INITIAL_STATE, 0, hasher, 0, STATE_SIZE); + } + + @Override + public int engineGetDigestLength() { + return OUT_LEN; + } + + @Override + public byte[] engineDigest() { + return getOutput(OUT_LEN); + } + + @Override + public int engineDigest(byte[] buf, int off, int len) throws DigestException { + if (len < OUT_LEN) { + throw new DigestException("partial digests not returned"); + } + if (buf.length - off < OUT_LEN) { + throw new DigestException("insufficient space in the output buffer to store the digest"); + } + + byte[] digestBytes = getOutput(OUT_LEN); + System.arraycopy(digestBytes, 0, buf, off, digestBytes.length); + return digestBytes.length; + } + + public static final native int hasher_size(); + + public static final native void initialize_hasher(byte[] hasher); + + public static final native void blake3_hasher_update(byte[] hasher, byte[] input, int inputLen); + + public static final native void blake3_hasher_finalize(byte[] hasher, byte[] out, int outLen); +} diff --git a/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Provider.java b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Provider.java new file mode 100644 index 00000000000000..0c868147dc4572 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3Provider.java @@ -0,0 +1,24 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.vfs.bazel; + +import java.security.Provider; + +/** A {@link Provider} for BLAKE3. */ +public final class Blake3Provider extends Provider { + public Blake3Provider() { + super("BLAKE3Provider", "1.0", "A BLAKE3 digest provider"); + put("MessageDigest.BLAKE3", Blake3MessageDigest.class.getName()); + } +} diff --git a/src/main/native/BUILD b/src/main/native/BUILD index 3af2db53e3d6d2..13f0bfca683fd3 100644 --- a/src/main/native/BUILD +++ b/src/main/native/BUILD @@ -54,6 +54,21 @@ cc_library( includes = ["."], # For jni headers. ) +cc_library( + name = "blake3_jni", + srcs = [ + "blake3_jni.cc", + ":jni.h", + ":jni_md.h", + ], + includes = ["."], # For jni headers. + visibility = ["//src/main/native:__subpackages__"], + deps = [ + "@blake3", + ], + alwayslink = 1, +) + cc_binary( name = "libunix_jni.so", srcs = [ @@ -77,6 +92,7 @@ cc_binary( linkshared = 1, visibility = ["//src/main/java/com/google/devtools/build/lib/jni:__pkg__"], deps = [ + ":blake3_jni", ":latin1_jni_path", "//src/main/cpp/util:logging", "//src/main/cpp/util:md5", diff --git a/src/main/native/blake3_jni.cc b/src/main/native/blake3_jni.cc new file mode 100644 index 00000000000000..56201888e91a56 --- /dev/null +++ b/src/main/native/blake3_jni.cc @@ -0,0 +1,73 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "c/blake3.h" + +namespace blaze_jni { + +jbyte *get_byte_array(JNIEnv *env, jbyteArray java_array) { + return (jbyte *)env->GetPrimitiveArrayCritical(java_array, nullptr); +} + +void release_byte_array(JNIEnv *env, jbyteArray array, jbyte *addr) { + env->ReleasePrimitiveArrayCritical(array, addr, 0); +} + +extern "C" JNIEXPORT int JNICALL +Java_com_google_devtools_build_lib_vfs_bazel_Blake3MessageDigest_hasher_1size( + JNIEnv *env, jobject obj) { + return (int)sizeof(blake3_hasher); +} + +extern "C" JNIEXPORT void JNICALL +Java_com_google_devtools_build_lib_vfs_bazel_Blake3MessageDigest_initialize_1hasher( + JNIEnv *env, jobject obj, jbyteArray jhasher) { + blake3_hasher *hasher = (blake3_hasher *)get_byte_array(env, jhasher); + if (hasher) { + blake3_hasher_init(hasher); + release_byte_array(env, jhasher, (jbyte *)hasher); + } +} + +extern "C" JNIEXPORT void JNICALL +Java_com_google_devtools_build_lib_vfs_bazel_Blake3MessageDigest_blake3_1hasher_1update( + JNIEnv *env, jobject obj, jbyteArray jhasher, jbyteArray input, + jint input_len) { + blake3_hasher *hasher = (blake3_hasher *)get_byte_array(env, jhasher); + if (hasher) { + jbyte *input_addr = get_byte_array(env, input); + blake3_hasher_update(hasher, input_addr, input_len); + release_byte_array(env, input, input_addr); + release_byte_array(env, jhasher, (jbyte *)hasher); + } +} + +extern "C" JNIEXPORT void JNICALL +Java_com_google_devtools_build_lib_vfs_bazel_Blake3MessageDigest_blake3_1hasher_1finalize( + JNIEnv *env, jobject obj, jbyteArray jhasher, jbyteArray out, + jint out_len) { + blake3_hasher *hasher = (blake3_hasher *)get_byte_array(env, jhasher); + if (hasher) { + jbyte *out_addr = get_byte_array(env, out); + blake3_hasher_finalize(hasher, (uint8_t *)out_addr, out_len); + release_byte_array(env, out, out_addr); + release_byte_array(env, jhasher, (jbyte *)hasher); + } +} + +} // namespace blaze_jni diff --git a/src/main/native/windows/BUILD b/src/main/native/windows/BUILD index 73eaeed150544b..b595e7553353b2 100644 --- a/src/main/native/windows/BUILD +++ b/src/main/native/windows/BUILD @@ -76,6 +76,7 @@ cc_binary( deps = [ ":lib-file", ":lib-process", + "//src/main/native:blake3_jni", ], ) diff --git a/src/test/java/com/google/devtools/build/lib/vfs/BUILD b/src/test/java/com/google/devtools/build/lib/vfs/BUILD index f7647603a7357f..b19160f870bf94 100644 --- a/src/test/java/com/google/devtools/build/lib/vfs/BUILD +++ b/src/test/java/com/google/devtools/build/lib/vfs/BUILD @@ -9,6 +9,7 @@ filegroup( name = "srcs", testonly = 0, srcs = glob(["**"]) + [ + "//src/test/java/com/google/devtools/build/lib/vfs/bazel:srcs", "//src/test/java/com/google/devtools/build/lib/vfs/util:srcs", ], visibility = ["//src:__subpackages__"], diff --git a/src/test/java/com/google/devtools/build/lib/vfs/bazel/BUILD b/src/test/java/com/google/devtools/build/lib/vfs/bazel/BUILD new file mode 100644 index 00000000000000..f1d34cef6afb5f --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/vfs/bazel/BUILD @@ -0,0 +1,42 @@ +load("@rules_java//java:defs.bzl", "java_library", "java_test") + +package( + default_applicable_licenses = ["//:license"], + default_testonly = 1, + default_visibility = ["//src:__subpackages__"], +) + +filegroup( + name = "srcs", + testonly = 0, + srcs = glob(["**"]), + visibility = ["//src:__subpackages__"], +) + +java_library( + name = "BazelTests_lib", + srcs = glob( + [ + "*.java", + ], + ), + deps = [ + "//src/main/java/com/google/devtools/build/lib/vfs/bazel", + "//src/test/java/com/google/devtools/build/lib/testutil", + "//third_party:guava", + "//third_party:guava-testlib", + "//third_party:junit4", + "//third_party:truth", + "//third_party/protobuf:protobuf_java", + ], +) + +java_test( + name = "BazelTests", + size = "small", + test_class = "com.google.devtools.build.lib.AllTests", + runtime_deps = [ + ":BazelTests_lib", + "//src/test/java/com/google/devtools/build/lib:test_runner", + ], +) diff --git a/src/test/java/com/google/devtools/build/lib/vfs/bazel/Blake3HasherTest.java b/src/test/java/com/google/devtools/build/lib/vfs/bazel/Blake3HasherTest.java new file mode 100644 index 00000000000000..9fa0cb3929d5c0 --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/vfs/bazel/Blake3HasherTest.java @@ -0,0 +1,48 @@ +// Copyright 2023 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.vfs.bazel; + +import static org.junit.Assert.assertEquals; + +import java.nio.charset.StandardCharsets; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link Blake3MessageDigest}. */ +@RunWith(JUnit4.class) +public class Blake3HasherTest { + @Test + public void emptyHash() { + Blake3Hasher h = new Blake3Hasher(new Blake3MessageDigest()); + + byte[] data = new byte[0]; + h.putBytes(data); + + assertEquals( + "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", h.hash().toString()); + } + + @Test + public void helloWorld() { + Blake3Hasher h = new Blake3Hasher(new Blake3MessageDigest()); + + byte[] data = "hello world".getBytes(StandardCharsets.US_ASCII); + h.putBytes(data); + + assertEquals( + "d74981efa70a0c880b8d8c1985d075dbcbf679b99a5f9914e5aaf96b831a9e24", h.hash().toString()); + } +} diff --git a/third_party/blake3/blake3.BUILD b/third_party/blake3/blake3.BUILD index 6e96f051f1412e..867e33f67c6cec 100644 --- a/third_party/blake3/blake3.BUILD +++ b/third_party/blake3/blake3.BUILD @@ -30,7 +30,9 @@ cc_library( ] + select({ "@bazel_tools//src/conditions:linux_x86_64": [ "c/blake3_avx2_x86-64_unix.S", - "c/blake3_avx512_x86-64_unix.S", + # Disable to appease bazel-ci which uses ubuntu-18 (EOL) and GCC 7 + # lacking the headers to compile AVX512. + # "c/blake3_avx512_x86-64_unix.S", "c/blake3_sse2_x86-64_unix.S", "c/blake3_sse41_x86-64_unix.S", ], @@ -50,16 +52,21 @@ cc_library( "c/blake3_impl.h", ], copts = select({ - "@bazel_tools//src/conditions:linux_x86_64": [], + "@bazel_tools//src/conditions:linux_x86_64": [ + # Disable to appease bazel-ci which uses ubuntu-18 (EOL) and GCC 7 + # lacking the headers to compile AVX512. + "-DBLAKE3_NO_AVX512", + ], "@bazel_tools//src/conditions:windows_x64": [], "@bazel_tools//src/conditions:darwin_arm64": [ "-DBLAKE3_USE_NEON=1", ], "//conditions:default": [ - "-DBLAKE3_NO_SSE2", - "-DBLAKE3_NO_SSE41", "-DBLAKE3_NO_AVX2", "-DBLAKE3_NO_AVX512", + "-DBLAKE3_NO_NEON", + "-DBLAKE3_NO_SSE2", + "-DBLAKE3_NO_SSE41", ], }), includes = ["."], From ee243f2c2c01472cbcdfbfbf5ab122987b29f1fa Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Wed, 26 Jul 2023 09:11:50 -0700 Subject: [PATCH 6/8] Simplify blake3 hasher and improve performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve BLAKE3 performance by reducing copies. Rather than buffering bytes, it's faster to just update the blake3 hasher as bytes are hashed. Performance with buffering: ``` Benchmark (inputSize) Mode Cnt Score Error Units Blake3Benchmark.blake3Hash 1 avgt 10 1766.697 ± 709.515 ns/op Blake3Benchmark.blake3Hash 10 avgt 10 1466.253 ± 19.474 ns/op Blake3Benchmark.blake3Hash 100 avgt 10 1522.845 ± 15.480 ns/op Blake3Benchmark.blake3Hash 1000 avgt 10 2254.156 ± 8.588 ns/op Blake3Benchmark.blake3Hash 10000 avgt 10 4660.881 ± 28.637 ns/op Blake3Benchmark.blake3Hash 100000 avgt 10 24283.191 ± 32.754 ns/op Blake3Benchmark.sha256Hash 1 avgt 10 742.091 ± 6.307 ns/op Blake3Benchmark.sha256Hash 10 avgt 10 757.844 ± 12.042 ns/op Blake3Benchmark.sha256Hash 100 avgt 10 942.902 ± 555.874 ns/op Blake3Benchmark.sha256Hash 1000 avgt 10 1208.336 ± 508.392 ns/op Blake3Benchmark.sha256Hash 10000 avgt 10 4871.231 ± 494.507 ns/op Blake3Benchmark.sha256Hash 100000 avgt 10 40686.231 ± 63.814 ns/op ``` Performance without buffering (after this CL): ``` Benchmark (inputSize) Mode Cnt Score Error Units Blake3Benchmark.blake3Hash 1 avgt 10 1021.075 ± 11.640 ns/op Blake3Benchmark.blake3Hash 10 avgt 10 1029.561 ± 19.850 ns/op Blake3Benchmark.blake3Hash 100 avgt 10 1070.509 ± 12.140 ns/op Blake3Benchmark.blake3Hash 1000 avgt 10 1685.043 ± 13.963 ns/op Blake3Benchmark.blake3Hash 10000 avgt 10 3939.516 ± 13.212 ns/op Blake3Benchmark.blake3Hash 100000 avgt 10 21730.550 ± 22.976 ns/op Blake3Benchmark.sha256Hash 1 avgt 10 745.943 ± 9.853 ns/op Blake3Benchmark.sha256Hash 10 avgt 10 747.649 ± 17.381 ns/op Blake3Benchmark.sha256Hash 100 avgt 10 962.802 ± 590.879 ns/op Blake3Benchmark.sha256Hash 1000 avgt 10 1189.069 ± 412.327 ns/op Blake3Benchmark.sha256Hash 10000 avgt 10 4594.978 ± 21.833 ns/op Blake3Benchmark.sha256Hash 100000 avgt 10 39224.248 ± 229.265 ns/op ``` PiperOrigin-RevId: 551225043 Change-Id: I57dc0700b2f44d6faf75ffbd29f1607544e54f29 (cherry picked from commit d922ab30b5bef4c03fac6069f5cdce9e4995f8b0) --- .../lib/vfs/bazel/Blake3MessageDigest.java | 38 +++---------------- src/main/native/blake3_jni.cc | 4 +- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java index 50d2ace80f51a8..a8499afd69b2ae 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/bazel/Blake3MessageDigest.java @@ -13,8 +13,6 @@ // limitations under the License. package com.google.devtools.build.lib.vfs.bazel; -import static java.lang.Math.min; - import com.google.devtools.build.lib.jni.JniLoader; import java.nio.ByteBuffer; import java.security.DigestException; @@ -38,45 +36,23 @@ public final class Blake3MessageDigest extends MessageDigest { initialize_hasher(INITIAL_STATE); } - // To reduce the number of calls made via JNI, buffer up to this many bytes - // before updating the hasher. - public static final int ONESHOT_THRESHOLD = 8 * 1024; - - private final ByteBuffer buffer = ByteBuffer.allocate(ONESHOT_THRESHOLD); private final byte[] hasher = new byte[STATE_SIZE]; + private final byte[] oneByteArray = new byte[1]; public Blake3MessageDigest() { super("BLAKE3"); System.arraycopy(INITIAL_STATE, 0, hasher, 0, STATE_SIZE); } - private void flush() { - if (buffer.position() > 0) { - blake3_hasher_update(hasher, buffer.array(), buffer.position()); - buffer.clear(); - } - } - @Override public void engineUpdate(byte[] data, int offset, int length) { - while (length > 0) { - int numToCopy = min(length, buffer.remaining()); - buffer.put(data, offset, numToCopy); - length -= numToCopy; - offset += numToCopy; - - if (buffer.remaining() == 0) { - flush(); - } - } + blake3_hasher_update(hasher, data, offset, length); } @Override public void engineUpdate(byte b) { - if (buffer.remaining() == 0) { - flush(); - } - buffer.put(b); + oneByteArray[0] = b; + engineUpdate(oneByteArray, 0, 1); } @Override @@ -85,8 +61,6 @@ public void engineUpdate(ByteBuffer input) { } private byte[] getOutput(int outputLength) { - flush(); - byte[] retByteArray = new byte[outputLength]; blake3_hasher_finalize(hasher, retByteArray, outputLength); @@ -101,7 +75,6 @@ public Object clone() throws CloneNotSupportedException { @Override public void engineReset() { - buffer.clear(); System.arraycopy(INITIAL_STATE, 0, hasher, 0, STATE_SIZE); } @@ -133,7 +106,8 @@ public int engineDigest(byte[] buf, int off, int len) throws DigestException { public static final native void initialize_hasher(byte[] hasher); - public static final native void blake3_hasher_update(byte[] hasher, byte[] input, int inputLen); + public static final native void blake3_hasher_update( + byte[] hasher, byte[] input, int offset, int inputLen); public static final native void blake3_hasher_finalize(byte[] hasher, byte[] out, int outLen); } diff --git a/src/main/native/blake3_jni.cc b/src/main/native/blake3_jni.cc index 56201888e91a56..62f151dc27059a 100644 --- a/src/main/native/blake3_jni.cc +++ b/src/main/native/blake3_jni.cc @@ -46,12 +46,12 @@ Java_com_google_devtools_build_lib_vfs_bazel_Blake3MessageDigest_initialize_1has extern "C" JNIEXPORT void JNICALL Java_com_google_devtools_build_lib_vfs_bazel_Blake3MessageDigest_blake3_1hasher_1update( - JNIEnv *env, jobject obj, jbyteArray jhasher, jbyteArray input, + JNIEnv *env, jobject obj, jbyteArray jhasher, jbyteArray input, jint offset, jint input_len) { blake3_hasher *hasher = (blake3_hasher *)get_byte_array(env, jhasher); if (hasher) { jbyte *input_addr = get_byte_array(env, input); - blake3_hasher_update(hasher, input_addr, input_len); + blake3_hasher_update(hasher, input_addr + offset, input_len); release_byte_array(env, input, input_addr); release_byte_array(env, jhasher, (jbyte *)hasher); } From 80c1df20513beea83a060ed0ce297cf8b3c81539 Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Thu, 3 Aug 2023 02:00:46 -0700 Subject: [PATCH 7/8] Ensure namedSetOfFiles URIs specify blob type correctly I noticed when testing the BLAKE3 digest function that uploaded files were being referenced incorrectly in the BES because they were missing the digest type. This CL fixes that. Before: https://github.com/bazelbuild/bazel/assets/141737/52781f1b-b897-48f0-8956-f63c57b59436 After: https://github.com/bazelbuild/bazel/assets/141737/01ebc61b-3512-4ca5-8e2d-f47ad5f086b7 PiperOrigin-RevId: 553405394 Change-Id: Ic976e5a58f80ab8b5270b4aedc6204c22562533a (cherry picked from commit 19293673be14e68598f032b1c61fa991884305ad) Signed-off-by: Brentley Jones --- .../ByteStreamBuildEventArtifactUploader.java | 91 ++++++++++++++----- .../google/devtools/build/lib/remote/BUILD | 1 + ...eStreamBuildEventArtifactUploaderTest.java | 55 +++++++++-- 3 files changed, 116 insertions(+), 31 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java index 31b3ba6bf5cf7b..78a53b7f4d5825 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java +++ b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java @@ -19,7 +19,9 @@ import static com.google.devtools.build.lib.remote.util.Utils.grpcAwareErrorMessage; import build.bazel.remote.execution.v2.Digest; +import build.bazel.remote.execution.v2.DigestFunction; import build.bazel.remote.execution.v2.RequestMetadata; +import com.google.common.base.Ascii; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; @@ -133,13 +135,21 @@ private static final class PathMetadata { private final boolean directory; private final boolean remote; private final boolean omitted; - - PathMetadata(Path path, Digest digest, boolean directory, boolean remote, boolean omitted) { + private final DigestFunction.Value digestFunction; + + PathMetadata( + Path path, + Digest digest, + boolean directory, + boolean remote, + boolean omitted, + DigestFunction.Value digestFunction) { this.path = path; this.digest = digest; this.directory = directory; this.remote = remote; this.omitted = omitted; + this.digestFunction = digestFunction; } public Path getPath() { @@ -161,6 +171,10 @@ public boolean isRemote() { public boolean isOmitted() { return omitted; } + + public DigestFunction.Value getDigestFunction() { + return digestFunction; + } } /** @@ -168,13 +182,16 @@ public boolean isOmitted() { * might do I/O. */ private PathMetadata readPathMetadata(Path file) throws IOException { + DigestUtil digestUtil = new DigestUtil(xattrProvider, file.getFileSystem().getDigestFunction()); + if (file.isDirectory()) { return new PathMetadata( file, /* digest= */ null, /* directory= */ true, /* remote= */ false, - /* omitted= */ false); + /* omitted= */ false, + /* digestFunction= */ digestUtil.getDigestFunction()); } PathFragment filePathFragment = file.asFragment(); @@ -190,9 +207,14 @@ private PathMetadata readPathMetadata(Path file) throws IOException { } } - DigestUtil digestUtil = new DigestUtil(xattrProvider, file.getFileSystem().getDigestFunction()); Digest digest = digestUtil.compute(file); - return new PathMetadata(file, digest, /* directory= */ false, isRemoteFile(file), omitted); + return new PathMetadata( + file, + digest, + /* directory= */ false, + isRemoteFile(file), + omitted, + digestUtil.getDigestFunction()); } private static void processQueryResult( @@ -209,7 +231,8 @@ private static void processQueryResult( file.getDigest(), file.isDirectory(), /* remote= */ true, - file.isOmitted()); + file.isOmitted(), + file.getDigestFunction()); knownRemotePaths.add(remotePathMetadata); } } @@ -305,7 +328,8 @@ private Single> uploadLocalFiles( // set remote to true so the PathConverter will use bytestream:// // scheme to convert the URI for this file /* remote= */ true, - path.isOmitted())) + path.isOmitted(), + path.getDigestFunction())) .onErrorResumeNext( error -> { reportUploadError(error, path.getPath(), path.getDigest()); @@ -341,7 +365,8 @@ private Single upload(Set files) { /* digest= */ null, /* directory= */ false, /* remote= */ false, - /* omitted= */ false); + /* omitted= */ false, + DigestFunction.Value.SHA256); } }) .collect(Collectors.toList()) @@ -385,7 +410,7 @@ public ReferenceCounted touch(Object o) { private static class PathConverterImpl implements PathConverter { private final String remoteServerInstanceName; - private final Map pathToDigest; + private final Map pathToMetadata; private final Set skippedPaths; private final Set localPaths; @@ -395,18 +420,18 @@ private static class PathConverterImpl implements PathConverter { RemoteBuildEventUploadMode remoteBuildEventUploadMode) { Preconditions.checkNotNull(uploads); this.remoteServerInstanceName = remoteServerInstanceName; - pathToDigest = Maps.newHashMapWithExpectedSize(uploads.size()); + pathToMetadata = Maps.newHashMapWithExpectedSize(uploads.size()); ImmutableSet.Builder skippedPaths = ImmutableSet.builder(); ImmutableSet.Builder localPaths = ImmutableSet.builder(); - for (PathMetadata pair : uploads) { - Path path = pair.getPath(); - Digest digest = pair.getDigest(); + for (PathMetadata metadata : uploads) { + Path path = metadata.getPath(); + Digest digest = metadata.getDigest(); if (digest != null) { // Always use bytestream:// in MINIMAL mode if (remoteBuildEventUploadMode == RemoteBuildEventUploadMode.MINIMAL) { - pathToDigest.put(path, digest); - } else if (pair.isRemote()) { - pathToDigest.put(path, digest); + pathToMetadata.put(path, metadata); + } else if (metadata.isRemote()) { + pathToMetadata.put(path, metadata); } else { localPaths.add(path); } @@ -418,6 +443,14 @@ private static class PathConverterImpl implements PathConverter { this.localPaths = localPaths.build(); } + private static boolean isOldStyleDigestFunction(DigestFunction.Value digestFunction) { + // Old-style digest functions (SHA256, etc) are distinguishable by the length + // of their hash alone and do not require extra specification, but newer + // digest functions (which may have the same length hashes as the older + // functions!) must be explicitly specified in the upload resource name. + return digestFunction.getNumber() <= 7; + } + @Override @Nullable public String apply(Path path) { @@ -427,8 +460,8 @@ public String apply(Path path) { return String.format("file://%s", path.getPathString()); } - Digest digest = pathToDigest.get(path); - if (digest == null) { + PathMetadata metadata = pathToMetadata.get(path); + if (metadata == null) { if (skippedPaths.contains(path)) { return null; } @@ -436,9 +469,25 @@ public String apply(Path path) { throw new IllegalStateException( String.format("Illegal file reference: '%s'", path.getPathString())); } - return String.format( - "bytestream://%s/blobs/%s/%d", - remoteServerInstanceName, digest.getHash(), digest.getSizeBytes()); + + Digest digest = metadata.getDigest(); + DigestFunction.Value digestFunction = metadata.getDigestFunction(); + String out; + if (isOldStyleDigestFunction(digestFunction)) { + out = + String.format( + "bytestream://%s/blobs/%s/%d", + remoteServerInstanceName, digest.getHash(), digest.getSizeBytes()); + } else { + out = + String.format( + "bytestream://%s/blobs/%s/%s/%d", + remoteServerInstanceName, + Ascii.toLowerCase(digestFunction.getValueDescriptor().getName()), + digest.getHash(), + digest.getSizeBytes()); + } + return out; } } } diff --git a/src/test/java/com/google/devtools/build/lib/remote/BUILD b/src/test/java/com/google/devtools/build/lib/remote/BUILD index bb054fa51e0f18..7c83743208ff34 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/test/java/com/google/devtools/build/lib/remote/BUILD @@ -93,6 +93,7 @@ java_test( "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", + "//src/main/java/com/google/devtools/build/lib/vfs/bazel", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs", "//src/main/java/com/google/devtools/common/options", "//src/main/protobuf:failure_details_java_proto", diff --git a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java index 31176ef61beee7..74bdda1b73a29c 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploaderTest.java @@ -14,6 +14,7 @@ package com.google.devtools.build.lib.remote; import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assume.assumeNotNull; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doAnswer; @@ -65,6 +66,7 @@ import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.SyscallCache; +import com.google.devtools.build.lib.vfs.bazel.BazelHashFunctions; import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem; import com.google.devtools.common.options.Options; import io.grpc.Server; @@ -176,7 +178,7 @@ public void uploadsShouldWork() throws Exception { filesToUpload.put( file, new LocalFile( - file, LocalFileType.OUTPUT, /*artifact=*/ null, /*artifactMetadata=*/ null)); + file, LocalFileType.OUTPUT, /* artifact= */ null, /* artifactMetadata= */ null)); } serviceRegistry.addService(new MaybeFailOnceUploadService(blobsByHash)); @@ -219,7 +221,7 @@ public void uploadsShouldWork_fewerPermitsThanUploads() throws Exception { filesToUpload.put( file, new LocalFile( - file, LocalFileType.OUTPUT, /*artifact=*/ null, /*artifactMetadata=*/ null)); + file, LocalFileType.OUTPUT, /* artifact= */ null, /* artifactMetadata= */ null)); } serviceRegistry.addService(new MaybeFailOnceUploadService(blobsByHash)); @@ -245,6 +247,33 @@ public void uploadsShouldWork_fewerPermitsThanUploads() throws Exception { assertThat(refCntChannel.isShutdown()).isTrue(); } + @Test + public void testUnknown_uploadedIfFileBlake3() throws Exception { + assumeNotNull(BazelHashFunctions.BLAKE3); + + FileSystem fs = new InMemoryFileSystem(new JavaClock(), BazelHashFunctions.BLAKE3); + Path file = fs.getPath("/file"); + file.getOutputStream().close(); + Map filesToUpload = new HashMap<>(); + filesToUpload.put( + file, + new LocalFile( + file, LocalFileType.OUTPUT, /* artifact= */ null, /* artifactMetadata= */ null)); + RemoteRetrier retrier = + TestUtils.newRemoteRetrier(() -> new FixedBackoff(1, 0), (e) -> true, retryService); + ReferenceCountedChannel refCntChannel = new ReferenceCountedChannel(channelConnectionFactory); + RemoteCache remoteCache = newRemoteCache(refCntChannel, retrier); + ByteStreamBuildEventArtifactUploader artifactUploader = newArtifactUploader(remoteCache); + + PathConverter pathConverter = artifactUploader.upload(filesToUpload).get(); + String hash = BaseEncoding.base16().lowerCase().encode(file.getDigest()); + long size = file.getFileSize(); + String conversion = pathConverter.apply(file); + assertThat(conversion) + .isEqualTo("bytestream://localhost/instance/blobs/blake3/" + hash + "/" + size); + artifactUploader.release(); + } + @Test public void testUploadDirectoryDoesNotCrash() throws Exception { Path dir = fs.getPath("/dir"); @@ -286,7 +315,7 @@ public void someUploadsFail_succeedsWithWarningMessages() throws Exception { filesToUpload.put( file, new LocalFile( - file, LocalFileType.OUTPUT, /*artifact=*/ null, /*artifactMetadata=*/ null)); + file, LocalFileType.OUTPUT, /* artifact= */ null, /* artifactMetadata= */ null)); } String hashOfBlobThatShouldFail = blobsByHash.keySet().iterator().next().toString(); serviceRegistry.addService( @@ -370,7 +399,7 @@ public void remoteFileShouldNotBeUploaded_actionFs() throws Exception { assertThat(remotePath.getFileSystem()).isEqualTo(remoteFs); LocalFile file = new LocalFile( - remotePath, LocalFileType.OUTPUT, /*artifact=*/ null, /*artifactMetadata=*/ null); + remotePath, LocalFileType.OUTPUT, /* artifact= */ null, /* artifactMetadata= */ null); // act @@ -421,10 +450,16 @@ public void remoteFileShouldNotBeUploaded_findMissingDigests() throws Exception ImmutableMap.of( remoteFile, new LocalFile( - remoteFile, LocalFileType.OUTPUT, /*artifact=*/ null, /*artifactMetadata=*/ null), + remoteFile, + LocalFileType.OUTPUT, + /* artifact= */ null, + /* artifactMetadata= */ null), localFile, new LocalFile( - localFile, LocalFileType.OUTPUT, /*artifact=*/ null, /*artifactMetadata=*/ null)); + localFile, + LocalFileType.OUTPUT, + /* artifact= */ null, + /* artifactMetadata= */ null)); PathConverter pathConverter = artifactUploader.upload(files).get(); // assert @@ -481,11 +516,11 @@ private ByteStreamBuildEventArtifactUploader newArtifactUploader(RemoteCache rem return new ByteStreamBuildEventArtifactUploader( MoreExecutors.directExecutor(), reporter, - /*verboseFailures=*/ true, + /* verboseFailures= */ true, remoteCache, - /*remoteServerInstanceName=*/ "localhost/instance", - /*buildRequestId=*/ "none", - /*commandId=*/ "none", + /* remoteServerInstanceName= */ "localhost/instance", + /* buildRequestId= */ "none", + /* commandId= */ "none", SyscallCache.NO_CACHE, RemoteBuildEventUploadMode.ALL); } From 12865388271b18ee769254cfa753f8263e051a31 Mon Sep 17 00:00:00 2001 From: Tyler Williams Date: Mon, 7 Aug 2023 08:02:10 -0700 Subject: [PATCH 8/8] Separate new-style-hash content in DiskCache DiskCache always stores files in /root/{cas/ac}/digestHash. This change keeps the current behavior, but for new style digest functions inserts a directory between /root/ and {cas/ac} to disambiguate the digest function type. This prevents issues that could theoretically happen if there were hash collisions between two digest functions sharing the same cache directory. PiperOrigin-RevId: 554477775 Change-Id: Ibef994e432764c260a3cab821ca6583c231c5b50 (cherry picked from commit f17b280b2c22b51af29953451a64bf9e0f50c19c) Signed-off-by: Brentley Jones --- .../remote/ByteStreamBuildEventArtifactUploader.java | 9 +-------- .../build/lib/remote/ByteStreamUploader.java | 11 ++--------- .../devtools/build/lib/remote/GrpcCacheClient.java | 9 +-------- .../build/lib/remote/disk/DiskCacheClient.java | 12 +++++++++++- .../devtools/build/lib/remote/util/DigestUtil.java | 8 ++++++++ 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java index 78a53b7f4d5825..3a4d7a2b081b7d 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java +++ b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamBuildEventArtifactUploader.java @@ -13,6 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.remote; +import static com.google.devtools.build.lib.remote.util.DigestUtil.isOldStyleDigestFunction; import static com.google.devtools.build.lib.remote.util.RxFutures.toCompletable; import static com.google.devtools.build.lib.remote.util.RxFutures.toListenableFuture; import static com.google.devtools.build.lib.remote.util.RxFutures.toSingle; @@ -443,14 +444,6 @@ private static class PathConverterImpl implements PathConverter { this.localPaths = localPaths.build(); } - private static boolean isOldStyleDigestFunction(DigestFunction.Value digestFunction) { - // Old-style digest functions (SHA256, etc) are distinguishable by the length - // of their hash alone and do not require extra specification, but newer - // digest functions (which may have the same length hashes as the older - // functions!) must be explicitly specified in the upload resource name. - return digestFunction.getNumber() <= 7; - } - @Override @Nullable public String apply(Path path) { diff --git a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java index 2db0f9b955e96a..0dea65107c670d 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java +++ b/src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java @@ -15,6 +15,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.util.concurrent.Futures.immediateVoidFuture; +import static com.google.devtools.build.lib.remote.util.DigestUtil.isOldStyleDigestFunction; import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture; import static com.google.devtools.build.lib.remote.util.Utils.waitForBulkTransfer; import static java.lang.String.format; @@ -179,20 +180,12 @@ public ListenableFuture uploadBlobAsync( MoreExecutors.directExecutor()); } - private boolean isOldStyleDigestFunction() { - // Old-style digest functions (SHA256, etc) are distinguishable by the length - // of their hash alone and do not require extra specification, but newer - // digest functions (which may have the same length hashes as the older - // functions!) must be explicitly specified in the upload resource name. - return digestFunction.getNumber() <= 7; - } - private String buildUploadResourceName( String instanceName, UUID uuid, Digest digest, boolean compressed) { String resourceName; - if (isOldStyleDigestFunction()) { + if (isOldStyleDigestFunction(digestFunction)) { String template = compressed ? "uploads/%s/compressed-blobs/zstd/%s/%d" : "uploads/%s/blobs/%s/%d"; resourceName = format(template, uuid, digest.getHash(), digest.getSizeBytes()); diff --git a/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java b/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java index 8c86a31d9bafa8..401c1fedc53518 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java +++ b/src/main/java/com/google/devtools/build/lib/remote/GrpcCacheClient.java @@ -15,6 +15,7 @@ package com.google.devtools.build.lib.remote; import static com.google.common.base.Strings.isNullOrEmpty; +import static com.google.devtools.build.lib.remote.util.DigestUtil.isOldStyleDigestFunction; import build.bazel.remote.execution.v2.ActionCacheGrpc; import build.bazel.remote.execution.v2.ActionCacheGrpc.ActionCacheFutureStub; @@ -354,14 +355,6 @@ private ListenableFuture downloadBlob( MoreExecutors.directExecutor()); } - private static boolean isOldStyleDigestFunction(DigestFunction.Value digestFunction) { - // Old-style digest functions (SHA256, etc) are distinguishable by the length - // of their hash alone and do not require extra specification, but newer - // digest functions (which may have the same length hashes as the older - // functions!) must be explicitly specified in the upload resource name. - return digestFunction.getNumber() <= 7; - } - public static String getResourceName( String instanceName, Digest digest, boolean compressed, DigestFunction.Value digestFunction) { String resourceName = ""; diff --git a/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheClient.java b/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheClient.java index 55d6b43581f8dd..1c0777f77bc0de 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheClient.java +++ b/src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheClient.java @@ -13,10 +13,13 @@ // limitations under the License. package com.google.devtools.build.lib.remote.disk; +import static com.google.devtools.build.lib.remote.util.DigestUtil.isOldStyleDigestFunction; + import build.bazel.remote.execution.v2.ActionResult; import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.Directory; import build.bazel.remote.execution.v2.Tree; +import com.google.common.base.Ascii; import com.google.common.collect.ImmutableSet; import com.google.common.io.ByteStreams; import com.google.common.util.concurrent.Futures; @@ -57,10 +60,17 @@ public class DiskCacheClient implements RemoteCacheClient { */ public DiskCacheClient( Path root, boolean verifyDownloads, boolean checkActionResult, DigestUtil digestUtil) { - this.root = root; this.verifyDownloads = verifyDownloads; this.checkActionResult = checkActionResult; this.digestUtil = digestUtil; + + if (isOldStyleDigestFunction(digestUtil.getDigestFunction())) { + this.root = root; + } else { + this.root = + root.getChild( + Ascii.toLowerCase(digestUtil.getDigestFunction().getValueDescriptor().getName())); + } } /** Returns {@code true} if the provided {@code key} is stored in the CAS. */ diff --git a/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java b/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java index 76b258443890ca..1e876d424ce6ff 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java +++ b/src/main/java/com/google/devtools/build/lib/remote/util/DigestUtil.java @@ -131,4 +131,12 @@ public static String toString(Digest digest) { public static byte[] toBinaryDigest(Digest digest) { return HashCode.fromString(digest.getHash()).asBytes(); } + + public static boolean isOldStyleDigestFunction(DigestFunction.Value digestFunction) { + // Old-style digest functions (SHA256, etc) are distinguishable by the length + // of their hash alone and do not require extra specification, but newer + // digest functions (which may have the same length hashes as the older + // functions!) must be explicitly specified in the upload resource name. + return digestFunction.getNumber() <= 7; + } }