Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BLAKE3 hasher to vfs #18784

Closed
wants to merge 7 commits into from
Closed
1 change: 1 addition & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ pkg_tar(
"@com_google_protobuf//:protobuf_java_util",
"@com_google_protobuf//:protobuf_javalite",
"@zstd-jni//:zstd-jni",
"@blake3//:blake3",
],
package_dir = "derived/jars",
strip_prefix = "external",
Expand Down
1 change: 1 addition & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ bazel_dep(name = "platforms", version = "0.0.6")
bazel_dep(name = "rules_pkg", version = "0.7.0")
bazel_dep(name = "stardoc", version = "0.5.3", repo_name = "io_bazel_skydoc")
bazel_dep(name = "zstd-jni", version = "1.5.2-3")
bazel_dep(name = "blake3", version = "1.3.3")
bazel_dep(name = "zlib", version = "1.2.13")
bazel_dep(name = "rules_cc", version = "0.0.6")
bazel_dep(name = "rules_java", version = "6.1.1")
Expand Down
2 changes: 1 addition & 1 deletion distdir_deps.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ DIST_DEPS = {
"package_version": "1.5.2-3",
},
"blake3": {
"archive": "v1.3.3.zip",
"archive": "1.3.3.zip",
"sha256": "bb529ba133c0256df49139bd403c17835edbf60d2ecd6463549c6a5fe279364d",
"urls": [
"https://github.com/BLAKE3-team/BLAKE3/archive/refs/tags/1.3.3.zip",
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/vfs/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ java_library(
":pathfragment",
"//src/main/java/com/google/devtools/build/lib/clock",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/jni",
"//src/main/java/com/google/devtools/build/lib/io:file_symlink_exception",
"//src/main/java/com/google/devtools/build/lib/profiler",
"//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec",
Expand All @@ -77,6 +78,7 @@ java_library(
"//third_party:guava",
"//third_party:jsr305",
"//third_party/protobuf:protobuf_java",
"@maven//:com_google_errorprone_error_prone_annotations",
],
)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package com.google.devtools.build.lib.vfs;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkPositionIndexes;

import com.google.common.hash.Funnel;
import com.google.common.hash.HashCode;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.errorprone.annotations.Immutable;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;

@Immutable
public final class Blake3HashFunction implements HashFunction {
public int bits() {
return 256;
}

public Hasher newHasher() {
return new Blake3MessageDigest();
}

/* The following methods implement the {HashFunction} interface. */

public <T extends Object> HashCode hashObject(T instance, Funnel<? super T> funnel) {
return newHasher().putObject(instance, funnel).hash();
}

public HashCode hashUnencodedChars(CharSequence input) {
int len = input.length();
return newHasher(len * 2).putUnencodedChars(input).hash();
}

public HashCode hashString(CharSequence input, Charset charset) {
return newHasher().putString(input, charset).hash();
}

public HashCode hashInt(int input) {
return newHasher(4).putInt(input).hash();
}

public HashCode hashLong(long input) {
return newHasher(8).putLong(input).hash();
}

public HashCode hashBytes(byte[] input) {
return hashBytes(input, 0, input.length);
}

public HashCode hashBytes(byte[] input, int off, int len) {
checkPositionIndexes(off, off + len, input.length);
return newHasher(len).putBytes(input, off, len).hash();
}

public HashCode hashBytes(ByteBuffer input) {
return newHasher(input.remaining()).putBytes(input).hash();
}

public Hasher newHasher(int expectedInputSize) {
checkArgument(
expectedInputSize >= 0, "expectedInputSize must be >= 0 but was %s", expectedInputSize);
return newHasher();
}
}
36 changes: 36 additions & 0 deletions src/main/java/com/google/devtools/build/lib/vfs/Blake3JNI.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Copyright 2023 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.vfs;

import com.google.devtools.build.lib.jni.JniLoader;

final class Blake3JNI {
coeuvre marked this conversation as resolved.
Show resolved Hide resolved
private Blake3JNI() {}

static {
JniLoader.loadJni();
}

public static final native long allocate_and_initialize_hasher();

public static final native void blake3_hasher_reset(long self);

public static final native void blake3_hasher_update(long self, byte[] input, int input_len);

public static final native void blake3_hasher_finalize_and_close(
long self, byte[] out, int out_len);

public static final native void oneshot(byte[] input, int input_len, byte[] out, int out_len);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
package com.google.devtools.build.lib.vfs;

import static com.google.common.base.Preconditions.checkState;

import com.google.common.hash.Funnel;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hasher;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.security.DigestException;
import java.security.MessageDigest;

public final class Blake3MessageDigest extends MessageDigest implements Hasher {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you use MessageDigestHashFunction as suggested below, you can remove implements Hasher.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MessageDigestHashFunction is not public, and copying it in is prohibitive because it brings like 5 other classes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I missed that. Can we then create a separate class to implement Hasher? I found it is confusing to combine these two especially when we have update, engineUpdate and share the underlying buffer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

// These constants match the native definitions in:
// https://github.com/BLAKE3-team/BLAKE3/blob/master/c/blake3.h
public static final int KEY_LEN = 32;
public static final int OUT_LEN = 32;

// To reduce the number of calls made via JNI, buffer up to this many bytes.
// If a call to "hash()" is made and less than this much data has been
// written, a single JNI call will be made that initializes, hashes, and
// cleans up the hasher, rather than making separate calls for each operation.
public static final int ONESHOT_THRESHOLD = 8 * 1024;
private static ThreadLocal<ByteBuffer> threadLocalBuffer = new ThreadLocal<ByteBuffer>();
coeuvre marked this conversation as resolved.
Show resolved Hide resolved
private ByteBuffer buffer = null;

private long hasher = -1;
private boolean isDone;

public Blake3MessageDigest() {
super("BLAKE3");

isDone = false;
buffer = threadLocalBuffer.get();
if (buffer == null) {
buffer = ByteBuffer.allocate(ONESHOT_THRESHOLD);
threadLocalBuffer.set(buffer);
}
}

private void flush() {
if (hasher == -1) {
hasher = Blake3JNI.allocate_and_initialize_hasher();
}

if (buffer.position() > 0) {
Blake3JNI.blake3_hasher_update(hasher, buffer.array(), buffer.position());
buffer.clear();
}
}

public void engineUpdate(byte[] data, int offset, int length) {
while (length > 0) {
int numToCopy = Math.min(length, buffer.remaining());
buffer.put(data, offset, numToCopy);
length -= numToCopy;
offset += numToCopy;

if (buffer.remaining() == 0) {
flush();
}
}
}

public void engineUpdate(byte[] data) {
engineUpdate(data, 0, data.length);
}

public void engineUpdate(byte b) {
engineUpdate(new byte[] {b});
}

private byte[] getOutput(int outputLength) throws IllegalArgumentException {
byte[] retByteArray = new byte[outputLength];

checkState(!isDone);
isDone = true;

if (hasher == -1) {
// If no flush has happened yet; oneshot this.
Blake3JNI.oneshot(buffer.array(), buffer.position(), retByteArray, outputLength);
buffer.clear();
} else {
flush();
Blake3JNI.blake3_hasher_finalize_and_close(hasher, retByteArray, outputLength);
hasher = -1;
}
return retByteArray;
}

public Object clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}

public void engineReset() {
if (hasher != -1) {
Blake3JNI.blake3_hasher_reset(hasher);
}
buffer.clear();
}

public void engineUpdate(ByteBuffer input) {
if (input.hasArray()) {
engineUpdate(input.array());
} else {
byte[] bufCopy = new byte[input.position()];
input.get(bufCopy);
engineUpdate(bufCopy);
}
}

public int engineGetDigestLength() {
return OUT_LEN;
}

public byte[] engineDigest() {
byte[] digestBytes = getOutput(OUT_LEN);
return digestBytes;
}

public int engineDigest(byte[] buf, int off, int len) throws DigestException {
if (len < OUT_LEN) {
throw new DigestException("partial digests not returned");
}
if (buf.length - off < OUT_LEN) {
throw new DigestException("insufficient space in the output buffer to store the digest");
}

byte[] digestBytes = getOutput(OUT_LEN);
System.arraycopy(digestBytes, 0, buf, off, digestBytes.length);
return digestBytes.length;
}

/* The following methods implement the {Hasher} interface. */

@CanIgnoreReturnValue
public Hasher putBytes(ByteBuffer b) {
buffer = b;
return this;
}

@CanIgnoreReturnValue
public Hasher putBytes(byte[] bytes, int off, int len) {
update(bytes, off, len);
return this;
}

@CanIgnoreReturnValue
public Hasher putBytes(byte[] bytes) {
update(bytes, 0, bytes.length);
return this;
}

@CanIgnoreReturnValue
public Hasher putByte(byte b) {
update(new byte[] {b});
return this;
}

public HashCode hash() {
return HashCode.fromBytes(getOutput(OUT_LEN));
}

@CanIgnoreReturnValue
public final Hasher putBoolean(boolean b) {
return putByte(b ? (byte) 1 : (byte) 0);
}

@CanIgnoreReturnValue
public final Hasher putDouble(double d) {
return putLong(Double.doubleToRawLongBits(d));
}

@CanIgnoreReturnValue
public final Hasher putFloat(float f) {
return putInt(Float.floatToRawIntBits(f));
}

@CanIgnoreReturnValue
public Hasher putUnencodedChars(CharSequence charSequence) {
for (int i = 0, len = charSequence.length(); i < len; i++) {
putChar(charSequence.charAt(i));
}
return this;
}

@CanIgnoreReturnValue
public Hasher putString(CharSequence charSequence, Charset charset) {
return putBytes(charSequence.toString().getBytes(charset));
}

@CanIgnoreReturnValue
public Hasher putShort(short s) {
putByte((byte) s);
putByte((byte) (s >>> 8));
return this;
}

@CanIgnoreReturnValue
public Hasher putInt(int i) {
putByte((byte) i);
putByte((byte) (i >>> 8));
putByte((byte) (i >>> 16));
putByte((byte) (i >>> 24));
return this;
}

@CanIgnoreReturnValue
public Hasher putLong(long l) {
for (int i = 0; i < 64; i += 8) {
putByte((byte) (l >>> i));
}
return this;
}

@CanIgnoreReturnValue
public Hasher putChar(char c) {
putByte((byte) c);
putByte((byte) (c >>> 8));
return this;
}

@CanIgnoreReturnValue
public <T extends Object> Hasher putObject(T instance, Funnel<? super T> funnel) {
funnel.funnel(instance, this);
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package com.google.devtools.build.lib.vfs;

import java.security.Provider;

public final class Blake3Provider extends Provider {
public Blake3Provider() {
super("BLAKE3Provider", "1.0", "A BLAKE3 digest provider");
put("MessageDigest.BLAKE3", "com.google.devtools.build.lib.vfs.Blake3MessageDigest");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Use Blake3MessageDigest.class.getName() to make refactor easier in the future.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

}
}
Loading