diff --git a/.github/workflows/check.yaml b/.github/workflows/check.yaml index b7eb186..eac7435 100644 --- a/.github/workflows/check.yaml +++ b/.github/workflows/check.yaml @@ -10,6 +10,7 @@ on: env: PYTHON_VERSION: 3.x + JAVA_VERSION: '11' permissions: contents: read @@ -57,6 +58,21 @@ jobs: - name: test run: cd go && make mod deps linter test GOPATH=$(go env GOPATH) + java: + runs-on: ubuntu-latest + steps: + - name: checkout repository + uses: actions/checkout@v3 + - name: setup java build environment + uses: actions/setup-java@v2 + with: + distribution: 'adopt' + java-version: ${{ env.JAVA_VERSION }} + - name: set RELEASE number + run: echo ${GITHUB_RUN_NUMBER} > RELEASE + - name: test + run: cd java && make build test + python: runs-on: ubuntu-latest steps: diff --git a/LICENSE b/LICENSE index b6ce10c..73ac008 100644 --- a/LICENSE +++ b/LICENSE @@ -2,11 +2,13 @@ The code in this project is a C port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). -This code has been ported/translated by Nicola Asuni to header-only C code. +This code has been ported/translated by Nicola Asuni to multiple languages. -The original code is released under the MIT License: +MIT License: -Copyright (c) 2014 Google, Inc. + - Copyright (c) 2014 Google, Inc. + - Copyright (c) 2014 Damian Gryski (original GO version) + - Copyright (c) 2016-2024 Nicola Asuni (versions in CGO, GO, Java, Python, Rust) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index 41df975..a83faa0 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,7 @@ help: @echo " make c : Build and test the C version" @echo " make cgo : Build and test the GO C-wrapper version" @echo " make go : Build and test the GO version" + @echo " make java : Build and test the Java version" @echo " make python : Build and test the Python version" @echo " make rust : Build and test the Rust version" @echo " make clean : Remove any build artifact" @@ -58,7 +59,7 @@ help: @echo " make tag : Tag the Git repository" @echo "" -all: clean c cgo go python rust +all: clean c cgo go java python rust # Build and test the C version .PHONY: c @@ -75,6 +76,11 @@ cgo: go: cd go && make all +# Build and test the Java version +.PHONY: java +java: + cd java && make all + # Build and test the Python version .PHONY: python python: @@ -92,6 +98,7 @@ clean: cd c && make clean cd cgo && make clean cd go && make clean + cd java && make clean cd python && make clean cd rust && make clean @mkdir -p $(TARGETDIR) diff --git a/README.md b/README.md index a80545f..06211ed 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # FarmHash64 -*Provides farmhash64, a portable C 64-bit hash function* +*Provides farmhash64 and farmhash32 hash functions in multiple languages* [![Donate via PayPal](https://img.shields.io/badge/donate-paypal-87ceeb.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_donations¤cy_code=GBP&business=paypal@tecnick.com&item_name=donation%20for%20farmhash64%20project) *Please consider supporting this project by making a donation via [PayPal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations¤cy_code=GBP&business=paypal@tecnick.com&item_name=donation%20for%20farmhash64%20project)* @@ -19,17 +19,22 @@ FarmHash is a family of hash functions. -This is a C translation of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash -(https://github.com/google/farmhash). +FarmHash64 is a 64-bit fingerprint hash function that produces a hash value for a given string. +It is designed to be fast and provide good hash distribution but is not suitable for cryptography applications. -FarmHash64 provides a portable 64-bit hash function for strings (byte array). -The function mix the input bits thoroughly but is not suitable for cryptography. +The FarmHash32 function is also provided, which returns a 32-bit fingerprint hash for a string. All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. +This is a Java port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). -For more information please consult https://github.com/google/farmhash - +This code has been ported/translated by Nicola Asuni (Tecnick.com) to multiple languages: +- C (header-only) +- CGO +- GO +- Java +- Python +- Rust ## Getting Started @@ -44,19 +49,3 @@ make help ``` use the command ```make all``` to build and test all the implementations. - - -### Python Usage Example - -``` -# copy this code in the same directory of farmhash64 library - -import farmhash64 as vh - -print('\nUSAGE EXAMPLE:\n') - -vhash = vh.farmhash64("Lorem ipsum dolor sit amet") -print('vh.farmhash64("Lorem ipsum dolor sit amet")') -print("Variant Hash (DEC): %d" % vhash) -print("Variant Hash (HEX): %x\n" % vhash) -``` diff --git a/VERSION b/VERSION index 26ca594..dc1e644 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.5.1 +1.6.0 diff --git a/c/doc/Doxyfile b/c/doc/Doxyfile index c63ac82..193fa40 100644 --- a/c/doc/Doxyfile +++ b/c/doc/Doxyfile @@ -38,7 +38,7 @@ PROJECT_NAME = FarmHash64 # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 1.5.1 +PROJECT_NUMBER = 1.6.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/c/src/farmhash64.h b/c/src/farmhash64.h index b6a1116..68614b2 100644 --- a/c/src/farmhash64.h +++ b/c/src/farmhash64.h @@ -4,44 +4,15 @@ * * FarmHash is a family of hash functions. * - * FarmHash64 provides a portable 64-bit hash function for strings (byte array). - * The function mix the input bits thoroughly but is not suitable for cryptography. + * FarmHash64 is a 64-bit fingerprint hash function that produces a hash value for a given string. + * It is designed to be fast and provide good hash distribution but is not suitable for cryptography applications. * - * All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. - * For more information please consult https://github.com/google/farmhash - * - * This is a C port of the Fingerprint64 (farmhashna::Hash64) code - * from Google's FarmHash (https://github.com/google/farmhash). - * - * This code has been ported/translated by Nicola Asuni to header-only C code. - * - * The public functions are: - * - farmhash64: Returns a 64-bit fingerprint hash for a byte array. - * - farmhash32: Returns a 32-bit fingerprint hash for a byte array. - * - * The original C++ code is released under the MIT License: - * - * The MIT License (MIT) - * - * Copyright (c) 2014 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: + * The FarmHash32 function is also provided, which returns a 32-bit fingerprint hash for a string. * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. + * All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. + * This is a C port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * This code has been ported/translated by Nicola Asuni (Tecnick.com) to header-only C code. */ #ifndef FARMHASH64_H @@ -445,34 +416,6 @@ static const uint64_t k2 = 0x9ae16a3b2f90404fULL; static const uint32_t c1 = 0xcc9e2d51; static const uint32_t c2 = 0x1b873593; -/** - * @brief Get the low 64 bits of a uint128_t value. - * - * @param x uint128_t value - * - * @return The low 64 bits of x - * - * @private - */ -STATIC_INLINE uint64_t uint128_t_low64(const uint128_t x) -{ - return x.lo; -} - -/** - * @brief Get the high 64 bits of a uint128_t value. - * - * @param x uint128_t value - * - * @return The high 64 bits of x - * - * @private - */ -STATIC_INLINE uint64_t uint128_t_high64(const uint128_t x) -{ - return x.hi; -} - /** * @brief Create a uint128_t value from two 64-bit integers. * @@ -506,9 +449,9 @@ STATIC_INLINE uint64_t farmhash128_to_64(uint128_t x) { // Murmur-inspired hashing. const uint64_t k_mul = 0x9ddfea08eb382d69ULL; - uint64_t a = (uint128_t_low64(x) ^ uint128_t_high64(x)) * k_mul; + uint64_t a = (x.lo ^ x.hi) * k_mul; a ^= (a >> 47); - uint64_t b = (uint128_t_high64(x) ^ a) * k_mul; + uint64_t b = (x.hi ^ a) * k_mul; b ^= (b >> 47); b *= k_mul; return b; @@ -539,11 +482,11 @@ STATIC_INLINE uint64_t fetch64(const char* p) * * @private */ -STATIC_INLINE uint32_t fetch32(const char* p) +STATIC_INLINE uint64_t fetch32(const char* p) { uint32_t result; memcpy(&result, p, sizeof(result)); - return uint32_t_in_expected_order(result); + return uint64_t_in_expected_order(result); } /** diff --git a/cgo/src/farmhash64.go b/cgo/src/farmhash64.go index 9eb4c5e..ebe9fa2 100644 --- a/cgo/src/farmhash64.go +++ b/cgo/src/farmhash64.go @@ -1,21 +1,17 @@ /* -Package farmhash64 implements the FarmHash64 hash functions for strings. +Package farmhash64 implements the FarmHash64 and FarmHash32 hash functions for strings. + +FarmHash is a family of hash functions. FarmHash64 is a 64-bit fingerprint hash function that produces a hash value for a given string. -It is designed to be fast and provide good hash distribution. +It is designed to be fast and provide good hash distribution but is not suitable for cryptography applications. The FarmHash32 function is also provided, which returns a 32-bit fingerprint hash for a string. -Usage: - -To use the FarmHash64 function, pass a byte slice representing the string to be hashed. -The function returns a uint64 value representing the hash. - -Note: -The package uses cgo to interface with the C implementation of FarmHash64. +All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. +This is a CGO port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). -For more information about FarmHash64, refer to the original C implementation: -https://github.com/google/farmhash +This code has been ported/translated by Nicola Asuni (Tecnick.com) to CGO code. */ package farmhash64 diff --git a/go/src/farmhash64.go b/go/src/farmhash64.go index 467c7c0..d46b3de 100644 --- a/go/src/farmhash64.go +++ b/go/src/farmhash64.go @@ -1,33 +1,17 @@ /* -Package farmhash64 implements the FarmHash64 hash function. - -The code in this file is an extract from: -https://github.com/dgryski/go-farm/commits/master - -That is a golang translation of the Google's C++ code: -https://github.com/google/farmhash - - - Copyright (c) 2014 Google, Inc. - - Copyright (c) 2014 Damian Gryski - - Copyright (c) 2016-2024 Nicola Asuni - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +Package farmhash64 implements the FarmHash64 and FarmHash32 hash functions for strings. + +FarmHash is a family of hash functions. + +FarmHash64 is a 64-bit fingerprint hash function that produces a hash value for a given string. +It is designed to be fast and provide good hash distribution but is not suitable for cryptography applications. + +The FarmHash32 function is also provided, which returns a 32-bit fingerprint hash for a string. + +All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. +This is a GO port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). + +This code has been ported/translated by Nicola Asuni (Tecnick.com) to GO code. */ package farmhash64 @@ -54,15 +38,23 @@ type uint128 struct { // PLATFORM func rotate32(val uint32, shift uint) uint32 { + if shift == 0 { + return val + } + return ((val >> shift) | (val << (32 - shift))) } func rotate64(val uint64, shift uint) uint64 { + if shift == 0 { + return val + } + return ((val >> shift) | (val << (64 - shift))) } -func fetch32(s []byte, idx int) uint32 { - return uint32(s[idx+0]) | uint32(s[idx+1])<<8 | uint32(s[idx+2])<<16 | uint32(s[idx+3])<<24 +func fetch32(s []byte, idx int) uint64 { + return uint64(s[idx+0]) | uint64(s[idx+1])<<8 | uint64(s[idx+2])<<16 | uint64(s[idx+3])<<24 } func fetch64(s []byte, idx int) uint64 { @@ -120,7 +112,7 @@ func hashLen0to16(s []byte) uint64 { mul := k2 + slen*2 a := fetch32(s, 0) - return hashLen16Mul(slen+(uint64(a)<<3), uint64(fetch32(s, int(slen-4))), mul) + return hashLen16Mul(slen+(a<<3), fetch32(s, int(slen-4)), mul) } if slen > 0 { @@ -196,8 +188,6 @@ func hashLen33to64(s []byte) uint64 { func FarmHash64(s []byte) uint64 { slen := len(s) - var seed uint64 = 81 - if slen <= 16 { return hashLen0to16(s) } @@ -210,8 +200,11 @@ func FarmHash64(s []byte) uint64 { return hashLen33to64(s) } + var seed uint64 = 81 + // For strings over 64 bytes we loop. // Internal state consists of 56 bytes: v, w, x, y, and z. + v := uint128{0, 0} w := uint128{0, 0} x := seed*k2 + fetch64(s, 0) diff --git a/java/.gitattributes b/java/.gitattributes new file mode 100644 index 0000000..00a51af --- /dev/null +++ b/java/.gitattributes @@ -0,0 +1,6 @@ +# +# https://help.github.com/articles/dealing-with-line-endings/ +# +# These are explicitly windows files and should use crlf +*.bat text eol=crlf + diff --git a/java/.gitignore b/java/.gitignore new file mode 100644 index 0000000..05320a0 --- /dev/null +++ b/java/.gitignore @@ -0,0 +1,5 @@ +.gradle +.idea +bin +build +gradle diff --git a/java/Makefile b/java/Makefile new file mode 100644 index 0000000..7d2d48e --- /dev/null +++ b/java/Makefile @@ -0,0 +1,43 @@ +# MAKEFILE +# +# @author Nicola Asuni +# @link https://github.com/tecnickcom/farmhash64 +# ------------------------------------------------------------------------------ + +SHELL=/bin/bash +.SHELLFLAGS=-o pipefail -c + +# Display general help about this command +.PHONY: help +help: + @echo "" + @echo "FarmHash64 Java Makefile." + @echo "The following commands are available:" + @echo "" + @echo " make build : Build the library" + @echo " make clean : Remove any build artifact" + @echo " make test : Run the unit tests against source code" + @echo " make updategradle : Update gradle wrapper to the latest version" + @echo "" + +all: clean build test + +# Build the library +.PHONY: build +build: + ./gradlew jar + +# Remove any build artifact +.PHONY: clean +clean: + ./gradlew clean + +# Run the unit tests +.PHONY: test +test: + ./gradlew test + +# Update gradle wrapper to the latest version +.PHONY: updategradle +updategradle: + ./gradlew wrapper --gradle-version latest diff --git a/java/build.gradle b/java/build.gradle new file mode 100644 index 0000000..b1eefa6 --- /dev/null +++ b/java/build.gradle @@ -0,0 +1,26 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * This generated file contains a sample Java Library project to get you started. + * For more details take a look at the Java Libraries chapter in the Gradle + * User Manual available at https://docs.gradle.org/6.0.1/userguide/java_library_plugin.html + */ + +plugins { + // Apply the java-library plugin to add support for Java Library + id 'java-library' +} + +repositories { + mavenCentral() + google() +} + +dependencies { + testImplementation 'org.junit.jupiter:junit-jupiter:5.10.2' + testRuntimeOnly 'org.junit.platform:junit-platform-launcher' +} + +tasks.named("test", Test.class).configure { + useJUnitPlatform() +} diff --git a/java/gradle.properties b/java/gradle.properties new file mode 100644 index 0000000..55a902b --- /dev/null +++ b/java/gradle.properties @@ -0,0 +1 @@ +org.gradle.warning.mode=all diff --git a/java/gradlew b/java/gradlew new file mode 100755 index 0000000..1aa94a4 --- /dev/null +++ b/java/gradlew @@ -0,0 +1,249 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/java/gradlew.bat b/java/gradlew.bat new file mode 100644 index 0000000..25da30d --- /dev/null +++ b/java/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/java/settings.gradle b/java/settings.gradle new file mode 100644 index 0000000..5f409cd --- /dev/null +++ b/java/settings.gradle @@ -0,0 +1,10 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * The settings file is used to specify which projects to include in your build. + * + * Detailed information about configuring a multi-project build in Gradle can be found + * in the user manual at https://docs.gradle.org/6.0.1/userguide/multi_project_builds.html + */ + +rootProject.name = 'com.tecnickcom.farmhash64' diff --git a/java/src/main/java/com/tecnick/farmhash64/FarmHash64.java b/java/src/main/java/com/tecnick/farmhash64/FarmHash64.java new file mode 100644 index 0000000..d62d876 --- /dev/null +++ b/java/src/main/java/com/tecnick/farmhash64/FarmHash64.java @@ -0,0 +1,252 @@ +/* +Package farmhash64 implements the FarmHash64 and FarmHash32 hash functions for strings. + +FarmHash is a family of hash functions. + +FarmHash64 is a 64-bit fingerprint hash function that produces a hash value for a given string. +It is designed to be fast and provide good hash distribution but is not suitable for cryptography applications. + +The FarmHash32 function is also provided, which returns a 32-bit fingerprint hash for a string. + +All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. +This is a Java port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). + +This code has been ported/translated by Nicola Asuni (Tecnick.com) to Java code. +*/ +package com.tecnick.farmhash64; + +import java.util.Arrays; + +public class FarmHash64 { + + private static final int c1 = 0xcc9e2d51; + private static final int c2 = 0x1b873593; + + private static final long k0 = 0xc3a5c85c97cb3127L; + private static final long k1 = 0xb492b66fbe98f273L; + private static final long k2 = 0x9ae16a3b2f90404fL; + + private static class UInt128 { + public long lo; + public long hi; + } + + private static int rotate32(int val, int shift) { + return shift == 0 ? val : (val >>> shift) | (val << (32 - shift)); + } + + private static long rotate64(long val, int shift) { + return shift == 0 ? val : (val >>> shift) | (val << (64 - shift)); + } + + private static long fetch32(byte[] s, int idx) { + return (s[idx + 0] & 0xFFL) + | ((s[idx + 1] & 0xFFL) << 8) + | ((s[idx + 2] & 0xFFL) << 16) + | ((s[idx + 3] & 0xFFL) << 24); + } + + private static long fetch64(byte[] s, int idx) { + return (s[idx + 0] & 0xFFL) + | ((s[idx + 1] & 0xFFL) << 8) + | ((s[idx + 2] & 0xFFL) << 16) + | ((s[idx + 3] & 0xFFL) << 24) + | ((s[idx + 4] & 0xFFL) << 32) + | ((s[idx + 5] & 0xFFL) << 40) + | ((s[idx + 6] & 0xFFL) << 48) + | ((s[idx + 7] & 0xFFL) << 56); + } + + private static long shiftMix(long val) { + return val ^ (val >>> 47); + } + + private static int mur(int a, int h) { + // Helper from Murmur3 for combining two 32-bit values. + a *= c1; + a = rotate32(a, 17); + a *= c2; + h ^= a; + h = rotate32(h, 19); + + return ((h * 5) + 0xe6546b64); + } + + private static int mix64To32(long x) { + return mur((int) (x >>> 32), (int) ((x << 32) >>> 32)); + } + + private static long hashLen16Mul(long u, long v, long mul) { + // Murmur-inspired hashing. + long a = (u ^ v) * mul; + a ^= (a >>> 47); + long b = (v ^ a) * mul; + b ^= (b >>> 47); + b *= mul; + + return b; + } + + private static long hashLen0to16(byte[] s) { + long slen = s.length; + + if (slen >= 8) { + long mul = k2 + slen * 2; + long a = fetch64(s, 0) + k2; + long b = fetch64(s, (int) (slen - 8)); + long c = rotate64(b, 37) * mul + a; + long d = (rotate64(a, 25) + b) * mul; + + return hashLen16Mul(c, d, mul); + } + + if (slen >= 4) { + long mul = k2 + slen * 2; + long a = fetch32(s, 0); + + return hashLen16Mul(slen + (a << 3), fetch32(s, (int) (slen - 4)), mul); + } + + if (slen > 0) { + long a = s[0] & 0xFFL; + long b = s[(int) (slen >> 1)] & 0xFFL; + long c = s[(int) (slen - 1)] & 0xFFL; + long y = a + (b << 8); + long z = slen + (c << 2); + + return shiftMix(y * k2 ^ z * k0) * k2; + } + + return k2; + } + + private static long hashLen17to32(byte[] s) { + int slen = s.length; + long mul = k2 + (long) slen * 2; + long a = fetch64(s, 0) * k1; + long b = fetch64(s, 8); + long c = fetch64(s, slen - 8) * mul; + long d = fetch64(s, slen - 16) * k2; + + return hashLen16Mul(rotate64(a + b, 43) + rotate64(c, 30) + d, a + rotate64(b + k2, 18) + c, mul); + } + + private static UInt128 weakHashLen32WithSeedsWords(long w, long x, long y, long z, long a, long b) { + a += w; + b = rotate64(b + a + z, 21); + long c = a; + a += x; + a += y; + b += rotate64(a, 44); + + UInt128 result = new UInt128(); + result.lo = a + z; + result.hi = b + c; + + return result; + } + + private static UInt128 weakHashLen32WithSeeds(byte[] s, long a, long b) { + return weakHashLen32WithSeedsWords( + fetch64(s, 0), + fetch64(s, 8), + fetch64(s, 16), + fetch64(s, 24), + a, + b); + } + + private static long hashLen33to64(byte[] s) { + int slen = s.length; + long mul = k2 + (long) slen * 2; + long a = fetch64(s, 0) * k2; + long b = fetch64(s, 8); + long c = fetch64(s, slen - 8) * mul; + long d = fetch64(s, slen - 16) * k2; + long y = rotate64(a + b, 43) + rotate64(c, 30) + d; + long z = hashLen16Mul(y, a + rotate64(b + k2, 18) + c, mul); + long e = fetch64(s, 16) * mul; + long f = fetch64(s, 24); + long g = (y + fetch64(s, slen - 32)) * mul; + long h = (z + fetch64(s, slen - 24)) * mul; + + return hashLen16Mul( + rotate64(e + f, 43) + rotate64(g, 30) + h, e + rotate64(f + a, 18) + g, + mul + ); + } + + public static long farmhash64(byte[] s) { + int slen = s.length; + + long seed = 81; + + if (slen <= 16) { + return hashLen0to16(s); + } + + if (slen <= 32) { + return hashLen17to32(s); + } + + if (slen <= 64) { + return hashLen33to64(s); + } + + // For strings over 64 bytes we loop. + // Internal state consists of 56 bytes: v, w, x, y, and z. + UInt128 v = new UInt128(); + UInt128 w = new UInt128(); + long x = seed * k2 + fetch64(s, 0); + long y = seed * k1 + 113; + long z = shiftMix(y * k2 + 113) * k2; + long tmp = 0; + + // Set end so that after the loop we have 1 to 64 bytes left to process. + int endIdx = ((slen - 1) / 64) * 64; + int last64Idx = endIdx + ((slen - 1) & 63) - 63; + byte[] last64 = Arrays.copyOfRange(s, last64Idx, slen); + + while (s.length > 64) { + x = rotate64(x + y + v.lo + fetch64(s, 8), 37) * k1; + y = rotate64(y + v.hi + fetch64(s, 48), 42) * k1; + x ^= w.hi; + y += v.lo + fetch64(s, 40); + z = rotate64(z + w.lo, 33) * k1; + v = weakHashLen32WithSeeds(s, v.hi * k1, x + w.lo); + w = weakHashLen32WithSeeds(Arrays.copyOfRange(s, 32, s.length), z + w.hi, y + fetch64(s, 16)); + tmp = x; + x = z; + z = tmp; + s = Arrays.copyOfRange(s, 64, s.length); + } + + long mul = k1 + ((z & 0xFFL) << 1); + // Make s point to the last 64 bytes of input. + s = last64; + w.lo += ((long) slen - 1) & 63; + v.lo += w.lo; + w.lo += v.lo; + x = rotate64(x + y + v.lo + fetch64(s, 8), 37) * mul; + y = rotate64(y + v.hi + fetch64(s, 48), 42) * mul; + x ^= w.hi * 9; + y += v.lo * 9 + fetch64(s, 40); + z = rotate64(z + w.lo, 33) * mul; + v = weakHashLen32WithSeeds(s, v.hi * mul, x + w.lo); + w = weakHashLen32WithSeeds(Arrays.copyOfRange(s, 32, s.length), z + w.hi, y + fetch64(s, 16)); + tmp = x; + x = z; + z = tmp; + + return hashLen16Mul( + hashLen16Mul(v.lo, w.lo, mul) + shiftMix(y) * k0 + z, + hashLen16Mul(v.hi, w.hi, mul) + x, + mul + ); + } + + public static int farmhash32(byte[] s) { + return mix64To32(farmhash64(s)); + } + +} \ No newline at end of file diff --git a/java/src/test/java/com/tecnick/farmhash64/FarmHash64Test.java b/java/src/test/java/com/tecnick/farmhash64/FarmHash64Test.java new file mode 100644 index 0000000..af2bf5f --- /dev/null +++ b/java/src/test/java/com/tecnick/farmhash64/FarmHash64Test.java @@ -0,0 +1,232 @@ +package com.tecnick.farmhash64; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.Arrays; +import java.util.stream.Stream; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class FarmHash64Test { + + private static final int testSize = 300; + private static final int dataSize = 1048576; // 1 << 20 + + private static Stream hashTestData_Parameters() throws Throwable { + // Parameters: int oh32=(0), long oh64=(1), String in=(2) + return Stream.of( + Arguments.of(0xfe0061e9, 0x9ae16a3b2f90404fL, ""), + Arguments.of(0xd824662a, 0xb3454265b6df75e3L, "a"), + Arguments.of(0x15eb5ed6, 0xaa8d6e5242ada51eL, "ab"), + Arguments.of(0xcaf25fe2, 0x24a5b3a074e7f369L, "abc"), + Arguments.of(0xcf297808, 0x1a5502de4a1f8101L, "abcd"), + Arguments.of(0x5f8d48db, 0xc22f4663e54e04d4L, "abcde"), + Arguments.of(0x16b8a2fd, 0xc329379e6a03c2cdL, "abcdef"), + Arguments.of(0xcfc5f43d, 0x3c40c92b1ccb7355L, "abcdefg"), + Arguments.of(0x8d1b642, 0xfee9d22990c82909L, "abcdefgh"), + Arguments.of(0xb382832e, 0x332c8ed4dae5ba42L, "abcdefghi"), + Arguments.of(0x3f19a3cb, 0xad052244b781c4ebL, "0123456789"), + Arguments.of(0xee83c5c, 0x3ef4c03514208c77L, "0123456789 "), + Arguments.of(0x6fca023f, 0x496841e83a33cc91L, "0123456789-0"), + Arguments.of(0x6b2c02bd, 0xd81bcb9f3679ac0cL, "0123456789~01"), + Arguments.of(0xb8e8fba, 0x5da5a6a117c606f6L, "0123456789#012"), + Arguments.of(0xe6946835, 0x5361eae17c1ff6bcL, "0123456789@0123"), + Arguments.of(0xfa44df74, 0x4283d4ef43627f64L, "0123456789'01234"), + Arguments.of(0x2a1ed264, 0x46a7416ed4861e3bL, "0123456789=012345"), + Arguments.of(0xbcd3277f, 0xa4abb4e0da2c594cL, "0123456789+0123456"), + Arguments.of(0x26bf5a67, 0xcf1c7d3ad54f9215L, "0123456789*01234567"), + Arguments.of(0x8eedb634, 0x07adf50b2ac764fcL, "0123456789&012345678"), + Arguments.of(0xa329652e, 0xdebcba8e6f3eabd1L, "0123456789^0123456789"), + Arguments.of(0x4ba9b4ed, 0x4dbd128af51d77e8L, "0123456789%0123456789£"), + Arguments.of(0x1b9ea72f, 0xd78d5f852d522e6aL, "0123456789$0123456789!0"), + Arguments.of(0x819d77a5, 0x80d73b843ba57db8L, "size: a.out: bad magic"), + Arguments.of(0x8b72761e, 0x8eb3808d1ccfc779L, "Nepal premier won't resign."), + Arguments.of(0x5f21fe43, 0xb944f8a16261e414L, "C is as portable as Stonehedge!!"), + Arguments.of(0xa15ead04, 0xe8f89ab6df9bdd25L, "Discard medicine more than two years old."), + Arguments.of(0xe3763baf, 0xa9961670ce2a46d9L, "I wouldn't marry him with a ten foot pole."), + Arguments.of(0x50a48aaa, 0xbdd69b798d6ba37aL, "If the enemy is within range, then so are you."), + Arguments.of(0x517e346c, 0xc2f8db8624fefc0eL, "The major problem is with sendmail. -Mark Horton"), + Arguments.of(0x8a4b0b6c, 0x5a0a6efd52e84e2aL, "How can you write a big system without C++? -Paul Glick"), + Arguments.of(0xb360937b, 0x786d7e1987023ca9L, "He who has a shady past knows that nice guys finish last."), + Arguments.of(0x2e5713b3, 0x5d14f96c18fe3d5eL, "Free! Free!/A trip/to Mars/for 900/empty jars/Burma Shave"), + Arguments.of(0xec6d1e0e, 0xec8848fd3b266c10L, "His money is twice tainted: 'taint yours and 'taint mine."), + Arguments.of(0x7175f31d, 0x2a578b80bb82147cL, "The days of the digital watch are numbered. -Tom Stoppard"), + Arguments.of(0xdf4c5297, 0x55182f8859eca4ceL, "For every action there is an equal and opposite government program."), + Arguments.of(0x62359aca, 0xabcdb319fcf2826cL, "You remind me of a TV show, but that's all right: I watch it anyway."), + Arguments.of(0x398c0b7c, 0x1d85702503ac7eb4L, "It's well we cannot hear the screams/That we create in others' dreams."), + Arguments.of(0x47f9c, 0xa2b8bf3032021993L, "Give me a rock, paper and scissors and I will move the world. CCFestoon"), + Arguments.of(0xe56239a7, 0x38aa3175b37f305cL, "It's a tiny change to the code and not completely disgusting. - Bob Manchek"), + Arguments.of(0xb556f325, 0x7e85d7b050ed2967L, "There is no reason for any individual to have a computer in their home. -Ken Olsen, 1977"), + Arguments.of(0x75cc5362, 0x5a05644eb66e435eL, "Even if I could be Shakespeare, I think I should still choose to be Faraday. - A. Huxley"), + Arguments.of(0xc401a0bf, 0x98eff6958c5e91aL, "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"), + Arguments.of(0x4e56b7e9, 0xc3f02c4ffd5d71e6L, "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.") + ); + } + + private static long[] expectedFarmHash64() { + return new long[] { + 2598464059L, 797982799L, 1410420968L, 2134990486L, 255297188L, 2992121793L, 4019337850L, 452431531L, + 299850021L, 2532580744L, 2199864459L, 3696623795L, 1053458494L, 1882212500L, 458884671L, 3033004529L, + 2700149065L, 2699376854L, 4220361840L, 1712034059L, 594028478L, 2921867846L, 3280331829L, 326029180L, + 3824583307L, 1612122221L, 2233466664L, 1432476832L, 1628777059L, 1499109081L, 1145519619L, 3190844552L, + 65721842L, 489963606L, 1790705123L, 2128624475L, 155445229L, 1672724608L, 3610042449L, 1911523866L, + 1099072299L, 1389770549L, 3603803785L, 629449419L, 1552847036L, 645684964L, 3151491850L, 3272648435L, + 916494250L, 1230085527L, 231181488L, 851743255L, 1142264800L, 3667013118L, 732137533L, 1909203251L, + 4072067757L, 4165088768L, 956300927L, 914413116L, 3074915312L, 3117299654L, 1438494951L, 507436733L, + 126024219L, 146044391L, 165589978L, 1578546616L, 249776086L, 1207522198L, 46987739L, 1157614300L, + 3614377032L, 586863115L, 1164298657L, 4140791139L, 3725511003L, 232064808L, 512845449L, 3748861010L, + 22638523L, 648000590L, 1024246061L, 4027776454L, 411505255L, 1973395102L, 3474970689L, 1029055034L, + 589567754L, 325737734L, 257578986L, 3698087965L, 2305332220L, 191910725L, 3315355162L, 2135941665L, + 23075771L, 3252374102L, 663013031L, 3444053918L, 2115441882L, 4081398201L, 1379288194L, 4225182569L, + 3667516477L, 1709989541L, 2725013602L, 3639843023L, 2470483982L, 877580602L, 3981838403L, 3762572073L, + 1129162571L, 732225574L, 3232041815L, 1652884780L, 2227121257L, 1426140634L, 1386256573L, 24035717L, + 1598686658L, 3146815575L, 739944537L, 579625482L, 3903349120L, 389846205L, 2834153464L, 1481069623L, + 3740748788L, 3388062747L, 1020177209L, 734239551L, 2610427744L, 49703572L, 1416453607L, 2815915291L, + 937074653L, 3035635454L, 3711259084L, 2627383582L, 3669691805L, 263366740L, 3565059103L, 1190977418L, + 2747519432L, 4129538640L, 2271095827L, 2993032712L, 795918276L, 1116991810L, 937372240L, 1343017609L, + 1166522068L, 1623631848L, 2721658101L, 1937681565L, 114616703L, 954762543L, 1756889687L, 2936126607L, + 2483004780L, 1927385370L, 1672737098L, 2148675559L, 2636210123L, 1338083267L, 1335160250L, 2084630531L, + 2746885618L, 636616055L, 2076016059L, 408721884L, 2301682622L, 2691859523L, 2614088922L, 1975527044L, + 3529473373L, 1490330107L, 4271796078L, 1910401882L, 3738454258L, 2554452696L, 2237827073L, 2803250686L, + 1996680960L, 839529273L, 3544595875L, 3909443124L, 3656063205L, 837475154L, 438095290L, 484603494L, + 308425103L, 268427550L, 4243643405L, 2849988118L, 2948254999L, 2102063419L, 1735616066L, 1539151988L, + 95237878L, 2005032160L, 1433635018L, 116647396L, 881378302L, 2159170082L, 336034862L, 2017579106L, + 944743644L, 1694443528L, 260177668L, 505662155L, 3722741628L, 1511077569L, 1103819072L, 2089123665L, + 2475035432L, 1120017626L, 2842141483L, 4029205195L, 3873078673L, 136118734L, 1699452298L, 1403506686L, + 1805475756L, 2562064338L, 4271866024L, 3071338162L, 459509140L, 771592405L, 185232757L, 4032960199L, + 3512945009L, 308584855L, 4250142168L, 2565680167L, 38924274L, 3770488806L, 3099963860L, 1255084262L, + 2363435042L, 54945052L, 2534883189L, 2432427547L, 2741583197L, 1280920000L, 1281043691L, 1121403845L, + 2127558730L, 713121337L, 2108187161L, 927011680L, 4134691985L, 1958963937L, 2567532373L, 4075249328L, + 4104757832L, 3026358429L, 3573008472L, 3615577014L, 1541946015L, 3087190425L, 857839960L, 2515339233L, + 2809830736L, 460237542L, 1950698961L, 2069753399L, 1106466069L, 356742959L, 3662626864L, 1750561299L, + 992181339L, 3384018814L, 100741310L, 451656820L, 3650357479L, 2390172694L, 2088767754L, 164402616L, + 2751052984L, 1767810825L, 3441135892L, 3323383489L, 2756998822L, 207428029L, 2648427775L, 2360400900L, + 1396468647L, 1377764574L, 1435134775L, 1099809675L, 3374512975L, 3542220540L, 4081637863L, 337070226L, + 644850146L, 1306761320L, 1242645122L, 4109252858L, 3377483696L, 1788337208L, 1658628529L, 2911512007L, + 367022558L, 3071359622L, 4273132307L, 3898950547L, 1858986613L, 2040551642L, 4077477194L, 3565689036L, + 265993036L, 1864569342L, 923017956L, 490608221L, 3833372385L, 3287246572L, 2649450292L, 500120236L, + 2810524030L, 1561519055L, 3224066062L, 2774151984L, 2107011431L, 96459446L, 1235983679L, 4237425634L, + 276949224L, 4100839753L, 427484362L, 4246879223L, 1858777639L, 3476334357L, 358032121L, 2511026735L, + 1535473864L, 556796152L, 1476438092L, 2913077464L, 3051522276L, 4046477658L, 1802040304L, 990407433L, + 4052924496L, 2926590471L, 4265214507L, 82077489L, 464407878L, 4190838199L, 733509243L, 1583801700L, + 1877837196L, 3912423882L, 8759461L, 2540185277L, 2019419351L, 4051584612L, 700836153L, 1675560450L, + 3130433948L, 405251683L, 2224044848L, 4071581802L, 2272418128L, 803575837L, 4019147902L, 3841480082L, + 3424361375L, 779434428L, 3057021940L, 2285701422L, 1783152480L, 823305654L, 3032187389L, 4159715581L, + 3420960112L, 3198900547L, 3006227299L, 4194096960L, 1775955687L, 1719108984L, 684087286L, 531310503L, + 3105682208L, 3382290593L, 777173623L, 3241407531L, 2649684057L, 1397502982L, 3193669211L, 811750340L, + 3403136990L, 2540585554L, 784952939L, 943914610L, 3985088434L, 1911188923L, 519948041L, 3181425568L, + 1089679033L, 240953857L, 3017658263L, 3828377737L, 308018483L, 4262383425L, 3188015819L, 4051263539L, + 4074952232L, 1683612329L, 206775997L, 2283918569L, 2217060665L, 350160869L, 140980L, 1891558063L, + 422986366L, 330624974L, 918718096L, 376390582L, 3424344721L, 3187805406L, 3855037968L, 1928519266L, + 3059200728L, 2108753646L, 1343511943L, 2247006571L, 622521957L, 917121602L, 3299763344L, 2864033668L, + 2661022773L, 2006922227L, 1237256330L, 3449066284L, 3285899651L, 786322314L, 1244759631L, 3263135197L, + 987586766L, 3206261120L, 1827135136L, 1781944746L, 2482286699L, 1109175923L, 4190721328L, 1129462471L, + 1623777358L, 3389003793L, 1646071378L, 1164309901L, 989577914L, 3626554867L, 1516846461L, 3656006011L, + 3698796465L, 3155218919L, 1237411891L, 1854985978L, 3939149151L, 878608872L, 2437686324L, 3163786257L, + 1235300371L, 1256485167L, 1883344352L, 2083771672L, 3066325351L, 2770847216L, 601221482L, 3992583643L, + 2557027816L, 900741486L, 90375300L, 300318232L, 3253901179L, 542270815L, 1273768482L, 1216399252L, + 325675502L, 3652676161L, 1097584090L, 3262252593L, 3704419305L, 411263051L, 3460621305L, 1967599860L, + 901109753L, 2682611693L, 797089608L, 3286110054L, 2219863904L, 3623364733L, 3061255808L, 1615375832L, + 2701956286L, 4145497671L, 449740816L, 2686506989L, 1235084019L, 2151665147L, 2091754612L, 1178454681L, + 3213794286L, 2601416506L, 4004834921L, 238887261L, 186020771L, 2367569534L, 1962659444L, 3539886328L, + 2144472852L, 1390394371L, 3597555910L, 3188438773L, 3371014971L, 2058751609L, 1169588594L, 857915866L, + 923161569L, 4068653043L, 3808667664L, 581227317L, 2077539039L, 851579036L, 2794103714L, 2094375930L, + 3122317317L, 2365436865L, 2023960931L, 2312244996L, 612094988L, 1555465129L, 3306195841L, 1702313921L, + 1171351291L, 2043136409L, 3744621107L, 1028502697L, 6114625L, 3359104346L, 1024572712L, 1927582962L, + 3392622118L, 1347167673L, 2075035198L, 4202817168L, 701024148L, 1481965992L, 1334816273L, 2870251538L, + 1010064531L, 713520765L, 4089081247L, 3231042924L, 2452539325L, 1343734533L, 587001593L, 1917607088L, + 3498936874L, 246692543L, 2836854664L, 2317249321L, 774652981L, 1285694082L, 397012087L, 1717527689L, + 2904461070L, 3893453420L, 1565179401L, 600903026L, 1134342772L, 3234226304L, 345572299L, 2274770442L, + 1079209397L, 2122849632L, 1242840526L, 3987000643L, 3065138774L, 3111336863L, 1023721001L, 3763083325L, + 2196937373L, 2643841788L, 4201389782L, 4223278891L, 292733931L, 1424229089L, 2927147928L, 1048291071L, + 2490333812L, 4098360768L, 3948800722L, 335456628L, 540133451L, 3313113759L, 3430536378L, 2514123129L, + 2418881542L, 487365389L, 1136054817L, 3004241477L, 4109233936L, 3679809321L, 3527024461L, 1147434678L, + 3308746763L, 1875093248L, 4217929592L, 400784472L, 160353261L, 2413172925L, 1853298225L, 3201741245L, + 3680311316L, 4274382900L, 1131020455L, 194781179L, 3440090658L, 2165746386L, 3106421434L, 880320527L, + 1429837716L, 252230074L, 3623657004L, 3869801679L, 2507199021L, 1659221866L, 3121647246L, 3884308578L, + 2610217849L, 641564641L, 329123979L, 121860586L, 947795261L, 1992594155L, 3050771207L, 2767035539L, + 627269409L, 1806905031L, 584050483L, 4142579188L, 3259749808L, 644172091L, 3053081915L, 2840648309L, + 2244943480L, 4057483496L, 873421687L, 2447660175L, 1233635843L, 2163464207L, 2515400215L, 3100476924L, + 470325051L, 2598261204L, 850667549L, 3622479237L, 2781907007L, 943739431L, 1901484772L, 939810041L, + 3261383939L, 2212130277L, 3349254805L, 2796552902L, 3372846298L, 3835884044L, 2764936304L, 1338171648L, + 2525665319L, 4196233786L, 2290169528L, 1793910997L, 1554419340L, 1733094688L, 1084699349L, 3233936866L, + 1428704144L, 3269904970L, 3347011944L, 1892898231L, 1072588531L, 3547435717L, 1593338562L, 919414554L, + 3953006207L, 877438080L, 224271045L, 2914958001L, 2920583824L, 1251814062L, 385182008L, 640855184L, + 4263183176L, 3041193150L, 3505072908L, 2830570613L, 1949847968L, 2999344380L, 1714496583L, 15918244L, + 2605688266L, 3253705097L, 4152736859L, 2097020806L, 2122199776L, 1069285218L, 670591796L, 768977505L, + 379861934L, 1557579480L, 547346027L, 388559045L, 1495176194L, 4093461535L, 1911655402L, 1053371241L, + 3717104621L, 1144474110L, 4166253320L, 2747410691L, + }; + } + + @ParameterizedTest + @MethodSource("hashTestData_Parameters") + public void farmhash64String(int oh32, long oh64, String in) throws Throwable { + long h = FarmHash64.farmhash64(in.getBytes()); + assertEquals(oh64, h); + } + + @ParameterizedTest + @MethodSource("hashTestData_Parameters") + public void farmhash32String(int oh32, long oh64, String in) throws Throwable { + int h = FarmHash64.farmhash32(in.getBytes()); + assertEquals(oh32, h); + } + + private byte[] dataSetup() { + final long k0 = 0xc3a5c85c97cb3127L; + + byte[] data = new byte[dataSize]; + + long a = 9; + long b = 777; + byte u; + + for (int i = 0; i < dataSize; i++) { + a += b; + b += a; + a = (a ^ (a >>> 41)) * k0; + b = (b ^ (b >>> 41)) * k0 + (long) i; + u = (byte) (b >>> 37); + data[i] = u; + } + + return data; + } + + private void testDataItemFarmHash64(byte[] data, int offset, int hlen, int index) { + byte[] s = Arrays.copyOfRange(data, offset, offset + hlen); + long h = FarmHash64.farmhash64(s); + int a = (int) (h >>> 32); + + long[] exp = expectedFarmHash64(); + + assertEquals((int) exp[index], a, " | index: " + index + " | hlen:" + hlen + " | h: " + h); + + a = (int) (h & 0xFFFFFFFFL); + + assertEquals((int) exp[index + 1], a, "index: " + (index + 1) + " | hlen:" + hlen + " | h: " + h); + } + + @Test + public void testFarmHash64() { + byte[] data = dataSetup(); + + int index = 0; + int i = 0; + + for (; i < testSize - 1; i++) { + testDataItemFarmHash64(data, i * i, i, index); + index += 2; + } + + for (; i < dataSize; i += i / 7) { + testDataItemFarmHash64(data, 0, i, index); + index += 2; + } + + testDataItemFarmHash64(data, 0, dataSize, index); + } +} \ No newline at end of file diff --git a/python/.gitignore b/python/.gitignore index 1e98533..15eebb5 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -1,3 +1,8 @@ +.benchmarks +.pytest_cache +c +farmhash64.egg-info + .cache .coverage* diff --git a/python/LICENSE b/python/LICENSE index b6ce10c..73ac008 100644 --- a/python/LICENSE +++ b/python/LICENSE @@ -2,11 +2,13 @@ The code in this project is a C port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). -This code has been ported/translated by Nicola Asuni to header-only C code. +This code has been ported/translated by Nicola Asuni to multiple languages. -The original code is released under the MIT License: +MIT License: -Copyright (c) 2014 Google, Inc. + - Copyright (c) 2014 Google, Inc. + - Copyright (c) 2014 Damian Gryski (original GO version) + - Copyright (c) 2016-2024 Nicola Asuni (versions in CGO, GO, Java, Python, Rust) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/python/Makefile b/python/Makefile index 6e2c71f..0650811 100644 --- a/python/Makefile +++ b/python/Makefile @@ -46,7 +46,7 @@ build: clean version # Remove any build artifact .PHONY: clean clean: - rm -rf venv target c Dockerfile htmlcov build dist .pytest_cache .cache .benchmarks ./test/*.so ./test/__pycache__ ./farmhash64/__pycache__ ./farmhash64.egg-info + rm -rf venv target c Dockerfile htmlcov build dist .pytest_cache .cache .benchmarks ./test/*.so ./test/__pycache__ ./farmhash64/__pycache__ ./farmhash64.egg-info farmhash64.*.so find . -type f -name '*.pyc' -exec rm -f {} \; # Generate source code documentation diff --git a/python/README.md b/python/README.md index ac4559b..06211ed 100644 --- a/python/README.md +++ b/python/README.md @@ -1,6 +1,6 @@ # FarmHash64 -*Provides farmhash64, a portable C 64-bit hash function* +*Provides farmhash64 and farmhash32 hash functions in multiple languages* [![Donate via PayPal](https://img.shields.io/badge/donate-paypal-87ceeb.svg)](https://www.paypal.com/cgi-bin/webscr?cmd=_donations¤cy_code=GBP&business=paypal@tecnick.com&item_name=donation%20for%20farmhash64%20project) *Please consider supporting this project by making a donation via [PayPal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations¤cy_code=GBP&business=paypal@tecnick.com&item_name=donation%20for%20farmhash64%20project)* @@ -19,17 +19,22 @@ FarmHash is a family of hash functions. -This is a C port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash -(https://github.com/google/farmhash). +FarmHash64 is a 64-bit fingerprint hash function that produces a hash value for a given string. +It is designed to be fast and provide good hash distribution but is not suitable for cryptography applications. -FarmHash64 provides a portable 64-bit hash function for strings (byte array). -The function mix the input bits thoroughly but is not suitable for cryptography. +The FarmHash32 function is also provided, which returns a 32-bit fingerprint hash for a string. All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. +This is a Java port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). -For more information please consult https://github.com/google/farmhash - +This code has been ported/translated by Nicola Asuni (Tecnick.com) to multiple languages: +- C (header-only) +- CGO +- GO +- Java +- Python +- Rust ## Getting Started @@ -44,19 +49,3 @@ make help ``` use the command ```make all``` to build and test all the implementations. - - -### Python Usage Example - -``` -# copy this code in the same directory of farmhash64 library - -import farmhash64 as vh - -print('\nUSAGE EXAMPLE:\n') - -vhash = vh.farmhash64("Lorem ipsum dolor sit amet") -print('vh.farmhash64("Lorem ipsum dolor sit amet")') -print("Variant Hash (DEC): %d" % vhash) -print("Variant Hash (HEX): %x\n" % vhash) -``` diff --git a/python/c/src/farmhash64.h b/python/c/src/farmhash64.h deleted file mode 100644 index b6a1116..0000000 --- a/python/c/src/farmhash64.h +++ /dev/null @@ -1,924 +0,0 @@ -/** - * @file farmhash64.h - * @brief File containing the definition of public functions. - * - * FarmHash is a family of hash functions. - * - * FarmHash64 provides a portable 64-bit hash function for strings (byte array). - * The function mix the input bits thoroughly but is not suitable for cryptography. - * - * All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. - * For more information please consult https://github.com/google/farmhash - * - * This is a C port of the Fingerprint64 (farmhashna::Hash64) code - * from Google's FarmHash (https://github.com/google/farmhash). - * - * This code has been ported/translated by Nicola Asuni to header-only C code. - * - * The public functions are: - * - farmhash64: Returns a 64-bit fingerprint hash for a byte array. - * - farmhash32: Returns a 32-bit fingerprint hash for a byte array. - * - * The original C++ code is released under the MIT License: - * - * The MIT License (MIT) - * - * Copyright (c) 2014 Google, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef FARMHASH64_H -#define FARMHASH64_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include - - -// PORTABILITY LAYER: "static inline" or similar - -#ifndef STATIC_INLINE -/** - * @brief Macro definition for static inline functions. - * - * This macro is used to define functions as static inline, which allows the compiler - * to optimize the function by inlining it at the call site. It is typically used for - * small, frequently called functions to improve performance. - * - * @private - */ -#define STATIC_INLINE static inline -#endif - -// PORTABILITY LAYER: endianness and byteswapping functions - -#ifdef WORDS_BIGENDIAN -#undef FARMHASH_BIG_ENDIAN -/** - * @brief Macro definition for indicating big endian architecture. - * - * This macro is used to indicate that the code is being compiled on a big endian architecture. - * It is defined as 1 to represent big endian architecture. - * - * @private - */ -#define FARMHASH_BIG_ENDIAN 1 -#endif - -#if defined(FARMHASH_LITTLE_ENDIAN) && defined(FARMHASH_BIG_ENDIAN) -#error -#endif - -#if !defined(FARMHASH_LITTLE_ENDIAN) && !defined(FARMHASH_BIG_ENDIAN) -/** - * @brief Macro definition to indicate unknown endianness. - * - * This macro is used to indicate that the endianness of the system is unknown. - * It is defined as 1. - * - * @private - */ -#define FARMHASH_UNKNOWN_ENDIAN 1 -#endif - -#if !defined(bswap_32) || !defined(bswap_64) -#undef bswap_32 -#undef bswap_64 - -#if defined(HAVE_BUILTIN_BSWAP) || defined(__clang__) || \ -(defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5)) -// Easy case for bswap: no header file needed. - -/** - * @brief Macro to swap the byte order of a 32-bit integer. - * - * This macro uses the __builtin_bswap32() function to swap the byte order of a 32-bit integer. - * It is typically used for converting between little-endian and big-endian byte orders. - * - * @param x The 32-bit integer to swap the byte order of. - * - * @return The 32-bit integer with the byte order swapped. - * - * @private - */ -#define bswap_32(x) __builtin_bswap32(x) - -/** - * @brief Macro to swap the byte order of a 64-bit value. - * - * This macro uses the __builtin_bswap64() function to swap the byte order of the given 64-bit value. - * - * @param x The 64-bit value to swap the byte order of. - * - * @return The 64-bit value with the byte order swapped. - * - * @private - */ -#define bswap_64(x) __builtin_bswap64(x) -#endif - -#endif - -#if defined(FARMHASH_UNKNOWN_ENDIAN) || !defined(bswap_64) - -#ifdef _MSC_VER - -#undef bswap_32 -#undef bswap_64 - -/** - * @brief Macro to swap the byte order of a 32-bit integer. - * - * This macro uses the _byteswap_ulong function to swap the byte order of a 32-bit integer. - * - * @param x The 32-bit integer to swap the byte order of. - * - * @return The 32-bit integer with the byte order swapped. - * - * @private - */ -#define bswap_32(x) _byteswap_ulong(x) - - -/** - * @brief Macro to swap the byte order of a 64-bit integer. - * - * This macro uses the _byteswap_uint64 function to swap the byte order of a 64-bit integer. - * - * @param x The 64-bit integer to swap the byte order of. - * - * @return The 64-bit integer with the byte order swapped. - * - * @private - */ -#define bswap_64(x) _byteswap_uint64(x) - -#elif defined(__APPLE__) - -// Mac OS X / Darwin features -#include -#undef bswap_32 -#undef bswap_64 - -/** - * @brief Macro to swap the byte order of a 32-bit integer. - * - * This macro uses the OSSwapInt32 function to swap the byte order of a 32-bit integer. - * It is typically used for converting between little-endian and big-endian byte order. - * - * @param x The 32-bit integer to swap the byte order of. - * - * @return The 32-bit integer with the byte order swapped. - * - * @private - */ -#define bswap_32(x) OSSwapInt32(x) - -/** - * @brief Macro to swap the byte order of a 64-bit value. - * - * This macro uses the OSSwapInt64 function to swap the byte order of a 64-bit value. - * It is typically used for converting between little-endian and big-endian byte order. - * - * @param x The 64-bit value to swap the byte order of. - * - * @return The 64-bit value with the byte order swapped. - * - * @private - */ -#define bswap_64(x) OSSwapInt64(x) - -#elif defined(__sun) || defined(sun) - -#include -#undef bswap_32 -#undef bswap_64 - -/** - * @brief Macro to swap the byte order of a 32-bit integer. - * - * This macro uses the BSWAP_32 macro to swap the byte order of a 32-bit integer. - * - * @param x The 32-bit integer to swap the byte order of. - * - * @return The 32-bit integer with the byte order swapped. - * - * @private - */ -#define bswap_32(x) BSWAP_32(x) - -/** - * @brief Macro to swap the byte order of a 64-bit value. - * - * This macro is used to swap the byte order of a 64-bit value. - * It is defined as `BSWAP_64(x)`. - * - * @param x The 64-bit value to swap the byte order of. - * - * @return The 64-bit value with the byte order swapped. - * - * @private - */ -#define bswap_64(x) BSWAP_64(x) - -#elif defined(__FreeBSD__) - -#include -#undef bswap_32 -#undef bswap_64 - -/** - * @brief Macro to swap the byte order of a 32-bit integer. - * - * This macro swaps the byte order of a 32-bit integer using the bswap32 function. - * - * @param x The 32-bit integer to swap the byte order of. - * - * @return The 32-bit integer with the byte order swapped. - * - * @private - */ -#define bswap_32(x) bswap32(x) - -/** - * @brief Macro to swap the byte order of a 64-bit value. - * - * This macro swaps the byte order of a 64-bit value using the bswap64 function. - * - * @param x The 64-bit value to swap the byte order of. - * - * @return The 64-bit value with the byte order swapped. - * - * @private - */ -#define bswap_64(x) bswap64(x) - -#elif defined(__OpenBSD__) - -#include -#undef bswap_32 -#undef bswap_64 - -/** - * @brief Macro definition to swap the byte order of a 32-bit integer. - * - * This macro is used to swap the byte order of a 32-bit integer. - * It is defined as `swap32(x)`, where `x` is the input value. - * - * @param x The 32-bit integer to swap the byte order of. - * - * @return The 32-bit integer with the byte order swapped. - * - * @private - */ -#define bswap_32(x) swap32(x) - -/** - * @brief Macro to swap the byte order of a 64-bit value. - * - * This macro is used to swap the byte order of a 64-bit value. It is defined as `swap64(x)`. - * - * @param x The 64-bit value to swap the byte order of. - * - * @return The 64-bit value with the byte order swapped. - * - * @private - */ -#define bswap_64(x) swap64(x) - -#elif defined(__NetBSD__) - -#include -#include -#if defined(__BSWAP_RENAME) && !defined(__bswap_32) -#undef bswap_32 -#undef bswap_64 - -/** - * @brief Macro to swap the byte order of a 32-bit value. - * - * This macro swaps the byte order of a 32-bit value using the bswap32 function. - * - * @param x The 32-bit value to swap the byte order of. - * - * @return The 32-bit value with the byte order swapped. - * - * @private - */ -#define bswap_32(x) bswap32(x) - -/** - * @brief Macro to swap the byte order of a 64-bit value. - * - * This macro uses the bswap64 function to swap the byte order of the given 64-bit value. - * It is defined as a shorthand for the bswap64 function. - * - * @param x The 64-bit value to swap the byte order of. - * - * @return The 64-bit value with the byte order swapped. - * - * @private - */ -#define bswap_64(x) bswap64(x) -#endif - -#else - -#undef bswap_32 -#undef bswap_64 -#include - -#endif - -#ifdef WORDS_BIGENDIAN - -/** - * @brief Macro definition indicating that the system is big-endian. - * - * This macro is defined as 1 to indicate that the system is big-endian. - * Big-endian is a byte order in which the most significant byte is stored - * at the lowest memory address. This macro is used in the FarmHash64 library - * to handle endianness-specific operations. - */ -#define FARMHASH_BIG_ENDIAN 1 -#endif - -#endif - -#ifdef FARMHASH_BIG_ENDIAN - -/** - * @brief Macro to convert a 32-bit unsigned integer to the expected byte order. - * - * This macro uses the bswap_32 function to convert the given 32-bit unsigned integer - * to the expected byte order. It is used in the farmhash64 library for byte order conversion. - * - * @param x The 32-bit unsigned integer to convert. - * - * @return The converted 32-bit unsigned integer. - * - * @private - */ -#define uint32_t_in_expected_order(x) (bswap_32(x)) - -/** - * @brief Macro to convert a 64-bit unsigned integer to the expected byte order. - * - * This macro uses the bswap_64 function to convert the given 64-bit unsigned integer - * to the expected byte order. The bswap_64 function swaps the byte order of the input - * value, ensuring that the result is in the expected byte order. - * - * @param x The 64-bit unsigned integer to convert. - * - * @return The converted 64-bit unsigned integer in the expected byte order. - * - * @private - */ -#define uint64_t_in_expected_order(x) (bswap_64(x)) -#else - -/** - * Macro to convert a 32-bit unsigned integer to the expected order. - * - * @param x The 32-bit unsigned integer to convert. - * - * @return The converted 32-bit unsigned integer. - * - * @private - */ -#define uint32_t_in_expected_order(x) (x) - -/** - * Macro to convert a 64-bit unsigned integer to the expected order. - * - * @param x The 64-bit unsigned integer to be converted. - * - * @return The converted 64-bit unsigned integer. - * - * @private - */ -#define uint64_t_in_expected_order(x) (x) -#endif - -/** - * @brief Represents a 128-bit unsigned integer. - * - * The uint128_t struct is used to store a 128-bit integer, which consists of a lower 64 bits (lo) and a higher 64 bits (hi). - * This struct is typically used for operations that require a larger range of values than what can be represented by a standard 64-bit integer. - * - * @private - */ -typedef struct uint128_t -{ - uint64_t lo; // Lower 64 bits of the 128-bit integer - uint64_t hi; // Higher 64 bits of the 128-bit integer -} uint128_t; - -// Some primes between 2^63 and 2^64 for various uses. -static const uint64_t k0 = 0xc3a5c85c97cb3127ULL; -static const uint64_t k1 = 0xb492b66fbe98f273ULL; -static const uint64_t k2 = 0x9ae16a3b2f90404fULL; - -// Magic numbers for 32-bit hashing. Copied from Murmur3. -static const uint32_t c1 = 0xcc9e2d51; -static const uint32_t c2 = 0x1b873593; - -/** - * @brief Get the low 64 bits of a uint128_t value. - * - * @param x uint128_t value - * - * @return The low 64 bits of x - * - * @private - */ -STATIC_INLINE uint64_t uint128_t_low64(const uint128_t x) -{ - return x.lo; -} - -/** - * @brief Get the high 64 bits of a uint128_t value. - * - * @param x uint128_t value - * - * @return The high 64 bits of x - * - * @private - */ -STATIC_INLINE uint64_t uint128_t_high64(const uint128_t x) -{ - return x.hi; -} - -/** - * @brief Create a uint128_t value from two 64-bit integers. - * - * @param lo Low 64 bits - * @param hi High 64 bits - * - * @return uint128_t value - * - * @private - */ -STATIC_INLINE uint128_t make_uint128_t(uint64_t lo, uint64_t hi) -{ - uint128_t x = {lo, hi}; - return x; -} - -/** - * @brief Convert a uint128_t value to a 64-bit hash code. - * - * This function is intended to be a reasonably good hash function. - * The result may change from time to time and may differ on different platforms. - * The result may also differ depending on the NDEBUG macro. - * - * @param x uint128_t value - * - * @return 64-bit hash code - * - * @private - */ -STATIC_INLINE uint64_t farmhash128_to_64(uint128_t x) -{ - // Murmur-inspired hashing. - const uint64_t k_mul = 0x9ddfea08eb382d69ULL; - uint64_t a = (uint128_t_low64(x) ^ uint128_t_high64(x)) * k_mul; - a ^= (a >> 47); - uint64_t b = (uint128_t_high64(x) ^ a) * k_mul; - b ^= (b >> 47); - b *= k_mul; - return b; -} - -/** - * @brief Fetch a 64-bit little-endian integer from a byte array. - * - * @param p Pointer to the byte array - * - * @return The fetched 64-bit integer - * - * @private - */ -STATIC_INLINE uint64_t fetch64(const char* p) -{ - uint64_t result; - memcpy(&result, p, sizeof(result)); - return uint64_t_in_expected_order(result); -} - -/** - * @brief Fetch a 32-bit little-endian integer from a byte array. - * - * @param p Pointer to the byte array - * - * @return The fetched 32-bit integer - * - * @private - */ -STATIC_INLINE uint32_t fetch32(const char* p) -{ - uint32_t result; - memcpy(&result, p, sizeof(result)); - return uint32_t_in_expected_order(result); -} - -/** - * @brief Swap the values of two 64-bit integers. - * - * @param a Pointer to the first integer - * @param b Pointer to the second integer - * - * @private - */ -STATIC_INLINE void swap64(uint64_t* a, uint64_t* b) -{ - uint64_t t; - t = *a; - *a = *b; - *b = t; -} - -/** - * @brief Rotate a 32-bit integer right by a specified number of bits. - * - * @param val The value to rotate - * @param shift The number of bits to rotate by - * - * @return The rotated value - * - * @private - */ -STATIC_INLINE uint32_t ror32(uint32_t val, size_t shift) -{ - // Avoid shifting by 32: doing so yields an undefined result. - return shift == 0 ? val : (val >> shift) | (val << (32 - shift)); -} - -/** - * @brief Rotate a 64-bit integer right by a specified number of bits. - * - * @param val The value to rotate - * @param shift The number of bits to rotate by - * - * @return The rotated value - * - * @private - */ -STATIC_INLINE uint64_t ror64(uint64_t val, size_t shift) -{ - // Avoid shifting by 64: doing so yields an undefined result. - return shift == 0 ? val : (val >> shift) | (val << (64 - shift)); -} - -/** - * @brief Performs a bitwise XOR operation between a 64-bit value and its right-shifted value. - * - * This function takes a 64-bit value and performs a bitwise XOR operation between the value - * and its right-shifted value by 47 bits. The result is returned. - * - * @param val The input 64-bit value. - * - * @return The result of the bitwise XOR operation between the input value and its right-shifted value. - * - * @private - */ -STATIC_INLINE uint64_t smix(uint64_t val) -{ - return val ^ (val >> 47); -} - -/** - * @brief Performs the MurmurHash3 algorithm on a 32-bit input value. - * - * This function applies the MurmurHash3 algorithm on a 32-bit input value using the provided - * constants c1 and c2. It performs a series of bitwise operations and multiplications to - * generate a hash value. - * - * @param a The input value to be hashed. - * @param h The current hash value. - * - * @return The updated hash value after applying the MurmurHash3 algorithm. - * - * @private - */ -STATIC_INLINE uint32_t mur(uint32_t a, uint32_t h) -{ - a *= c1; - a = ror32(a, 17); - a *= c2; - h ^= a; - h = ror32(h, 19); - return h * 5 + 0xe6546b64; -} - -/** - * @brief Static inline function that converts a 64-bit integer to a 32-bit integer using the MurmurHash algorithm. - * - * @param x The 64-bit integer to be converted. - * - * @return The converted 32-bit integer. - * - * @private - */ -STATIC_INLINE uint32_t mix_64_to_32(uint64_t x) -{ - return mur((uint32_t)(x >> 32), (uint32_t)((x << 32) >> 32)); -} - -/** - * @brief Calculate a 64-bit hash code for a byte array of length 16. - * - * @param u First 64 bits of the byte array - * @param v Last 64 bits of the byte array - * - * @return 64-bit hash code - * - * @private - */ -STATIC_INLINE uint64_t farmhash_len_16(uint64_t u, uint64_t v) -{ - return farmhash128_to_64(make_uint128_t(u, v)); -} - -/** - * @brief Calculate a 64-bit hash code for a byte array of length 16, multiplied by a constant. - * - * @param u First 64 bits of the byte array - * @param v Last 64 bits of the byte array - * @param mul The multiplication constant - * - * @return 64-bit hash code - * - * @private - */ -STATIC_INLINE uint64_t farmhash_len_16_mul(uint64_t u, uint64_t v, uint64_t mul) -{ - // Murmur-inspired hashing. - uint64_t a = (u ^ v) * mul; - a ^= (a >> 47); - uint64_t b = (v ^ a) * mul; - b ^= (b >> 47); - b *= mul; - return b; -} - -/** - * @brief Calculate a 64-bit hash code for a byte array of length 0 to 16. - * - * @param s Pointer to the byte array - * @param len Length of the byte array - * - * @return 64-bit hash code - * - * @private - */ -STATIC_INLINE uint64_t farmhash_na_len_0_to_16(const char *s, size_t len) -{ - if (len >= 8) - { - uint64_t mul = k2 + len * 2; - uint64_t a = fetch64(s) + k2; - uint64_t b = fetch64(s + len - 8); - uint64_t c = ror64(b, 37) * mul + a; - uint64_t d = (ror64(a, 25) + b) * mul; - return farmhash_len_16_mul(c, d, mul); - } - if (len >= 4) - { - uint64_t mul = k2 + len * 2; - uint64_t a = fetch32(s); - return farmhash_len_16_mul(len + (a << 3), fetch32(s + len - 4), mul); - } - if (len > 0) - { - uint8_t a = s[0]; - uint8_t b = s[len >> 1]; - uint8_t c = s[len - 1]; - uint32_t y = (uint32_t) a + ((uint32_t) b << 8); - uint32_t z = len + ((uint32_t) c << 2); - return smix(y * k2 ^ z * k0) * k2; - } - return k2; -} - -/** - * @brief Calculate a 64-bit hash code for a byte array of length 17 to 32. - * - * @param s Pointer to the byte array - * @param len Length of the byte array - * - * @return 64-bit hash code - * - * @private - */ -STATIC_INLINE uint64_t farmhash_na_len_17_to_32(const char *s, size_t len) -{ - uint64_t mul = k2 + len * 2; - uint64_t a = fetch64(s) * k1; - uint64_t b = fetch64(s + 8); - uint64_t c = fetch64(s + len - 8) * mul; - uint64_t d = fetch64(s + len - 16) * k2; - return farmhash_len_16_mul(ror64(a + b, 43) + ror64(c, 30) + d, a + ror64(b + k2, 18) + c, mul); -} - -/** - * @brief Calculate a 16-byte (128-bit) weak hash code for a byte array of length 48, including seeds. - * Callers do best to use "random-looking" values for a and b. - * - * @param w First 64 bits of the byte array - * @param x Second 64 bits of the byte array - * @param y Third 64 bits of the byte array - * @param z Fourth 64 bits of the byte array - * @param a First seed value - * @param b Second seed value - * - * @return 128-bit weak hash code - * - * @private - */ -STATIC_INLINE uint128_t weak_farmhash_na_len_32_with_seeds_vals(uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) -{ - a += w; - b = ror64(b + a + z, 21); - uint64_t c = a; - a += x; - a += y; - b += ror64(a, 44); - return make_uint128_t(a + z, b + c); -} - -/** - * @brief Calculate a 16-byte (128-bit) weak hash code for a byte array of length 32, including seeds. - * - * @param s Pointer to the byte array - * @param a First seed value - * @param b Second seed value - * - * @return 128-bit weak hash code - * - * @private - */ -STATIC_INLINE uint128_t weak_farmhash_na_len_32_with_seeds(const char* s, uint64_t a, uint64_t b) -{ - return weak_farmhash_na_len_32_with_seeds_vals(fetch64(s), - fetch64(s + 8), - fetch64(s + 16), - fetch64(s + 24), - a, - b); -} - -/** - * @brief Calculate a 8-byte (64-bit) hash code for a byte array of length 33 to 64. - * - * @param s Pointer to the byte array - * @param len Length of the byte array - * - * @return 64-bit hash code - * - * @private - */ -STATIC_INLINE uint64_t farmhash_na_len_33_to_64(const char *s, size_t len) -{ - uint64_t mul = k2 + len * 2; - uint64_t a = fetch64(s) * k2; - uint64_t b = fetch64(s + 8); - uint64_t c = fetch64(s + len - 8) * mul; - uint64_t d = fetch64(s + len - 16) * k2; - uint64_t y = ror64(a + b, 43) + ror64(c, 30) + d; - uint64_t z = farmhash_len_16_mul(y, a + ror64(b + k2, 18) + c, mul); - uint64_t e = fetch64(s + 16) * mul; - uint64_t f = fetch64(s + 24); - uint64_t g = (y + fetch64(s + len - 32)) * mul; - uint64_t h = (z + fetch64(s + len - 24)) * mul; - return farmhash_len_16_mul(ror64(e + f, 43) + ror64(g, 30) + h, e + ror64(f + a, 18) + g, mul); -} - -// ================================================================================================= -// PUBLIC FUNCTIONS -// ================================================================================================= - -/** - * @brief 64 bit hash. - * - * Returns a 64-bit fingerprint hash for a byte array. - * - * This function is not suitable for cryptography. - * - * @param s string to process - * @param len string length - * - * @return 64-bit hash code - * - * @public - */ -STATIC_INLINE uint64_t farmhash64(const char *s, size_t len) -{ - const uint64_t seed = 81; - if (len <= 32) - { - if (len <= 16) - { - return farmhash_na_len_0_to_16(s, len); - } - return farmhash_na_len_17_to_32(s, len); - } - if (len <= 64) - { - return farmhash_na_len_33_to_64(s, len); - } - // For strings over 64 bytes we loop. - // Internal state consists of 56 bytes: v, w, x, y, and z. - uint64_t x = seed; - uint64_t y = seed * k1 + 113; - uint64_t z = smix(y * k2 + 113) * k2; - uint128_t v = make_uint128_t(0, 0); - uint128_t w = make_uint128_t(0, 0); - x = x * k2 + fetch64(s); - // Set end so that after the loop we have 1 to 64 bytes left to process. - const char* end = s + ((len - 1) / 64) * 64; - const char* last64 = end + ((len - 1) & 63) - 63; - assert(s + len - 64 == last64); - do - { - x = ror64(x + y + v.lo + fetch64(s + 8), 37) * k1; - y = ror64(y + v.hi + fetch64(s + 48), 42) * k1; - x ^= w.hi; - y += v.lo + fetch64(s + 40); - z = ror64(z + w.lo, 33) * k1; - v = weak_farmhash_na_len_32_with_seeds(s, v.hi * k1, x + w.lo); - w = weak_farmhash_na_len_32_with_seeds(s + 32, z + w.hi, y + fetch64(s + 16)); - swap64(&z, &x); - s += 64; - } - while (s != end); - uint64_t mul = k1 + ((z & 0xff) << 1); - // Make s point to the last 64 bytes of input. - s = last64; - w.lo += ((len - 1) & 63); - v.lo += w.lo; - w.lo += v.lo; - x = ror64(x + y + v.lo + fetch64(s + 8), 37) * mul; - y = ror64(y + v.hi + fetch64(s + 48), 42) * mul; - x ^= w.hi * 9; - y += v.lo * 9 + fetch64(s + 40); - z = ror64(z + w.lo, 33) * mul; - v = weak_farmhash_na_len_32_with_seeds(s, v.hi * mul, x + w.lo); - w = weak_farmhash_na_len_32_with_seeds(s + 32, z + w.hi, y + fetch64(s + 16)); - swap64(&z, &x); - return farmhash_len_16_mul(farmhash_len_16_mul(v.lo, w.lo, mul) + smix(y) * k0 + z, - farmhash_len_16_mul(v.hi, w.hi, mul) + x, - mul); -} - -/** - * @brief 32 bit hash. - * - * Returns a 32-bit fingerprint hash for a byte array. - * - * NOTE: This is NOT equivalent to the original Fingerprint32 function. - * It is a new function based on farmhash64. - * - * This function is not suitable for cryptography. - * - * @param s string to process - * @param len string length - * - * @return 32-bit hash code - * - * @public - */ -STATIC_INLINE uint32_t farmhash32(const char *s, size_t len) -{ - return mix_64_to_32(farmhash64(s, len)); -} - -#ifdef __cplusplus -} -#endif - -#endif // FARMHASH64_H \ No newline at end of file diff --git a/python/setup.py b/python/setup.py index adc9c83..c69776b 100644 --- a/python/setup.py +++ b/python/setup.py @@ -30,7 +30,7 @@ def run(self): setup( name="farmhash64", - version="1.5.1.0", + version="1.6.0.0", keywords=("farmhash64"), description="farmhash64 Bindings for Python", long_description=read("../README.md"), diff --git a/rust/Cargo.toml b/rust/Cargo.toml index bc1780d..8e49507 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "farmhash64" -version = "1.5.1" +version = "1.6.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/rust/src/lib.rs b/rust/src/lib.rs index aa6db83..3de64c9 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,30 +1,17 @@ /* -Package farmhash64 implements the FarmHash64 hash function. - -That is a Rust translation of the Google's C++ code: -https://github.com/google/farmhash - - - Copyright (c) 2014 Google, Inc. - - Copyright (c) 2014 Damian Gryski - - Copyright (c) 2016-2024 Nicola Asuni - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +This library implements the farmhash64 and farmhash32 hash functions for strings. + +FarmHash is a family of hash functions. + +FarmHash64 is a 64-bit fingerprint hash function that produces a hash value for a given string. +It is designed to be fast and provide good hash distribution but is not suitable for cryptography applications. + +The FarmHash32 function is also provided, which returns a 32-bit fingerprint hash for a string. + +All members of the FarmHash family were designed with heavy reliance on previous work by Jyrki Alakuijala, Austin Appleby, Bob Jenkins, and others. +This is a Rust port of the Fingerprint64 (farmhashna::Hash64) code from Google's FarmHash (https://github.com/google/farmhash). + +This code has been ported/translated by Nicola Asuni (Tecnick.com) to Rust code. */ // BASICS @@ -45,18 +32,25 @@ struct Uint128 { // PLATFORM +#[inline] fn rotate32(val: u32, shift: u32) -> u32 { val.rotate_right(shift) } +#[inline] fn rotate64(val: u64, shift: u32) -> u64 { val.rotate_right(shift) } -fn fetch32(s: &[u8], idx: usize) -> u32 { - u32::from_le_bytes([s[idx], s[idx + 1], s[idx + 2], s[idx + 3]]) +#[inline] +fn fetch32(s: &[u8], idx: usize) -> u64 { + u64::from(s[idx + 0]) + | (u64::from(s[idx + 1]) << 8) + | (u64::from(s[idx + 2]) << 16) + | (u64::from(s[idx + 3]) << 24) } +#[inline] fn fetch64(s: &[u8], idx: usize) -> u64 { u64::from(s[idx + 0]) | (u64::from(s[idx + 1]) << 8) @@ -70,10 +64,12 @@ fn fetch64(s: &[u8], idx: usize) -> u64 { // FARMHASH NA +#[inline] fn shift_mix(val: u64) -> u64 { val ^ (val >> 47) } +#[inline] fn mur(a: u32, h: u32) -> u32 { let mut a: u32 = u32::from(a); let mut h: u32 = u32::from(h); @@ -86,10 +82,12 @@ fn mur(a: u32, h: u32) -> u32 { } // Merge a 64 bit integer into 32 bit. +#[inline] fn mix_64_to_32(x: u64) -> u32 { mur((x >> 32) as u32, ((x << 32) >> 32) as u32) } +#[inline] fn hash_len_16_mul(u: u64, v: u64, mul: u64) -> u64 { let a = (u ^ v).wrapping_mul(mul); let a = a ^ (a >> 47); @@ -98,6 +96,7 @@ fn hash_len_16_mul(u: u64, v: u64, mul: u64) -> u64 { b.wrapping_mul(mul) } +#[inline] fn hash_len_0_to_16(s: &[u8]) -> u64 { let slen = s.len() as u64; @@ -116,8 +115,8 @@ fn hash_len_0_to_16(s: &[u8]) -> u64 { let a = fetch32(s, 0); return hash_len_16_mul( - slen.wrapping_add(u64::from(a) << 3), - u64::from(fetch32(s, (slen - 4) as usize)), + slen.wrapping_add(a << 3), + fetch32(s, (slen - 4) as usize), mul, ); } @@ -138,6 +137,7 @@ fn hash_len_0_to_16(s: &[u8]) -> u64 { // This probably works well for 16-byte strings as well, but it may be overkill // in that case. +#[inline] fn hash_len_17_to_32(s: &[u8]) -> u64 { let slen = s.len(); let mul = K2.wrapping_add((slen * 2) as u64); @@ -158,6 +158,7 @@ fn hash_len_17_to_32(s: &[u8]) -> u64 { // Return a 16-byte hash for 48 bytes. Quick and dirty. // Callers do best to use "random-looking" values for a and b. +#[inline] fn weak_hash_len_32_with_seeds_words(w: u64, x: u64, y: u64, z: u64, a: u64, b: u64) -> (u64, u64) { let a = a.wrapping_add(w); let b = rotate64(b.wrapping_add(a).wrapping_add(z), 21); @@ -170,6 +171,7 @@ fn weak_hash_len_32_with_seeds_words(w: u64, x: u64, y: u64, z: u64, a: u64, b: } // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +#[inline] fn weak_hash_len_32_with_seeds(s: &[u8], a: u64, b: u64) -> (u64, u64) { weak_hash_len_32_with_seeds_words( fetch64(s, 0), @@ -182,6 +184,7 @@ fn weak_hash_len_32_with_seeds(s: &[u8], a: u64, b: u64) -> (u64, u64) { } // Return an 8-byte hash for 33 to 64 bytes. +#[inline] fn hash_len_33_to_64(s: &[u8]) -> u64 { let slen = s.len(); let mul = K2.wrapping_add((slen as u64).wrapping_mul(2)); @@ -214,9 +217,9 @@ fn hash_len_33_to_64(s: &[u8]) -> u64 { } // FarmHash64 returns a 64-bit fingerprint hash for a string. +#[inline] pub fn farmhash64(mut s: &[u8]) -> u64 { let slen = s.len(); - let seed: u64 = 81; if slen <= 16 { return hash_len_0_to_16(s); @@ -230,6 +233,8 @@ pub fn farmhash64(mut s: &[u8]) -> u64 { return hash_len_33_to_64(s); } + let seed: u64 = 81; + // For strings over 64 bytes we loop. // Internal state consists of 56 bytes: v, w, x, y, and z. let mut v = Uint128 { lo: 0, hi: 0 }; @@ -312,6 +317,7 @@ pub fn farmhash64(mut s: &[u8]) -> u64 { // FarmHash32 returns a 32-bit fingerprint hash for a string. // NOTE: This is NOT equivalent to the original Fingerprint32 function. +#[inline] pub fn farmhash32(s: &[u8]) -> u32 { mix_64_to_32(farmhash64(s)) }