|
| 1 | +""" |
| 2 | +FNV (Fowler-Noll-Vo) Hash Algorithm |
| 3 | +
|
| 4 | +This module implements the FNV-1a hash algorithm, a fast non-cryptographic hash |
| 5 | +function widely used in hash tables, bloom filters, and checksums. |
| 6 | +
|
| 7 | +FNV-1a is known for: |
| 8 | +- Simplicity: Very simple implementation (XOR and multiply) |
| 9 | +- Speed: Extremely fast computation |
| 10 | +- Good distribution: Excellent avalanche properties |
| 11 | +- Zero collisions: For short strings, very few collisions |
| 12 | +
|
| 13 | +Common uses: |
| 14 | +- Hash tables (Python's dict historically used a variant) |
| 15 | +- Bloom filters and caches |
| 16 | +- Checksums for data structures |
| 17 | +- Database indexing |
| 18 | +
|
| 19 | +The algorithm uses prime numbers and XOR operations to create well-distributed |
| 20 | +hash values. The "1a" variant (used here) processes bytes in a different order |
| 21 | +than FNV-1, typically providing better distribution. |
| 22 | +
|
| 23 | +Note: FNV is NOT cryptographically secure. Use SHA-256 or similar for security. |
| 24 | +
|
| 25 | +References: |
| 26 | +- http://www.isthe.com/chongo/tech/comp/fnv/ |
| 27 | +- https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function |
| 28 | +""" |
| 29 | + |
| 30 | + |
| 31 | +def fnv1a_32(data: bytes) -> int: |
| 32 | + """ |
| 33 | + Calculate the FNV-1a 32-bit hash of byte data. |
| 34 | +
|
| 35 | + FNV-1a uses XOR-then-multiply instead of multiply-then-XOR, |
| 36 | + providing better avalanche characteristics than FNV-1. |
| 37 | +
|
| 38 | + Args: |
| 39 | + data: Byte data to hash |
| 40 | +
|
| 41 | + Returns: |
| 42 | + 32-bit hash value (0 to 4,294,967,295) |
| 43 | +
|
| 44 | + Raises: |
| 45 | + TypeError: If data is not bytes |
| 46 | +
|
| 47 | + >>> fnv1a_32(b"") |
| 48 | + 2166136261 |
| 49 | +
|
| 50 | + >>> fnv1a_32(b"hello") |
| 51 | + 1335831723 |
| 52 | +
|
| 53 | + >>> fnv1a_32(b"Hello") |
| 54 | + 4116459851 |
| 55 | +
|
| 56 | + >>> fnv1a_32(b"world") |
| 57 | + 933488787 |
| 58 | +
|
| 59 | + >>> fnv1a_32(b"The quick brown fox jumps over the lazy dog") |
| 60 | + 76545936 |
| 61 | +
|
| 62 | + >>> fnv1a_32(b"a") |
| 63 | + 3826002220 |
| 64 | +
|
| 65 | + >>> fnv1a_32(b"abc") |
| 66 | + 440920331 |
| 67 | +
|
| 68 | + >>> fnv1a_32(b"Python") |
| 69 | + 3822946231 |
| 70 | +
|
| 71 | + >>> fnv1a_32(b"FNV-1a") |
| 72 | + 3973616866 |
| 73 | +
|
| 74 | + >>> fnv1a_32(b"\\x00\\x00\\x00\\x00") |
| 75 | + 1268118805 |
| 76 | +
|
| 77 | + >>> fnv1a_32(b"test" * 100) != fnv1a_32(b"test" * 101) |
| 78 | + True |
| 79 | + """ |
| 80 | + if not isinstance(data, bytes): |
| 81 | + msg = f"data must be bytes, not {type(data).__name__}" |
| 82 | + raise TypeError(msg) |
| 83 | + |
| 84 | + fnv_32_prime = 0x01000193 |
| 85 | + hash_value = 0x811C9DC5 |
| 86 | + |
| 87 | + for byte in data: |
| 88 | + hash_value ^= byte |
| 89 | + hash_value = (hash_value * fnv_32_prime) & 0xFFFFFFFF |
| 90 | + |
| 91 | + return hash_value |
| 92 | + |
| 93 | + |
| 94 | +def fnv1a_64(data: bytes) -> int: |
| 95 | + """ |
| 96 | + Calculate the FNV-1a 64-bit hash of byte data. |
| 97 | +
|
| 98 | + The 64-bit variant provides a larger hash space, reducing |
| 99 | + collision probability for large datasets. |
| 100 | +
|
| 101 | + Args: |
| 102 | + data: Byte data to hash |
| 103 | +
|
| 104 | + Returns: |
| 105 | + 64-bit hash value (0 to 18,446,744,073,709,551,615) |
| 106 | +
|
| 107 | + Raises: |
| 108 | + TypeError: If data is not bytes |
| 109 | +
|
| 110 | + >>> fnv1a_64(b"") |
| 111 | + 14695981039346656037 |
| 112 | +
|
| 113 | + >>> fnv1a_64(b"hello") |
| 114 | + 11831194018420276491 |
| 115 | +
|
| 116 | + >>> fnv1a_64(b"Hello") |
| 117 | + 7201466553693376363 |
| 118 | +
|
| 119 | + >>> fnv1a_64(b"world") |
| 120 | + 5717881983045765875 |
| 121 | +
|
| 122 | + >>> fnv1a_64(b"The quick brown fox jumps over the lazy dog") |
| 123 | + 17580284887202820368 |
| 124 | +
|
| 125 | + >>> fnv1a_64(b"a") |
| 126 | + 12638187200555641996 |
| 127 | +
|
| 128 | + >>> fnv1a_64(b"abc") |
| 129 | + 16654208175385433931 |
| 130 | +
|
| 131 | + >>> fnv1a_64(b"Python") |
| 132 | + 4148801904339793143 |
| 133 | +
|
| 134 | + >>> fnv1a_64(b"FNV-1a") |
| 135 | + 15319149270662077890 |
| 136 | +
|
| 137 | + >>> fnv1a_64(b"\\x00\\x00\\x00\\x00") |
| 138 | + 5558979605539197941 |
| 139 | +
|
| 140 | + >>> fnv1a_64(b"test" * 100) != fnv1a_64(b"test" * 101) |
| 141 | + True |
| 142 | + """ |
| 143 | + if not isinstance(data, bytes): |
| 144 | + msg = f"data must be bytes, not {type(data).__name__}" |
| 145 | + raise TypeError(msg) |
| 146 | + |
| 147 | + fnv_64_prime = 0x100000001B3 |
| 148 | + hash_value = 0xCBF29CE484222325 |
| 149 | + |
| 150 | + for byte in data: |
| 151 | + hash_value ^= byte |
| 152 | + hash_value = (hash_value * fnv_64_prime) & 0xFFFFFFFFFFFFFFFF |
| 153 | + |
| 154 | + return hash_value |
| 155 | + |
| 156 | + |
| 157 | +if __name__ == "__main__": |
| 158 | + import doctest |
| 159 | + |
| 160 | + doctest.testmod() |
| 161 | + |
| 162 | + test_data = b"Hello, World!" |
| 163 | + print(f"FNV-1a 32-bit hash of '{test_data.decode()}': {fnv1a_32(test_data)}") |
| 164 | + print(f"FNV-1a 64-bit hash of '{test_data.decode()}': {fnv1a_64(test_data)}") |
0 commit comments