Skip to content

Commit d1d4c47

Browse files
author
rodrigo.nogueira
committed
feat: add CRC32 hash algorithm implementation
- Implement CRC32 with IEEE 802.3 polynomial - Add 17 comprehensive doctests with edge cases - Include validation against zlib.crc32() - Full type hints and English documentation - Passes ruff and mypy checks CRC32 is widely used in ZIP, Ethernet, PNG for data integrity. This adds an important non-cryptographic hash algorithm that was missing from the repository.
1 parent 2c15b8c commit d1d4c47

File tree

1 file changed

+124
-0
lines changed

1 file changed

+124
-0
lines changed

hashes/crc32.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""
2+
CRC32 (Cyclic Redundancy Check 32-bit) Hash Algorithm
3+
4+
This module implements the CRC32 hash algorithm, a non-cryptographic hash function
5+
widely used for error detection and data integrity verification.
6+
7+
CRC32 is commonly used in:
8+
- ZIP file format for data integrity
9+
- Ethernet frame check sequences
10+
- PNG image format for chunk verification
11+
- Gzip compression
12+
13+
The algorithm uses the IEEE 802.3 polynomial (0xEDB88320 in reversed bit order)
14+
and produces a 32-bit hash value.
15+
16+
Note: CRC32 is NOT suitable for cryptographic purposes. It's designed for
17+
error detection, not security. For cryptographic hashing, use SHA-256 or similar.
18+
19+
Reference:
20+
- https://en.wikipedia.org/wiki/Cyclic_redundancy_check
21+
- https://www.rfc-editor.org/rfc/rfc1952.html (GZIP specification)
22+
"""
23+
24+
25+
def _generate_crc32_table() -> list[int]:
26+
"""
27+
Generate the CRC32 lookup table for optimized calculation.
28+
29+
Uses the IEEE 802.3 polynomial: 0xEDB88320 (reversed bit order)
30+
31+
>>> table = _generate_crc32_table()
32+
>>> len(table)
33+
256
34+
>>> hex(table[0])
35+
'0x0'
36+
>>> hex(table[128])
37+
'0xedb88320'
38+
"""
39+
polynomial = 0xEDB88320
40+
table = []
41+
42+
for i in range(256):
43+
crc = i
44+
for _ in range(8):
45+
if crc & 1:
46+
crc = (crc >> 1) ^ polynomial
47+
else:
48+
crc >>= 1
49+
table.append(crc)
50+
51+
return table
52+
53+
54+
CRC32_TABLE = _generate_crc32_table()
55+
56+
57+
def crc32(data: bytes) -> int:
58+
"""
59+
Calculate the CRC32 hash of byte data.
60+
61+
Args:
62+
data: Byte data to calculate the hash for
63+
64+
Returns:
65+
CRC32 hash as a 32-bit integer (0 to 4294967295)
66+
67+
Raises:
68+
TypeError: If data is not of type bytes
69+
70+
>>> crc32(b"Hello World")
71+
1243066710
72+
73+
>>> crc32(b"")
74+
0
75+
76+
>>> crc32(b"The quick brown fox jumps over the lazy dog")
77+
1095738169
78+
79+
>>> crc32(b"a")
80+
3904355907
81+
82+
>>> crc32(b"abc")
83+
891568578
84+
85+
>>> crc32(b"123456789")
86+
3421780262
87+
88+
>>> crc32(b"Python")
89+
2742599054
90+
91+
>>> crc32(b"Algorithms")
92+
3866870335
93+
94+
>>> crc32(b"CRC32")
95+
4128576900
96+
97+
>>> crc32(b"\\x00\\x00\\x00\\x00")
98+
558161692
99+
100+
>>> import zlib
101+
>>> test_data = b"Verify with zlib"
102+
>>> crc32(test_data) == zlib.crc32(test_data)
103+
True
104+
"""
105+
if not isinstance(data, bytes):
106+
msg = f"data must be bytes, not {type(data).__name__}"
107+
raise TypeError(msg)
108+
109+
crc = 0xFFFFFFFF
110+
111+
for byte in data:
112+
table_index = (crc ^ byte) & 0xFF
113+
crc = (crc >> 8) ^ CRC32_TABLE[table_index]
114+
115+
return crc ^ 0xFFFFFFFF
116+
117+
118+
if __name__ == "__main__":
119+
import doctest
120+
121+
doctest.testmod()
122+
123+
print(f"CRC32 of 'Hello World': {crc32(b'Hello World')}")
124+
print(f"CRC32 of empty bytes: {crc32(b'')}")

0 commit comments

Comments
 (0)