|
| 1 | +""" |
| 2 | +CRC32 (Cyclic Redundancy Check 32-bit) Hash Algorithm |
| 3 | +
|
| 4 | +This module implements the CRC32 hash algorithm, a non-cryptographic hash function |
| 5 | +widely used for error detection and data integrity verification. |
| 6 | +
|
| 7 | +CRC32 is commonly used in: |
| 8 | +- ZIP file format for data integrity |
| 9 | +- Ethernet frame check sequences |
| 10 | +- PNG image format for chunk verification |
| 11 | +- Gzip compression |
| 12 | +
|
| 13 | +The algorithm uses the IEEE 802.3 polynomial (0xEDB88320 in reversed bit order) |
| 14 | +and produces a 32-bit hash value. |
| 15 | +
|
| 16 | +Note: CRC32 is NOT suitable for cryptographic purposes. It's designed for |
| 17 | +error detection, not security. For cryptographic hashing, use SHA-256 or similar. |
| 18 | +
|
| 19 | +Reference: |
| 20 | +- https://en.wikipedia.org/wiki/Cyclic_redundancy_check |
| 21 | +- https://www.rfc-editor.org/rfc/rfc1952.html (GZIP specification) |
| 22 | +""" |
| 23 | + |
| 24 | + |
| 25 | +def _generate_crc32_table() -> list[int]: |
| 26 | + """ |
| 27 | + Generate the CRC32 lookup table for optimized calculation. |
| 28 | +
|
| 29 | + Uses the IEEE 802.3 polynomial: 0xEDB88320 (reversed bit order) |
| 30 | +
|
| 31 | + >>> table = _generate_crc32_table() |
| 32 | + >>> len(table) |
| 33 | + 256 |
| 34 | + >>> hex(table[0]) |
| 35 | + '0x0' |
| 36 | + >>> hex(table[128]) |
| 37 | + '0xedb88320' |
| 38 | + """ |
| 39 | + polynomial = 0xEDB88320 |
| 40 | + table = [] |
| 41 | + |
| 42 | + for i in range(256): |
| 43 | + crc = i |
| 44 | + for _ in range(8): |
| 45 | + if crc & 1: |
| 46 | + crc = (crc >> 1) ^ polynomial |
| 47 | + else: |
| 48 | + crc >>= 1 |
| 49 | + table.append(crc) |
| 50 | + |
| 51 | + return table |
| 52 | + |
| 53 | + |
| 54 | +CRC32_TABLE = _generate_crc32_table() |
| 55 | + |
| 56 | + |
| 57 | +def crc32(data: bytes) -> int: |
| 58 | + """ |
| 59 | + Calculate the CRC32 hash of byte data. |
| 60 | +
|
| 61 | + Args: |
| 62 | + data: Byte data to calculate the hash for |
| 63 | +
|
| 64 | + Returns: |
| 65 | + CRC32 hash as a 32-bit integer (0 to 4294967295) |
| 66 | +
|
| 67 | + Raises: |
| 68 | + TypeError: If data is not of type bytes |
| 69 | +
|
| 70 | + >>> crc32(b"Hello World") |
| 71 | + 1243066710 |
| 72 | +
|
| 73 | + >>> crc32(b"") |
| 74 | + 0 |
| 75 | +
|
| 76 | + >>> crc32(b"The quick brown fox jumps over the lazy dog") |
| 77 | + 1095738169 |
| 78 | +
|
| 79 | + >>> crc32(b"a") |
| 80 | + 3904355907 |
| 81 | +
|
| 82 | + >>> crc32(b"abc") |
| 83 | + 891568578 |
| 84 | +
|
| 85 | + >>> crc32(b"123456789") |
| 86 | + 3421780262 |
| 87 | +
|
| 88 | + >>> crc32(b"Python") |
| 89 | + 2742599054 |
| 90 | +
|
| 91 | + >>> crc32(b"Algorithms") |
| 92 | + 3866870335 |
| 93 | +
|
| 94 | + >>> crc32(b"CRC32") |
| 95 | + 4128576900 |
| 96 | +
|
| 97 | + >>> crc32(b"\\x00\\x00\\x00\\x00") |
| 98 | + 558161692 |
| 99 | +
|
| 100 | + >>> import zlib |
| 101 | + >>> test_data = b"Verify with zlib" |
| 102 | + >>> crc32(test_data) == zlib.crc32(test_data) |
| 103 | + True |
| 104 | + """ |
| 105 | + if not isinstance(data, bytes): |
| 106 | + msg = f"data must be bytes, not {type(data).__name__}" |
| 107 | + raise TypeError(msg) |
| 108 | + |
| 109 | + crc = 0xFFFFFFFF |
| 110 | + |
| 111 | + for byte in data: |
| 112 | + table_index = (crc ^ byte) & 0xFF |
| 113 | + crc = (crc >> 8) ^ CRC32_TABLE[table_index] |
| 114 | + |
| 115 | + return crc ^ 0xFFFFFFFF |
| 116 | + |
| 117 | + |
| 118 | +if __name__ == "__main__": |
| 119 | + import doctest |
| 120 | + |
| 121 | + doctest.testmod() |
| 122 | + |
| 123 | + print(f"CRC32 of 'Hello World': {crc32(b'Hello World')}") |
| 124 | + print(f"CRC32 of empty bytes: {crc32(b'')}") |
0 commit comments