Skip to content

Commit 06a5f79

Browse files
author
rodrigo.nogueira
committed
Add Merkle tree data structure
- Implements complete Merkle tree (hash tree) for efficient data verification - Supports tree construction, root hash calculation, and proof generation - Includes proof verification without rebuilding tree - Comprehensive documentation with real-world use cases - 37 passing doctests covering all functionality - All tests pass: ruff, mypy, doctests
1 parent aca55f4 commit 06a5f79

File tree

1 file changed

+185
-0
lines changed

1 file changed

+185
-0
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
"""
2+
Merkle Tree (Hash Tree) Implementation
3+
4+
A Merkle tree is a tree data structure where every leaf node is labeled with the
5+
cryptographic hash of a data block, and every non-leaf node is labeled with the
6+
hash of its child nodes. This allows efficient and secure verification of large
7+
data structures.
8+
9+
How it works:
10+
1. Hash each data block to create leaf nodes
11+
2. Pair adjacent hashes and hash them together to create parent nodes
12+
3. Repeat until a single root hash remains
13+
4. If odd number of nodes, duplicate the last one
14+
15+
Use cases:
16+
- Bitcoin and Blockchain: Verify transactions in blocks efficiently
17+
- Git: Verify repository integrity and commit history
18+
- IPFS: Content-addressed distributed file system
19+
- Certificate Transparency: SSL/TLS certificate verification logs
20+
- Apache Cassandra: Anti-entropy for data synchronization
21+
- BitTorrent: Verify pieces of downloaded files
22+
23+
Time Complexity:
24+
- Build tree: O(n) where n is number of data blocks
25+
- Generate proof: O(log n)
26+
- Verify proof: O(log n)
27+
28+
Space Complexity: O(n)
29+
30+
References:
31+
- https://en.wikipedia.org/wiki/Merkle_tree
32+
- https://bitcoin.org/bitcoin.pdf (Section 7: Reclaiming Disk Space)
33+
- https://tools.ietf.org/html/rfc9162 (Certificate Transparency)
34+
"""
35+
36+
from hashlib import sha256
37+
38+
39+
class MerkleTree:
40+
"""
41+
Merkle tree implementation for efficient data verification.
42+
43+
>>> tree = MerkleTree([b"a", b"b", b"c", b"d"])
44+
>>> root = tree.get_root()
45+
>>> len(root)
46+
64
47+
48+
>>> tree = MerkleTree([b"hello", b"world"])
49+
>>> proof = tree.get_proof(0)
50+
>>> len(proof) > 0
51+
True
52+
53+
>>> tree = MerkleTree([b"data"])
54+
>>> root = tree.get_root()
55+
>>> len(root)
56+
64
57+
"""
58+
59+
def __init__(self, data_blocks: list[bytes]) -> None:
60+
if not data_blocks:
61+
msg = "Cannot create Merkle tree from empty data"
62+
raise ValueError(msg)
63+
self.leaves = [sha256(block).hexdigest() for block in data_blocks]
64+
self.tree = self._build_tree()
65+
66+
def _build_tree(self) -> list[list[str]]:
67+
tree = [self.leaves[:]]
68+
current_level = self.leaves[:]
69+
while len(current_level) > 1:
70+
current_level = [
71+
(
72+
sha256(
73+
(current_level[i] + current_level[i + 1]).encode()
74+
).hexdigest()
75+
if i + 1 < len(current_level)
76+
else sha256(
77+
(current_level[i] + current_level[i]).encode()
78+
).hexdigest()
79+
)
80+
for i in range(0, len(current_level), 2)
81+
]
82+
tree.append(current_level)
83+
return tree
84+
85+
def get_root(self) -> str:
86+
"""
87+
Get the Merkle root hash.
88+
89+
>>> tree = MerkleTree([b"a", b"b", b"c", b"d"])
90+
>>> root = tree.get_root()
91+
>>> isinstance(root, str)
92+
True
93+
94+
>>> tree = MerkleTree([b"single"])
95+
>>> root = tree.get_root()
96+
>>> len(root)
97+
64
98+
"""
99+
return self.tree[-1][0]
100+
101+
def get_proof(self, index: int) -> list[tuple[str, str]]:
102+
"""
103+
Generate a Merkle proof for a data block at the given index.
104+
105+
Returns list of (hash, position) tuples where position is 'left' or 'right'.
106+
107+
>>> tree = MerkleTree([b"a", b"b", b"c", b"d"])
108+
>>> proof = tree.get_proof(0)
109+
>>> len(proof) > 0
110+
True
111+
112+
>>> tree = MerkleTree([b"a", b"b"])
113+
>>> proof = tree.get_proof(0)
114+
>>> all(isinstance(p, tuple) and len(p) == 2 for p in proof)
115+
True
116+
117+
>>> tree = MerkleTree([b"only_one"])
118+
>>> proof = tree.get_proof(0)
119+
>>> len(proof)
120+
0
121+
"""
122+
if index < 0 or index >= len(self.leaves):
123+
msg = f"Index {index} out of range"
124+
raise ValueError(msg)
125+
proof = []
126+
for level in self.tree[:-1]:
127+
sibling_index = index ^ 1
128+
if sibling_index < len(level):
129+
position = "left" if index % 2 == 1 else "right"
130+
proof.append((level[sibling_index], position))
131+
index //= 2
132+
return proof
133+
134+
@staticmethod
135+
def verify_proof(
136+
leaf_hash: str, proof: list[tuple[str, str]], root_hash: str
137+
) -> bool:
138+
"""
139+
Verify a Merkle proof.
140+
141+
>>> tree = MerkleTree([b"a", b"b", b"c", b"d"])
142+
>>> root = tree.get_root()
143+
>>> leaf = sha256(b"a").hexdigest()
144+
>>> proof = tree.get_proof(0)
145+
>>> MerkleTree.verify_proof(leaf, proof, root)
146+
True
147+
148+
>>> MerkleTree.verify_proof(leaf, proof, "wrong_root")
149+
False
150+
151+
>>> tree = MerkleTree([b"x", b"y", b"z"])
152+
>>> root = tree.get_root()
153+
>>> leaf = sha256(b"y").hexdigest()
154+
>>> proof = tree.get_proof(1)
155+
>>> MerkleTree.verify_proof(leaf, proof, root)
156+
True
157+
158+
>>> wrong_leaf = sha256(b"wrong").hexdigest()
159+
>>> MerkleTree.verify_proof(wrong_leaf, proof, root)
160+
False
161+
"""
162+
current_hash = leaf_hash
163+
for sibling_hash, position in proof:
164+
current_hash = (
165+
sha256((sibling_hash + current_hash).encode()).hexdigest()
166+
if position == "left"
167+
else sha256((current_hash + sibling_hash).encode()).hexdigest()
168+
)
169+
return current_hash == root_hash
170+
171+
172+
if __name__ == "__main__":
173+
import doctest
174+
175+
_ = doctest.testmod()
176+
177+
print("Merkle Tree Example:")
178+
data = [b"Transaction 1", b"Transaction 2", b"Transaction 3", b"Transaction 4"]
179+
tree = MerkleTree(data)
180+
print(f"Root hash: {tree.get_root()}")
181+
print(f"Proof for index 0: {tree.get_proof(0)}")
182+
leaf = sha256(data[0]).hexdigest()
183+
proof = tree.get_proof(0)
184+
is_valid = MerkleTree.verify_proof(leaf, proof, tree.get_root())
185+
print(f"Proof valid: {is_valid}")

0 commit comments

Comments
 (0)