Skip to content

Commit 44df2f7

Browse files
FlyCloudCbobzhang
authored andcommitted
perf(HAMT): flatten nested Branch structure
1 parent b76d0ca commit 44df2f7

File tree

7 files changed

+79
-18
lines changed

7 files changed

+79
-18
lines changed

immut/hashset/HAMT.mbt

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pub fn[A : Eq + Hash] contains(self : T[A], key : A) -> Bool {
3838
guard self._ is Some(node) else { return false }
3939
loop (node, @path.of(key)) {
4040
(Leaf(key1, bucket), _) => key == key1 || bucket.contains(key)
41+
(Flat(key1, path1), path) => path == path1 && key == key1
4142
(Branch(children), path) => {
4243
let idx = path.idx()
4344
if children.has(idx) {
@@ -50,13 +51,24 @@ pub fn[A : Eq + Hash] contains(self : T[A], key : A) -> Bool {
5051
}
5152

5253
///|
53-
fn[A] make_leaf(key : A, path : Path) -> Node[A] {
54-
// make sure leaf nodes always appear at the bottom of the tree
55-
if path.is_end() {
56-
Leaf(key, @list.empty())
54+
/// require: key1 != key2, path1 and path2 has the same length
55+
fn[A] join_2(key1 : A, path1 : Path, key2 : A, path2 : Path) -> Node[A] {
56+
let idx1 = path1.idx()
57+
let idx2 = path2.idx()
58+
if idx1 == idx2 {
59+
let node = if path1.is_last() {
60+
Leaf(key2, @list.singleton(key1))
61+
} else {
62+
join_2(key1, path1.next(), key2, path2.next())
63+
}
64+
Branch(@sparse_array.singleton(idx1, node))
5765
} else {
58-
let child = make_leaf(key, path.next())
59-
Branch(@sparse_array.singleton(path.idx(), child))
66+
let (node1, node2) = if path1.is_last() {
67+
(Leaf(key1, @list.empty()), Leaf(key2, @list.empty()))
68+
} else {
69+
(Flat(key1, path1.next()), Flat(key2, path2.next()))
70+
}
71+
Branch(@sparse_array.doubleton(idx1, node1, idx2, node2))
6072
}
6173
}
6274

@@ -69,14 +81,20 @@ fn[A : Eq] add_with_path(self : Node[A], key : A, path : Path) -> Node[A] {
6981
} else {
7082
Leaf(key, bucket.add(key1))
7183
}
84+
Flat(key1, path1) =>
85+
if path == path1 && key == key1 {
86+
self
87+
} else {
88+
join_2(key1, path1, key, path)
89+
}
7290
Branch(children) => {
7391
let idx = path.idx()
7492
if children.has(idx) {
7593
let child = children[idx]
7694
let child = child.add_with_path(key, path.next())
7795
Branch(children.replace(idx, child))
7896
} else {
79-
let child = make_leaf(key, path.next())
97+
let child = Flat(key, path.next())
8098
Branch(children.add(idx, child))
8199
}
82100
}
@@ -87,7 +105,7 @@ fn[A : Eq] add_with_path(self : Node[A], key : A, path : Path) -> Node[A] {
87105
/// Add a key to the hashset.
88106
pub fn[A : Eq + Hash] add(self : T[A], key : A) -> T[A] {
89107
match self._ {
90-
None => Some(make_leaf(key, @path.of(key)))
108+
None => Some(Flat(key, @path.of(key)))
91109
Some(node) => Some(node.add_with_path(key, @path.of(key)))
92110
}
93111
}
@@ -115,15 +133,26 @@ fn[A : Eq] remove_with_path(self : Node[A], key : A, path : Path) -> Node[A]? {
115133
} else {
116134
Some(self)
117135
}
136+
Flat(key1, path1) =>
137+
if path == path1 && key == key1 {
138+
None
139+
} else {
140+
Some(self)
141+
}
118142
Branch(children) => {
119143
let idx = path.idx()
120144
if children.has(idx) {
121145
let child = children[idx]
122146
let new_child = child.remove_with_path(key, path.next())
123-
match (children.size(), new_child) {
124-
(1, None) => None
125-
(_, None) => Some(Branch(children.remove(idx)))
126-
(_, Some(new_child)) => Some(Branch(children.replace(idx, new_child)))
147+
let new_children = match (children.size(), new_child) {
148+
(1, None) => return None
149+
(_, None) => children.remove(idx)
150+
(_, Some(new_child)) => children.replace(idx, new_child)
151+
}
152+
match new_children.data {
153+
[Flat(key1, path1)] =>
154+
Some(Flat(key1, path1.push(new_children.elem_info.fitst_idx())))
155+
_ => Some(Branch(new_children))
127156
}
128157
} else {
129158
Some(self)
@@ -140,6 +169,7 @@ pub fn[A] size(self : T[A]) -> Int {
140169
fn node_size(node) {
141170
match node {
142171
Leaf(_, bucket) => 1 + bucket.length()
172+
Flat(_) => 1
143173
Branch(children) =>
144174
for i = 0, total_size = 0 {
145175
if i < children.data.length() {
@@ -194,6 +224,7 @@ pub fn[A] each(self : T[A], f : (A) -> Unit) -> Unit {
194224
f(k)
195225
bucket.each(f)
196226
}
227+
Flat(k, _) => f(k)
197228
Branch(children) => children.each(go)
198229
}
199230
}
@@ -210,6 +241,7 @@ pub fn[A] iter(self : T[A]) -> Iter[A] {
210241
fn go(node) -> Iter[A] {
211242
match node {
212243
Leaf(k, bucket) => Iter::singleton(k) + bucket.iter()
244+
Flat(k, _) => Iter::singleton(k)
213245
Branch(children) => children.data.iter().flat_map(go)
214246
}
215247
}
@@ -262,6 +294,7 @@ impl[A : Eq] Eq for Node[A] with op_equal(self, other) {
262294
match (self, other) {
263295
(Leaf(x, xs), Leaf(y, ys)) =>
264296
xs.length() == ys.length() && xs.add(x).iter().all(ys.add(y).contains(_))
297+
(Flat(x, pathx), Flat(y, pathy)) => pathx == pathy && x == y
265298
(Branch(xs), Branch(ys)) => xs == ys
266299
_ => false
267300
}

immut/hashset/types.mbt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ typealias @path.Path
1818
///|
1919
/// An non-empty immutable hash set data structure
2020
priv enum Node[A] {
21+
Flat(A, Path)
2122
Leaf(A, @list.T[A]) // use a list of buckets to resolve collision
23+
/// number of all its leaf > 1. If equals 1, it should be represented as `Flat`
2224
Branch(@sparse_array.SparseArray[Node[A]])
2325
}
2426

immut/internal/path/path.mbt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
/// A Path is a binary string divided into several segments, for example:
1717
/// 0b11_10000_10101_00100_11111_01010
1818
/// where the last segment is 10000, and the first segment is 01010.
19-
pub(all) type Path UInt
19+
pub(all) type Path UInt derive(Eq)
2020

2121
///|
2222
const SEGMENT_LENGTH : Int = 5
@@ -36,13 +36,13 @@ pub fn[A : Hash] of(key : A) -> Path {
3636
}
3737

3838
///|
39-
/// If SEGMENT_LENGTH == 5, END == 0b11
40-
const END : UInt = 0xffffffffU >> (SEGMENT_LENGTH * SEGMENT_NUM)
39+
/// If SEGMENT_LENGTH == 5, MAX_TAIL == 0b11_11111
40+
const MAX_TAIL : UInt = 0xffffffffU >> (SEGMENT_LENGTH * (SEGMENT_NUM - 1))
4141

4242
///|
43-
pub fn Path::is_end(self : Path) -> Bool {
43+
pub fn Path::is_last(self : Path) -> Bool {
4444
let Path(self) = self
45-
self == END
45+
self <= MAX_TAIL
4646
}
4747

4848
///|

immut/internal/path/path.mbti

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ fn[A : Hash] of(A) -> Path
66
// Types and methods
77
pub(all) type Path UInt
88
fn Path::idx(Self) -> Int
9-
fn Path::is_end(Self) -> Bool
9+
fn Path::is_last(Self) -> Bool
1010
fn Path::next(Self) -> Self
1111
fn Path::push(Self, Int) -> Self
12+
impl Eq for Path
1213

1314
// Type aliases
1415

immut/internal/sparse_array/bitset.mbt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ pub fn Bitset::index_of(self : Bitset, idx : Int) -> Int {
3131
(self._ & ((1U << idx) - 1)).popcnt()
3232
}
3333

34+
///|
35+
pub fn Bitset::fitst_idx(self : Bitset) -> Int {
36+
self._.ctz()
37+
}
38+
3439
///|
3540
pub fn Bitset::union(self : Bitset, other : Bitset) -> Bitset {
3641
Bitset(self._ | other._)

immut/internal/sparse_array/sparse_array.mbt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ pub fn[X] singleton(idx : Int, value : X) -> SparseArray[X] {
3131
{ elem_info: empty_bitset.add(idx), data: [value] }
3232
}
3333

34+
///|
35+
pub fn[X] doubleton(
36+
idx1 : Int,
37+
value1 : X,
38+
idx2 : Int,
39+
value2 : X
40+
) -> SparseArray[X] {
41+
{
42+
elem_info: empty_bitset.add(idx1).add(idx2),
43+
data: if idx1 < idx2 {
44+
[value1, value2]
45+
} else {
46+
[value2, value1]
47+
},
48+
}
49+
}
50+
3451
///|
3552
pub fn[X] has(self : SparseArray[X], idx : Int) -> Bool {
3653
self.elem_info.has(idx)

immut/internal/sparse_array/sparse_array.mbti

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package "moonbitlang/core/immut/internal/sparse_array"
33
// Values
44
fn[X] add(SparseArray[X], Int, X) -> SparseArray[X]
55

6+
fn[X] doubleton(Int, X, Int, X) -> SparseArray[X]
7+
68
fn[X] each(SparseArray[X], (X) -> Unit) -> Unit
79

810
fn[X] empty() -> SparseArray[X]
@@ -23,6 +25,7 @@ fn[X] size(SparseArray[X]) -> Int
2325
pub(all) type Bitset UInt
2426
fn Bitset::add(Self, Int) -> Self
2527
fn Bitset::difference(Self, Self) -> Self
28+
fn Bitset::fitst_idx(Self) -> Int
2629
fn Bitset::has(Self, Int) -> Bool
2730
fn Bitset::index_of(Self, Int) -> Int
2831
fn Bitset::intersection(Self, Self) -> Self

0 commit comments

Comments
 (0)