Skip to content

Commit

Permalink
feat: Implement MutableIntMap (#3303)
Browse files Browse the repository at this point in the history
`MutableIntMap` is an internally mutable variant of `IntMap`. The
underlying tree is still a persistent data structure, so cloning is
constant time and different copies of the same `MutableIntMap` can be
modified independently. And the performance is similar, since it's based
on the same internals.

It is intended as a drop-in replacement for `BTreeMap` (hence the
internal mutability as opposed to the purely functional interface of
`IntMap`). For that reason, it also implements `remove()`, `split_off()`
and `extend()`; as well as a couple of syntactic sugar methods (e.g.
`keys()` and `values()`). And it provides support for any integer-like
keys (that can be efficiently mapped to `u64` or `u128`).

The main performance difference between `IntMap` and `MutableIntMap`
comes from (i) having `insert()` return the previous value; and (ii)
keeping track of / recalculating the size of the map, so that `len()`
completes in constant time.

---------

Co-authored-by: Stefan Schneider <[email protected]>
  • Loading branch information
alin-at-dfinity and schneiderstefan authored Jan 9, 2025
1 parent 6704c14 commit 760e1f7
Show file tree
Hide file tree
Showing 9 changed files with 1,456 additions and 239 deletions.
2 changes: 1 addition & 1 deletion rs/replicated_state/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ rust_bench(
name = "replicated_state_intmap_bench",
testonly = True,
srcs = [
"benches/bench_allocator.rs",
"benches/bench_intmap.rs",
],
deps = [":replicated_state"] + BIN_DEPENDENCIES,
)
Expand Down
211 changes: 192 additions & 19 deletions rs/replicated_state/benches/bench_intmap.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use criterion::{black_box, BatchSize, BenchmarkId, Criterion};
use criterion_time::ProcessTime;

use ic_replicated_state::page_map::int_map::IntMap;
use ic_replicated_state::page_map::int_map::{AsInt, IntMap, MutableIntMap};
use std::collections::{BTreeMap, HashMap};
use std::sync::Arc;

Expand All @@ -11,67 +11,222 @@ fn value(k: u64) -> Value {
Arc::new(k.to_be_bytes().to_vec())
}

const BENCH_SIZES: &[u64] = &[10, 100, 1000];

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct Key128(i32, usize);

impl AsInt for Key128 {
type Repr = u128;

#[inline]
fn as_int(&self) -> u128 {
(self.0 as u128) << 64 | self.1 as u128
}
}

fn key128(i: u64) -> Key128 {
Key128(i as i32 / 4, i as usize % 4)
}

fn bench_intmap(c: &mut Criterion<ProcessTime>) {
let mut group = c.benchmark_group("Insert");
for n in [10u64, 100, 1000].iter().cloned() {
for &n in BENCH_SIZES.iter() {
group.bench_function(BenchmarkId::new("patricia", n), |b| {
b.iter(|| {
let m: IntMap<_> = (0..n).map(|x| (x, value(x))).collect();
let m: IntMap<u64, _> = (0..n).map(|x| (x * 13 % n, value(x))).collect();
black_box(m);
})
});
group.bench_function(BenchmarkId::new("mpatricia", n), |b| {
b.iter(|| {
let m: MutableIntMap<u64, _> = (0..n).map(|x| (x * 13 % n, value(x))).collect();
black_box(m);
})
});
group.bench_function(BenchmarkId::new("mpatricia_128", n), |b| {
b.iter(|| {
let m: MutableIntMap<Key128, _> =
(0..n).map(|x| (key128(x * 13 % n), value(x))).collect();
black_box(m);
})
});
group.bench_function(BenchmarkId::new("cow_btree", n), |b| {
b.iter(|| {
let m: Arc<BTreeMap<_, _>> = Arc::new((0..n).map(|x| (x, value(x))).collect());
let m: Arc<BTreeMap<_, _>> =
Arc::new((0..n).map(|x| (x * 13 % n, value(x))).collect());
black_box(m);
})
});
group.bench_function(BenchmarkId::new("cow_btree_128", n), |b| {
b.iter(|| {
let m: Arc<BTreeMap<_, _>> =
Arc::new((0..n).map(|x| (key128(x * 13 % n), value(x))).collect());
black_box(m);
})
});
group.bench_function(BenchmarkId::new("cow_hash", n), |b| {
b.iter(|| {
let m: Arc<HashMap<_, _>> = Arc::new((0..n).map(|x| (x, value(x))).collect());
let m: Arc<HashMap<_, _>> =
Arc::new((0..n).map(|x| (x * 13 % n, value(x))).collect());
black_box(m);
})
});
group.bench_function(BenchmarkId::new("cow_hash_128", n), |b| {
b.iter(|| {
let m: Arc<HashMap<_, _>> =
Arc::new((0..n).map(|x| (key128(x * 13 % n), value(x))).collect());
black_box(m);
})
});
}
group.finish();

let mut group = c.benchmark_group("Lookup");
for n in [10u64, 100, 1000].iter().cloned() {
let patricia_map: IntMap<Value> = (0..n).map(|x| (x, value(x))).collect();
let mut group = c.benchmark_group("Remove");
for &n in BENCH_SIZES.iter() {
let patricia_map: IntMap<u64, Value> = (0..n).map(|x| (x, value(x))).collect();
let mpatricia_map: MutableIntMap<u64, Value> = (0..n).map(|x| (x, value(x))).collect();
let btree_map: Arc<BTreeMap<u64, Value>> =
Arc::new((0..n).map(|x| (x, value(x))).collect());
let hash_map: Arc<HashMap<u64, Value>> = Arc::new((0..n).map(|x| (x, value(x))).collect());

group.bench_function(BenchmarkId::new("patricia", n), |b| {
b.iter_batched(
|| patricia_map.clone(),
|mut map| {
for i in 0..n {
map = map.remove(&(i * 13 % n)).0;
map = map.remove(&(i * 13 % n + n)).0;
}
black_box(map);
},
BatchSize::SmallInput,
);
});
group.bench_function(BenchmarkId::new("mpatricia", n), |b| {
b.iter_batched(
|| mpatricia_map.clone(),
|mut map| {
for i in 0..n {
map.remove(&(i * 13 % n));
map.remove(&(i * 13 % n + n));
}
black_box(map);
},
BatchSize::SmallInput,
);
});
group.bench_function(BenchmarkId::new("cow_btree", n), |b| {
b.iter_batched(
|| Arc::clone(&btree_map),
|mut map| {
let map = Arc::make_mut(&mut map);
for i in 0..n {
map.remove(&(i * 13 % n));
map.remove(&(i * 13 % n + n));
}
black_box(map);
},
BatchSize::SmallInput,
);
});
group.bench_function(BenchmarkId::new("cow_hash", n), |b| {
b.iter_batched(
|| Arc::clone(&hash_map),
|mut map| {
let map = Arc::make_mut(&mut map);
for i in 0..n {
map.remove(&(i * 13 % n));
map.remove(&(i * 13 % n + n));
}
black_box(map);
},
BatchSize::SmallInput,
);
});
}
group.finish();

let mut group = c.benchmark_group("Lookup");
for &n in BENCH_SIZES.iter() {
const N: u64 = 5;
let kv = |x| (N * x, value(x));
let kv128 = |x| (key128(N * x), value(x));

let patricia_map: IntMap<u64, Value> = (0..n).map(kv).collect();
let mpatricia_map: MutableIntMap<u64, Value> = (0..n).map(kv).collect();
let patricia_128_map: MutableIntMap<Key128, Value> = (0..n).map(kv128).collect();
let btree_map: Arc<BTreeMap<u64, Value>> = Arc::new((0..n).map(kv).collect());
let btree_128_map: Arc<BTreeMap<Key128, Value>> = Arc::new((0..n).map(kv128).collect());
let hash_map: Arc<HashMap<u64, Value>> = Arc::new((0..n).map(kv).collect());
let hash_128_map: Arc<HashMap<Key128, Value>> = Arc::new((0..n).map(kv128).collect());
group.bench_function(BenchmarkId::new("patricia", n), |b| {
b.iter(|| {
for i in 0..n {
black_box(patricia_map.get(i));
black_box(patricia_map.get(i + n));
black_box(patricia_map.get(&(i * N)));
black_box(patricia_map.get(&(i * N + 3)));
}
});
});
group.bench_function(BenchmarkId::new("mpatricia", n), |b| {
b.iter(|| {
for i in 0..n {
black_box(mpatricia_map.get(&(i * N)));
black_box(mpatricia_map.get(&(i * N + 3)));
}
});
});
group.bench_function(BenchmarkId::new("mpatricia_128", n), |b| {
b.iter(|| {
for i in 0..n {
black_box(patricia_128_map.get(&key128(i * N)));
black_box(patricia_128_map.get(&key128(i * N + 3)));
}
});
});
group.bench_function(BenchmarkId::new("cow_btree", n), |b| {
b.iter(|| {
for i in 0..n {
black_box(btree_map.get(&i));
black_box(btree_map.get(&(i + n)));
black_box(btree_map.get(&(i * N)));
black_box(btree_map.get(&(i * N + 3)));
}
});
});
group.bench_function(BenchmarkId::new("cow_btree_128", n), |b| {
b.iter(|| {
for i in 0..n {
black_box(btree_128_map.get(&key128(i * N)));
black_box(btree_128_map.get(&key128(i * N + 3)));
}
});
});
group.bench_function(BenchmarkId::new("cow_hash", n), |b| {
b.iter(|| {
for i in 0..n {
black_box(hash_map.get(&i));
black_box(hash_map.get(&(i + n)));
black_box(hash_map.get(&(i * N)));
black_box(hash_map.get(&(i * N + 3)));
}
});
});
group.bench_function(BenchmarkId::new("cow_hash_128", n), |b| {
b.iter(|| {
for i in 0..n {
black_box(hash_128_map.get(&key128(i * N)));
black_box(hash_128_map.get(&key128(i * N + 3)));
}
});
});
}
group.finish();

let mut group = c.benchmark_group("Union");
for n in [10u64, 100, 1000].iter().cloned() {
let patricia_lmap: IntMap<Value> = (0..n).map(|x| (x, value(x))).collect();
let patricia_rmap: IntMap<Value> = (n / 2..n + n / 2).map(|x| (x, value(x))).collect();
for &n in BENCH_SIZES.iter() {
let patricia_lmap: IntMap<u64, Value> = (0..n).map(|x| (x, value(x))).collect();
let patricia_rmap: IntMap<u64, Value> = (n / 2..n + n / 2).map(|x| (x, value(x))).collect();

let mpatricia_lmap: MutableIntMap<u64, Value> = (0..n).map(|x| (x, value(x))).collect();
let mpatricia_rmap: MutableIntMap<u64, Value> =
(n / 2..n + n / 2).map(|x| (x, value(x))).collect();

let btree_lmap: Arc<BTreeMap<u64, Value>> =
Arc::new((0..n).map(|x| (x, value(x))).collect());
Expand All @@ -91,6 +246,16 @@ fn bench_intmap(c: &mut Criterion<ProcessTime>) {
BatchSize::SmallInput,
);
});
group.bench_function(BenchmarkId::new("mpatricia", n), |b| {
b.iter_batched(
|| (mpatricia_lmap.clone(), mpatricia_rmap.clone()),
|(mut l, r)| {
l.union(r);
black_box(l);
},
BatchSize::SmallInput,
);
});
group.bench_function(BenchmarkId::new("cow_btree", n), |b| {
b.iter_batched(
|| (Arc::clone(&btree_lmap), Arc::clone(&btree_rmap)),
Expand Down Expand Up @@ -121,8 +286,9 @@ fn bench_intmap(c: &mut Criterion<ProcessTime>) {
group.finish();

let mut group = c.benchmark_group("Iter");
for n in [10u64, 100, 1000].iter().cloned() {
let patricia_map: IntMap<Value> = (0..n).map(|x| (x, value(x))).collect();
for &n in BENCH_SIZES.iter() {
let patricia_map: IntMap<u64, Value> = (0..n).map(|x| (x, value(x))).collect();
let mpatricia_map: MutableIntMap<u64, Value> = (0..n).map(|x| (x, value(x))).collect();
let btree_map: Arc<BTreeMap<u64, Value>> =
Arc::new((0..n).map(|x| (x, value(x))).collect());
let hash_map: Arc<HashMap<u64, Value>> = Arc::new((0..n).map(|x| (x, value(x))).collect());
Expand All @@ -134,6 +300,13 @@ fn bench_intmap(c: &mut Criterion<ProcessTime>) {
}
});
});
group.bench_function(BenchmarkId::new("mpatricia", n), |b| {
b.iter(|| {
for e in mpatricia_map.iter() {
black_box(e);
}
});
});
group.bench_function(BenchmarkId::new("cow_btree", n), |b| {
b.iter(|| {
for e in btree_map.iter() {
Expand Down
Loading

0 comments on commit 760e1f7

Please sign in to comment.