From c9139d92ffa7a3c94c31b7df8e40c148b823f090 Mon Sep 17 00:00:00 2001 From: behzad nouri Date: Thu, 19 Dec 2024 09:12:02 -0600 Subject: [PATCH 1/2] builds deduper hashers once per reset cycle The commit avoids RandomState::build_hasher on each item and instead just clones initialized hahsers which are pretty cheap to clone: https://github.com/tkaitchuck/aHash/blob/7d5c661a7/src/aes_hash.rs#L18-L23 --- perf/src/deduper.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/perf/src/deduper.rs b/perf/src/deduper.rs index b25bcc93ed4449..2a985b9c7a0383 100644 --- a/perf/src/deduper.rs +++ b/perf/src/deduper.rs @@ -2,7 +2,7 @@ use { crate::packet::PacketBatch, - ahash::RandomState, + ahash::{AHasher, RandomState}, rand::Rng, std::{ hash::{BuildHasher, Hash, Hasher}, @@ -16,7 +16,7 @@ use { pub struct Deduper { num_bits: u64, bits: Vec, - state: [RandomState; K], + hashers: [AHasher; K], clock: Instant, popcount: AtomicU64, // Number of one bits in self.bits. _phantom: PhantomData, @@ -28,7 +28,7 @@ impl Deduper { let size = usize::try_from(size).unwrap(); Self { num_bits, - state: std::array::from_fn(|_| new_random_state(rng)), + hashers: std::array::from_fn(|_| new_hasher(rng)), clock: Instant::now(), bits: repeat_with(AtomicU64::default).take(size).collect(), popcount: AtomicU64::default(), @@ -54,7 +54,7 @@ impl Deduper { assert!(0.0 < false_positive_rate && false_positive_rate < 1.0); let saturated = self.false_positive_rate() >= false_positive_rate; if saturated || self.clock.elapsed() >= reset_cycle { - self.state = std::array::from_fn(|_| new_random_state(rng)); + self.hashers = std::array::from_fn(|_| new_hasher(rng)); self.clock = Instant::now(); self.bits.fill_with(AtomicU64::default); self.popcount = AtomicU64::default(); @@ -67,8 +67,7 @@ impl Deduper { #[allow(clippy::arithmetic_side_effects)] pub fn dedup(&self, data: &T) -> bool { let mut out = true; - let hashers = self.state.iter().map(RandomState::build_hasher); - for mut hasher in hashers { + for mut hasher in self.hashers.iter().map(AHasher::clone) { data.hash(&mut hasher); let hash: u64 = hasher.finish() % self.num_bits; let index = (hash >> 6) as usize; @@ -83,8 +82,8 @@ impl Deduper { } } -fn new_random_state(rng: &mut R) -> RandomState { - RandomState::with_seeds(rng.gen(), rng.gen(), rng.gen(), rng.gen()) +fn new_hasher(rng: &mut R) -> AHasher { + RandomState::with_seeds(rng.gen(), rng.gen(), rng.gen(), rng.gen()).build_hasher() } pub fn dedup_packets_and_count_discards( From 1a042860d244a400be9bfe625de9edf2fcfb72ba Mon Sep 17 00:00:00 2001 From: behzad nouri Date: Thu, 19 Dec 2024 11:56:37 -0600 Subject: [PATCH 2/2] clones inside the loop --- perf/src/deduper.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/perf/src/deduper.rs b/perf/src/deduper.rs index 2a985b9c7a0383..36213cf1f65877 100644 --- a/perf/src/deduper.rs +++ b/perf/src/deduper.rs @@ -67,7 +67,8 @@ impl Deduper { #[allow(clippy::arithmetic_side_effects)] pub fn dedup(&self, data: &T) -> bool { let mut out = true; - for mut hasher in self.hashers.iter().map(AHasher::clone) { + for hasher in &self.hashers { + let mut hasher = hasher.clone(); data.hash(&mut hasher); let hash: u64 = hasher.finish() % self.num_bits; let index = (hash >> 6) as usize;