diff --git a/src/lib.rs b/src/lib.rs index ed04c2d..882ec4c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -224,6 +224,7 @@ The `Guard` trait supports both local and owned guards. Note the `'guard` lifeti mod map; mod raw; +mod set; #[cfg(feature = "serde")] mod serde_impls; @@ -233,3 +234,4 @@ pub use map::{ Values, }; pub use seize::{Guard, LocalGuard, OwnedGuard}; +pub use set::{HashSet, HashSetBuilder, HashSetRef}; diff --git a/src/set.rs b/src/set.rs new file mode 100644 index 0000000..0ab754e --- /dev/null +++ b/src/set.rs @@ -0,0 +1,913 @@ +use crate::raw::{self, InsertResult}; +use seize::{Collector, Guard, LocalGuard, OwnedGuard}; + +use crate::map::ResizeMode; +use std::borrow::Borrow; +use std::collections::hash_map::RandomState; +use std::fmt; +use std::hash::{BuildHasher, Hash}; +use std::marker::PhantomData; + +/// A concurrent hash set. +/// +/// Most hash set operations require a [`Guard`](crate::Guard), which can be acquired through +/// [`HashSet::guard`] or using the [`HashSet::pin`] API. See the [crate-level documentation](crate#usage) +/// for details. +pub struct HashSet { + raw: raw::HashMap, +} + +// Safety: We only ever hand out &K/V through shared references to the map, +// so normal Send/Sync rules apply. We never expose owned or mutable references +// to keys or values. +// TODO +unsafe impl Send for HashSet {} +unsafe impl Sync for HashSet {} + +/// A builder for a [`HashSet`]. +/// +/// # Examples +/// +/// ```rust +/// use papaya::{HashSet, ResizeMode}; +/// use seize::Collector; +/// use std::collections::hash_map::RandomState; +/// +/// let set: HashSet = HashSet::builder() +/// // Set the initial capacity. +/// .capacity(2048) +/// // Set the hasher. +/// .hasher(RandomState::new()) +/// // Set the resize mode. +/// .resize_mode(ResizeMode::Blocking) +/// // Set a custom garbage collector. +/// .collector(Collector::new().batch_size(128)) +/// // Construct the hash set. +/// .build(); +/// ``` +pub struct HashSetBuilder { + hasher: S, + capacity: usize, + collector: Collector, + resize_mode: ResizeMode, + _kv: PhantomData, +} + +impl HashSetBuilder { + /// Set the hash builder used to hash keys. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed + /// to allow HashSets to be resistant to attacks that cause many collisions + /// and very poor performance. Setting it manually using this function can + /// expose a DoS attack vector. + /// + /// The `hash_builder` passed should implement the [`BuildHasher`] trait for + /// the HashSet to be useful, see its documentation for details. + pub fn hasher(self, hasher: S) -> HashSetBuilder { + HashSetBuilder { + hasher, + capacity: self.capacity, + collector: self.collector, + resize_mode: self.resize_mode, + _kv: PhantomData, + } + } +} + +impl HashSetBuilder { + /// Set the initial capacity of the set. + /// + /// The set should be able to hold at least `capacity` elements before resizing. + /// However, the capacity is an estimate, and the set may prematurely resize due + /// to poor hash distribution. If `capacity` is 0, the hash set will not allocate. + pub fn capacity(self, capacity: usize) -> HashSetBuilder { + HashSetBuilder { + capacity, + hasher: self.hasher, + collector: self.collector, + resize_mode: self.resize_mode, + _kv: PhantomData, + } + } + + /// Set the resizing mode of the set. See [`ResizeMode`] for details. + pub fn resize_mode(self, resize_mode: ResizeMode) -> Self { + HashSetBuilder { + resize_mode, + hasher: self.hasher, + capacity: self.capacity, + collector: self.collector, + _kv: PhantomData, + } + } + + /// Set the [`seize::Collector`] used for garbage collection. + /// + /// This method may be useful when you want more control over garbage collection. + /// + /// Note that all `Guard` references used to access the set must be produced by + /// the provided `collector`. + pub fn collector(self, collector: Collector) -> Self { + HashSetBuilder { + collector, + hasher: self.hasher, + capacity: self.capacity, + resize_mode: self.resize_mode, + _kv: PhantomData, + } + } + + /// Construct a [`HashSet`] from the builder, using the configured options. + pub fn build(self) -> HashSet { + HashSet { + raw: raw::HashMap::new(self.capacity, self.hasher, self.collector, self.resize_mode), + } + } +} + +impl fmt::Debug for HashSetBuilder { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("HashSetBuilder") + .field("capacity", &self.capacity) + .field("collector", &self.collector) + .field("resize_mode", &self.resize_mode) + .finish() + } +} + +impl HashSet { + /// Creates an empty `HashSet`. + /// + /// The hash map is initially created with a capacity of 0, so it will not allocate + /// until it is first inserted into. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// let map: HashSet<&str> = HashSet::new(); + /// ``` + pub fn new() -> HashSet { + HashSet::with_capacity_and_hasher(0, RandomState::new()) + } + + /// Creates an empty `HashSet` with the specified capacity. + /// + /// The set should be able to hold at least `capacity` elements before resizing. + /// However, the capacity is an estimate, and the set may prematurely resize due + /// to poor hash distribution. If `capacity` is 0, the hash set will not allocate. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// let set: HashSet<&str> = HashSet::with_capacity(10); + /// ``` + pub fn with_capacity(capacity: usize) -> HashSet { + HashSet::with_capacity_and_hasher(capacity, RandomState::new()) + } + + /// Returns a builder for a `HashSet`. + /// + /// The builder can be used for more complex configuration, such as using + /// a custom [`Collector`], or [`ResizeMode`]. + pub fn builder() -> HashSetBuilder { + HashSetBuilder { + capacity: 0, + hasher: RandomState::default(), + collector: Collector::new(), + resize_mode: ResizeMode::default(), + _kv: PhantomData, + } + } +} + +impl Default for HashSet +where + S: Default, +{ + fn default() -> Self { + HashSet::with_hasher(S::default()) + } +} + +impl HashSet { + /// Creates an empty `HashSet` which will use the given hash builder to hash + /// keys. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed + /// to allow HashSets to be resistant to attacks that cause many collisions + /// and very poor performance. Setting it manually using this function can + /// expose a DoS attack vector. + /// + /// The `hash_builder` passed should implement the [`BuildHasher`] trait for + /// the HashSet to be useful, see its documentation for details. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// use std::hash::RandomState; + /// + /// let s = RandomState::new(); + /// let set = HashSet::with_hasher(s); + /// set.pin().insert(1); + /// ``` + pub fn with_hasher(hash_builder: S) -> HashSet { + HashSet::with_capacity_and_hasher(0, hash_builder) + } + + /// Creates an empty `HashSet` with at least the specified capacity, using + /// `hash_builder` to hash the keys. + /// + /// The set should be able to hold at least `capacity` elements before resizing. + /// However, the capacity is an estimate, and the set may prematurely resize due + /// to poor hash distribution. If `capacity` is 0, the hash set will not allocate. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed + /// to allow HashSets to be resistant to attacks that cause many collisions + /// and very poor performance. Setting it manually using this function can + /// expose a DoS attack vector. + /// + /// The `hasher` passed should implement the [`BuildHasher`] trait for + /// the HashSet to be useful, see its documentation for details. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// use std::hash::RandomState; + /// + /// let s = RandomState::new(); + /// let set = HashSet::with_capacity_and_hasher(10, s); + /// set.pin().insert(1); + /// ``` + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> HashSet { + HashSet { + raw: raw::HashMap::new( + capacity, + hash_builder, + Collector::default(), + ResizeMode::default(), + ), + } + } + + /// Returns a pinned reference to the set. + /// + /// The returned reference manages a guard internally, preventing garbage collection + /// for as long as it is held. See the [crate-level documentation](crate#usage) for details. + #[inline] + pub fn pin(&self) -> HashSetRef<'_, K, S, LocalGuard<'_>> { + HashSetRef { + guard: self.guard(), + set: self, + } + } + + /// Returns a pinned reference to the set. + /// + /// Unlike [`HashSet::pin`], the returned reference implements `Send` and `Sync`, + /// allowing it to be held across `.await` points in work-stealing schedulers. + /// This is especially useful for iterators. + /// + /// The returned reference manages a guard internally, preventing garbage collection + /// for as long as it is held. See the [crate-level documentation](crate#usage) for details. + #[inline] + pub fn pin_owned(&self) -> HashSetRef<'_, K, S, OwnedGuard<'_>> { + HashSetRef { + guard: self.owned_guard(), + set: self, + } + } + + /// Returns a guard for use with this set. + /// + /// Note that holding on to a guard prevents garbage collection. + /// See the [crate-level documentation](crate#usage) for details. + #[inline] + pub fn guard(&self) -> LocalGuard<'_> { + self.raw.collector().enter() + } + + /// Returns an owned guard for use with this set. + /// + /// Owned guards implement `Send` and `Sync`, allowing them to be held across + /// `.await` points in work-stealing schedulers. This is especially useful + /// for iterators. + /// + /// Note that holding on to a guard prevents garbage collection. + /// See the [crate-level documentation](crate#usage) for details. + #[inline] + pub fn owned_guard(&self) -> OwnedGuard<'_> { + self.raw.collector().enter_owned() + } +} + +impl HashSet +where + K: Hash + Eq, + S: BuildHasher, +{ + /// Returns the number of entries in the set. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::new(); + /// + /// set.pin().insert(1); + /// set.pin().insert(2); + /// assert!(set.len() == 2); + /// ``` + #[inline] + pub fn len(&self) -> usize { + self.raw.len() + } + + /// Returns `true` if the set is empty. Otherwise returns `false`. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::new(); + /// assert!(set.is_empty()); + /// set.pin().insert("a"); + /// assert!(!set.is_empty()); + /// ``` + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns `true` if the set contains a value for the specified key. + /// + /// The key may be any borrowed form of the set's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: std::cmp::Eq + /// [`Hash`]: std::hash::Hash + /// + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::new(); + /// let m = set.pin(); + /// m.insert(1); + /// assert_eq!(m.contains(&1), true); + /// assert_eq!(m.contains(&2), false); + /// ``` + #[inline] + pub fn contains(&self, key: &Q, guard: &impl Guard) -> bool + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.get(key, guard).is_some() + } + + /// Returns a reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the set's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// [`Eq`]: std::cmp::Eq + /// [`Hash`]: std::hash::Hash + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::new(); + /// let m = set.pin(); + /// m.insert(1); + /// assert_eq!(m.get(&1), Some(&1)); + /// assert_eq!(m.get(&2), None); + /// ``` + #[inline] + pub fn get<'g, Q>(&self, key: &Q, guard: &'g impl Guard) -> Option<&'g K> + where + K: Borrow + 'g, + Q: Hash + Eq + ?Sized, + { + match self.raw.root(guard).get(key, guard) { + Some((k, _)) => Some(k), + None => None, + } + } + + /// Inserts a value into the set. + /// + /// If the set did not have this key present, [`None`] is returned. + /// + /// If the set did have this key present, the value is updated, and the old + /// value is returned. The key is not updated, though; this matters for + /// types that can be `==` without being identical. See the [standard library + /// documentation] for details. + /// + /// [standard library documentation]: https://doc.rust-lang.org/std/collections/index.html#insert-and-complex-keys + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::new(); + /// assert_eq!(set.pin().insert(37), true); + /// assert_eq!(set.pin().is_empty(), false); + /// + /// // note: you can also re-use a set pin like so: + /// let m = set.pin(); + /// + /// m.insert(37); + /// assert_eq!(m.insert(37), false); + /// assert_eq!(m.get(&37), Some(&37)); + /// ``` + #[inline] + pub fn insert(&self, key: K, guard: &impl Guard) -> bool { + match self.raw.root(guard).insert(key, (), true, guard) { + InsertResult::Inserted(_) => true, + InsertResult::Replaced(_) => false, + InsertResult::Error { .. } => unreachable!(), + } + } + + /// Removes a key from the set, returning the value at the key if the key + /// was previously in the set. + /// + /// The key may be any borrowed form of the set's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::new(); + /// set.pin().insert(1); + /// assert_eq!(set.pin().remove(&1), true); + /// assert_eq!(set.pin().remove(&1), false); + /// ``` + #[inline] + pub fn remove<'g, Q>(&self, key: &Q, guard: &'g impl Guard) -> bool + where + K: Borrow + 'g, + Q: Hash + Eq + ?Sized, + { + match self.raw.root(guard).remove(key, guard) { + Some((_, _)) => true, + None => false, + } + } + + /// Tries to reserve capacity for `additional` more elements to be inserted + /// in the `HashSet`. + /// + /// After calling this method, the set should be able to hold at least `capacity` elements + /// before resizing. However, the capacity is an estimate, and the set may prematurely resize + /// due to poor hash distribution. The collection may also reserve more space to avoid frequent + /// reallocations. + /// + /// # Panics + /// + /// Panics if the new allocation size overflows `usize`. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set: HashSet<&str> = HashSet::new(); + /// set.pin().reserve(10); + /// ``` + #[inline] + pub fn reserve(&self, additional: usize, guard: &impl Guard) { + self.raw.root(guard).reserve(additional, guard); + } + + /// Clears the set, removing all values. + /// + /// Note that this method will block until any in-progress resizes are + /// completed before proceeding. See the [consistency](crate#consistency) + /// section for details. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::new(); + /// + /// set.pin().insert(1); + /// set.pin().clear(); + /// assert!(set.pin().is_empty()); + /// ``` + #[inline] + pub fn clear(&self, guard: &impl Guard) { + self.raw.root(guard).clear(guard) + } + + /// Retains only the elements specified by the predicate. + /// + /// In other words, remove all values `v` for which `f(&v)` returns `false`. + /// The elements are visited in unsorted (and unspecified) order. + /// + /// Note the function may be called more than once for a given key if its value is + /// concurrently modified during removal. + /// + /// Additionally, this method will block until any in-progress resizes are + /// completed before proceeding. See the [consistency](crate#consistency) + /// section for details. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let mut set: HashSet = (0..8).collect(); + /// set.pin().retain(|&v| v % 2 == 0); + /// assert_eq!(set.len(), 4); + /// assert_eq!(set.pin().contains(&1), false); + /// assert_eq!(set.pin().contains(&2), true); + /// ``` + #[inline] + pub fn retain(&mut self, mut f: F, guard: &impl Guard) + where + F: FnMut(&K) -> bool, + { + self.raw.root(guard).retain(|k, _| f(k), guard) + } + + /// An iterator visiting all values in arbitrary order. + /// + /// Note that this method will block until any in-progress resizes are + /// completed before proceeding. See the [consistency](crate#consistency) + /// section for details. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashSet; + /// + /// let set = HashSet::from([ + /// "a", + /// "b", + /// "c" + /// ]); + /// + /// for val in set.pin().iter() { + /// println!("val: {val}"); + /// } + #[inline] + pub fn iter<'g, G>(&self, guard: &'g G) -> Iter<'g, K, G> + where + G: Guard, + { + Iter { + raw: self.raw.root(guard).iter(guard), + } + } +} + +impl PartialEq for HashSet +where + K: Hash + Eq, + S: BuildHasher, +{ + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + + let (guard1, guard2) = (&self.guard(), &other.guard()); + + let mut iter = self.iter(guard1); + iter.all(|key| other.get(key, guard2).is_some()) + } +} + +impl Eq for HashSet +where + K: Hash + Eq, + S: BuildHasher, +{ +} + +impl fmt::Debug for HashSet +where + K: Hash + Eq + fmt::Debug, + S: BuildHasher, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let guard = self.guard(); + f.debug_set().entries(self.iter(&guard)).finish() + } +} + +impl Extend for &HashSet +where + K: Hash + Eq, + S: BuildHasher, +{ + fn extend>(&mut self, iter: T) { + // from `hashbrown::HashSet::extend`: + // Keys may be already present or show multiple times in the iterator. + // Reserve the entire hint lower bound if the set is empty. + // Otherwise reserve half the hint (rounded up), so the set + // will only resize twice in the worst case. + let iter = iter.into_iter(); + let reserve = if self.is_empty() { + iter.size_hint().0 + } else { + (iter.size_hint().0 + 1) / 2 + }; + + let guard = self.guard(); + self.reserve(reserve, &guard); + + for key in iter { + self.insert(key, &guard); + } + } +} + +impl<'a, K, S> Extend<&'a K> for &HashSet +where + K: Copy + Hash + Eq + 'a, + S: BuildHasher, +{ + fn extend>(&mut self, iter: T) { + self.extend(iter.into_iter().copied()); + } +} + +impl From<[K; N]> for HashSet +where + K: Hash + Eq, +{ + fn from(arr: [K; N]) -> Self { + HashSet::from_iter(arr) + } +} + +impl FromIterator for HashSet +where + K: Hash + Eq, + S: BuildHasher + Default, +{ + fn from_iter>(iter: T) -> Self { + let mut iter = iter.into_iter(); + + if let Some(key) = iter.next() { + let (lower, _) = iter.size_hint(); + let set = HashSet::with_capacity_and_hasher(lower.saturating_add(1), S::default()); + + // Ideally we could use an unprotected guard here. However, `insert` + // returns references to values that were replaced and retired, so + // we need a "real" guard. A `raw_insert` method that strictly returns + // pointers would fix this. + { + let set = set.pin(); + set.insert(key); + for key in iter { + set.insert(key); + } + } + + set + } else { + Self::default() + } + } +} + +impl Clone for HashSet +where + K: Clone + Hash + Eq, + S: BuildHasher + Clone, +{ + fn clone(&self) -> HashSet { + let other = HashSet::builder() + .capacity(self.len()) + .hasher(self.raw.hasher.clone()) + .collector(self.raw.collector().clone()) + .build(); + + { + let (guard1, guard2) = (&self.guard(), &other.guard()); + for key in self.iter(guard1) { + other.insert(key.clone(), guard2); + } + } + + other + } +} + +/// A pinned reference to a [`HashSet`]. +/// +/// This type is created with [`HashSet::pin`] and can be used to easily access a [`HashSet`] +/// without explicitly managing a guard. See the [crate-level documentation](crate#usage) for details. +pub struct HashSetRef<'set, K, S, G> { + guard: G, + set: &'set HashSet, +} + +impl<'set, K, S, G> HashSetRef<'set, K, S, G> +where + K: Hash + Eq, + S: BuildHasher, + G: Guard, +{ + /// Returns a reference to the inner [`HashSet`]. + #[inline] + pub fn set(&self) -> &'set HashSet { + self.set + } + + /// Returns the number of entries in the set. + /// + /// See [`HashSet::len`] for details. + #[inline] + pub fn len(&self) -> usize { + self.set.raw.len() + } + + /// Returns `true` if the set is empty. Otherwise returns `false`. + /// + /// See [`HashSet::is_empty`] for details. + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Returns `true` if the set contains a value for the specified key. + /// + /// See [`HashSet::contains`] for details. + #[inline] + pub fn contains(&self, key: &Q) -> bool + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.get(key).is_some() + } + + /// Returns a reference to the value corresponding to the key. + /// + /// See [`HashSet::get`] for details. + #[inline] + pub fn get(&self, key: &Q) -> Option<&K> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + match self.root().get(key, &self.guard) { + Some((k, _)) => Some(k), + None => None, + } + } + + /// Inserts a key-value pair into the set. + /// + /// See [`HashSet::insert`] for details. + #[inline] + pub fn insert(&self, key: K) -> bool { + match self.root().insert(key, (), true, &self.guard) { + InsertResult::Inserted(_) => true, + InsertResult::Replaced(_) => false, + InsertResult::Error { .. } => unreachable!(), + } + } + + /// Removes a key from the set, returning the value at the key if the key + /// was previously in the set. + /// + /// See [`HashSet::remove`] for details. + #[inline] + pub fn remove(&self, key: &Q) -> bool + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + match self.root().remove(key, &self.guard) { + Some((_, _)) => true, + None => false, + } + } + + /// Clears the set, removing all values. + /// + /// See [`HashSet::clear`] for details. + #[inline] + pub fn clear(&self) { + self.root().clear(&self.guard) + } + + /// Retains only the elements specified by the predicate. + /// + /// See [`HashSet::retain`] for details. + #[inline] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&K) -> bool, + { + self.root().retain(|k, _| f(k), &self.guard) + } + + /// Tries to reserve capacity for `additional` more elements to be inserted + /// in the set. + /// + /// See [`HashSet::reserve`] for details. + #[inline] + pub fn reserve(&self, additional: usize) { + self.root().reserve(additional, &self.guard) + } + + /// An iterator visiting all values in arbitrary order. + /// The iterator element type is `(&K, &V)`. + /// + /// See [`HashSet::iter`] for details. + #[inline] + pub fn iter(&self) -> Iter<'_, K, G> { + Iter { + raw: self.root().iter(&self.guard), + } + } + + #[inline] + fn root(&self) -> raw::HashMapRef<'_, K, (), S> { + // Safety: A `HashSetRef` can only be created through `HashSet::pin` or + // `HashSet::pin_owned`, so we know the guard belongs to our collector. + unsafe { self.set.raw.root_unchecked(&self.guard) } + } +} + +impl fmt::Debug for HashSetRef<'_, K, S, G> +where + K: Hash + Eq + fmt::Debug, + S: BuildHasher, + G: Guard, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} + +impl<'a, K, S, G> IntoIterator for &'a HashSetRef<'_, K, S, G> +where + K: Hash + Eq, + S: BuildHasher, + G: Guard, +{ + type Item = &'a K; + type IntoIter = Iter<'a, K, G>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An iterator over a set's entries. +/// +/// This struct is created by the [`iter`](HashSet::iter) method on [`HashSet`]. See its documentation for details. +pub struct Iter<'g, K, G> { + raw: raw::Iter<'g, K, (), G>, +} + +impl<'g, K: 'g, G> Iterator for Iter<'g, K, G> +where + G: Guard, +{ + type Item = &'g K; + + #[inline] + fn next(&mut self) -> Option { + self.raw.next().map(|(k, _)| k) + } +} + +impl fmt::Debug for Iter<'_, K, G> +where + K: fmt::Debug, + G: Guard, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list() + .entries(Iter { + raw: self.raw.clone(), + }) + .finish() + } +} diff --git a/tests/basic_set.rs b/tests/basic_set.rs new file mode 100644 index 0000000..cb01882 --- /dev/null +++ b/tests/basic_set.rs @@ -0,0 +1,613 @@ +// Adapted from: https://github.com/jonhoo/flurry/blob/main/tests/basic.rs + +use papaya::HashSet; + +use std::hash::{BuildHasher, BuildHasherDefault, Hasher}; +use std::sync::Arc; + +mod common; +use common::with_set; + +#[test] +fn new() { + with_set::(|set| drop(set())); +} + +#[test] +fn clear() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + { + set.insert(0, &guard); + set.insert(1, &guard); + set.insert(2, &guard); + set.insert(3, &guard); + set.insert(4, &guard); + } + set.clear(&guard); + assert!(set.is_empty()); + }); +} + +#[test] +fn insert() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + assert_eq!(set.insert(42, &guard), true); + assert_eq!(set.insert(42, &guard), false); + assert_eq!(set.len(), 1); + }); +} + +#[test] +fn get_empty() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + let e = set.get(&42, &guard); + assert!(e.is_none()); + }); +} + +#[test] +fn remove_empty() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + assert_eq!(set.remove(&42, &guard), false); + }); +} + +#[test] +fn insert_and_remove() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + assert!(set.insert(42, &guard)); + assert!(set.remove(&42, &guard)); + assert!(set.get(&42, &guard).is_none()); + }); +} + +#[test] +fn insert_and_get() { + with_set::(|set| { + let set = set(); + set.insert(42, &set.guard()); + + { + let guard = set.guard(); + let e = set.get(&42, &guard).unwrap(); + assert_eq!(e, &42); + } + }); +} + +#[test] +fn reinsert() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + assert!(set.insert(42, &guard)); + assert!(!set.insert(42, &guard)); + { + let guard = set.guard(); + let e = set.get(&42, &guard).unwrap(); + assert_eq!(e, &42); + } + }); +} + +#[test] +fn concurrent_insert() { + with_set::(|set| { + let set = set(); + let set = Arc::new(set); + + let set1 = set.clone(); + let t1 = std::thread::spawn(move || { + for i in 0..64 { + set1.insert(i, &set1.guard()); + } + }); + let set2 = set.clone(); + let t2 = std::thread::spawn(move || { + for i in 0..64 { + set2.insert(i, &set2.guard()); + } + }); + + t1.join().unwrap(); + t2.join().unwrap(); + + let guard = set.guard(); + for i in 0..64 { + let v = set.get(&i, &guard).unwrap(); + assert!(v == &i); + } + }); +} + +#[test] +fn concurrent_remove() { + with_set::(|set| { + let set = set(); + let set = Arc::new(set); + + { + let guard = set.guard(); + for i in 0..64 { + set.insert(i, &guard); + } + } + + let set1 = set.clone(); + let t1 = std::thread::spawn(move || { + let guard = set1.guard(); + for i in 0..64 { + set1.remove(&i, &guard); + } + }); + let set2 = set.clone(); + let t2 = std::thread::spawn(move || { + let guard = set2.guard(); + for i in 0..64 { + set2.remove(&i, &guard); + } + }); + + t1.join().unwrap(); + t2.join().unwrap(); + + // after joining the threads, the set should be empty + let guard = set.guard(); + for i in 0..64 { + assert!(set.get(&i, &guard).is_none()); + } + }); +} + +#[test] +#[cfg(not(miri))] +fn concurrent_resize_and_get() { + if cfg!(papaya_stress) { + return; + } + + with_set::(|set| { + let set = set(); + let set = Arc::new(set); + + { + let guard = set.guard(); + for i in 0..1024 { + set.insert(i, &guard); + } + } + + let set1 = set.clone(); + // t1 is using reserve to trigger a bunch of resizes + let t1 = std::thread::spawn(move || { + let guard = set1.guard(); + // there should be 2 ** 10 capacity already, so trigger additional resizes + for power in 11..16 { + set1.reserve(1 << power, &guard); + } + }); + let set2 = set.clone(); + // t2 is retrieving existing keys a lot, attempting to encounter a BinEntry::Moved + let t2 = std::thread::spawn(move || { + let guard = set2.guard(); + for _ in 0..32 { + for i in 0..1024 { + let v = set2.get(&i, &guard).unwrap(); + assert_eq!(v, &i); + } + } + }); + + t1.join().unwrap(); + t2.join().unwrap(); + + // make sure all the entries still exist after all the resizes + { + let guard = set.guard(); + + for i in 0..1024 { + let v = set.get(&i, &guard).unwrap(); + assert_eq!(v, &i); + } + } + }); +} + +#[test] +fn current_kv_dropped() { + let dropped1 = Arc::new(0); + + with_set::>(|set| { + let set = set(); + set.insert(dropped1.clone(), &set.guard()); + assert_eq!(Arc::strong_count(&dropped1), 2); + + drop(set); + + // dropping the set should immediately drop (not deferred) all keys and values + assert_eq!(Arc::strong_count(&dropped1), 1); + }); +} + +#[test] +fn empty_sets_equal() { + with_set::(|set1| { + let set1 = set1(); + with_set::(|set2| { + let set2 = set2(); + assert_eq!(set1, set2); + assert_eq!(set2, set1); + }); + }); +} + +#[test] +fn different_size_sets_not_equal() { + with_set::(|set1| { + let set1 = set1(); + with_set::(|set2| { + let set2 = set2(); + { + let guard1 = set1.guard(); + let guard2 = set2.guard(); + + set1.insert(1, &guard1); + set1.insert(2, &guard1); + set1.insert(3, &guard1); + + set2.insert(1, &guard2); + set2.insert(2, &guard2); + } + + assert_ne!(set1, set2); + assert_ne!(set2, set1); + }); + }); +} + +#[test] +fn same_values_equal() { + with_set::(|set1| { + let set1 = set1(); + with_set::(|set2| { + let set2 = set2(); + { + set1.pin().insert(1); + set2.pin().insert(1); + } + + assert_eq!(set1, set2); + assert_eq!(set2, set1); + }); + }); +} + +#[test] +fn different_values_not_equal() { + with_set::(|set1| { + let set1 = set1(); + with_set::(|set2| { + let set2 = set2(); + { + set1.pin().insert(1); + set2.pin().insert(2); + } + + assert_ne!(set1, set2); + assert_ne!(set2, set1); + }); + }); +} + +#[test] +fn clone_set_empty() { + with_set::<&'static str>(|set| { + let set = set(); + let cloned_set = set.clone(); + assert_eq!(set.len(), cloned_set.len()); + assert_eq!(&set, &cloned_set); + assert_eq!(cloned_set.len(), 0); + }); +} + +#[test] +// Test that same values exists in both sets (original and cloned) +fn clone_set_filled() { + with_set::<&'static str>(|set| { + let set = set(); + set.insert("FooKey", &set.guard()); + set.insert("BarKey", &set.guard()); + let cloned_set = set.clone(); + assert_eq!(set.len(), cloned_set.len()); + assert_eq!(&set, &cloned_set); + + // test that we are not setting the same tables + set.insert("NewItem", &set.guard()); + assert_ne!(&set, &cloned_set); + }); +} + +#[test] +fn default() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + set.insert(42, &guard); + + assert_eq!(set.get(&42, &guard), Some(&42)); + }); +} + +#[test] +fn debug() { + with_set::(|set| { + let set = set(); + let guard = set.guard(); + set.insert(42, &guard); + set.insert(16, &guard); + + let formatted = format!("{:?}", set); + + assert!(formatted == "{42, 16}" || formatted == "{16, 42}"); + }); +} + +#[test] +fn extend() { + if cfg!(papaya_stress) { + return; + } + + with_set::(|set| { + let set = set(); + let guard = set.guard(); + + let mut entries: Vec = vec![42, 16, 38]; + entries.sort_unstable(); + + (&set).extend(entries.clone().into_iter()); + + let mut collected: Vec = set.iter(&guard).map(|key| *key).collect(); + collected.sort_unstable(); + + assert_eq!(entries, collected); + }); +} + +#[test] +fn extend_ref() { + if cfg!(papaya_stress) { + return; + } + + with_set::(|set| { + let set = set(); + let mut entries: Vec<&usize> = vec![&42, &36, &18]; + entries.sort(); + + (&set).extend(entries.clone().into_iter()); + + let guard = set.guard(); + let mut collected: Vec<&usize> = set.iter(&guard).collect(); + collected.sort(); + + assert_eq!(entries, collected); + }); +} + +#[test] +fn from_iter_empty() { + use std::iter::FromIterator; + + let entries: Vec = Vec::new(); + let set: HashSet = HashSet::from_iter(entries.into_iter()); + + assert_eq!(set.len(), 0) +} + +#[test] +fn from_iter_repeated() { + use std::iter::FromIterator; + + let entries = vec![0, 0, 0]; + let set: HashSet<_> = HashSet::from_iter(entries.into_iter()); + let set = set.pin(); + assert_eq!(set.len(), 1); + assert_eq!(set.iter().collect::>(), vec![&0]) +} + +#[test] +fn len() { + with_set::(|set| { + let set = set(); + let len = if cfg!(miri) { 100 } else { 10_000 }; + for i in 0..len { + set.pin().insert(i); + } + assert_eq!(set.len(), len); + }); +} + +#[test] +fn iter() { + if cfg!(papaya_stress) { + return; + } + + with_set::(|set| { + let set = set(); + let len = if cfg!(miri) { 100 } else { 10_000 }; + for i in 0..len { + assert_eq!(set.pin().insert(i), true); + } + + let v: Vec<_> = (0..len).collect(); + let mut got: Vec<_> = set.pin().iter().map(|&k| k).collect(); + got.sort(); + assert_eq!(v, got); + }); +} + +#[test] +fn retain_empty() { + with_set::(|set| { + let set = set(); + set.pin().retain(|_| false); + assert_eq!(set.len(), 0); + }); +} + +#[test] +fn retain_all_false() { + with_set::(|set| { + let set = set(); + for i in 0..10 { + set.pin().insert(i); + } + set.pin().retain(|_| false); + assert_eq!(set.len(), 0); + }); +} + +#[test] +fn retain_all_true() { + with_set::(|set| { + let set = set(); + for i in 0..10 { + set.pin().insert(i); + } + set.pin().retain(|_| true); + assert_eq!(set.len(), 10); + }); +} + +#[test] +fn retain_some() { + with_set::(|set| { + let set = set(); + for i in 0..10 { + set.pin().insert(i); + } + set.pin().retain(|&k| k >= 5); + assert_eq!(set.len(), 5); + let mut got: Vec<_> = set.pin().iter().copied().collect(); + got.sort(); + assert_eq!(got, [5, 6, 7, 8, 9]); + }); +} + +#[test] +fn mixed() { + const LEN: usize = if cfg!(miri) { 48 } else { 1024 }; + with_set::(|set| { + let set = set(); + assert!(set.pin().get(&100).is_none()); + set.pin().insert(100); + assert_eq!(set.pin().get(&100), Some(&100)); + + assert!(set.pin().get(&200).is_none()); + set.pin().insert(200); + assert_eq!(set.pin().get(&200), Some(&200)); + + assert!(set.pin().get(&300).is_none()); + + assert_eq!(set.pin().remove(&100), true); + assert_eq!(set.pin().remove(&200), true); + assert_eq!(set.pin().remove(&300), false); + + assert!(set.pin().get(&100).is_none()); + assert!(set.pin().get(&200).is_none()); + assert!(set.pin().get(&300).is_none()); + + for i in 0..LEN { + assert_eq!(set.pin().insert(i), true); + } + + for i in 0..LEN { + assert_eq!(set.pin().get(&i), Some(&i)); + } + + for i in 0..LEN { + assert_eq!(set.pin().remove(&i), true); + } + + for i in 0..LEN { + assert_eq!(set.pin().get(&i), None); + } + + for i in 0..(LEN * 2) { + assert_eq!(set.pin().insert(i), true); + } + + for i in 0..(LEN * 2) { + assert_eq!(set.pin().get(&i), Some(&i)); + } + }); +} + +// run tests with hashers that create unrealistically long probe sequences +mod hasher { + use super::*; + + fn check() { + let range = if cfg!(miri) { 0..16 } else { 0..100 }; + + with_set::(|set| { + let set = set(); + let guard = set.guard(); + for i in range.clone() { + set.insert(i, &guard); + } + + assert!(!set.contains(&i32::min_value(), &guard)); + assert!(!set.contains(&(range.start - 1), &guard)); + for i in range.clone() { + assert!(set.contains(&i, &guard)); + } + assert!(!set.contains(&range.end, &guard)); + assert!(!set.contains(&i32::max_value(), &guard)); + }); + } + + #[test] + fn test_zero_hasher() { + #[derive(Default)] + pub struct ZeroHasher; + + impl Hasher for ZeroHasher { + fn finish(&self) -> u64 { + 0 + } + + fn write(&mut self, _: &[u8]) {} + } + + check::>(); + } + + #[test] + fn test_max_hasher() { + #[derive(Default)] + struct MaxHasher; + + impl Hasher for MaxHasher { + fn finish(&self) -> u64 { + u64::max_value() + } + + fn write(&mut self, _: &[u8]) {} + } + + check::>(); + } +} diff --git a/tests/common.rs b/tests/common.rs index e537180..704d018 100644 --- a/tests/common.rs +++ b/tests/common.rs @@ -1,6 +1,6 @@ #![allow(dead_code)] -use papaya::{HashMap, ResizeMode}; +use papaya::{HashMap, HashSet, ResizeMode}; // Run the test on different configurations of a `HashMap`. pub fn with_map(mut test: impl FnMut(&dyn Fn() -> HashMap)) { @@ -29,6 +29,33 @@ pub fn with_map(mut test: impl FnMut(&dyn Fn() -> HashMap)) { ); } +// Run the test on different configurations of a `HashSet`. +pub fn with_set(mut test: impl FnMut(&dyn Fn() -> HashSet)) { + // Blocking resize mode. + if !cfg!(papaya_stress) { + test(&(|| HashSet::builder().resize_mode(ResizeMode::Blocking).build())); + } + + // Incremental resize mode with a small chunk to stress operations on nested tables. + test( + &(|| { + HashSet::builder() + .resize_mode(ResizeMode::Incremental(1)) + .build() + }), + ); + + // Incremental resize mode with a medium-sized chunk to promote interference with incremental + // resizing. + test( + &(|| { + HashSet::builder() + .resize_mode(ResizeMode::Incremental(128)) + .build() + }), + ); +} + // Prints a log message if `RUST_LOG=debug` is set. #[macro_export] macro_rules! debug {