From 3baf4e0bb2c7050124253b5d85735e1a40dcc7bd Mon Sep 17 00:00:00 2001 From: Ryan Daum Date: Fri, 5 Jan 2024 00:02:41 -0500 Subject: [PATCH] Reduce copies in relation indexes; replace use of Vec for keys with the actual domain/codomain slices, eliminating a copy. 3-4% throughput improvement. --- crates/db/src/tuplebox/base_relation.rs | 33 ++++++++---------- crates/db/src/tuplebox/tx/transaction.rs | 25 +++++++------- crates/db/src/tuplebox/tx/working_set.rs | 44 +++++++++--------------- crates/values/src/util/slice_ref.rs | 6 ++++ 4 files changed, 49 insertions(+), 59 deletions(-) diff --git a/crates/db/src/tuplebox/base_relation.rs b/crates/db/src/tuplebox/base_relation.rs index 8e842f8d..277541ed 100644 --- a/crates/db/src/tuplebox/base_relation.rs +++ b/crates/db/src/tuplebox/base_relation.rs @@ -55,11 +55,11 @@ pub struct BaseRelation { /// The domain-indexed tuples in this relation, which are in this case expressed purely as bytes. /// It is up to the caller to interpret them. - index_domain: im::HashMap, TupleRef>, + index_domain: im::HashMap, /// Optional reverse index from codomain -> tuples, which is used to support (more) efficient /// reverse lookups. - index_codomain: Option, HashSet>>, + index_codomain: Option>>, } impl BaseRelation { @@ -87,7 +87,7 @@ impl BaseRelation { self.index_codomain .as_mut() .unwrap() - .entry(tuple.codomain().as_slice().to_vec()) + .entry(tuple.codomain()) .or_default() .insert(tuple.clone()); } @@ -102,20 +102,19 @@ impl BaseRelation { tuple.update_timestamp(self.id, self.slotbox.clone(), 0); // Update the domain index to point to the tuple... - self.index_domain - .insert(tuple.domain().as_slice().to_vec(), tuple.clone()); + self.index_domain.insert(tuple.domain(), tuple.clone()); // ... and update the secondary index if there is one. if let Some(index) = &mut self.index_codomain { index - .entry(tuple.codomain().as_slice().to_vec()) + .entry(tuple.codomain()) .or_insert_with(HashSet::new) .insert(tuple); } } pub fn seek_by_domain(&self, domain: SliceRef) -> Option { - self.index_domain.get(domain.as_slice()).cloned() + self.index_domain.get(&domain).cloned() } pub fn predicate_scan bool>(&self, f: &F) -> HashSet { @@ -131,7 +130,7 @@ impl BaseRelation { // We could do full-scan, but in this case we're going to assume that the caller knows // what they're doing. let codomain_index = self.index_codomain.as_ref().expect("No codomain index"); - if let Some(tuple_refs) = codomain_index.get(codomain.as_slice()) { + if let Some(tuple_refs) = codomain_index.get(&codomain) { tuple_refs.iter().cloned().collect() } else { HashSet::new() @@ -139,13 +138,13 @@ impl BaseRelation { } pub fn remove_by_domain(&mut self, domain: SliceRef) { // Seek the tuple id... - if let Some(tuple_ref) = self.index_domain.remove(domain.as_slice()) { + if let Some(tuple_ref) = self.index_domain.remove(&domain) { self.tuples.remove(&tuple_ref); // And remove from codomain index, if it exists in there if let Some(index) = &mut self.index_codomain { index - .entry(domain.as_slice().to_vec()) + .entry(domain) .or_insert_with(HashSet::new) .remove(&tuple_ref); } @@ -155,16 +154,15 @@ impl BaseRelation { /// Update or insert a tuple into the relation. pub fn upsert_tuple(&mut self, tuple: TupleRef) { // First check the domain->tuple id index to see if we're inserting or updating. - let existing_tuple_ref = self.index_domain.get(tuple.domain().as_slice()).cloned(); + let existing_tuple_ref = self.index_domain.get(&tuple.domain()).cloned(); match existing_tuple_ref { None => { // Insert into the tuple list and the index. - self.index_domain - .insert(tuple.domain().as_slice().to_vec(), tuple.clone()); + self.index_domain.insert(tuple.domain(), tuple.clone()); self.tuples.insert(tuple.clone()); if let Some(codomain_index) = &mut self.index_codomain { codomain_index - .entry(tuple.codomain().as_slice().to_vec()) + .entry(tuple.codomain()) .or_insert_with(HashSet::new) .insert(tuple); } @@ -173,16 +171,15 @@ impl BaseRelation { // We need the old value so we can update the codomain index. if let Some(codomain_index) = &mut self.index_codomain { codomain_index - .entry(existing_tuple.codomain().as_slice().to_vec()) + .entry(existing_tuple.codomain()) .or_insert_with(HashSet::new) .remove(&existing_tuple); codomain_index - .entry(tuple.codomain().as_slice().to_vec()) + .entry(tuple.codomain()) .or_insert_with(HashSet::new) .insert(tuple.clone()); } - self.index_domain - .insert(tuple.domain().as_slice().to_vec(), tuple.clone()); + self.index_domain.insert(tuple.domain(), tuple.clone()); self.tuples.remove(&existing_tuple); self.tuples.insert(tuple); } diff --git a/crates/db/src/tuplebox/tx/transaction.rs b/crates/db/src/tuplebox/tx/transaction.rs index d5928fc0..3e98ffef 100644 --- a/crates/db/src/tuplebox/tx/transaction.rs +++ b/crates/db/src/tuplebox/tx/transaction.rs @@ -331,8 +331,8 @@ impl CommitSet { struct TransientRelation { _id: RelationId, tuples: Vec<(SliceRef, SliceRef)>, - domain_tuples: HashMap, usize>, - codomain_domain: Option, HashSet>>, + domain_tuples: HashMap, + codomain_domain: Option>>, } impl TransientRelation { @@ -343,7 +343,7 @@ impl TransientRelation { ) -> Result<(SliceRef, SliceRef), TupleError> { let tuple_idx = self .domain_tuples - .get(domain.as_slice()) + .get(&domain) .copied() .ok_or(TupleError::NotFound); tuple_idx.map(|id| self.tuples[id].clone()) @@ -360,7 +360,7 @@ impl TransientRelation { // what they're doing. let codomain_domain = self.codomain_domain.as_ref().expect("No codomain index"); let tuple_indexes = codomain_domain - .get(codomain.as_slice()) + .get(&codomain) .cloned() .ok_or(TupleError::NotFound)?; Ok(tuple_indexes @@ -382,13 +382,13 @@ impl TransientRelation { domain: SliceRef, codomain: SliceRef, ) -> Result<(), TupleError> { - if self.domain_tuples.contains_key(domain.as_slice()) { + if self.domain_tuples.contains_key(&domain) { return Err(TupleError::Duplicate); } let tuple_idx = self.tuples.len(); self.tuples.push((domain.clone(), codomain.clone())); self.domain_tuples - .insert(domain.as_slice().to_vec(), tuple_idx) + .insert(domain, tuple_idx) .map(|_| ()) .ok_or(TupleError::Duplicate) } @@ -401,7 +401,7 @@ impl TransientRelation { ) -> Result<(), TupleError> { let tuple_idx = self .domain_tuples - .get(domain.as_slice()) + .get(&domain) .copied() .ok_or(TupleError::NotFound)?; if self.codomain_domain.is_some() { @@ -417,7 +417,7 @@ impl TransientRelation { domain: SliceRef, codomain: SliceRef, ) -> Result<(), TupleError> { - let tuple_idx = match self.domain_tuples.get(domain.as_slice()) { + let tuple_idx = match self.domain_tuples.get(&domain) { Some(tuple_idx) => { self.tuples[*tuple_idx] = (domain, codomain.clone()); *tuple_idx @@ -425,8 +425,7 @@ impl TransientRelation { None => { let tuple_idx = self.tuples.len(); self.tuples.push((domain.clone(), codomain.clone())); - self.domain_tuples - .insert(domain.as_slice().to_vec(), tuple_idx); + self.domain_tuples.insert(domain, tuple_idx); tuple_idx } }; @@ -439,7 +438,7 @@ impl TransientRelation { pub async fn remove_by_domain(&mut self, domain: SliceRef) -> Result<(), TupleError> { let tuple_idx = self .domain_tuples - .remove(domain.as_slice()) + .remove(&domain) .ok_or(TupleError::NotFound)?; if self.codomain_domain.is_some() { @@ -462,13 +461,13 @@ impl TransientRelation { // Clear out the old entry, if there was one. if let Some(old_codomain) = old_codomain { index - .entry(old_codomain.as_slice().to_vec()) + .entry(old_codomain) .or_insert_with(HashSet::new) .remove(&tuple_idx); } if let Some(new_codomain) = new_codomain { index - .entry(new_codomain.as_slice().to_vec()) + .entry(new_codomain) .or_insert_with(HashSet::new) .insert(tuple_idx); } diff --git a/crates/db/src/tuplebox/tx/working_set.rs b/crates/db/src/tuplebox/tx/working_set.rs index dc6c6380..b4f753e6 100644 --- a/crates/db/src/tuplebox/tx/working_set.rs +++ b/crates/db/src/tuplebox/tx/working_set.rs @@ -70,7 +70,7 @@ impl WorkingSet { let relation = &mut self.relations[relation_id.0]; // Check local first. - if let Some(tuple_idx) = relation.domain_index.get(domain.as_slice()) { + if let Some(tuple_idx) = relation.domain_index.get(&domain) { let local_version = relation.tuples.get(*tuple_idx).unwrap(); return match &local_version { TxTuple::Insert(t) | TxTuple::Update(t) | TxTuple::Value(t) => { @@ -91,12 +91,10 @@ impl WorkingSet { .await?; let tuple_idx = relation.tuples.len(); relation.tuples.push(TxTuple::Value(canon_t.clone())); - relation - .domain_index - .insert(domain.as_slice().to_vec(), tuple_idx); + relation.domain_index.insert(domain, tuple_idx); if let Some(ref mut codomain_index) = relation.codomain_index { codomain_index - .entry(canon_t.codomain().as_slice().to_vec()) + .entry(canon_t.codomain()) .or_insert_with(HashSet::new) .insert(tuple_idx); } @@ -139,7 +137,7 @@ impl WorkingSet { let relation = &mut self.relations[relation_id.0]; let codomain_index = relation.codomain_index.as_ref().expect("No codomain index"); let tuple_indexes = codomain_index - .get(codomain.as_slice()) + .get(&codomain) .cloned() .unwrap_or_else(HashSet::new) .into_iter(); @@ -165,7 +163,7 @@ impl WorkingSet { let relation = &mut self.relations[relation_id.0]; // If we already have a local version, that's a dupe, so return an error for that. - if relation.domain_index.get(domain.as_slice()).is_some() { + if relation.domain_index.get(&domain).is_some() { return Err(TupleError::Duplicate); } @@ -187,9 +185,7 @@ impl WorkingSet { codomain.as_slice(), ); relation.tuples.push(TxTuple::Insert(new_t.unwrap())); - relation - .domain_index - .insert(domain.as_slice().to_vec(), tuple_idx); + relation.domain_index.insert(domain, tuple_idx); relation.update_secondary(tuple_idx, None, Some(codomain.clone())); Ok(()) @@ -252,7 +248,7 @@ impl WorkingSet { // If we have an existing copy, we will update it, but keep its existing derivation // timestamp and operation type. - if let Some(tuple_idx) = relation.domain_index.get_mut(domain.as_slice()).cloned() { + if let Some(tuple_idx) = relation.domain_index.get_mut(&domain).cloned() { let existing = relation.tuples.get_mut(tuple_idx).expect("Tuple not found"); let (replacement, old_value) = match &existing { TxTuple::Tombstone { .. } => return Err(TupleError::NotFound), @@ -305,9 +301,7 @@ impl WorkingSet { codomain.as_slice(), ); relation.tuples.push(TxTuple::Update(new_t.unwrap())); - relation - .domain_index - .insert(domain.as_slice().to_vec(), tuple_idx); + relation.domain_index.insert(domain, tuple_idx); relation.update_secondary(tuple_idx, Some(old_codomain), Some(codomain.clone())); Ok(()) } @@ -327,7 +321,7 @@ impl WorkingSet { // timestamp. // If it's an insert, we have to keep it an insert, same for update, but if it's a delete, // we have to turn it into an update. - if let Some(tuple_idx) = relation.domain_index.get_mut(domain.as_slice()).cloned() { + if let Some(tuple_idx) = relation.domain_index.get_mut(&domain).cloned() { let existing = relation.tuples.get_mut(tuple_idx).expect("Tuple not found"); let (replacement, old) = match &existing { TxTuple::Insert(t) => { @@ -405,9 +399,7 @@ impl WorkingSet { .await; let tuple_idx = relation.tuples.len(); relation.tuples.push(operation); - relation - .domain_index - .insert(domain.as_slice().to_vec(), tuple_idx); + relation.domain_index.insert(domain, tuple_idx); // Remove the old codomain->domain index entry if it exists, and then add the new one. relation.update_secondary(tuple_idx, old.map(|o| o.0), Some(codomain.clone())); @@ -425,7 +417,7 @@ impl WorkingSet { let relation = &mut self.relations[relation_id.0]; // Delete is basically an update but where we stick a Tombstone. - if let Some(tuple_index) = relation.domain_index.get_mut(domain.as_slice()).cloned() { + if let Some(tuple_index) = relation.domain_index.get_mut(&domain).cloned() { let tuple_v = relation .tuples .get_mut(tuple_index) @@ -464,9 +456,7 @@ impl WorkingSet { domain: domain.clone(), tuple_id: tuple.id(), }); - relation - .domain_index - .insert(domain.as_slice().to_vec(), local_tuple_idx); + relation.domain_index.insert(domain, local_tuple_idx); relation.update_secondary(local_tuple_idx, Some(old_codomain), None); Ok(()) } @@ -476,8 +466,8 @@ impl WorkingSet { pub(crate) struct TxBaseRelation { pub id: RelationId, tuples: Vec, - domain_index: HashMap, usize>, - codomain_index: Option, HashSet>>, + domain_index: HashMap, + codomain_index: Option>>, } impl TxBaseRelation { @@ -506,16 +496,14 @@ impl TxBaseRelation { // Clear out the old entry, if there was one. if let Some(old_codomain) = old_codomain { - let old_codomain_bytes = old_codomain.as_slice().to_vec(); codomain_index - .entry(old_codomain_bytes) + .entry(old_codomain) .or_insert_with(HashSet::new) .remove(&tuple_id); } if let Some(new_codomain) = new_codomain { - let codomain_bytes = new_codomain.as_slice().to_vec(); codomain_index - .entry(codomain_bytes) + .entry(new_codomain) .or_insert_with(HashSet::new) .insert(tuple_id); } diff --git a/crates/values/src/util/slice_ref.rs b/crates/values/src/util/slice_ref.rs index 18deb88f..14948084 100644 --- a/crates/values/src/util/slice_ref.rs +++ b/crates/values/src/util/slice_ref.rs @@ -13,6 +13,7 @@ // use std::fmt::{Debug, Display, Formatter}; +use std::hash::{Hash, Hasher}; use std::ops::RangeBounds; use std::sync::Arc; use yoke::Yoke; @@ -50,6 +51,11 @@ impl Display for SliceRef { } } +impl Hash for SliceRef { + fn hash(&self, state: &mut H) { + self.as_slice().hash(state) + } +} pub trait ByteSource: Send + Sync { fn as_slice(&self) -> &[u8]; fn len(&self) -> usize;