Skip to content

Commit

Permalink
Reduce copies in relation indexes; replace use of Vec<u8> for keys wi…
Browse files Browse the repository at this point in the history
…th the actual domain/codomain slices, eliminating a copy. 3-4% throughput improvement.
  • Loading branch information
rdaum committed Jan 5, 2024
1 parent a7cfde5 commit 3baf4e0
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 59 deletions.
33 changes: 15 additions & 18 deletions crates/db/src/tuplebox/base_relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ pub struct BaseRelation {

/// The domain-indexed tuples in this relation, which are in this case expressed purely as bytes.
/// It is up to the caller to interpret them.
index_domain: im::HashMap<Vec<u8>, TupleRef>,
index_domain: im::HashMap<SliceRef, TupleRef>,

/// Optional reverse index from codomain -> tuples, which is used to support (more) efficient
/// reverse lookups.
index_codomain: Option<im::HashMap<Vec<u8>, HashSet<TupleRef>>>,
index_codomain: Option<im::HashMap<SliceRef, HashSet<TupleRef>>>,
}

impl BaseRelation {
Expand Down Expand Up @@ -87,7 +87,7 @@ impl BaseRelation {
self.index_codomain
.as_mut()
.unwrap()
.entry(tuple.codomain().as_slice().to_vec())
.entry(tuple.codomain())
.or_default()
.insert(tuple.clone());
}
Expand All @@ -102,20 +102,19 @@ impl BaseRelation {
tuple.update_timestamp(self.id, self.slotbox.clone(), 0);

// Update the domain index to point to the tuple...
self.index_domain
.insert(tuple.domain().as_slice().to_vec(), tuple.clone());
self.index_domain.insert(tuple.domain(), tuple.clone());

// ... and update the secondary index if there is one.
if let Some(index) = &mut self.index_codomain {
index
.entry(tuple.codomain().as_slice().to_vec())
.entry(tuple.codomain())
.or_insert_with(HashSet::new)
.insert(tuple);
}
}

pub fn seek_by_domain(&self, domain: SliceRef) -> Option<TupleRef> {
self.index_domain.get(domain.as_slice()).cloned()
self.index_domain.get(&domain).cloned()
}

pub fn predicate_scan<F: Fn(&(SliceRef, SliceRef)) -> bool>(&self, f: &F) -> HashSet<TupleRef> {
Expand All @@ -131,21 +130,21 @@ impl BaseRelation {
// We could do full-scan, but in this case we're going to assume that the caller knows
// what they're doing.
let codomain_index = self.index_codomain.as_ref().expect("No codomain index");
if let Some(tuple_refs) = codomain_index.get(codomain.as_slice()) {
if let Some(tuple_refs) = codomain_index.get(&codomain) {
tuple_refs.iter().cloned().collect()
} else {
HashSet::new()
}
}
pub fn remove_by_domain(&mut self, domain: SliceRef) {
// Seek the tuple id...
if let Some(tuple_ref) = self.index_domain.remove(domain.as_slice()) {
if let Some(tuple_ref) = self.index_domain.remove(&domain) {
self.tuples.remove(&tuple_ref);

// And remove from codomain index, if it exists in there
if let Some(index) = &mut self.index_codomain {
index
.entry(domain.as_slice().to_vec())
.entry(domain)
.or_insert_with(HashSet::new)
.remove(&tuple_ref);
}
Expand All @@ -155,16 +154,15 @@ impl BaseRelation {
/// Update or insert a tuple into the relation.
pub fn upsert_tuple(&mut self, tuple: TupleRef) {
// First check the domain->tuple id index to see if we're inserting or updating.
let existing_tuple_ref = self.index_domain.get(tuple.domain().as_slice()).cloned();
let existing_tuple_ref = self.index_domain.get(&tuple.domain()).cloned();
match existing_tuple_ref {
None => {
// Insert into the tuple list and the index.
self.index_domain
.insert(tuple.domain().as_slice().to_vec(), tuple.clone());
self.index_domain.insert(tuple.domain(), tuple.clone());
self.tuples.insert(tuple.clone());
if let Some(codomain_index) = &mut self.index_codomain {
codomain_index
.entry(tuple.codomain().as_slice().to_vec())
.entry(tuple.codomain())
.or_insert_with(HashSet::new)
.insert(tuple);
}
Expand All @@ -173,16 +171,15 @@ impl BaseRelation {
// We need the old value so we can update the codomain index.
if let Some(codomain_index) = &mut self.index_codomain {
codomain_index
.entry(existing_tuple.codomain().as_slice().to_vec())
.entry(existing_tuple.codomain())
.or_insert_with(HashSet::new)
.remove(&existing_tuple);
codomain_index
.entry(tuple.codomain().as_slice().to_vec())
.entry(tuple.codomain())
.or_insert_with(HashSet::new)
.insert(tuple.clone());
}
self.index_domain
.insert(tuple.domain().as_slice().to_vec(), tuple.clone());
self.index_domain.insert(tuple.domain(), tuple.clone());
self.tuples.remove(&existing_tuple);
self.tuples.insert(tuple);
}
Expand Down
25 changes: 12 additions & 13 deletions crates/db/src/tuplebox/tx/transaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,8 @@ impl CommitSet {
struct TransientRelation {
_id: RelationId,
tuples: Vec<(SliceRef, SliceRef)>,
domain_tuples: HashMap<Vec<u8>, usize>,
codomain_domain: Option<HashMap<Vec<u8>, HashSet<usize>>>,
domain_tuples: HashMap<SliceRef, usize>,
codomain_domain: Option<HashMap<SliceRef, HashSet<usize>>>,
}

impl TransientRelation {
Expand All @@ -343,7 +343,7 @@ impl TransientRelation {
) -> Result<(SliceRef, SliceRef), TupleError> {
let tuple_idx = self
.domain_tuples
.get(domain.as_slice())
.get(&domain)
.copied()
.ok_or(TupleError::NotFound);
tuple_idx.map(|id| self.tuples[id].clone())
Expand All @@ -360,7 +360,7 @@ impl TransientRelation {
// what they're doing.
let codomain_domain = self.codomain_domain.as_ref().expect("No codomain index");
let tuple_indexes = codomain_domain
.get(codomain.as_slice())
.get(&codomain)
.cloned()
.ok_or(TupleError::NotFound)?;
Ok(tuple_indexes
Expand All @@ -382,13 +382,13 @@ impl TransientRelation {
domain: SliceRef,
codomain: SliceRef,
) -> Result<(), TupleError> {
if self.domain_tuples.contains_key(domain.as_slice()) {
if self.domain_tuples.contains_key(&domain) {
return Err(TupleError::Duplicate);
}
let tuple_idx = self.tuples.len();
self.tuples.push((domain.clone(), codomain.clone()));
self.domain_tuples
.insert(domain.as_slice().to_vec(), tuple_idx)
.insert(domain, tuple_idx)
.map(|_| ())
.ok_or(TupleError::Duplicate)
}
Expand All @@ -401,7 +401,7 @@ impl TransientRelation {
) -> Result<(), TupleError> {
let tuple_idx = self
.domain_tuples
.get(domain.as_slice())
.get(&domain)
.copied()
.ok_or(TupleError::NotFound)?;
if self.codomain_domain.is_some() {
Expand All @@ -417,16 +417,15 @@ impl TransientRelation {
domain: SliceRef,
codomain: SliceRef,
) -> Result<(), TupleError> {
let tuple_idx = match self.domain_tuples.get(domain.as_slice()) {
let tuple_idx = match self.domain_tuples.get(&domain) {
Some(tuple_idx) => {
self.tuples[*tuple_idx] = (domain, codomain.clone());
*tuple_idx
}
None => {
let tuple_idx = self.tuples.len();
self.tuples.push((domain.clone(), codomain.clone()));
self.domain_tuples
.insert(domain.as_slice().to_vec(), tuple_idx);
self.domain_tuples.insert(domain, tuple_idx);
tuple_idx
}
};
Expand All @@ -439,7 +438,7 @@ impl TransientRelation {
pub async fn remove_by_domain(&mut self, domain: SliceRef) -> Result<(), TupleError> {
let tuple_idx = self
.domain_tuples
.remove(domain.as_slice())
.remove(&domain)
.ok_or(TupleError::NotFound)?;

if self.codomain_domain.is_some() {
Expand All @@ -462,13 +461,13 @@ impl TransientRelation {
// Clear out the old entry, if there was one.
if let Some(old_codomain) = old_codomain {
index
.entry(old_codomain.as_slice().to_vec())
.entry(old_codomain)
.or_insert_with(HashSet::new)
.remove(&tuple_idx);
}
if let Some(new_codomain) = new_codomain {
index
.entry(new_codomain.as_slice().to_vec())
.entry(new_codomain)
.or_insert_with(HashSet::new)
.insert(tuple_idx);
}
Expand Down
44 changes: 16 additions & 28 deletions crates/db/src/tuplebox/tx/working_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl WorkingSet {
let relation = &mut self.relations[relation_id.0];

// Check local first.
if let Some(tuple_idx) = relation.domain_index.get(domain.as_slice()) {
if let Some(tuple_idx) = relation.domain_index.get(&domain) {
let local_version = relation.tuples.get(*tuple_idx).unwrap();
return match &local_version {
TxTuple::Insert(t) | TxTuple::Update(t) | TxTuple::Value(t) => {
Expand All @@ -91,12 +91,10 @@ impl WorkingSet {
.await?;
let tuple_idx = relation.tuples.len();
relation.tuples.push(TxTuple::Value(canon_t.clone()));
relation
.domain_index
.insert(domain.as_slice().to_vec(), tuple_idx);
relation.domain_index.insert(domain, tuple_idx);
if let Some(ref mut codomain_index) = relation.codomain_index {
codomain_index
.entry(canon_t.codomain().as_slice().to_vec())
.entry(canon_t.codomain())
.or_insert_with(HashSet::new)
.insert(tuple_idx);
}
Expand Down Expand Up @@ -139,7 +137,7 @@ impl WorkingSet {
let relation = &mut self.relations[relation_id.0];
let codomain_index = relation.codomain_index.as_ref().expect("No codomain index");
let tuple_indexes = codomain_index
.get(codomain.as_slice())
.get(&codomain)
.cloned()
.unwrap_or_else(HashSet::new)
.into_iter();
Expand All @@ -165,7 +163,7 @@ impl WorkingSet {
let relation = &mut self.relations[relation_id.0];

// If we already have a local version, that's a dupe, so return an error for that.
if relation.domain_index.get(domain.as_slice()).is_some() {
if relation.domain_index.get(&domain).is_some() {
return Err(TupleError::Duplicate);
}

Expand All @@ -187,9 +185,7 @@ impl WorkingSet {
codomain.as_slice(),
);
relation.tuples.push(TxTuple::Insert(new_t.unwrap()));
relation
.domain_index
.insert(domain.as_slice().to_vec(), tuple_idx);
relation.domain_index.insert(domain, tuple_idx);
relation.update_secondary(tuple_idx, None, Some(codomain.clone()));

Ok(())
Expand Down Expand Up @@ -252,7 +248,7 @@ impl WorkingSet {

// If we have an existing copy, we will update it, but keep its existing derivation
// timestamp and operation type.
if let Some(tuple_idx) = relation.domain_index.get_mut(domain.as_slice()).cloned() {
if let Some(tuple_idx) = relation.domain_index.get_mut(&domain).cloned() {
let existing = relation.tuples.get_mut(tuple_idx).expect("Tuple not found");
let (replacement, old_value) = match &existing {
TxTuple::Tombstone { .. } => return Err(TupleError::NotFound),
Expand Down Expand Up @@ -305,9 +301,7 @@ impl WorkingSet {
codomain.as_slice(),
);
relation.tuples.push(TxTuple::Update(new_t.unwrap()));
relation
.domain_index
.insert(domain.as_slice().to_vec(), tuple_idx);
relation.domain_index.insert(domain, tuple_idx);
relation.update_secondary(tuple_idx, Some(old_codomain), Some(codomain.clone()));
Ok(())
}
Expand All @@ -327,7 +321,7 @@ impl WorkingSet {
// timestamp.
// If it's an insert, we have to keep it an insert, same for update, but if it's a delete,
// we have to turn it into an update.
if let Some(tuple_idx) = relation.domain_index.get_mut(domain.as_slice()).cloned() {
if let Some(tuple_idx) = relation.domain_index.get_mut(&domain).cloned() {
let existing = relation.tuples.get_mut(tuple_idx).expect("Tuple not found");
let (replacement, old) = match &existing {
TxTuple::Insert(t) => {
Expand Down Expand Up @@ -405,9 +399,7 @@ impl WorkingSet {
.await;
let tuple_idx = relation.tuples.len();
relation.tuples.push(operation);
relation
.domain_index
.insert(domain.as_slice().to_vec(), tuple_idx);
relation.domain_index.insert(domain, tuple_idx);

// Remove the old codomain->domain index entry if it exists, and then add the new one.
relation.update_secondary(tuple_idx, old.map(|o| o.0), Some(codomain.clone()));
Expand All @@ -425,7 +417,7 @@ impl WorkingSet {
let relation = &mut self.relations[relation_id.0];

// Delete is basically an update but where we stick a Tombstone.
if let Some(tuple_index) = relation.domain_index.get_mut(domain.as_slice()).cloned() {
if let Some(tuple_index) = relation.domain_index.get_mut(&domain).cloned() {
let tuple_v = relation
.tuples
.get_mut(tuple_index)
Expand Down Expand Up @@ -464,9 +456,7 @@ impl WorkingSet {
domain: domain.clone(),
tuple_id: tuple.id(),
});
relation
.domain_index
.insert(domain.as_slice().to_vec(), local_tuple_idx);
relation.domain_index.insert(domain, local_tuple_idx);
relation.update_secondary(local_tuple_idx, Some(old_codomain), None);
Ok(())
}
Expand All @@ -476,8 +466,8 @@ impl WorkingSet {
pub(crate) struct TxBaseRelation {
pub id: RelationId,
tuples: Vec<TxTuple>,
domain_index: HashMap<Vec<u8>, usize>,
codomain_index: Option<HashMap<Vec<u8>, HashSet<usize>>>,
domain_index: HashMap<SliceRef, usize>,
codomain_index: Option<HashMap<SliceRef, HashSet<usize>>>,
}

impl TxBaseRelation {
Expand Down Expand Up @@ -506,16 +496,14 @@ impl TxBaseRelation {

// Clear out the old entry, if there was one.
if let Some(old_codomain) = old_codomain {
let old_codomain_bytes = old_codomain.as_slice().to_vec();
codomain_index
.entry(old_codomain_bytes)
.entry(old_codomain)
.or_insert_with(HashSet::new)
.remove(&tuple_id);
}
if let Some(new_codomain) = new_codomain {
let codomain_bytes = new_codomain.as_slice().to_vec();
codomain_index
.entry(codomain_bytes)
.entry(new_codomain)
.or_insert_with(HashSet::new)
.insert(tuple_id);
}
Expand Down
6 changes: 6 additions & 0 deletions crates/values/src/util/slice_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
//

use std::fmt::{Debug, Display, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::RangeBounds;
use std::sync::Arc;
use yoke::Yoke;
Expand Down Expand Up @@ -50,6 +51,11 @@ impl Display for SliceRef {
}
}

impl Hash for SliceRef {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_slice().hash(state)
}
}
pub trait ByteSource: Send + Sync {
fn as_slice(&self) -> &[u8];
fn len(&self) -> usize;
Expand Down

0 comments on commit 3baf4e0

Please sign in to comment.