Skip to content

Commit

Permalink
Merge pull request #16 from froth/string_interning
Browse files Browse the repository at this point in the history
Implement string interning
  • Loading branch information
froth authored Oct 22, 2024
2 parents 68c0975 + 7a78ae6 commit 27dfca8
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 68 deletions.
2 changes: 1 addition & 1 deletion src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ impl<'a, 'gc> Compiler<'a, 'gc> {
TokenType::True => self.chunk.write(Op::True, token.location),
TokenType::False => self.chunk.write(Op::False, token.location),
TokenType::String(s) => {
let obj = self.gc.manage(Obj::from_str(s));
let obj = self.gc.manage_str(s);
self.emit_constant(Value::Obj(obj), token.location)
}
_ => unreachable!(), // guarded by is_prefix
Expand Down
82 changes: 64 additions & 18 deletions src/datastructures/hash_table.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use crate::types::obj::Obj;
use crate::types::obj_ref::ObjRef;
use crate::types::string::hash_str;
use crate::types::Hashable;
use std::fmt::Write as _;
use std::ptr::NonNull;
use std::{fmt::Write as _, mem};

use tracing::debug;

use crate::types::string::LoxString;
use crate::types::value::Value;

use super::memory;
Expand Down Expand Up @@ -38,16 +38,15 @@ impl HashTable {

let entry = Self::find_entry(self.entries, self.capacity, key);
//SAFETY: we are sure the pointer points into valid HashTable memory
let is_new_key = unsafe { (*entry).key.is_none() };
if is_new_key && unsafe { !(*entry).is_tombstone() } {
self.count += 1;
}
//SAFETY: we are sure the pointer points into valid HashTable memory
unsafe {
let is_new_key = (*entry).key.is_none();
if is_new_key && !(*entry).is_tombstone() {
self.count += 1;
}
(*entry).key = Some(key);
(*entry).value = value;
is_new_key
}
is_new_key
}

pub fn get(&self, key: Value) -> Option<Value> {
Expand Down Expand Up @@ -100,6 +99,7 @@ impl HashTable {
unsafe {
let entry = entries.as_ptr().add(index as usize);
if let Some(entry_key) = (*entry).key {
// this does pointer equality for all Obj, this only works because all Strings are interned
if entry_key == key {
return entry;
}
Expand All @@ -113,6 +113,31 @@ impl HashTable {
}
}

// only for string interning
pub fn find_string(&self, string: &str) -> Option<ObjRef> {
if self.count == 0 {
return None;
}
let mut index = hash_str(string).0 % self.capacity;
loop {
// SAFETY: we know this ends in valid memory of HashTable
unsafe {
let entry = self.entries.as_ptr().add(index as usize);
if let Some(Value::Obj(obj_ref)) = (*entry).key {
let obj = &(*obj_ref);
if let Obj::String(s) = obj {
if s.string.eq(string) {
return Some(obj_ref);
}
}
} else if !(*entry).is_tombstone() {
return None;
}
}
index = (index.wrapping_add(1)) % self.capacity;
}
}

fn adjust_capacity(&mut self, new_capacity: u32) {
let new_pointer: NonNull<Entry> = memory::alloc_array(new_capacity as usize);
for i in 0..new_capacity {
Expand Down Expand Up @@ -239,7 +264,7 @@ mod tests {
let mut gc = Gc::new();
let mut table: HashTable = HashTable::new();
for i in 0..2049 {
let obj_ref = gc.manage_string(LoxString::string(format!("key{}", i)));
let obj_ref = gc.manage_string(format!("key{}", i));
let key = Value::Obj(obj_ref);
let inserted = table.insert(key, Value::Number(f64::from(i)));
assert!(inserted);
Expand All @@ -255,15 +280,15 @@ mod tests {
let mut gc = Gc::new();
let mut table: HashTable = HashTable::new();
for i in 0..5 {
let obj_ref = gc.manage_string(LoxString::string(format!("key{}", i)));
let obj_ref = gc.manage_string(format!("key{}", i));
let key = Value::Obj(obj_ref);
table.insert(key, Value::Number(f64::from(i)));
table.delete(key);
}
assert_eq!(table.count, 5);
assert_eq!(table.capacity, 8);
for i in 6..14 {
let obj_ref = gc.manage_string(LoxString::string(format!("key{}", i)));
let obj_ref = gc.manage_string(format!("key{}", i));
let key = Value::Obj(obj_ref);
table.insert(key, Value::Number(f64::from(i)));
}
Expand All @@ -275,15 +300,15 @@ mod tests {
fn handle_tombstones_correctly() {
let mut gc = Gc::new();
// all those keys have hash % 8 == 2
let key1 = Value::Obj(gc.manage_string(LoxString::string("3".to_string())));
let key1 = Value::Obj(gc.manage_string("3".to_string()));
let value1 = Value::Number(1.0);
let key2 = Value::Obj(gc.manage_string(LoxString::string("12".to_string())));
let key2 = Value::Obj(gc.manage_string("12".to_string()));
let value2 = Value::Number(2.0);
let key3 = Value::Obj(gc.manage_string(LoxString::string("23".to_string())));
let key3 = Value::Obj(gc.manage_string("23".to_string()));
let value3 = Value::Number(3.0);

// has hash % 8 == 3
let key4 = Value::Obj(gc.manage_string(LoxString::string("key5".to_string())));
let key4 = Value::Obj(gc.manage_string("key5".to_string()));
let value4 = Value::Number(4.0);
let mut table: HashTable = HashTable::new();

Expand All @@ -304,12 +329,33 @@ mod tests {
assert_eq!(table.count, 3);
}

#[test]
fn find_str() {
let mut gc = Gc::new();
// all those keys have hash % 8 == 2
let key1 = Value::Obj(gc.manage_string("3".to_string()));
let value1 = Value::Number(1.0);
let key2_obj = gc.manage_string("12".to_string());
let key2 = Value::Obj(key2_obj);
let value2 = Value::Number(2.0);
let key3 = Value::Obj(gc.manage_string("23".to_string()));
let value3 = Value::Number(3.0);

let mut table: HashTable = HashTable::new();

table.insert(key1, value1);
table.insert(key2, value2);
table.insert(key3, value3);

let res = table.find_string("12").unwrap();
assert_eq!(res, key2_obj);
}
#[test]
fn add_all() {
let mut gc = Gc::new();
let mut from: HashTable = HashTable::new();
for i in 0..2049 {
let obj_ref = gc.manage_string(LoxString::string(format!("key{}", i)));
let obj_ref = gc.manage_string(format!("key{}", i));
let key = Value::Obj(obj_ref);
from.insert(key, Value::Number(f64::from(i)));
}
Expand Down
61 changes: 37 additions & 24 deletions src/gc.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use std::{hint::unreachable_unchecked, ptr::NonNull};

use crate::types::{obj::Obj, obj_ref::ObjRef, string::LoxString};
use crate::{
datastructures::hash_table::HashTable,
types::{obj::Obj, obj_ref::ObjRef, string::LoxString, value::Value},
};

pub struct Gc {
head: Option<Box<Node>>,
strings: HashTable,
}

struct Node {
Expand All @@ -13,30 +15,35 @@ struct Node {

impl Gc {
pub fn new() -> Self {
Self { head: None }
Self {
head: None,
strings: HashTable::new(),
}
}

pub fn manage(&mut self, obj: Obj) -> ObjRef {
let old_head = self.head.take();
let mut new_node = Box::new(Node {
next: old_head,
obj,
});
let ptr: *mut Obj = &mut new_node.obj;
self.head = Some(new_node);
// SAFETY: guaranteed to be not null
ObjRef::new(unsafe { NonNull::new_unchecked(ptr) })
pub fn manage_string(&mut self, string: String) -> ObjRef {
self.strings
.find_string(&string)
.unwrap_or_else(|| self.manage_lox_string(LoxString::string(string)))
}

pub fn manage_string(&mut self, string: LoxString) -> ObjRef {
pub fn manage_str(&mut self, string: &str) -> ObjRef {
self.strings
.find_string(string)
.unwrap_or_else(|| self.manage_lox_string(LoxString::from_str(string)))
}

fn manage_lox_string(&mut self, lox_string: LoxString) -> ObjRef {
let obj = Obj::String(lox_string);
let old_head = self.head.take();
let obj = Obj::String(string);
let mut new_node = Box::new(Node {
next: old_head,
obj,
});
let obj_ref = ObjRef::from_obj(&mut new_node.obj);
self.head = Some(new_node);
// intern the string
self.strings.insert(Value::Obj(obj_ref), Value::Nil);
obj_ref
}
}
Expand All @@ -53,18 +60,24 @@ impl Drop for Gc {
#[cfg(test)]
mod tests {

use std::ops::Deref;

use super::*;

#[test]
fn push() {
let mut gc = Gc::new();
let one = gc.manage(Obj::from_str("asfsaf"));
assert_eq!(one.deref(), &Obj::from_str("asfsaf"));
let two = gc.manage(Obj::from_str("sfdsdfsaf"));
assert_eq!(two.deref(), &Obj::from_str("sfdsdfsaf"));
let three = gc.manage(Obj::from_str("sfdsasdasddfsaf"));
assert_eq!(three.deref(), &Obj::from_str("sfdsasdasddfsaf"));
let one = gc.manage_str("asfsaf");
assert_eq!(*one, Obj::String(LoxString::from_str("asfsaf")));
let two = gc.manage_str("sfdsdfsaf");
assert_eq!(*two, Obj::String(LoxString::from_str("sfdsdfsaf")));
let three = gc.manage_str("sfdsasdasddfsaf");
assert_eq!(*three, Obj::String(LoxString::from_str("sfdsasdasddfsaf")));
}

#[test]
fn string_interning() {
let mut gc = Gc::new();
let one = gc.manage_str("asfsaf");
let two = gc.manage_str("asfsaf");
assert_eq!(one, two);
}
}
10 changes: 0 additions & 10 deletions src/types/obj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,6 @@ impl Hashable for Obj {
}
}

impl Obj {
pub fn from_str(s: &str) -> Self {
Self::String(LoxString::string(s.to_owned()))
}

pub fn string(string: String) -> Self {
Self::String(LoxString::string(string))
}
}

impl Display for Obj {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Expand Down
4 changes: 0 additions & 4 deletions src/types/obj_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ use super::obj::Obj;
pub struct ObjRef(NonNull<Obj>);

impl ObjRef {
pub fn new(ptr: NonNull<Obj>) -> Self {
Self(ptr)
}

pub fn from_obj(obj: &mut Obj) -> Self {
unsafe { Self(NonNull::new_unchecked(&mut (*obj))) }
}
Expand Down
22 changes: 12 additions & 10 deletions src/types/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,21 @@ impl LoxString {
}

pub fn string(string: String) -> Self {
const PRIME: u32 = 16777619;
let mut hash: u32 = 2166136261;
for b in string.bytes() {
hash ^= b as u32;
hash = hash.wrapping_mul(PRIME);
}
Self {
string,
hash: Hash(hash),
}
let hash = hash_str(&string);
Self { string, hash }
}
}

pub fn hash_str(str: &str) -> Hash {
const PRIME: u32 = 16777619;
let mut hash: u32 = 2166136261;
for b in str.bytes() {
hash ^= b as u32;
hash = hash.wrapping_mul(PRIME);
}
Hash(hash)
}

impl Hashable for LoxString {
fn hash(&self) -> Hash {
self.hash
Expand Down
2 changes: 1 addition & 1 deletion src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ impl VM {
if let (Obj::String(a), Obj::String(b)) = (a.deref(), b.deref()) {
self.pop();
self.pop();
let concated = self.gc.manage(Obj::string(a.string.to_owned() + &b.string));
let concated = self.gc.manage_string(a.string.to_owned() + &b.string);
self.push(Value::Obj(concated));
} else {
miette::bail!(
Expand Down

0 comments on commit 27dfca8

Please sign in to comment.