Skip to content

Commit

Permalink
Introduces hashdb and memorydb packages. Integrates HashDB into TrieD…
Browse files Browse the repository at this point in the history
…B, update tests, add required iterators, bug fixes.
  • Loading branch information
timwu20 committed Nov 4, 2024
1 parent 55efa14 commit 929ac3c
Show file tree
Hide file tree
Showing 25 changed files with 1,677 additions and 495 deletions.
51 changes: 51 additions & 0 deletions internal/hash-db/hash_db.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package hashdb

import "golang.org/x/exp/constraints"

// A trie node prefix, it is the nibble path from the trie root
// to the trie node.
// For a node containing no partial key value it is the full key.
// For a value node or node containing a partial key, it is the full key minus its node partial
// nibbles (the node key can be split into prefix and node partial).
// Therefore it is always the leftmost portion of the node key, so its internal representation
// is a non expanded byte slice followed by a last padded byte representation.
// The padded byte is an optional padded value.
type Prefix struct {
Key []byte
Padded *byte
}

// An empty prefix constant.
// Can be use when the prefix is not use dinternally or for root nodes.
var EmptyPrefix = Prefix{}

// Hasher is an interface describing an object that can hash a slice of bytes. Used to abstract
// other types over the hashing algorithm. Defines a single hash method and an
// Out associated type with the necessary bounds.
type Hasher[Out constraints.Ordered] interface {
// Compute the hash of the provided slice of bytes returning the Out type of the Hasher.
Hash(x []byte) Out
}

// HashDB is an interface modelling datastore keyed by a hash defined by the Hasher.
type HashDB[Hash comparable] interface {
// Look up a given hash into the bytes that hash to it, returning None if the
// hash is not known.
Get(key Hash, prefix Prefix) []byte

// Check for the existence of a hash-key.
Contains(key Hash, prefix Prefix) bool

// Insert a datum item into the DB and return the datum's hash for a later lookup. Insertions
// are counted and the equivalent number of remove()s must be performed before the data
// is considered dead.
Insert(prefix Prefix, value []byte) Hash

// Like Insert(), except you provide the key and the data is all moved.
Emplace(key Hash, prefix Prefix, value []byte)

// Remove a datum previously inserted. Insertions can be "owed" such that the same number of
// inserts may happen without the data being eventually being inserted into the DB.
// It can be "owed" more than once.
Remove(key Hash, prefix Prefix)
}
239 changes: 239 additions & 0 deletions internal/memory-db/memory_db.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
package memorydb

import (
"maps"

hashdb "github.com/ChainSafe/gossamer/internal/hash-db"
"golang.org/x/exp/constraints"
)

type dataRC struct {
Data []byte
RC int32
}

type Hash interface {
constraints.Ordered
Bytes() []byte
}

type Value interface {
~[]byte
}

// Reference-counted memory-based [hashdb.HashDB] implementation.
type MemoryDB[H Hash, Hasher hashdb.Hasher[H], Key constraints.Ordered, KF KeyFunction[H, Key]] struct {
data map[Key]dataRC
hashedNullNode H
nullNodeData []byte
}

func NewMemoryDB[H Hash, Hasher hashdb.Hasher[H], Key constraints.Ordered, KF KeyFunction[H, Key]](
data []byte,
) MemoryDB[H, Hasher, Key, KF] {
return newMemoryDBFromNullNode[H, Hasher, Key, KF](data, data)
}

func newMemoryDBFromNullNode[H Hash, Hasher hashdb.Hasher[H], Key constraints.Ordered, KF KeyFunction[H, Key], T Value](
nullKey []byte,
nullNodeData T,
) MemoryDB[H, Hasher, Key, KF] {
return MemoryDB[H, Hasher, Key, KF]{
data: make(map[Key]dataRC),
hashedNullNode: (*new(Hasher)).Hash(nullKey),
nullNodeData: nullNodeData,
}
}

func (mdb *MemoryDB[H, Hasher, Key, KF]) Clone() MemoryDB[H, Hasher, Key, KF] {
return MemoryDB[H, Hasher, Key, KF]{
data: maps.Clone(mdb.data),
hashedNullNode: mdb.hashedNullNode,
nullNodeData: mdb.nullNodeData,
}
}

// Purge all zero-referenced data from the database.
func (mdb *MemoryDB[H, Hasher, Key, KF]) Purge() {
for k, val := range mdb.data {
if val.RC == 0 {
delete(mdb.data, k)
}
}
}

// Return the internal key-value Map, clearing the current state.
func (mdb *MemoryDB[H, Hasher, Key, KF]) Drain() map[Key]dataRC {
data := mdb.data
mdb.data = make(map[Key]dataRC)
return data
}

// Grab the raw information associated with a key. Returns None if the key
// doesn't exist.
//
// Even when Some is returned, the data is only guaranteed to be useful
// when the refs > 0.
func (mdb *MemoryDB[H, Hasher, Key, KF]) raw(key H, prefix hashdb.Prefix) *dataRC {
if key == mdb.hashedNullNode {
return &dataRC{mdb.nullNodeData, 1}
}
kfKey := (*new(KF)).Key(key, prefix)
data, ok := mdb.data[kfKey]
if ok {
return &data
}
return nil
}

// Consolidate all the entries of other into self.
func (mdb *MemoryDB[H, Hasher, Key, KF]) Consolidate(other *MemoryDB[H, Hasher, Key, KF]) {
for key, value := range other.Drain() {
entry, ok := mdb.data[key]
if ok {
if entry.RC < 0 {
entry.Data = value.Data
}

entry.RC += value.RC
mdb.data[key] = entry
} else {
mdb.data[key] = dataRC{
Data: value.Data,
RC: value.RC,
}
}
}
}

// Remove an element and delete it from storage if reference count reaches zero.
// If the value was purged, return the old value.
func (mdb *MemoryDB[H, Hasher, Key, KF]) removeAndPurge(key H, prefix hashdb.Prefix) []byte {
if key == mdb.hashedNullNode {
return nil
}
kfKey := (*new(KF)).Key(key, prefix)
data, ok := mdb.data[kfKey]
if ok {
if data.RC == 1 {
delete(mdb.data, kfKey)
return data.Data
}
data.RC -= 1
mdb.data[kfKey] = data
return nil
}
mdb.data[kfKey] = dataRC{RC: -1}
return nil
}

func (mdb *MemoryDB[H, Hasher, Key, KF]) Get(key H, prefix hashdb.Prefix) []byte {
if key == mdb.hashedNullNode {
return mdb.nullNodeData
}

kfKey := (*new(KF)).Key(key, prefix)
data, ok := mdb.data[kfKey]
if ok {
if data.RC > 0 {
return data.Data
}
}
return nil
}

func (mdb *MemoryDB[H, Hasher, Key, KF]) Contains(key H, prefix hashdb.Prefix) bool {
if key == mdb.hashedNullNode {
return true
}

kfKey := (*new(KF)).Key(key, prefix)
data, ok := mdb.data[kfKey]
if ok {
if data.RC > 0 {
return true
}
}
return false
}

func (mdb *MemoryDB[H, Hasher, Key, KF]) Emplace(key H, prefix hashdb.Prefix, value []byte) {
if string(mdb.nullNodeData) == string(value) {
return
}

kfKey := (*new(KF)).Key(key, prefix)
data, ok := mdb.data[kfKey]
if ok {
if data.RC <= 0 {
data.Data = value
}
data.RC += 1
mdb.data[kfKey] = data
} else {
mdb.data[kfKey] = dataRC{value, 1}
}
}

func (mdb *MemoryDB[H, Hasher, Key, KF]) Insert(prefix hashdb.Prefix, value []byte) H {
if string(mdb.nullNodeData) == string(value) {
return mdb.hashedNullNode
}

key := (*new(Hasher)).Hash(value)
mdb.Emplace(key, prefix, value)
return key
}

func (mdb *MemoryDB[H, Hasher, Key, KF]) Remove(key H, prefix hashdb.Prefix) {
if key == mdb.hashedNullNode {
return
}

kfKey := (*new(KF)).Key(key, prefix)
data, ok := mdb.data[kfKey]
if ok {
data.RC -= 1
mdb.data[kfKey] = data
} else {
mdb.data[kfKey] = dataRC{RC: -1}
}
}

func (mdb *MemoryDB[H, Hasher, Key, KF]) Keys() map[Key]int32 {
keyCounts := make(map[Key]int32)
for key, drc := range mdb.data {
if drc.RC != 0 {
keyCounts[key] = drc.RC
}
}
return keyCounts
}

type KeyFunction[Hash constraints.Ordered, Key any] interface {
Key(hash Hash, prefix hashdb.Prefix) Key
}

// Key function that only uses the hash
type HashKey[H Hash] struct{}

func (HashKey[Hash]) Key(hash Hash, prefix hashdb.Prefix) Hash {
return hash
}

// Key function that concatenates prefix and hash.
type PrefixedKey[H Hash] struct{}

func (PrefixedKey[H]) Key(key H, prefix hashdb.Prefix) string {
return string(NewPrefixedKey(key, prefix))
}

// Derive a database key from hash value of the node (key) and the node prefix.
func NewPrefixedKey[H Hash](key H, prefix hashdb.Prefix) []byte {
prefixedKey := prefix.Key
if prefix.Padded != nil {
prefixedKey = append(prefixedKey, *prefix.Padded)
}
prefixedKey = append(prefixedKey, key.Bytes()...)
return prefixedKey
}
81 changes: 81 additions & 0 deletions internal/memory-db/memory_db_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package memorydb

import (
"testing"

hashdb "github.com/ChainSafe/gossamer/internal/hash-db"
"github.com/ChainSafe/gossamer/internal/primitives/core/hash"
"github.com/ChainSafe/gossamer/internal/primitives/core/hashing"
"github.com/stretchr/testify/assert"
)

var (
_ KeyFunction[hash.H256, hash.H256] = HashKey[hash.H256]{}
_ KeyFunction[hash.H256, string] = PrefixedKey[hash.H256]{}
)

// Blake2-256 Hash implementation.
type Keccak256 struct{}

// Produce the hash of some byte-slice.
func (k256 Keccak256) Hash(s []byte) hash.H256 {
h := hashing.Keccak256(s)

Check failure on line 22 in internal/memory-db/memory_db_test.go

View workflow job for this annotation

GitHub Actions / linting

undefined: hashing.Keccak256 (typecheck)

Check failure on line 22 in internal/memory-db/memory_db_test.go

View workflow job for this annotation

GitHub Actions / unit-tests

undefined: hashing.Keccak256
return hash.H256(h[:])
}

func TestMemoryDB_RemoveAndPurge(t *testing.T) {
helloBytes := []byte("Hello world!")
helloKey := Keccak256{}.Hash(helloBytes)

m := NewMemoryDB[hash.H256, Keccak256, hash.H256, HashKey[hash.H256]]([]byte{0})
m.Remove(helloKey, hashdb.EmptyPrefix)
assert.Equal(t, int32(-1), m.raw(helloKey, hashdb.EmptyPrefix).RC)
m.Purge()
assert.Equal(t, int32(-1), m.raw(helloKey, hashdb.EmptyPrefix).RC)
m.Insert(hashdb.EmptyPrefix, helloBytes)
assert.Equal(t, int32(0), m.raw(helloKey, hashdb.EmptyPrefix).RC)
m.Purge()
assert.Nil(t, m.raw(helloKey, hashdb.EmptyPrefix))

m = NewMemoryDB[hash.H256, Keccak256, hash.H256, HashKey[hash.H256]]([]byte{0})
assert.Nil(t, m.removeAndPurge(helloKey, hashdb.EmptyPrefix))
assert.Equal(t, int32(-1), m.raw(helloKey, hashdb.EmptyPrefix).RC)
m.Insert(hashdb.EmptyPrefix, helloBytes)
m.Insert(hashdb.EmptyPrefix, helloBytes)
assert.Equal(t, int32(1), m.raw(helloKey, hashdb.EmptyPrefix).RC)
assert.Equal(t, helloBytes, m.removeAndPurge(helloKey, hashdb.EmptyPrefix))
assert.Nil(t, m.raw(helloKey, hashdb.EmptyPrefix))
assert.Nil(t, m.removeAndPurge(helloKey, hashdb.EmptyPrefix))
}

func TestMemoryDB_Consolidate(t *testing.T) {
main := NewMemoryDB[hash.H256, Keccak256, hash.H256, HashKey[hash.H256]]([]byte{0})
other := NewMemoryDB[hash.H256, Keccak256, hash.H256, HashKey[hash.H256]]([]byte{0})
removeKey := other.Insert(hashdb.EmptyPrefix, []byte("doggo"))
main.Remove(removeKey, hashdb.EmptyPrefix)

insertKey := other.Insert(hashdb.EmptyPrefix, []byte("arf"))
main.Emplace(insertKey, hashdb.EmptyPrefix, []byte("arf"))

negativeRemoveKey := other.Insert(hashdb.EmptyPrefix, []byte("negative"))
other.Remove(negativeRemoveKey, hashdb.EmptyPrefix)
other.Remove(negativeRemoveKey, hashdb.EmptyPrefix)
main.Remove(negativeRemoveKey, hashdb.EmptyPrefix)

main.Consolidate(&other)

assert.Equal(t, &dataRC{[]byte("doggo"), 0}, main.raw(removeKey, hashdb.EmptyPrefix))
assert.Equal(t, &dataRC{[]byte("arf"), 2}, main.raw(insertKey, hashdb.EmptyPrefix))
assert.Equal(t, &dataRC{[]byte("negative"), -2}, main.raw(negativeRemoveKey, hashdb.EmptyPrefix))
}

func TestMemoryDB_DefaultWorks(t *testing.T) {
db := NewMemoryDB[hash.H256, Keccak256, hash.H256, HashKey[hash.H256]]([]byte{0})
hashedNullNode := Keccak256{}.Hash([]byte{0})
assert.Equal(t, hashedNullNode, db.Insert(hashdb.EmptyPrefix, []byte{0}))

db2 := NewMemoryDB[hash.H256, Keccak256, hash.H256, HashKey[hash.H256]]([]byte{0})
root := db2.hashedNullNode
assert.True(t, db2.Contains(root, hashdb.EmptyPrefix))
assert.True(t, db.Contains(root, hashdb.EmptyPrefix))
}
4 changes: 2 additions & 2 deletions pkg/trie/triedb/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ It offers functionalities for writing and reading operations and uses lazy loadi
- **Reads**: Basic functions to get data from the trie.
- **Lazy Loading**: Load data on demand.
- **Caching**: Enhances search performance.
- **Compatibility**: Works with any database implementing the `db.RWDatabase` interface and any cache implementing the `Cache` interface.
- **Compatibility**: Works with any database implementing the `hashdb.HashDB` interface and any cache implementing the `TrieCache` interface.
- **Merkle proofs**: Create and verify merkle proofs.
- **Iterator**: Traverse the trie keys in order.

Expand All @@ -29,7 +29,7 @@ trie := triedb.NewEmptyTrieDB(db)
To insert a key and its associated value:

```go
err := trie.Put([]byte("key"), []byte("value"))
err := trie.Set([]byte("key"), []byte("value"))
```

### Get Data
Expand Down
Loading

0 comments on commit 929ac3c

Please sign in to comment.