From 917f1e1252050c6a833a64422e51cd08fc222f0d Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 22 Apr 2024 16:12:03 +0900 Subject: [PATCH 1/2] utreexobackends, blockchain: move nodesMapSlice to utreexobackends Moving nodesMapSlice to a separate package ensures that private fields don't get accessed and the code is safer for concurrency due to this. This addresses a bug that was noticed that resulted in a runtime panic due to concurrent access of a map in nodesMapSlice. --- .../internal/utreexobackends/nodesmap.go | 226 ++++++++++++++ .../internal/utreexobackends/nodesmap_test.go | 66 ++++ blockchain/utreexoio.go | 283 +++--------------- blockchain/utreexoio_test.go | 76 +---- 4 files changed, 343 insertions(+), 308 deletions(-) create mode 100644 blockchain/internal/utreexobackends/nodesmap.go create mode 100644 blockchain/internal/utreexobackends/nodesmap_test.go diff --git a/blockchain/internal/utreexobackends/nodesmap.go b/blockchain/internal/utreexobackends/nodesmap.go new file mode 100644 index 00000000..edf6693d --- /dev/null +++ b/blockchain/internal/utreexobackends/nodesmap.go @@ -0,0 +1,226 @@ +package utreexobackends + +import ( + "sync" + + "github.com/utreexo/utreexo" + "github.com/utreexo/utreexod/blockchain/internal/sizehelper" +) + +const ( + // Calculated with unsafe.Sizeof(cachedLeaf{}). + cachedLeafSize = 34 + + // Bucket size for the node map. + nodesMapBucketSize = 16 + sizehelper.Uint64Size*sizehelper.Uint64Size + sizehelper.Uint64Size*cachedLeafSize +) + +// CachedFlag is the status of each of the cached elements in the NodesBackEnd. +type CachedFlag uint8 + +const ( + // Fresh means it's never been in the database + Fresh CachedFlag = 1 << iota + + // Modified means it's been in the database and has been modified in the cache. + Modified + + // Removed means that the key it belongs to has been removed but it's still + // in the cache. + Removed +) + +// CachedLeaf has the leaf and a flag for the status in the cache. +type CachedLeaf struct { + Leaf utreexo.Leaf + Flags CachedFlag +} + +// IsFresh returns if the cached leaf has never been in the database. +func (c *CachedLeaf) IsFresh() bool { + return c.Flags&Fresh == Fresh +} + +// IsModified returns if the cached leaf has been in the database and was modified in the cache. +func (c *CachedLeaf) IsModified() bool { + return c.Flags&Modified == Modified +} + +// IsRemoved returns if the key for this cached leaf has been removed. +func (c *CachedLeaf) IsRemoved() bool { + return c.Flags&Removed == Removed +} + +// NodesMapSlice is a slice of maps for utxo entries. The slice of maps are needed to +// guarantee that the map will only take up N amount of bytes. As of v1.20, the +// go runtime will allocate 2^N + few extra buckets, meaning that for large N, we'll +// allocate a lot of extra memory if the amount of entries goes over the previously +// allocated buckets. A slice of maps allows us to have a better control of how much +// total memory gets allocated by all the maps. +type NodesMapSlice struct { + // mtx protects against concurrent access for the map slice. + mtx *sync.Mutex + + // maps are the underlying maps in the slice of maps. + maps []map[uint64]CachedLeaf + + // maxEntries is the maximum amount of elemnts that the map is allocated for. + maxEntries []int + + // maxTotalMemoryUsage is the maximum memory usage in bytes that the state + // should contain in normal circumstances. + maxTotalMemoryUsage uint64 +} + +// Length returns the length of all the maps in the map slice added together. +// +// This function is safe for concurrent access. +func (ms *NodesMapSlice) Length() int { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + var l int + for _, m := range ms.maps { + l += len(m) + } + + return l +} + +// get looks for the outpoint in all the maps in the map slice and returns +// the entry. nil and false is returned if the outpoint is not found. +// +// This function is safe for concurrent access. +func (ms *NodesMapSlice) Get(k uint64) (CachedLeaf, bool) { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + var v CachedLeaf + var found bool + + for _, m := range ms.maps { + v, found = m[k] + if found { + return v, found + } + } + + return v, found +} + +// put puts the keys and the values into one of the maps in the map slice. If the +// existing maps are all full and it fails to put the entry in the cache, it will +// return false. +// +// This function is safe for concurrent access. +func (ms *NodesMapSlice) Put(k uint64, v CachedLeaf) bool { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + for i := range ms.maxEntries { + m := ms.maps[i] + _, found := m[k] + if found { + m[k] = v + return true + } + } + + for i, maxNum := range ms.maxEntries { + m := ms.maps[i] + if len(m) >= maxNum { + // Don't try to insert if the map already at max since + // that'll force the map to allocate double the memory it's + // currently taking up. + continue + } + + m[k] = v + return true // Return as we were successful in adding the entry. + } + + // We only reach this code if we've failed to insert into the map above as + // all the current maps were full. + return false +} + +// delete attempts to delete the given outpoint in all of the maps. No-op if the +// key doesn't exist. +// +// This function is safe for concurrent access. +func (ms *NodesMapSlice) delete(k uint64) { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + for i := 0; i < len(ms.maps); i++ { + delete(ms.maps[i], k) + } +} + +// DeleteMaps deletes all maps and allocate new ones with the maxEntries defined in +// ms.maxEntries. +// +// This function is safe for concurrent access. +func (ms *NodesMapSlice) DeleteMaps() { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + for i := range ms.maxEntries { + ms.maps[i] = make(map[uint64]CachedLeaf, ms.maxEntries[i]) + } +} + +// ForEach loops through all the elements in the nodes map slice and calls fn with the key-value pairs. +// +// This function is safe for concurrent access. +func (ms *NodesMapSlice) ForEach(fn func(uint64, CachedLeaf)) { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + for _, m := range ms.maps { + for k, v := range m { + fn(k, v) + } + } +} + +// createMaps creates a slice of maps and returns the total count that the maps +// can handle. maxEntries are also set along with the newly created maps. +func (ms *NodesMapSlice) createMaps(maxMemoryUsage int64) int64 { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + if maxMemoryUsage <= 0 { + return 0 + } + + // Get the entry count for the maps we'll allocate. + var totalElemCount int + ms.maxEntries, totalElemCount = sizehelper.CalcNumEntries(nodesMapBucketSize, maxMemoryUsage) + + // maxMemoryUsage that's smaller than the minimum map size will return a totalElemCount + // that's equal to 0. + if totalElemCount <= 0 { + return 0 + } + + // Create the maps. + ms.maps = make([]map[uint64]CachedLeaf, len(ms.maxEntries)) + for i := range ms.maxEntries { + ms.maps[i] = make(map[uint64]CachedLeaf, ms.maxEntries[i]) + } + + return int64(totalElemCount) +} + +// NewNodesMapSlice returns a new NodesMapSlice and the total amount of elements +// that the map slice can accomodate. +func NewNodesMapSlice(maxTotalMemoryUsage int64) (NodesMapSlice, int64) { + ms := NodesMapSlice{ + mtx: new(sync.Mutex), + maxTotalMemoryUsage: uint64(maxTotalMemoryUsage), + } + + totalCacheElem := ms.createMaps(maxTotalMemoryUsage) + return ms, totalCacheElem +} diff --git a/blockchain/internal/utreexobackends/nodesmap_test.go b/blockchain/internal/utreexobackends/nodesmap_test.go new file mode 100644 index 00000000..de962924 --- /dev/null +++ b/blockchain/internal/utreexobackends/nodesmap_test.go @@ -0,0 +1,66 @@ +package utreexobackends + +import "testing" + +func TestNodesMapSliceMaxCacheElems(t *testing.T) { + _, maxCacheElems := NewNodesMapSlice(0) + if maxCacheElems != 0 { + t.Fatalf("expected %v got %v", 0, maxCacheElems) + } + + _, maxCacheElems = NewNodesMapSlice(-1) + if maxCacheElems != 0 { + t.Fatalf("expected %v got %v", 0, maxCacheElems) + } + + _, maxCacheElems = NewNodesMapSlice(8000) + if maxCacheElems <= 0 { + t.Fatalf("expected something bigger than 0 but got %v", maxCacheElems) + } +} + +func TestNodesMapSliceDuplicates(t *testing.T) { + m, maxElems := NewNodesMapSlice(8000) + for i := 0; i < 10; i++ { + for j := int64(0); j < maxElems; j++ { + if !m.Put(uint64(j), CachedLeaf{}) { + t.Fatalf("unexpected error on m.put") + } + } + } + + if m.Length() != int(maxElems) { + t.Fatalf("expected length of %v but got %v", + maxElems, m.Length()) + } + + // Try inserting x which should be unique. Should fail as the map is full. + x := uint64(0) + x -= 1 + if m.Put(x, CachedLeaf{}) { + t.Fatalf("expected error but successfully called put") + } + + // Remove the first element in the first map and then try inserting + // a duplicate element. + m.delete(0) + x = uint64(maxElems) - 1 + if !m.Put(x, CachedLeaf{}) { + t.Fatalf("unexpected failure on put") + } + + // Make sure the length of the map is 1 less than the max elems. + if m.Length() != int(maxElems)-1 { + t.Fatalf("expected length of %v but got %v", + maxElems-1, m.Length()) + } + + // Put 0 back in and then compare the map. + if !m.Put(0, CachedLeaf{}) { + t.Fatalf("didn't expect error but unsuccessfully called put") + } + if m.Length() != int(maxElems) { + t.Fatalf("expected length of %v but got %v", + maxElems, m.Length()) + } +} diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index 38b124d1..e0062125 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -11,6 +11,7 @@ import ( "github.com/syndtr/goleveldb/leveldb" "github.com/utreexo/utreexo" "github.com/utreexo/utreexod/blockchain/internal/sizehelper" + "github.com/utreexo/utreexod/blockchain/internal/utreexobackends" "github.com/utreexo/utreexod/chaincfg/chainhash" ) @@ -40,214 +41,18 @@ func deserializeLeaf(serialized [leafLength]byte) utreexo.Leaf { return leaf } -// cachedFlag is the status of each of the cached elements in the NodesBackEnd. -type cachedFlag uint8 - const ( - // fresh means it's never been in the database - fresh cachedFlag = 1 << iota - - // modified means it's been in the database and has been modified in the cache. - modified - - // removed means that the key it belongs to has been removed but it's still - // in the cache. - removed -) - -// cachedLeaf has the leaf and a flag for the status in the cache. -type cachedLeaf struct { - leaf utreexo.Leaf - flags cachedFlag -} - -// isFresh returns if the cached leaf has never been in the database. -func (c *cachedLeaf) isFresh() bool { - return c.flags&fresh == fresh -} - -// isModified returns if the cached leaf has been in the database and was modified in the cache. -func (c *cachedLeaf) isModified() bool { - return c.flags&modified == modified -} - -// isRemoved returns if the key for this cached leaf has been removed. -func (c *cachedLeaf) isRemoved() bool { - return c.flags&removed == removed -} - -const ( - // Calculated with unsafe.Sizeof(cachedLeaf{}). - cachedLeafSize = 34 - - // Bucket size for the node map. - nodesMapBucketSize = 16 + sizehelper.Uint64Size*sizehelper.Uint64Size + sizehelper.Uint64Size*cachedLeafSize - // Bucket size for the cached leaves map. cachedLeavesMapBucketSize = 16 + sizehelper.Uint64Size*chainhash.HashSize + sizehelper.Uint64Size*sizehelper.Uint64Size ) -// nodesMapSlice is a slice of maps for utxo entries. The slice of maps are needed to -// guarantee that the map will only take up N amount of bytes. As of v1.20, the -// go runtime will allocate 2^N + few extra buckets, meaning that for large N, we'll -// allocate a lot of extra memory if the amount of entries goes over the previously -// allocated buckets. A slice of maps allows us to have a better control of how much -// total memory gets allocated by all the maps. -type nodesMapSlice struct { - // mtx protects against concurrent access for the map slice. - mtx *sync.Mutex - - // maps are the underlying maps in the slice of maps. - maps []map[uint64]cachedLeaf - - // maxEntries is the maximum amount of elemnts that the map is allocated for. - maxEntries []int - - // maxTotalMemoryUsage is the maximum memory usage in bytes that the state - // should contain in normal circumstances. - maxTotalMemoryUsage uint64 -} - -// length returns the length of all the maps in the map slice added together. -// -// This function is safe for concurrent access. -func (ms *nodesMapSlice) length() int { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - var l int - for _, m := range ms.maps { - l += len(m) - } - - return l -} - -// get looks for the outpoint in all the maps in the map slice and returns -// the entry. nil and false is returned if the outpoint is not found. -// -// This function is safe for concurrent access. -func (ms *nodesMapSlice) get(k uint64) (cachedLeaf, bool) { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - var v cachedLeaf - var found bool - - for _, m := range ms.maps { - v, found = m[k] - if found { - return v, found - } - } - - return v, found -} - -// put puts the keys and the values into one of the maps in the map slice. If the -// existing maps are all full and it fails to put the entry in the cache, it will -// return false. -// -// This function is safe for concurrent access. -func (ms *nodesMapSlice) put(k uint64, v cachedLeaf) bool { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - for i := range ms.maxEntries { - m := ms.maps[i] - _, found := m[k] - if found { - m[k] = v - return true - } - } - - for i, maxNum := range ms.maxEntries { - m := ms.maps[i] - if len(m) >= maxNum { - // Don't try to insert if the map already at max since - // that'll force the map to allocate double the memory it's - // currently taking up. - continue - } - - m[k] = v - return true // Return as we were successful in adding the entry. - } - - // We only reach this code if we've failed to insert into the map above as - // all the current maps were full. - return false -} - -// delete attempts to delete the given outpoint in all of the maps. No-op if the -// key doesn't exist. -// -// This function is safe for concurrent access. -func (ms *nodesMapSlice) delete(k uint64) { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - for i := 0; i < len(ms.maps); i++ { - delete(ms.maps[i], k) - } -} - -// deleteMaps deletes all maps and allocate new ones with the maxEntries defined in -// ms.maxEntries. -// -// This function is safe for concurrent access. -func (ms *nodesMapSlice) deleteMaps() { - for i := range ms.maxEntries { - ms.maps[i] = make(map[uint64]cachedLeaf, ms.maxEntries[i]) - } -} - -// createMaps creates a slice of maps and returns the total count that the maps -// can handle. maxEntries are also set along with the newly created maps. -func (ms *nodesMapSlice) createMaps(maxMemoryUsage int64) int64 { - if maxMemoryUsage <= 0 { - return 0 - } - - // Get the entry count for the maps we'll allocate. - var totalElemCount int - ms.maxEntries, totalElemCount = sizehelper.CalcNumEntries(nodesMapBucketSize, maxMemoryUsage) - - // maxMemoryUsage that's smaller than the minimum map size will return a totalElemCount - // that's equal to 0. - if totalElemCount <= 0 { - return 0 - } - - // Create the maps. - ms.maps = make([]map[uint64]cachedLeaf, len(ms.maxEntries)) - for i := range ms.maxEntries { - ms.maps[i] = make(map[uint64]cachedLeaf, ms.maxEntries[i]) - } - - return int64(totalElemCount) -} - -// newNodesMapSlice returns a newNodesMapSlice and the total amount of elements -// that the map slice can accomodate. -func newNodesMapSlice(maxTotalMemoryUsage int64) (nodesMapSlice, int64) { - ms := nodesMapSlice{ - mtx: new(sync.Mutex), - maxTotalMemoryUsage: uint64(maxTotalMemoryUsage), - } - - totalCacheElem := ms.createMaps(maxTotalMemoryUsage) - return ms, totalCacheElem -} - var _ utreexo.NodesInterface = (*NodesBackEnd)(nil) // NodesBackEnd implements the NodesInterface interface. type NodesBackEnd struct { db *leveldb.DB maxCacheElem int64 - cache nodesMapSlice + cache utreexobackends.NodesMapSlice } // InitNodesBackEnd returns a newly initialized NodesBackEnd which implements @@ -258,7 +63,7 @@ func InitNodesBackEnd(datadir string, maxTotalMemoryUsage int64) (*NodesBackEnd, return nil, err } - cache, maxCacheElems := newNodesMapSlice(maxTotalMemoryUsage) + cache, maxCacheElems := utreexobackends.NewNodesMapSlice(maxTotalMemoryUsage) nb := NodesBackEnd{ db: db, maxCacheElem: maxCacheElems, @@ -313,22 +118,22 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { } // Look it up on the cache first. - cLeaf, found := m.cache.get(k) + cLeaf, found := m.cache.Get(k) if found { // The leaf might not have been cleaned up yet. - if cLeaf.isRemoved() { + if cLeaf.IsRemoved() { return utreexo.Leaf{}, false } - // If the cache is full, flush the cache then put + // If the cache is full, flush the cache then Put // the leaf in. - if !m.cache.put(k, cLeaf) { + if !m.cache.Put(k, cLeaf) { m.flush() - m.cache.put(k, cLeaf) + m.cache.Put(k, cLeaf) } // If we found it, return here. - return cLeaf.leaf, true + return cLeaf.Leaf, true } // Since it's not in the cache, look it up in the database. @@ -340,9 +145,9 @@ func (m *NodesBackEnd) Get(k uint64) (utreexo.Leaf, bool) { } // Cache the leaf before returning it. - if !m.cache.put(k, cachedLeaf{leaf: leaf}) { + if !m.cache.Put(k, utreexobackends.CachedLeaf{Leaf: leaf}) { m.flush() - m.cache.put(k, cachedLeaf{leaf: leaf}) + m.cache.Put(k, utreexobackends.CachedLeaf{Leaf: leaf}) } return leaf, true } @@ -358,34 +163,34 @@ func (m *NodesBackEnd) Put(k uint64, v utreexo.Leaf) { return } - if int64(m.cache.length()) > m.maxCacheElem { + if int64(m.cache.Length()) > m.maxCacheElem { m.flush() } - leaf, found := m.cache.get(k) + leaf, found := m.cache.Get(k) if found { - leaf.flags &^= removed - l := cachedLeaf{ - leaf: v, - flags: leaf.flags | modified, + leaf.Flags &^= utreexobackends.Removed + l := utreexobackends.CachedLeaf{ + Leaf: v, + Flags: leaf.Flags | utreexobackends.Modified, } // It shouldn't fail here but handle it anyways. - if !m.cache.put(k, l) { + if !m.cache.Put(k, l) { m.flush() - m.cache.put(k, l) + m.cache.Put(k, l) } } else { // If the key isn't found, mark it as fresh. - l := cachedLeaf{ - leaf: v, - flags: fresh, + l := utreexobackends.CachedLeaf{ + Leaf: v, + Flags: utreexobackends.Fresh, } // It shouldn't fail here but handle it anyways. - if !m.cache.put(k, l) { + if !m.cache.Put(k, l) { m.flush() - m.cache.put(k, l) + m.cache.Put(k, l) } } } @@ -402,19 +207,19 @@ func (m *NodesBackEnd) Delete(k uint64) { return } - leaf, found := m.cache.get(k) + leaf, found := m.cache.Get(k) if !found { - if int64(m.cache.length()) >= m.maxCacheElem { + if int64(m.cache.Length()) >= m.maxCacheElem { m.flush() } } - l := cachedLeaf{ - leaf: leaf.leaf, - flags: leaf.flags | removed, + l := utreexobackends.CachedLeaf{ + Leaf: leaf.Leaf, + Flags: leaf.Flags | utreexobackends.Removed, } - if !m.cache.put(k, l) { + if !m.cache.Put(k, l) { m.flush() - m.cache.put(k, l) + m.cache.Put(k, l) } } @@ -464,23 +269,21 @@ func (m *NodesBackEnd) flush() { return } - for _, mm := range m.cache.maps { - for k, v := range mm { - if v.isRemoved() { - err := m.dbDel(k) - if err != nil { - log.Warnf("NodesBackEnd flush error. %v", err) - } - } else if v.isFresh() || v.isModified() { - err := m.dbPut(k, v.leaf) - if err != nil { - log.Warnf("NodesBackEnd flush error. %v", err) - } + m.cache.ForEach(func(k uint64, v utreexobackends.CachedLeaf) { + if v.IsRemoved() { + err := m.dbDel(k) + if err != nil { + log.Warnf("NodesBackEnd flush error. %v", err) + } + } else if v.IsFresh() || v.IsModified() { + err := m.dbPut(k, v.Leaf) + if err != nil { + log.Warnf("NodesBackEnd flush error. %v", err) } } - } + }) - m.cache.deleteMaps() + m.cache.DeleteMaps() } // Close flushes the cache and closes the underlying database. diff --git a/blockchain/utreexoio_test.go b/blockchain/utreexoio_test.go index 478eb89e..9611c908 100644 --- a/blockchain/utreexoio_test.go +++ b/blockchain/utreexoio_test.go @@ -9,25 +9,11 @@ import ( "testing" "github.com/utreexo/utreexo" + "github.com/utreexo/utreexod/blockchain/internal/utreexobackends" ) func TestNodesMapSliceMaxCacheElems(t *testing.T) { - _, maxCacheElems := newNodesMapSlice(0) - if maxCacheElems != 0 { - t.Fatalf("expected %v got %v", 0, maxCacheElems) - } - - _, maxCacheElems = newNodesMapSlice(-1) - if maxCacheElems != 0 { - t.Fatalf("expected %v got %v", 0, maxCacheElems) - } - - _, maxCacheElems = newNodesMapSlice(8000) - if maxCacheElems <= 0 { - t.Fatalf("expected something bigger than 0 but got %v", maxCacheElems) - } - - _, maxCacheElems = newCachedLeavesMapSlice(0) + _, maxCacheElems := newCachedLeavesMapSlice(0) if maxCacheElems != 0 { t.Fatalf("expected %v got %v", 0, maxCacheElems) } @@ -43,52 +29,6 @@ func TestNodesMapSliceMaxCacheElems(t *testing.T) { } } -func TestNodesMapSliceDuplicates(t *testing.T) { - m, maxElems := newNodesMapSlice(8000) - for i := 0; i < 10; i++ { - for j := int64(0); j < maxElems; j++ { - if !m.put(uint64(j), cachedLeaf{}) { - t.Fatalf("unexpected error on m.put") - } - } - } - - if m.length() != int(maxElems) { - t.Fatalf("expected length of %v but got %v", - maxElems, m.length()) - } - - // Try inserting x which should be unique. Should fail as the map is full. - x := uint64(0) - x -= 1 - if m.put(x, cachedLeaf{}) { - t.Fatalf("expected error but successfully called put") - } - - // Remove the first element in the first map and then try inserting - // a duplicate element. - m.delete(0) - x = uint64(maxElems) - 1 - if !m.put(x, cachedLeaf{}) { - t.Fatalf("unexpected failure on put") - } - - // Make sure the length of the map is 1 less than the max elems. - if m.length() != int(maxElems)-1 { - t.Fatalf("expected length of %v but got %v", - maxElems-1, m.length()) - } - - // Put 0 back in and then compare the map. - if !m.put(0, cachedLeaf{}) { - t.Fatalf("didn't expect error but unsuccessfully called put") - } - if m.length() != int(maxElems) { - t.Fatalf("expected length of %v but got %v", - maxElems, m.length()) - } -} - func uint64ToHash(v uint64) utreexo.Hash { var buf [8]byte binary.LittleEndian.PutUint64(buf[:], v) @@ -308,13 +248,13 @@ func TestNodesBackEnd(t *testing.T) { defer os.RemoveAll(test.tmpDir) count := uint64(1000) - compareMap := make(map[uint64]cachedLeaf) + compareMap := make(map[uint64]utreexobackends.CachedLeaf) for i := uint64(0); i < count/2; i++ { var buf [8]byte binary.LittleEndian.PutUint64(buf[:], i) hash := sha256.Sum256(buf[:]) - compareMap[i] = cachedLeaf{leaf: utreexo.Leaf{Hash: hash}} + compareMap[i] = utreexobackends.CachedLeaf{Leaf: utreexo.Leaf{Hash: hash}} nodesBackEnd.Put(i, utreexo.Leaf{Hash: hash}) } @@ -371,7 +311,7 @@ func TestNodesBackEnd(t *testing.T) { binary.LittleEndian.PutUint64(buf[:], i) hash := sha256.Sum256(buf[:]) - compareMap[i] = cachedLeaf{leaf: utreexo.Leaf{Hash: hash}} + compareMap[i] = utreexobackends.CachedLeaf{Leaf: utreexo.Leaf{Hash: hash}} } if nodesBackEnd.Length() != len(compareMap) { @@ -386,9 +326,9 @@ func TestNodesBackEnd(t *testing.T) { t.Fatalf("expected %v but it wasn't found", v) } - if got.Hash != v.leaf.Hash { - if got.Hash != v.leaf.Hash { - t.Fatalf("for key %v, expected %v but got %v", k, v.leaf.Hash, got.Hash) + if got.Hash != v.Leaf.Hash { + if got.Hash != v.Leaf.Hash { + t.Fatalf("for key %v, expected %v but got %v", k, v.Leaf.Hash, got.Hash) } } } From 8144ed91543c61546a3c5a98c74e7b69d44aab70 Mon Sep 17 00:00:00 2001 From: Calvin Kim Date: Mon, 22 Apr 2024 16:56:12 +0900 Subject: [PATCH 2/2] blockchain, utreexobackends: move cached leaves map to utreexobackends Moving cached leaves map to a separate package ensures that private fields do not get accessed. This achieves better concurrency guarantees as the exported functions of cached leaves map are concurrency safe but if the fields get accessed directly, it bypasses the safety measures. --- .../utreexobackends/cachedleavesmap.go | 189 ++++++++++++++++++ .../utreexobackends/cachedleavesmap_test.go | 78 ++++++++ blockchain/utreexoio.go | 182 ++--------------- blockchain/utreexoio_test.go | 69 ------- 4 files changed, 279 insertions(+), 239 deletions(-) create mode 100644 blockchain/internal/utreexobackends/cachedleavesmap.go create mode 100644 blockchain/internal/utreexobackends/cachedleavesmap_test.go diff --git a/blockchain/internal/utreexobackends/cachedleavesmap.go b/blockchain/internal/utreexobackends/cachedleavesmap.go new file mode 100644 index 00000000..cc46f26d --- /dev/null +++ b/blockchain/internal/utreexobackends/cachedleavesmap.go @@ -0,0 +1,189 @@ +package utreexobackends + +import ( + "sync" + + "github.com/utreexo/utreexo" + "github.com/utreexo/utreexod/blockchain/internal/sizehelper" + "github.com/utreexo/utreexod/chaincfg/chainhash" +) + +const ( + // Bucket size for the cached leaves map. + cachedLeavesMapBucketSize = 16 + sizehelper.Uint64Size*chainhash.HashSize + sizehelper.Uint64Size*sizehelper.Uint64Size +) + +// CachedLeavesMapSlice is a slice of maps for utxo entries. The slice of maps are needed to +// guarantee that the map will only take up N amount of bytes. As of v1.20, the +// go runtime will allocate 2^N + few extra buckets, meaning that for large N, we'll +// allocate a lot of extra memory if the amount of entries goes over the previously +// allocated buckets. A slice of maps allows us to have a better control of how much +// total memory gets allocated by all the maps. +type CachedLeavesMapSlice struct { + // mtx protects against concurrent access for the map slice. + mtx *sync.Mutex + + // maps are the underlying maps in the slice of maps. + maps []map[utreexo.Hash]uint64 + + // maxEntries is the maximum amount of elemnts that the map is allocated for. + maxEntries []int + + // maxTotalMemoryUsage is the maximum memory usage in bytes that the state + // should contain in normal circumstances. + maxTotalMemoryUsage uint64 +} + +// Length returns the length of all the maps in the map slice added together. +// +// This function is safe for concurrent access. +func (ms *CachedLeavesMapSlice) Length() int { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + var l int + for _, m := range ms.maps { + l += len(m) + } + + return l +} + +// Get looks for the outpoint in all the maps in the map slice and returns +// the entry. nil and false is returned if the outpoint is not found. +// +// This function is safe for concurrent access. +func (ms *CachedLeavesMapSlice) Get(k utreexo.Hash) (uint64, bool) { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + var v uint64 + var found bool + + for _, m := range ms.maps { + v, found = m[k] + if found { + return v, found + } + } + + return 0, false +} + +// Put puts the keys and the values into one of the maps in the map slice. If the +// existing maps are all full and it fails to put the entry in the cache, it will +// return false. +// +// This function is safe for concurrent access. +func (ms *CachedLeavesMapSlice) Put(k utreexo.Hash, v uint64) bool { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + for i := range ms.maxEntries { + m := ms.maps[i] + _, found := m[k] + if found { + m[k] = v + return true + } + } + + for i, maxNum := range ms.maxEntries { + m := ms.maps[i] + if len(m) >= maxNum { + // Don't try to insert if the map already at max since + // that'll force the map to allocate double the memory it's + // currently taking up. + continue + } + + m[k] = v + return true // Return as we were successful in adding the entry. + } + + // We only reach this code if we've failed to insert into the map above as + // all the current maps were full. + return false +} + +// Delete attempts to delete the given outpoint in all of the maps. No-op if the +// outpoint doesn't exist. +// +// This function is safe for concurrent access. +func (ms *CachedLeavesMapSlice) Delete(k utreexo.Hash) { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + for i := 0; i < len(ms.maps); i++ { + delete(ms.maps[i], k) + } +} + +// DeleteMaps deletes all maps and allocate new ones with the maxEntries defined in +// ms.maxEntries. +// +// This function is safe for concurrent access. +func (ms *CachedLeavesMapSlice) DeleteMaps() { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + ms.maps = make([]map[utreexo.Hash]uint64, len(ms.maxEntries)) + for i := range ms.maxEntries { + ms.maps[i] = make(map[utreexo.Hash]uint64, ms.maxEntries[i]) + } +} + +// ForEach loops through all the elements in the cachedleaves map slice and calls fn with the key-value pairs. +// +// This function is safe for concurrent access. +func (ms *CachedLeavesMapSlice) ForEach(fn func(utreexo.Hash, uint64)) { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + for _, m := range ms.maps { + for k, v := range m { + fn(k, v) + } + } +} + +// createMaps creates a slice of maps and returns the total count that the maps +// can handle. maxEntries are also set along with the newly created maps. +func (ms *CachedLeavesMapSlice) createMaps(maxMemoryUsage int64) int64 { + ms.mtx.Lock() + defer ms.mtx.Unlock() + + if maxMemoryUsage <= 0 { + return 0 + } + + // Get the entry count for the maps we'll allocate. + var totalElemCount int + ms.maxEntries, totalElemCount = sizehelper.CalcNumEntries(cachedLeavesMapBucketSize, maxMemoryUsage) + + // maxMemoryUsage that's smaller than the minimum map size will return a totalElemCount + // that's equal to 0. + if totalElemCount <= 0 { + return 0 + } + + // Create the maps. + ms.maps = make([]map[utreexo.Hash]uint64, len(ms.maxEntries)) + for i := range ms.maxEntries { + ms.maps[i] = make(map[utreexo.Hash]uint64, ms.maxEntries[i]) + } + + return int64(totalElemCount) +} + +// NewCachedLeavesMapSlice returns a new CachedLeavesMapSlice and the total amount of elements +// that the map slice can accomodate. +func NewCachedLeavesMapSlice(maxTotalMemoryUsage int64) (CachedLeavesMapSlice, int64) { + ms := CachedLeavesMapSlice{ + mtx: new(sync.Mutex), + maxTotalMemoryUsage: uint64(maxTotalMemoryUsage), + } + + totalCacheElem := ms.createMaps(maxTotalMemoryUsage) + return ms, totalCacheElem +} diff --git a/blockchain/internal/utreexobackends/cachedleavesmap_test.go b/blockchain/internal/utreexobackends/cachedleavesmap_test.go new file mode 100644 index 00000000..50735df0 --- /dev/null +++ b/blockchain/internal/utreexobackends/cachedleavesmap_test.go @@ -0,0 +1,78 @@ +package utreexobackends + +import ( + "crypto/sha256" + "encoding/binary" + "testing" + + "github.com/utreexo/utreexo" +) + +func TestCachedLeavesMapSliceMaxCacheElems(t *testing.T) { + _, maxCacheElems := NewCachedLeavesMapSlice(0) + if maxCacheElems != 0 { + t.Fatalf("expected %v got %v", 0, maxCacheElems) + } + + _, maxCacheElems = NewCachedLeavesMapSlice(-1) + if maxCacheElems != 0 { + t.Fatalf("expected %v got %v", 0, maxCacheElems) + } + + _, maxCacheElems = NewCachedLeavesMapSlice(8000) + if maxCacheElems <= 0 { + t.Fatalf("expected something bigger than 0 but got %v", maxCacheElems) + } +} + +func uint64ToHash(v uint64) utreexo.Hash { + var buf [8]byte + binary.LittleEndian.PutUint64(buf[:], v) + return sha256.Sum256(buf[:]) +} + +func TestCachedLeaveMapSliceDuplicates(t *testing.T) { + m, maxElems := NewCachedLeavesMapSlice(8000) + for i := 0; i < 10; i++ { + for j := int64(0); j < maxElems; j++ { + if !m.Put(uint64ToHash(uint64(j)), 0) { + t.Fatalf("unexpected error on m.put") + } + } + } + + if m.Length() != int(maxElems) { + t.Fatalf("expected length of %v but got %v", + maxElems, m.Length()) + } + + // Try inserting x which should be unique. Should fail as the map is full. + x := uint64(0) + x -= 1 + if m.Put(uint64ToHash(x), 0) { + t.Fatalf("expected error but successfully called put") + } + + // Remove the first element in the first map and then try inserting + // a duplicate element. + m.Delete(uint64ToHash(0)) + x = uint64(maxElems) - 1 + if !m.Put(uint64ToHash(x), 0) { + t.Fatalf("unexpected failure on put") + } + + // Make sure the length of the map is 1 less than the max elems. + if m.Length() != int(maxElems)-1 { + t.Fatalf("expected length of %v but got %v", + maxElems-1, m.Length()) + } + + // Put 0 back in and then compare the map. + if !m.Put(uint64ToHash(0), 0) { + t.Fatalf("didn't expect error but unsuccessfully called put") + } + if m.Length() != int(maxElems) { + t.Fatalf("expected length of %v but got %v", + maxElems, m.Length()) + } +} diff --git a/blockchain/utreexoio.go b/blockchain/utreexoio.go index e0062125..28b22d23 100644 --- a/blockchain/utreexoio.go +++ b/blockchain/utreexoio.go @@ -6,11 +6,9 @@ package blockchain import ( "fmt" - "sync" "github.com/syndtr/goleveldb/leveldb" "github.com/utreexo/utreexo" - "github.com/utreexo/utreexod/blockchain/internal/sizehelper" "github.com/utreexo/utreexod/blockchain/internal/utreexobackends" "github.com/utreexo/utreexod/chaincfg/chainhash" ) @@ -41,11 +39,6 @@ func deserializeLeaf(serialized [leafLength]byte) utreexo.Leaf { return leaf } -const ( - // Bucket size for the cached leaves map. - cachedLeavesMapBucketSize = 16 + sizehelper.Uint64Size*chainhash.HashSize + sizehelper.Uint64Size*sizehelper.Uint64Size -) - var _ utreexo.NodesInterface = (*NodesBackEnd)(nil) // NodesBackEnd implements the NodesInterface interface. @@ -293,150 +286,6 @@ func (m *NodesBackEnd) Close() error { return m.db.Close() } -// cachedLeavesMapSlice is a slice of maps for utxo entries. The slice of maps are needed to -// guarantee that the map will only take up N amount of bytes. As of v1.20, the -// go runtime will allocate 2^N + few extra buckets, meaning that for large N, we'll -// allocate a lot of extra memory if the amount of entries goes over the previously -// allocated buckets. A slice of maps allows us to have a better control of how much -// total memory gets allocated by all the maps. -type cachedLeavesMapSlice struct { - // mtx protects against concurrent access for the map slice. - mtx *sync.Mutex - - // maps are the underlying maps in the slice of maps. - maps []map[utreexo.Hash]uint64 - - // maxEntries is the maximum amount of elemnts that the map is allocated for. - maxEntries []int - - // maxTotalMemoryUsage is the maximum memory usage in bytes that the state - // should contain in normal circumstances. - maxTotalMemoryUsage uint64 -} - -// length returns the length of all the maps in the map slice added together. -// -// This function is safe for concurrent access. -func (ms *cachedLeavesMapSlice) length() int { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - var l int - for _, m := range ms.maps { - l += len(m) - } - - return l -} - -// get looks for the outpoint in all the maps in the map slice and returns -// the entry. nil and false is returned if the outpoint is not found. -// -// This function is safe for concurrent access. -func (ms *cachedLeavesMapSlice) get(k utreexo.Hash) (uint64, bool) { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - var v uint64 - var found bool - - for _, m := range ms.maps { - v, found = m[k] - if found { - return v, found - } - } - - return 0, false -} - -// put puts the keys and the values into one of the maps in the map slice. If the -// existing maps are all full and it fails to put the entry in the cache, it will -// return false. -// -// This function is safe for concurrent access. -func (ms *cachedLeavesMapSlice) put(k utreexo.Hash, v uint64) bool { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - for i := range ms.maxEntries { - m := ms.maps[i] - _, found := m[k] - if found { - m[k] = v - return true - } - } - - for i, maxNum := range ms.maxEntries { - m := ms.maps[i] - if len(m) >= maxNum { - // Don't try to insert if the map already at max since - // that'll force the map to allocate double the memory it's - // currently taking up. - continue - } - - m[k] = v - return true // Return as we were successful in adding the entry. - } - - // We only reach this code if we've failed to insert into the map above as - // all the current maps were full. - return false -} - -// delete attempts to delete the given outpoint in all of the maps. No-op if the -// outpoint doesn't exist. -// -// This function is safe for concurrent access. -func (ms *cachedLeavesMapSlice) delete(k utreexo.Hash) { - ms.mtx.Lock() - defer ms.mtx.Unlock() - - for i := 0; i < len(ms.maps); i++ { - delete(ms.maps[i], k) - } -} - -// createMaps creates a slice of maps and returns the total count that the maps -// can handle. maxEntries are also set along with the newly created maps. -func (ms *cachedLeavesMapSlice) createMaps(maxMemoryUsage int64) int64 { - if maxMemoryUsage <= 0 { - return 0 - } - - // Get the entry count for the maps we'll allocate. - var totalElemCount int - ms.maxEntries, totalElemCount = sizehelper.CalcNumEntries(cachedLeavesMapBucketSize, maxMemoryUsage) - - // maxMemoryUsage that's smaller than the minimum map size will return a totalElemCount - // that's equal to 0. - if totalElemCount <= 0 { - return 0 - } - - // Create the maps. - ms.maps = make([]map[utreexo.Hash]uint64, len(ms.maxEntries)) - for i := range ms.maxEntries { - ms.maps[i] = make(map[utreexo.Hash]uint64, ms.maxEntries[i]) - } - - return int64(totalElemCount) -} - -// newCachedLeavesMapSlice returns a newCachedLeavesMapSlice and the total amount of elements -// that the map slice can accomodate. -func newCachedLeavesMapSlice(maxTotalMemoryUsage int64) (cachedLeavesMapSlice, int64) { - ms := cachedLeavesMapSlice{ - mtx: new(sync.Mutex), - maxTotalMemoryUsage: uint64(maxTotalMemoryUsage), - } - - totalCacheElem := ms.createMaps(maxTotalMemoryUsage) - return ms, totalCacheElem -} - var _ utreexo.CachedLeavesInterface = (*CachedLeavesBackEnd)(nil) // CachedLeavesBackEnd implements the CachedLeavesInterface interface. The cache assumes @@ -444,7 +293,7 @@ var _ utreexo.CachedLeavesInterface = (*CachedLeavesBackEnd)(nil) type CachedLeavesBackEnd struct { db *leveldb.DB maxCacheElem int64 - cache cachedLeavesMapSlice + cache utreexobackends.CachedLeavesMapSlice } // dbPut serializes and puts the key and the value into the database. @@ -474,7 +323,7 @@ func InitCachedLeavesBackEnd(datadir string, maxMemoryUsage int64) (*CachedLeave return nil, err } - cache, maxCacheElem := newCachedLeavesMapSlice(maxMemoryUsage) + cache, maxCacheElem := utreexobackends.NewCachedLeavesMapSlice(maxMemoryUsage) return &CachedLeavesBackEnd{maxCacheElem: maxCacheElem, db: db, cache: cache}, nil } @@ -484,7 +333,7 @@ func (m *CachedLeavesBackEnd) Get(k utreexo.Hash) (uint64, bool) { return m.dbGet(k) } - pos, found := m.cache.get(k) + pos, found := m.cache.Get(k) if !found { return m.dbGet(k) } @@ -504,18 +353,18 @@ func (m *CachedLeavesBackEnd) Put(k utreexo.Hash, v uint64) { return } - length := m.cache.length() + length := m.cache.Length() if int64(length) >= m.maxCacheElem { m.flush() } - m.cache.put(k, v) + m.cache.Put(k, v) } // Delete removes the given key from the underlying map. No-op if the key // doesn't exist. func (m *CachedLeavesBackEnd) Delete(k utreexo.Hash) { - m.cache.delete(k) + m.cache.Delete(k) m.db.Delete(k[:], nil) } @@ -555,21 +404,14 @@ func (m *CachedLeavesBackEnd) ForEach(fn func(utreexo.Hash, uint64) error) error // Flush resets the cache and saves all the key values onto the database. func (m *CachedLeavesBackEnd) flush() { - for i := range m.cache.maxEntries { - mp := m.cache.maps[i] - for k, v := range mp { - err := m.dbPut(k, v) - if err != nil { - log.Warnf("CachedLeavesBackEnd dbPut fail. %v", err) - } + m.cache.ForEach(func(k utreexo.Hash, v uint64) { + err := m.dbPut(k, v) + if err != nil { + log.Warnf("CachedLeavesBackEnd dbPut fail. %v", err) } - } + }) - // Create the maps. - m.cache.maps = make([]map[utreexo.Hash]uint64, len(m.cache.maxEntries)) - for i := range m.cache.maxEntries { - m.cache.maps[i] = make(map[utreexo.Hash]uint64, m.cache.maxEntries[i]) - } + m.cache.DeleteMaps() } // Close flushes all the cached entries and then closes the underlying database. diff --git a/blockchain/utreexoio_test.go b/blockchain/utreexoio_test.go index 9611c908..33ced614 100644 --- a/blockchain/utreexoio_test.go +++ b/blockchain/utreexoio_test.go @@ -12,75 +12,6 @@ import ( "github.com/utreexo/utreexod/blockchain/internal/utreexobackends" ) -func TestNodesMapSliceMaxCacheElems(t *testing.T) { - _, maxCacheElems := newCachedLeavesMapSlice(0) - if maxCacheElems != 0 { - t.Fatalf("expected %v got %v", 0, maxCacheElems) - } - - _, maxCacheElems = newCachedLeavesMapSlice(-1) - if maxCacheElems != 0 { - t.Fatalf("expected %v got %v", 0, maxCacheElems) - } - - _, maxCacheElems = newCachedLeavesMapSlice(8000) - if maxCacheElems <= 0 { - t.Fatalf("expected something bigger than 0 but got %v", maxCacheElems) - } -} - -func uint64ToHash(v uint64) utreexo.Hash { - var buf [8]byte - binary.LittleEndian.PutUint64(buf[:], v) - return sha256.Sum256(buf[:]) -} - -func TestCachedLeaveMapSliceDuplicates(t *testing.T) { - m, maxElems := newCachedLeavesMapSlice(8000) - for i := 0; i < 10; i++ { - for j := int64(0); j < maxElems; j++ { - if !m.put(uint64ToHash(uint64(j)), 0) { - t.Fatalf("unexpected error on m.put") - } - } - } - - if m.length() != int(maxElems) { - t.Fatalf("expected length of %v but got %v", - maxElems, m.length()) - } - - // Try inserting x which should be unique. Should fail as the map is full. - x := uint64(0) - x -= 1 - if m.put(uint64ToHash(x), 0) { - t.Fatalf("expected error but successfully called put") - } - - // Remove the first element in the first map and then try inserting - // a duplicate element. - m.delete(uint64ToHash(0)) - x = uint64(maxElems) - 1 - if !m.put(uint64ToHash(x), 0) { - t.Fatalf("unexpected failure on put") - } - - // Make sure the length of the map is 1 less than the max elems. - if m.length() != int(maxElems)-1 { - t.Fatalf("expected length of %v but got %v", - maxElems-1, m.length()) - } - - // Put 0 back in and then compare the map. - if !m.put(uint64ToHash(0), 0) { - t.Fatalf("didn't expect error but unsuccessfully called put") - } - if m.length() != int(maxElems) { - t.Fatalf("expected length of %v but got %v", - maxElems, m.length()) - } -} - func TestCachedLeavesBackEnd(t *testing.T) { tests := []struct { tmpDir string