Skip to content

Commit

Permalink
Add tools to scan and compute hash for IAVL db (#2059)
Browse files Browse the repository at this point in the history
* Add tools to scan and compute hash for IAVL db

* Fix lint

* Fix lint

* Fix

* Fix

* Fix

* Pebble scanner (#2061)

* Pebble scanner

* Update name

* Update command

---------

Co-authored-by: Kartik Bhat <[email protected]>
  • Loading branch information
yzang2019 and Kbhat1 authored Feb 1, 2025
1 parent f5e6716 commit 51f6572
Show file tree
Hide file tree
Showing 10 changed files with 335 additions and 4 deletions.
3 changes: 3 additions & 0 deletions tools/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package tools
import (
"github.com/spf13/cobra"

hasher "github.com/sei-protocol/sei-chain/tools/hash_verification/cmd"
migration "github.com/sei-protocol/sei-chain/tools/migration/cmd"
scanner "github.com/sei-protocol/sei-chain/tools/tx-scanner/cmd"
)
Expand All @@ -16,5 +17,7 @@ func ToolCmd() *cobra.Command {
toolsCmd.AddCommand(migration.MigrateCmd())
toolsCmd.AddCommand(migration.VerifyMigrationCmd())
toolsCmd.AddCommand(migration.GenerateStats())
toolsCmd.AddCommand(hasher.GenerateIavlHashCmd())
toolsCmd.AddCommand(hasher.GeneratePebbleHashCmd())
return toolsCmd
}
65 changes: 65 additions & 0 deletions tools/hash_verification/cmd/cmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package cmd

import (
"path/filepath"

"github.com/spf13/cobra"

"github.com/sei-protocol/sei-chain/tools/hash_verification/iavl"
"github.com/sei-protocol/sei-chain/tools/hash_verification/pebbledb"
"github.com/sei-protocol/sei-db/config"
sstypes "github.com/sei-protocol/sei-db/ss"
"github.com/tendermint/tendermint/libs/log"
dbm "github.com/tendermint/tm-db"
)

func GenerateIavlHashCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "generate-iavl-hash",
Short: "A tool to scan full IAVL archive database and generate a hash for every N blocks per module",
Run: generateIavlHash,
}
cmd.PersistentFlags().String("home-dir", "/root/.sei", "Sei home directory")
cmd.PersistentFlags().Int64("blocks-interval", 1_000_000, "Generate a hash every N blocks")
return cmd
}

func generateIavlHash(cmd *cobra.Command, _ []string) {
homeDir, _ := cmd.Flags().GetString("home-dir")
blocksInterval, _ := cmd.Flags().GetInt64("blocks-interval")
dataDir := filepath.Join(homeDir, "data")
db, err := dbm.NewGoLevelDB("application", dataDir)
if err != nil {
panic(err)
}
scanner := iavl.NewHashScanner(db, blocksInterval)
scanner.ScanAllModules()
}

func GeneratePebbleHashCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "generate-pebble-hash",
Short: "A tool to scan full Pebble archive database and generate a hash for every N blocks per module",
Run: generatePebbleHash,
}
cmd.PersistentFlags().String("home-dir", "/root/.sei", "Sei home directory")
cmd.PersistentFlags().Int64("blocks-interval", 1_000_000, "Generate a hash every N blocks")
return cmd
}

func generatePebbleHash(cmd *cobra.Command, _ []string) {
homeDir, _ := cmd.Flags().GetString("home-dir")
blocksInterval, _ := cmd.Flags().GetInt64("blocks-interval")

ssConfig := config.DefaultStateStoreConfig()
ssConfig.Enable = true
ssConfig.KeepRecent = 0
stateStore, err := sstypes.NewStateStore(log.NewNopLogger(), homeDir, ssConfig)

if err != nil {
panic(err)
}

scanner := pebbledb.NewHashScanner(stateStore, blocksInterval)
scanner.ScanAllModules()
}
8 changes: 8 additions & 0 deletions tools/hash_verification/hasher/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package hasher

// HashCalculator defines the interface for calculating chained state hash.
type HashCalculator interface {
HashSingle(data []byte) []byte
HashTwo(dataA []byte, dataB []byte) []byte
ComputeHashes() [][]byte
}
103 changes: 103 additions & 0 deletions tools/hash_verification/hasher/xor_hasher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package hasher

import (
"crypto/sha256"
"encoding/binary"
"sync"

"github.com/sei-protocol/sei-db/ss/types"
)

var _ HashCalculator = (*XorHashCalculator)(nil)

// XorHashCalculator is the hash calculator backed by XoR hash.
type XorHashCalculator struct {
NumBlocksPerWorker int64
NumOfWorkers int
DataCh chan types.RawSnapshotNode
}

// NewXorHashCalculator create a new XorHashCalculator.
func NewXorHashCalculator(numBlocksPerWorker int64, numWorkers int, data chan types.RawSnapshotNode) XorHashCalculator {
return XorHashCalculator{
NumBlocksPerWorker: numBlocksPerWorker,
NumOfWorkers: numWorkers,
DataCh: data,
}
}

// HashSingle computes the hash of a single data element.
func (x XorHashCalculator) HashSingle(data []byte) []byte {
hash := sha256.Sum256(data)
return hash[:]
}

// HashTwo computes the hash of a two data elements, performs XOR between two byte slices of equal size.
func (x XorHashCalculator) HashTwo(dataA []byte, dataB []byte) []byte {
if len(dataA) != len(dataB) {
panic("Expecting both data to have equal length for computing a XoR hash")
}
result := make([]byte, len(dataA))
for i := range dataA {
result[i] = dataA[i] ^ dataB[i]
}
return result
}

func (x XorHashCalculator) ComputeHashes() [][]byte {
var wg sync.WaitGroup
allChannels := make([]chan types.RawSnapshotNode, x.NumOfWorkers)
allHashes := make([][]byte, x.NumOfWorkers)
// First calculate each sub hash in a separate goroutine
for i := 0; i < x.NumOfWorkers; i++ {
wg.Add(1)
subsetChan := make(chan types.RawSnapshotNode, 1000)
go func(index int, data chan types.RawSnapshotNode) {
defer wg.Done()
var hashResult []byte
for item := range subsetChan {
entryHash := x.HashSingle(Serialize(item))
if hashResult == nil {
hashResult = entryHash
} else {
hashResult = x.HashTwo(hashResult, entryHash)
}
}
allHashes[index] = hashResult
}(i, subsetChan)
allChannels[i] = subsetChan
}
// Push all the data to its corresponding channel based on version
for data := range x.DataCh {
index := data.Version / x.NumBlocksPerWorker
allChannels[index] <- data
}
// Close all sub channels
for _, subChan := range allChannels {
close(subChan)
}
// Wait for all workers to complete
wg.Wait()
// Now modify sub hashes to hash again with previous hash
for i := 1; i < len(allHashes); i++ {
if len(allHashes[i-1]) > 0 && len(allHashes[i]) > 0 {
allHashes[i] = x.HashTwo(allHashes[i-1], allHashes[i])
} else if len(allHashes[i-1]) > 0 && len(allHashes[i]) == 0 {
allHashes[i] = allHashes[i-1]
}
}
return allHashes
}

func Serialize(node types.RawSnapshotNode) []byte {
keySize := len(node.Key)
valueSize := len(node.Value)
versionSize := 8
buf := make([]byte, keySize+valueSize+versionSize)
copy(buf[:keySize], node.Key)
offset := keySize
copy(buf[offset:offset+valueSize], node.Value)
offset += valueSize
binary.LittleEndian.PutUint64(buf[offset:offset+versionSize], uint64(node.Version))
return buf
}
82 changes: 82 additions & 0 deletions tools/hash_verification/iavl/scanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package iavl

import (
"bytes"
"fmt"

"github.com/cosmos/cosmos-sdk/store/rootmulti"
"github.com/cosmos/iavl"
"github.com/sei-protocol/sei-chain/tools/hash_verification/hasher"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/ss/types"
dbm "github.com/tendermint/tm-db"
)

type HashScanner struct {
db dbm.DB
latestVersion int64
blocksInterval int64
hashResult map[string][][]byte
}

func NewHashScanner(db dbm.DB, blocksInterval int64) *HashScanner {
latestVersion := rootmulti.GetLatestVersion(db)
fmt.Printf("Detected IAVL latest version: %d\n", latestVersion)
return &HashScanner{
db: db,
latestVersion: latestVersion,
blocksInterval: blocksInterval,
hashResult: make(map[string][][]byte),
}
}

func (s *HashScanner) ScanAllModules() {
for _, moduleName := range utils.Modules {
result := s.scanAllHeights(moduleName)
for i, hashResult := range result {
fmt.Printf("Module %s height %d hash is: %X\n", moduleName, s.blocksInterval*(int64(i)+1), hashResult)
}
}
}

func (s *HashScanner) scanAllHeights(module string) [][]byte {
dataCh := make(chan types.RawSnapshotNode, 10000)
hashCalculator := hasher.NewXorHashCalculator(s.blocksInterval, int(s.latestVersion/s.blocksInterval+1), dataCh)
fmt.Printf("Starting to scan module: %s\n", module)
go func() {
prefixDB := dbm.NewPrefixDB(s.db, []byte(utils.BuildRawPrefix(module)))
itr, err := prefixDB.Iterator(nil, nil)
count := 0
if err != nil {
panic(fmt.Errorf("failed to create iterator: %w", err))
}
defer itr.Close()
for ; itr.Valid(); itr.Next() {
value := bytes.Clone(itr.Value())
node, err := iavl.MakeNode(value)
if err != nil {
panic(fmt.Errorf("failed to parse iavl node: %w", err))
}

// Only scan leaf nodes
if node.GetHeight() != 0 {
continue
}
snapshotNode := types.RawSnapshotNode{
StoreKey: module,
Key: node.GetNodeKey(),
Value: node.GetValue(),
Version: node.GetVersion(),
}
dataCh <- snapshotNode
count++
if count%1000000 == 0 {
fmt.Printf("Scanned %d items for module %s\n", count, module)
}
}
close(dataCh)
}()
allHashes := hashCalculator.ComputeHashes()
s.hashResult[module] = allHashes
return allHashes
}
70 changes: 70 additions & 0 deletions tools/hash_verification/pebbledb/scanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package pebbledb

import (
"fmt"

"github.com/sei-protocol/sei-chain/tools/hash_verification/hasher"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/ss/types"
)

type HashScanner struct {
db types.StateStore
latestVersion int64
blocksInterval int64
hashResult map[string][][]byte
}

func NewHashScanner(db types.StateStore, blocksInterval int64) *HashScanner {
latestVersion, err := db.GetLatestVersion()
if err != nil {
panic(err)
}
fmt.Printf("Detected Pebbledb latest version: %d\n", latestVersion)
return &HashScanner{
db: db,
latestVersion: latestVersion,
blocksInterval: blocksInterval,
hashResult: make(map[string][][]byte),
}
}

func (s *HashScanner) ScanAllModules() {
for _, moduleName := range utils.Modules {
result := s.scanAllHeights(moduleName)
for i, hashResult := range result {
fmt.Printf("Module %s height %d hash is: %X\n", moduleName, s.blocksInterval*(int64(i)+1), hashResult)
}
}
}

func (s *HashScanner) scanAllHeights(module string) [][]byte {
dataCh := make(chan types.RawSnapshotNode, 10000)
hashCalculator := hasher.NewXorHashCalculator(s.blocksInterval, int(s.latestVersion/s.blocksInterval+1), dataCh)
fmt.Printf("Starting to scan module: %s\n", module)
go func() {
count := 0
_, err := s.db.RawIterate(module, func(key, value []byte, version int64) bool {
dataCh <- types.RawSnapshotNode{
StoreKey: module,
Key: key,
Value: value,
Version: version,
}

count++
if count%1000000 == 0 {
fmt.Printf("Scanned %d items for module %s\n", count, module)
}

return false
})
if err != nil {
panic(fmt.Errorf("RawIterate error: %w", err))
}
close(dataCh)
}()
allHashes := hashCalculator.ComputeHashes()
s.hashResult[module] = allHashes
return allHashes
}
2 changes: 1 addition & 1 deletion tools/migration/cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/cosmos/iavl"
"github.com/sei-protocol/sei-chain/tools/migration/sc"
"github.com/sei-protocol/sei-chain/tools/migration/ss"
"github.com/sei-protocol/sei-chain/tools/migration/utils"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/config"
sstypes "github.com/sei-protocol/sei-db/ss"
"github.com/spf13/cobra"
Expand Down
2 changes: 1 addition & 1 deletion tools/migration/sc/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
paramstypes "github.com/cosmos/cosmos-sdk/x/params/types"
stakingkeeper "github.com/cosmos/cosmos-sdk/x/staking/keeper"
"github.com/sei-protocol/sei-chain/app/params"
"github.com/sei-protocol/sei-chain/tools/migration/utils"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/config"
"github.com/tendermint/tendermint/libs/log"
dbm "github.com/tendermint/tm-db"
Expand Down
2 changes: 1 addition & 1 deletion tools/migration/ss/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (

"github.com/armon/go-metrics"
"github.com/cosmos/iavl"
"github.com/sei-protocol/sei-chain/tools/migration/utils"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/ss/types"
dbm "github.com/tendermint/tm-db"
)
Expand Down
2 changes: 1 addition & 1 deletion tools/migration/utils/helper.go → tools/utils/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ var ModuleKeys = sdk.NewKVStoreKeys(
)

var Modules = []string{
"acc",
"aclaccesscontrol",
"authz",
"acc",
"bank",
"capability",
"distribution",
Expand Down

0 comments on commit 51f6572

Please sign in to comment.