Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tools to scan and compute hash for IAVL db #2059

Merged
merged 8 commits into from
Feb 1, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions tools/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package tools
import (
"github.com/spf13/cobra"

hasher "github.com/sei-protocol/sei-chain/tools/hash_verification/cmd"
migration "github.com/sei-protocol/sei-chain/tools/migration/cmd"
scanner "github.com/sei-protocol/sei-chain/tools/tx-scanner/cmd"
)
Expand All @@ -16,5 +17,6 @@ func ToolCmd() *cobra.Command {
toolsCmd.AddCommand(migration.MigrateCmd())
toolsCmd.AddCommand(migration.VerifyMigrationCmd())
toolsCmd.AddCommand(migration.GenerateStats())
toolsCmd.AddCommand(hasher.GenerateIavlHashCmd())
return toolsCmd
}
33 changes: 33 additions & 0 deletions tools/hash_verification/cmd/cmd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package cmd

import (
"path/filepath"

"github.com/spf13/cobra"

"github.com/sei-protocol/sei-chain/tools/hash_verification/iavl"
dbm "github.com/tendermint/tm-db"
)

func GenerateIavlHashCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "generate-iavl-hash",
Short: "A tool to scan full IAVL archive database and generate a hash for every N blocks per module",
Run: generateIavlHash,
}
cmd.PersistentFlags().String("home-dir", "/root/.sei", "Sei home directory")
cmd.PersistentFlags().Int64("blocks-interval", 1_000_000, "Generate a hash every N blocks")
return cmd
}

func generateIavlHash(cmd *cobra.Command, _ []string) {
homeDir, _ := cmd.Flags().GetString("home-dir")
blocksInterval, _ := cmd.Flags().GetInt64("blocks-interval")
dataDir := filepath.Join(homeDir, "data")
db, err := dbm.NewGoLevelDB("application", dataDir)
if err != nil {
panic(err)
}
scanner := iavl.NewHashScanner(db, blocksInterval)
scanner.ScanAllModules()
}
8 changes: 8 additions & 0 deletions tools/hash_verification/hasher/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package hasher

// HashCalculator defines the interface for calculating chained state hash.
type HashCalculator interface {
HashSingle(data []byte) []byte
HashTwo(dataA []byte, dataB []byte) []byte
ComputeHashes() [][]byte
}
103 changes: 103 additions & 0 deletions tools/hash_verification/hasher/xor_hasher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package hasher

import (
"crypto/sha256"
"encoding/binary"
"sync"

"github.com/sei-protocol/sei-db/ss/types"
)

var _ HashCalculator = (*XorHashCalculator)(nil)

// XorHashCalculator is the hash calculator backed by XoR hash.
type XorHashCalculator struct {
NumBlocksPerWorker int64
NumOfWorkers int
DataCh chan types.RawSnapshotNode
}

// NewXorHashCalculator create a new XorHashCalculator.
func NewXorHashCalculator(numBlocksPerWorker int64, numWorkers int, data chan types.RawSnapshotNode) XorHashCalculator {
return XorHashCalculator{
NumBlocksPerWorker: numBlocksPerWorker,
NumOfWorkers: numWorkers,
DataCh: data,
}
}

// HashSingle computes the hash of a single data element.
func (x XorHashCalculator) HashSingle(data []byte) []byte {
hash := sha256.Sum256(data)
return hash[:]
}

// HashTwo computes the hash of a two data elements, performs XOR between two byte slices of equal size.
func (x XorHashCalculator) HashTwo(dataA []byte, dataB []byte) []byte {
if len(dataA) != len(dataB) {
panic("Expecting both data to have equal length for computing a XoR hash")
}
result := make([]byte, len(dataA))
for i := range dataA {
result[i] = dataA[i] ^ dataB[i]
}
return result
}

func (x XorHashCalculator) ComputeHashes() [][]byte {
var wg sync.WaitGroup
allChannels := make([]chan types.RawSnapshotNode, x.NumOfWorkers)
allHashes := make([][]byte, x.NumOfWorkers)
// First calculate each sub hash in a separate goroutine
for i := 0; i < x.NumOfWorkers; i++ {
wg.Add(1)
subsetChan := make(chan types.RawSnapshotNode, 1000)
go func(index int, data chan types.RawSnapshotNode) {
defer wg.Done()
var hashResult []byte
for item := range subsetChan {
entryHash := x.HashSingle(Serialize(item))
if hashResult == nil {
hashResult = entryHash
} else {
hashResult = x.HashTwo(hashResult, entryHash)
}
}
allHashes[index] = hashResult
}(i, subsetChan)
allChannels[i] = subsetChan
}
// Push all the data to its corresponding channel based on version
for data := range x.DataCh {
index := data.Version / x.NumBlocksPerWorker
allChannels[index] <- data
}
// Close all sub channels
for _, subChan := range allChannels {
close(subChan)
}
// Wait for all workers to complete
wg.Wait()
// Now modify sub hashes to hash again with previous hash
for i := 1; i < len(allHashes); i++ {
if len(allHashes[i-1]) > 0 && len(allHashes[i]) > 0 {
allHashes[i] = x.HashTwo(allHashes[i-1], allHashes[i])
} else if len(allHashes[i-1]) > 0 && len(allHashes[i]) == 0 {
allHashes[i] = allHashes[i-1]
}
}
return allHashes
}

func Serialize(node types.RawSnapshotNode) []byte {
keySize := len(node.Key)
valueSize := len(node.Value)
versionSize := 8
buf := make([]byte, keySize+valueSize+versionSize)
copy(buf[:keySize], node.Key)
offset := keySize
copy(buf[offset:offset+valueSize], node.Value)
offset += valueSize
binary.LittleEndian.PutUint64(buf[offset:offset+versionSize], uint64(node.Version))
return buf
}
82 changes: 82 additions & 0 deletions tools/hash_verification/iavl/scanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package iavl

import (
"bytes"
"fmt"

"github.com/cosmos/cosmos-sdk/store/rootmulti"
"github.com/cosmos/iavl"
"github.com/sei-protocol/sei-chain/tools/hash_verification/hasher"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/ss/types"
dbm "github.com/tendermint/tm-db"
)

type HashScanner struct {
db dbm.DB
latestVersion int64
blocksInterval int64
hashResult map[string][][]byte
}

func NewHashScanner(db dbm.DB, blocksInterval int64) *HashScanner {
latestVersion := rootmulti.GetLatestVersion(db)
fmt.Printf("Detected IAVL latest version: %d\n", latestVersion)
return &HashScanner{
db: db,
latestVersion: latestVersion,
blocksInterval: blocksInterval,
hashResult: make(map[string][][]byte),
}
}

func (s *HashScanner) ScanAllModules() {
for _, moduleName := range utils.Modules {
result := s.scanAllHeights(moduleName)
for i, hashResult := range result {
fmt.Printf("Module %s height %d hash is: %X\n", moduleName, s.blocksInterval*(int64(i)+1), hashResult)
}
}
}

func (s *HashScanner) scanAllHeights(module string) [][]byte {
dataCh := make(chan types.RawSnapshotNode, 10000)
hashCalculator := hasher.NewXorHashCalculator(s.blocksInterval, int(s.latestVersion/s.blocksInterval+1), dataCh)
fmt.Printf("Starting to scan module: %s\n", module)
go func() {
prefixDB := dbm.NewPrefixDB(s.db, []byte(utils.BuildRawPrefix(module)))
itr, err := prefixDB.Iterator(nil, nil)
count := 0
if err != nil {
panic(fmt.Errorf("failed to create iterator: %w", err))
}
defer itr.Close()
for ; itr.Valid(); itr.Next() {
value := bytes.Clone(itr.Value())
node, err := iavl.MakeNode(value)
if err != nil {
panic(fmt.Errorf("failed to parse iavl node: %w", err))
}

// Only scan leaf nodes
if node.GetHeight() != 0 {
continue
}
snapshotNode := types.RawSnapshotNode{
StoreKey: module,
Key: node.GetNodeKey(),
Value: node.GetValue(),
Version: node.GetVersion(),
}
dataCh <- snapshotNode
count++
if count%1000000 == 0 {
fmt.Printf("Scanned %d items for module %s\n", count, module)
}
}
close(dataCh)
}()
Comment on lines +46 to +78

Check notice

Code scanning / CodeQL

Spawning a Go routine Note

Spawning a Go routine may be a possible source of non-determinism
allHashes := hashCalculator.ComputeHashes()
s.hashResult[module] = allHashes
return allHashes
}
2 changes: 1 addition & 1 deletion tools/migration/cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"github.com/cosmos/iavl"
"github.com/sei-protocol/sei-chain/tools/migration/sc"
"github.com/sei-protocol/sei-chain/tools/migration/ss"
"github.com/sei-protocol/sei-chain/tools/migration/utils"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/config"
sstypes "github.com/sei-protocol/sei-db/ss"
"github.com/spf13/cobra"
Expand Down
2 changes: 1 addition & 1 deletion tools/migration/sc/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
paramstypes "github.com/cosmos/cosmos-sdk/x/params/types"
stakingkeeper "github.com/cosmos/cosmos-sdk/x/staking/keeper"
"github.com/sei-protocol/sei-chain/app/params"
"github.com/sei-protocol/sei-chain/tools/migration/utils"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/config"
"github.com/tendermint/tendermint/libs/log"
dbm "github.com/tendermint/tm-db"
Expand Down
2 changes: 1 addition & 1 deletion tools/migration/ss/migrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (

"github.com/armon/go-metrics"
"github.com/cosmos/iavl"
"github.com/sei-protocol/sei-chain/tools/migration/utils"
"github.com/sei-protocol/sei-chain/tools/utils"
"github.com/sei-protocol/sei-db/ss/types"
dbm "github.com/tendermint/tm-db"
)
Expand Down
2 changes: 1 addition & 1 deletion tools/migration/utils/helper.go → tools/utils/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ var ModuleKeys = sdk.NewKVStoreKeys(
)

var Modules = []string{
"acc",
"aclaccesscontrol",
"authz",
"acc",
"bank",
"capability",
"distribution",
Expand Down
Loading