Skip to content

Commit

Permalink
[AV-1683] Cap triedb.Dirties Size in Acceptor (#785)
Browse files Browse the repository at this point in the history
* add TrieDirtyOptimisticLimit configs

* minimal impl

* fix size tracking

* add more comments

* cleanup

* nits

* remove TrieDirtyOptimisticLimit

* add TrieDirtyIdealLimit to tests

* add flush locktime

* update variable name and amount

* add target decrease over last 512

* cleanup comments

* don't flush below target

* nits

* better chunk flush writes

* nits

* add flush/commit meters

* expand flushWindow

* smooth writes
  • Loading branch information
patrick-ogrady authored Jun 2, 2022
1 parent d53c64a commit 7b28739
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 86 deletions.
16 changes: 9 additions & 7 deletions core/blockchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ const (
// that's resident in a blockchain.
type CacheConfig struct {
TrieCleanLimit int // Memory allowance (MB) to use for caching trie nodes in memory
TrieDirtyLimit int // Memory limit (MB) at which to start flushing dirty trie nodes to disk
TrieDirtyLimit int // Memory limit (MB) at which to block on insert and force a flush of dirty trie nodes to disk
TrieDirtyCommitTarget int // Memory limit (MB) to target for the dirties cache before invoking commit
CommitInterval uint64 // Commit the trie every [CommitInterval] blocks.
Pruning bool // Whether to disable trie write caching and GC altogether (archive node)
AcceptorQueueLimit int // Blocks to queue before blocking during acceptance
Expand All @@ -121,12 +122,13 @@ type CacheConfig struct {
}

var DefaultCacheConfig = &CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: true,
CommitInterval: 4096,
AcceptorQueueLimit: 64, // Provides 2 minutes of buffer (2s block target) for a commit delay
SnapshotLimit: 256,
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20, // 20% overhead in memory counting (this targets 16 MB)
Pruning: true,
CommitInterval: 4096,
AcceptorQueueLimit: 64, // Provides 2 minutes of buffer (2s block target) for a commit delay
SnapshotLimit: 256,
}

// BlockChain represents the canonical chain given a database with a genesis
Expand Down
90 changes: 49 additions & 41 deletions core/blockchain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,22 @@ import (

var (
archiveConfig = &CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: false, // Archive mode
SnapshotLimit: 256,
AcceptorQueueLimit: 64,
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: false, // Archive mode
SnapshotLimit: 256,
AcceptorQueueLimit: 64,
}

pruningConfig = &CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: 256,
AcceptorQueueLimit: 64,
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: 256,
AcceptorQueueLimit: 64,
}
)

Expand Down Expand Up @@ -82,11 +84,12 @@ func TestArchiveBlockChainSnapsDisabled(t *testing.T) {
return createBlockChain(
db,
&CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: false, // Archive mode
SnapshotLimit: 0, // Disable snapshots
AcceptorQueueLimit: 64,
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: false, // Archive mode
SnapshotLimit: 0, // Disable snapshots
AcceptorQueueLimit: 64,
},
chainConfig,
lastAcceptedHash,
Expand Down Expand Up @@ -115,12 +118,13 @@ func TestPruningBlockChainSnapsDisabled(t *testing.T) {
return createBlockChain(
db,
&CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: 0, // Disable snapshots
AcceptorQueueLimit: 64,
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: 0, // Disable snapshots
AcceptorQueueLimit: 64,
},
chainConfig,
lastAcceptedHash,
Expand Down Expand Up @@ -163,12 +167,13 @@ func TestPruningBlockChainUngracefulShutdownSnapsDisabled(t *testing.T) {
blockchain, err := createBlockChain(
db,
&CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: 0, // Disable snapshots
AcceptorQueueLimit: 64,
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: 0, // Disable snapshots
AcceptorQueueLimit: 64,
},
chainConfig,
lastAcceptedHash,
Expand Down Expand Up @@ -197,12 +202,13 @@ func TestEnableSnapshots(t *testing.T) {
blockchain, err := createBlockChain(
db,
&CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: snapLimit,
AcceptorQueueLimit: 64,
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: true, // Enable pruning
CommitInterval: 4096,
SnapshotLimit: snapLimit,
AcceptorQueueLimit: 64,
},
chainConfig,
lastAcceptedHash,
Expand Down Expand Up @@ -349,6 +355,7 @@ func testRepopulateMissingTriesParallel(t *testing.T, parallelism int) {
&CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: false, // Archive mode
SnapshotLimit: 256,
PopulateMissingTries: &startHeight, // Starting point for re-populating.
Expand Down Expand Up @@ -380,13 +387,14 @@ func TestUngracefulAsyncShutdown(t *testing.T) {
var (
create = func(db ethdb.Database, chainConfig *params.ChainConfig, lastAcceptedHash common.Hash) (*BlockChain, error) {
blockchain, err := createBlockChain(db, &CacheConfig{
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
Pruning: true,
CommitInterval: 4096,
SnapshotLimit: 256,
SkipSnapshotRebuild: true, // Ensure the test errors if snapshot initialization fails
AcceptorQueueLimit: 1000, // ensure channel doesn't block
TrieCleanLimit: 256,
TrieDirtyLimit: 256,
TrieDirtyCommitTarget: 20,
Pruning: true,
CommitInterval: 4096,
SnapshotLimit: 256,
SkipSnapshotRebuild: true, // Ensure the test errors if snapshot initialization fails
AcceptorQueueLimit: 1000, // ensure channel doesn't block
}, chainConfig, lastAcceptedHash)
if err != nil {
return nil, err
Expand Down
72 changes: 62 additions & 10 deletions core/state_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,33 @@ package core

import (
"fmt"
"math/rand"
"time"

"github.com/ava-labs/coreth/core/types"
"github.com/ava-labs/coreth/ethdb"
"github.com/ethereum/go-ethereum/common"
)

func init() {
rand.Seed(time.Now().UnixNano())
}

const (
// tipBufferSize is the number of recent accepted tries to keep in the TrieDB
// dirties cache at tip (only applicable in [pruning] mode).
//
// Keeping extra tries around at tip enables clients to query data from
// recent trie roots.
tipBufferSize = 32

// flushWindow is the distance to the [commitInterval] when we start
// optimistically flushing trie nodes to disk (only applicable in [pruning]
// mode).
//
// We perform this optimistic flushing to reduce synchronized database IO at the
// [commitInterval].
flushWindow = 768
)

type TrieWriter interface {
Expand All @@ -55,13 +74,16 @@ type TrieDB interface {

func NewTrieWriter(db TrieDB, config *CacheConfig) TrieWriter {
if config.Pruning {
return &cappedMemoryTrieWriter{
TrieDB: db,
memoryCap: common.StorageSize(config.TrieDirtyLimit) * 1024 * 1024,
imageCap: 4 * 1024 * 1024,
commitInterval: config.CommitInterval,
tipBuffer: NewBoundedBuffer(tipBufferSize, db.Dereference),
cm := &cappedMemoryTrieWriter{
TrieDB: db,
memoryCap: common.StorageSize(config.TrieDirtyLimit) * 1024 * 1024,
targetCommitSize: common.StorageSize(config.TrieDirtyCommitTarget) * 1024 * 1024,
imageCap: 4 * 1024 * 1024,
commitInterval: config.CommitInterval,
tipBuffer: NewBoundedBuffer(tipBufferSize, db.Dereference),
}
cm.flushStepSize = (cm.memoryCap - cm.targetCommitSize) / common.StorageSize(flushWindow)
return cm
} else {
return &noPruningTrieWriter{
TrieDB: db,
Expand Down Expand Up @@ -95,16 +117,20 @@ func (np *noPruningTrieWriter) Shutdown() error { return nil }

type cappedMemoryTrieWriter struct {
TrieDB
memoryCap common.StorageSize
imageCap common.StorageSize
commitInterval uint64
memoryCap common.StorageSize
targetCommitSize common.StorageSize
flushStepSize common.StorageSize
imageCap common.StorageSize
commitInterval uint64

tipBuffer *BoundedBuffer
}

func (cm *cappedMemoryTrieWriter) InsertTrie(block *types.Block) error {
cm.TrieDB.Reference(block.Root(), common.Hash{})

// The use of [Cap] in [InsertTrie] prevents exceeding the configured memory
// limit (and OOM) in case there is a large backlog of processing (unaccepted) blocks.
nodes, imgs := cm.TrieDB.Size()
if nodes <= cm.memoryCap && imgs <= cm.imageCap {
return nil
Expand All @@ -127,12 +153,38 @@ func (cm *cappedMemoryTrieWriter) AcceptTrie(block *types.Block) error {
cm.tipBuffer.Insert(root)

// Commit this root if we have reached the [commitInterval].
if block.NumberU64()%cm.commitInterval == 0 {
modCommitInterval := block.NumberU64() % cm.commitInterval
if modCommitInterval == 0 {
if err := cm.TrieDB.Commit(root, true, nil); err != nil {
return fmt.Errorf("failed to commit trie for block %s: %w", block.Hash().Hex(), err)
}
return nil
}

// Write at least [flushStepSize] of the oldest nodes in the trie database
// dirty cache to disk as we approach the [commitInterval] to reduce the number of trie nodes
// that will need to be written at once on [Commit] (to roughly [targetCommitSize]).
//
// To reduce the number of useless trie nodes that are committed during this
// capping, we only optimistically flush within the [flushWindow]. During
// this period, the [targetMemory] decreases stepwise by [flushStepSize]
// as we get closer to the commit boundary.
//
// Most trie nodes are 300B, so we will write at least ~1000 trie nodes in
// a single optimistic flush (with the default [flushStepSize]=312KB).
distanceFromCommit := cm.commitInterval - modCommitInterval // this cannot be 0
if distanceFromCommit > flushWindow {
return nil
}
targetMemory := cm.targetCommitSize + cm.flushStepSize*common.StorageSize(distanceFromCommit)
nodes, _ := cm.TrieDB.Size()
if nodes <= targetMemory {
return nil
}
targetCap := targetMemory - ethdb.IdealBatchSize
if err := cm.TrieDB.Cap(targetCap); err != nil {
return fmt.Errorf("failed to cap trie for block %s (target=%s): %w", block.Hash().Hex(), targetCap, err)
}
return nil
}

Expand Down
7 changes: 6 additions & 1 deletion eth/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,11 @@ func New(
config.TrieCleanCache = roundUpCacheSize(config.TrieCleanCache, 64)
config.SnapshotCache = roundUpCacheSize(config.SnapshotCache, 64)

log.Info("Allocated trie memory caches", "clean", common.StorageSize(config.TrieCleanCache)*1024*1024, "dirty", common.StorageSize(config.TrieDirtyCache)*1024*1024)
log.Info(
"Allocated trie memory caches",
"clean", common.StorageSize(config.TrieCleanCache)*1024*1024,
"dirty", common.StorageSize(config.TrieDirtyCache)*1024*1024,
)

chainConfig, genesisErr := core.SetupGenesisBlock(chainDb, config.Genesis)
if genesisErr != nil {
Expand Down Expand Up @@ -200,6 +204,7 @@ func New(
cacheConfig = &core.CacheConfig{
TrieCleanLimit: config.TrieCleanCache,
TrieDirtyLimit: config.TrieDirtyCache,
TrieDirtyCommitTarget: config.TrieDirtyCommitTarget,
Pruning: config.Pruning,
AcceptorQueueLimit: config.AcceptorQueueLimit,
CommitInterval: config.CommitInterval,
Expand Down
36 changes: 19 additions & 17 deletions eth/ethconfig/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,20 @@ var DefaultConfig = NewDefaultConfig()

func NewDefaultConfig() Config {
return Config{
NetworkId: 1,
LightPeers: 100,
UltraLightFraction: 75,
DatabaseCache: 512,
TrieCleanCache: 128,
TrieDirtyCache: 256,
SnapshotCache: 128,
Miner: miner.Config{},
TxPool: core.DefaultTxPoolConfig,
RPCGasCap: 25000000,
RPCEVMTimeout: 5 * time.Second,
GPO: DefaultFullGPOConfig,
RPCTxFeeCap: 1, // 1 AVAX
NetworkId: 1,
LightPeers: 100,
UltraLightFraction: 75,
DatabaseCache: 512,
TrieCleanCache: 256,
TrieDirtyCache: 256,
TrieDirtyCommitTarget: 20,
SnapshotCache: 128,
Miner: miner.Config{},
TxPool: core.DefaultTxPoolConfig,
RPCGasCap: 25000000,
RPCEVMTimeout: 5 * time.Second,
GPO: DefaultFullGPOConfig,
RPCTxFeeCap: 1, // 1 AVAX
}
}

Expand Down Expand Up @@ -114,10 +115,11 @@ type Config struct {
DatabaseCache int
// DatabaseFreezer string

TrieCleanCache int
TrieDirtyCache int
SnapshotCache int
Preimages bool
TrieCleanCache int
TrieDirtyCache int
TrieDirtyCommitTarget int
SnapshotCache int
Preimages bool

// Mining options
Miner miner.Config
Expand Down
Loading

0 comments on commit 7b28739

Please sign in to comment.