diff --git a/config.go b/config.go index e9ce4103542..5bbd5a4fef7 100644 --- a/config.go +++ b/config.go @@ -30,6 +30,7 @@ import ( "github.com/lightningnetwork/lnd/channeldb" "github.com/lightningnetwork/lnd/discovery" "github.com/lightningnetwork/lnd/funding" + graphdb "github.com/lightningnetwork/lnd/graph/db" "github.com/lightningnetwork/lnd/htlcswitch" "github.com/lightningnetwork/lnd/htlcswitch/hodl" "github.com/lightningnetwork/lnd/input" @@ -675,8 +676,9 @@ func DefaultConfig() Config { Sig: lncfg.DefaultSigWorkers, }, Caches: &lncfg.Caches{ - RejectCacheSize: channeldb.DefaultRejectCacheSize, - ChannelCacheSize: channeldb.DefaultChannelCacheSize, + RejectCacheSize: channeldb.DefaultRejectCacheSize, + ChannelCacheSize: channeldb.DefaultChannelCacheSize, + PublicNodeCacheSize: graphdb.DefaultPublicNodeCacheSize, }, Prometheus: lncfg.DefaultPrometheus(), Watchtower: lncfg.DefaultWatchtowerCfg(defaultTowerDir), diff --git a/config_builder.go b/config_builder.go index 7ce63041ee2..bab756ab3f6 100644 --- a/config_builder.go +++ b/config_builder.go @@ -1045,6 +1045,7 @@ func (d *DefaultDatabaseBuilder) BuildDatabase( graphDBOptions := []graphdb.StoreOptionModifier{ graphdb.WithRejectCacheSize(cfg.Caches.RejectCacheSize), graphdb.WithChannelCacheSize(cfg.Caches.ChannelCacheSize), + graphdb.WithPublicNodeCacheSize(cfg.Caches.PublicNodeCacheSize), graphdb.WithBatchCommitInterval(cfg.DB.BatchCommitInterval), } diff --git a/docs/release-notes/release-notes-0.20.1.md b/docs/release-notes/release-notes-0.20.1.md index 344894073f1..5d2f6a4b5b9 100644 --- a/docs/release-notes/release-notes-0.20.1.md +++ b/docs/release-notes/release-notes-0.20.1.md @@ -92,6 +92,12 @@ safe single-writer behavior until the wallet subsystem is fully concurrent-safe. +* [Add caching for](https://github.com/lightningnetwork/lnd/pull/10363) +`IsPublicNode` query which speedup calls to check for nodes visibility status. +This reduces the amount of time lnd needs to query the db to determine if a node +is public or not. Also added a new config `caches.public-node-cache-size` which +controls the max number of entries that the cache can accommodate. + ## Deprecations # Technical and Architectural Updates @@ -107,5 +113,6 @@ # Contributors (Alphabetical Order) +* Abdulkbk * bitromortac * Ziggie diff --git a/graph/db/graph_sql_test.go b/graph/db/graph_sql_test.go new file mode 100644 index 00000000000..b9f0ba7ffaa --- /dev/null +++ b/graph/db/graph_sql_test.go @@ -0,0 +1,118 @@ +//go:build test_db_postgres || test_db_sqlite + +package graphdb + +import ( + "testing" + + "github.com/btcsuite/btcd/chaincfg" + "github.com/lightningnetwork/lnd/sqldb" + "github.com/stretchr/testify/require" +) + +// TestNodeIsPublicCache verifies that once a node is observed as public, it +// remains public on cache hit even if it later has zero public channels in the +// DB. +func TestNodeIsPublicCache(t *testing.T) { + t.Parallel() + ctx := t.Context() + + graph := MakeTestGraph(t) + + alice := createTestVertex(t) + bob := createTestVertex(t) + carol := createTestVertex(t) + + require.NoError(t, graph.SetSourceNode(ctx, alice)) + + alice.LastUpdate = nextUpdateTime() + bob.LastUpdate = nextUpdateTime() + carol.LastUpdate = nextUpdateTime() + + require.NoError(t, graph.AddNode(ctx, alice)) + require.NoError(t, graph.AddNode(ctx, bob)) + require.NoError(t, graph.AddNode(ctx, carol)) + + // Carol has no public channels, so she should be private. + isPublic, err := graph.IsPublicNode(carol.PubKeyBytes) + require.NoError(t, err) + require.False(t, isPublic) + + // Add a public edge so Alice becomes public and is cached as such. + edge, _ := createEdge(10, 0, 0, 0, alice, bob) + require.NoError(t, graph.AddChannelEdge(ctx, &edge)) + + isPublic, err = graph.IsPublicNode(alice.PubKeyBytes) + require.NoError(t, err) + require.True(t, isPublic) + + // Delete Alice's only public edge. Since we're using a public node + // cache, Alice is still treated as public until she is evicted from + // the cache. + require.NoError(t, graph.DeleteChannelEdges(false, true, edge.ChannelID)) + + isPublic, err = graph.IsPublicNode(alice.PubKeyBytes) + require.NoError(t, err) + require.True(t, isPublic) +} + +// TestPublicNodeCacheEviction verifies that once a cached public entry is +// evicted, the next lookup re-queries the DB and can return private if the +// node no longer has any public channels. +func TestPublicNodeCacheEviction(t *testing.T) { + t.Parallel() + ctx := t.Context() + + // Create a SQL graph with a tiny public-node cache so we can force + // eviction. + store, err := NewSQLStore( + &SQLStoreConfig{ + ChainHash: *chaincfg.MainNetParams.GenesisHash, + QueryCfg: sqldb.DefaultSQLiteConfig(), + }, + newBatchQuerier(t), + // We set the public node cache size to 1 so we can force + // eviction. + WithPublicNodeCacheSize(1), + ) + require.NoError(t, err) + + graph, err := NewChannelGraph(store) + require.NoError(t, err) + require.NoError(t, graph.Start()) + t.Cleanup(func() { require.NoError(t, graph.Stop()) }) + + alice := createTestVertex(t) + bob := createTestVertex(t) + + require.NoError(t, graph.SetSourceNode(ctx, alice)) + + alice.LastUpdate = nextUpdateTime() + bob.LastUpdate = nextUpdateTime() + require.NoError(t, graph.AddNode(ctx, alice)) + require.NoError(t, graph.AddNode(ctx, bob)) + + edge, _ := createEdge(10, 0, 0, 0, alice, bob) + require.NoError(t, graph.AddChannelEdge(ctx, &edge)) + + // Call IsPublicNode to cache Alice as public. + isPublic, err := graph.IsPublicNode(alice.PubKeyBytes) + require.NoError(t, err) + require.True(t, isPublic) + + // Call IsPublicNode to cache Bob as public. With cache size 1, this + // should evict Alice. + isPublic, err = graph.IsPublicNode(bob.PubKeyBytes) + require.NoError(t, err) + require.True(t, isPublic) + + // Now delete the only public channel edge Alice has. + require.NoError(t, graph.DeleteChannelEdges( + false, true, edge.ChannelID), + ) + + // Alice was evicted, so this should return private. + isPublic, err = graph.IsPublicNode(alice.PubKeyBytes) + require.NoError(t, err) + require.False(t, isPublic) +} diff --git a/graph/db/graph_test.go b/graph/db/graph_test.go index 590c077d608..c794e456b9e 100644 --- a/graph/db/graph_test.go +++ b/graph/db/graph_test.go @@ -1640,6 +1640,8 @@ func TestGraphCacheTraversal(t *testing.T) { require.Equal(t, numChannels*2*(numNodes-1), numNodeChans) } +// fillTestGraph fills the graph with a given number of nodes and create a given +// number of channels between each node. func fillTestGraph(t testing.TB, graph *ChannelGraph, numNodes, numChannels int) (map[uint64]struct{}, []*models.Node) { @@ -3923,6 +3925,15 @@ func TestNodeIsPublic(t *testing.T) { // participant to replicate real-world scenarios (private edges being in // some graphs but not others, etc.). aliceGraph := MakeTestGraph(t) + + // SQL store caches public nodes and once a node is cached as public, it + // stays public until eviction/restart. This test asserts + // public<->private transitions, so it doesn't apply to SQL. + if _, ok := aliceGraph.V1Store.(*SQLStore); ok { + t.Skip("SQL backend uses public node cache, public status is " + + "sticky until eviction") + } + aliceNode := createTestVertex(t) if err := aliceGraph.SetSourceNode(ctx, aliceNode); err != nil { t.Fatalf("unable to set source node: %v", err) @@ -4042,6 +4053,29 @@ func TestNodeIsPublic(t *testing.T) { ) } +// BenchmarkIsPublicNode measures the performance of IsPublicNode when checking +// a large number of nodes. +func BenchmarkIsPublicNode(b *testing.B) { + graph := MakeTestGraph(b) + + // Create a graph with a reasonable number of nodes and channels. + numNodes := 8000 + numChans := 4 + _, nodes := fillTestGraph(b, graph, numNodes, numChans) + + // Pick any node to test with. + nodePub := nodes[len(nodes)/2].PubKeyBytes + + // Reset the timer to exclude setup time especially since + // `fillTestGraph` can take a while. + b.ResetTimer() + + for b.Loop() { + _, err := graph.IsPublicNode(nodePub) + require.NoError(b, err) + } +} + // TestDisabledChannelIDs ensures that the disabled channels within the // disabledEdgePolicyBucket are managed properly and the list returned from // DisabledChannelIDs is correct. diff --git a/graph/db/options.go b/graph/db/options.go index 15ea6f4ee85..ae818b6ae74 100644 --- a/graph/db/options.go +++ b/graph/db/options.go @@ -13,6 +13,11 @@ const ( // around 40MB. DefaultChannelCacheSize = 20000 + // DefaultPublicNodeCacheSize is the default number of node public + // status entries to cache. With 15k entries, this produces a cache of + // around 1-1.5MB (including map overhead and LRU bookkeeping). + DefaultPublicNodeCacheSize = 15000 + // DefaultPreAllocCacheNumNodes is the default number of channels we // assume for mainnet for pre-allocating the graph cache. As of // September 2021, there currently are 14k nodes in a strictly pruned @@ -125,6 +130,10 @@ type StoreOptions struct { // channel cache. ChannelCacheSize int + // PublicNodeCacheSize is the maximum number of node public status + // entries to hold in the cache. + PublicNodeCacheSize int + // BatchCommitInterval is the maximum duration the batch schedulers will // wait before attempting to commit a pending set of updates. BatchCommitInterval time.Duration @@ -138,9 +147,10 @@ type StoreOptions struct { // DefaultOptions returns a StoreOptions populated with default values. func DefaultOptions() *StoreOptions { return &StoreOptions{ - RejectCacheSize: DefaultRejectCacheSize, - ChannelCacheSize: DefaultChannelCacheSize, - NoMigration: false, + RejectCacheSize: DefaultRejectCacheSize, + ChannelCacheSize: DefaultChannelCacheSize, + PublicNodeCacheSize: DefaultPublicNodeCacheSize, + NoMigration: false, } } @@ -169,3 +179,10 @@ func WithBatchCommitInterval(interval time.Duration) StoreOptionModifier { o.BatchCommitInterval = interval } } + +// WithPublicNodeCacheSize sets the PublicNodeCacheSize to n. +func WithPublicNodeCacheSize(n int) StoreOptionModifier { + return func(o *StoreOptions) { + o.PublicNodeCacheSize = n + } +} diff --git a/graph/db/sql_store.go b/graph/db/sql_store.go index 3e2d74dc248..98351c25b4b 100644 --- a/graph/db/sql_store.go +++ b/graph/db/sql_store.go @@ -21,6 +21,8 @@ import ( "github.com/btcsuite/btcd/btcutil" "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/wire" + "github.com/lightninglabs/neutrino/cache" + "github.com/lightninglabs/neutrino/cache/lru" "github.com/lightningnetwork/lnd/aliasmgr" "github.com/lightningnetwork/lnd/batch" "github.com/lightningnetwork/lnd/fn/v2" @@ -176,6 +178,8 @@ type SQLStore struct { rejectCache *rejectCache chanCache *channelCache + publicNodeCache *lru.Cache[[33]byte, *cachedPublicNode] + chanScheduler batch.Scheduler[SQLQueries] nodeScheduler batch.Scheduler[SQLQueries] @@ -183,6 +187,16 @@ type SQLStore struct { srcNodeMu sync.Mutex } +// cachedPublicNode represents a value that can be stored in an LRU cache. It +// has the Size() method which the lru cache requires. +type cachedPublicNode struct{} + +// Size returns the size of the cache entry. We return 1 as we just want to +// limit the number of entries rather than their actual memory size. +func (c *cachedPublicNode) Size() (uint64, error) { + return 1, nil +} + // A compile-time assertion to ensure that SQLStore implements the V1Store // interface. var _ V1Store = (*SQLStore)(nil) @@ -217,7 +231,10 @@ func NewSQLStore(cfg *SQLStoreConfig, db BatchedSQLQueries, db: db, rejectCache: newRejectCache(opts.RejectCacheSize), chanCache: newChannelCache(opts.ChannelCacheSize), - srcNodes: make(map[lnwire.GossipVersion]*srcNodeInfo), + publicNodeCache: lru.NewCache[[33]byte, *cachedPublicNode]( + uint64(opts.PublicNodeCacheSize), + ), + srcNodes: make(map[lnwire.GossipVersion]*srcNodeInfo), } s.chanScheduler = batch.NewTimeScheduler( @@ -2292,8 +2309,19 @@ func (s *SQLStore) ChannelID(chanPoint *wire.OutPoint) (uint64, error) { func (s *SQLStore) IsPublicNode(pubKey [33]byte) (bool, error) { ctx := context.TODO() + // Check the cache first and return early if there is a hit. + cached, err := s.publicNodeCache.Get(pubKey) + if err == nil && cached != nil { + return true, nil + } + + // Log any error other than NotFound. + if err != nil && !errors.Is(err, cache.ErrElementNotFound) { + log.Warnf("Unable to check cache if node is public: %v", err) + } + var isPublic bool - err := s.db.ExecTx(ctx, sqldb.ReadTxOpt(), func(db SQLQueries) error { + err = s.db.ExecTx(ctx, sqldb.ReadTxOpt(), func(db SQLQueries) error { var err error isPublic, err = db.IsPublicV1Node(ctx, pubKey[:]) @@ -2304,6 +2332,14 @@ func (s *SQLStore) IsPublicNode(pubKey [33]byte) (bool, error) { "public: %w", err) } + // Store the result in cache only if the node is public. + if isPublic { + _, err = s.publicNodeCache.Put(pubKey, &cachedPublicNode{}) + if err != nil { + log.Warnf("Unable to store node info in cache: %v", err) + } + } + return isPublic, nil } diff --git a/lncfg/caches.go b/lncfg/caches.go index 2457bb1d04a..e3e156ff876 100644 --- a/lncfg/caches.go +++ b/lncfg/caches.go @@ -14,6 +14,10 @@ const ( // channeldb's channel cache. This amounts to roughly 2 MB when full. MinChannelCacheSize = 1000 + // MinPublicNodeCacheSize is a floor on the maximum capacity allowed for + // public node cache. This amount is roughly 500 KB when full. + MinPublicNodeCacheSize = 5000 + // DefaultRPCGraphCacheDuration is the default interval that the RPC // response to DescribeGraph should be cached for. DefaultRPCGraphCacheDuration = time.Minute @@ -37,6 +41,11 @@ type Caches struct { // RPCGraphCacheDuration is used to control the flush interval of the // channel graph cache. RPCGraphCacheDuration time.Duration `long:"rpc-graph-cache-duration" description:"The period of time expressed as a duration (1s, 1m, 1h, etc) that the RPC response to DescribeGraph should be cached for."` + + // PublicNodeCacheSize is the maximum number of entries stored in lnd's + // public node cache, which is used to speed up checks for nodes + // visibility. Memory usage is roughly 100b per entry. + PublicNodeCacheSize int `long:"public-node-cache-size" description:"Maximum number of entries contained in the public node cache, which is used to speed up checks for nodes visibility. Each entry requires roughly 100 bytes."` } // Validate checks the Caches configuration for values that are too small to be @@ -50,6 +59,11 @@ func (c *Caches) Validate() error { return fmt.Errorf("channel cache size %d is less than min: %d", c.ChannelCacheSize, MinChannelCacheSize) } + if c.PublicNodeCacheSize < MinPublicNodeCacheSize { + return fmt.Errorf("public node cache size %d is less than "+ + "min: %d", c.PublicNodeCacheSize, + MinPublicNodeCacheSize) + } return nil } diff --git a/sample-lnd.conf b/sample-lnd.conf index c9a2865b858..1566a7281b2 100644 --- a/sample-lnd.conf +++ b/sample-lnd.conf @@ -1388,6 +1388,10 @@ ; roughly 2Kb. ; caches.channel-cache-size=20000 +; Maximum number of entries contained in the public node cache, which is used to +; speed up checks for nodes visibility. Each entry requires roughly 100 bytes. +; caches.public-node-cache-size=15000 + ; The duration that the response to DescribeGraph should be cached for. Setting ; the value to zero disables the cache. ; Default: