diff --git a/app/docs/[...slug]/page.tsx b/app/docs/[...slug]/page.tsx index a1dfb58bde6..a69267f2b58 100644 --- a/app/docs/[...slug]/page.tsx +++ b/app/docs/[...slug]/page.tsx @@ -1,4 +1,5 @@ import Mermaid from "@/components/content-design/mermaid"; +import StateGrowthChart from "@/components/content-design/state-growth-chart"; import { AutoTypeTable } from "@/components/content-design/type-table"; import YouTube from "@/components/content-design/youtube"; import { BackToTop } from "@/components/ui/back-to-top"; @@ -116,6 +117,7 @@ export default async function Page(props: { Steps, YouTube, Mermaid, + StateGrowthChart, AddNetworkButtonInline, TypeTable, AutoTypeTable, diff --git a/components/content-design/state-growth-chart.tsx b/components/content-design/state-growth-chart.tsx new file mode 100644 index 00000000000..9449b5ec87f --- /dev/null +++ b/components/content-design/state-growth-chart.tsx @@ -0,0 +1,136 @@ +"use client"; +import React, { type JSX } from "react"; +import { + LineChart, + Line, + XAxis, + YAxis, + CartesianGrid, + Tooltip, + ResponsiveContainer, + Legend, + ReferenceLine, + Label, +} from "recharts"; + +const data = [ + { time: "t0", totalState: 50, activeState: 0, prunedState: 25, prunedTotal: null }, + { time: "t1", totalState: 250, activeState: 75, prunedState: 150, prunedTotal: null }, + { time: "t2", totalState: 450, activeState: 150, prunedState: 275, prunedTotal: null }, + { time: "t3", totalState: 650, activeState: 225, prunedState: 400, prunedTotal: 225 }, + { time: "t4", totalState: 850, activeState: 300, prunedState: 525, prunedTotal: 425 }, + { time: "t5", totalState: 1050, activeState: 375, prunedState: 650, prunedTotal: 625 }, + { time: "t6", totalState: 1250, activeState: 450, prunedState: 775, prunedTotal: 825 }, +]; + +const StateGrowthChart = (): JSX.Element => { + const renderLegend = (props: any) => { + const { payload } = props; + // Add custom legend entry for the arrow + const customPayload = [ + ...payload, + { value: "Resync/Offline Pruning", color: "#666", type: "arrow" } + ]; + + return ( +
+ {customPayload.map((entry: any, index: number) => ( +
+ + + + {entry.value} +
+ ))} +
+ ); + }; + + return ( +
+ + + + + + + + + + + {/* Vertical arrow at t3 */} + + + +
+ Note: The dotted line shows how resync or offline pruning reduces total state or pruned state back to active state level. +
+
+ ); +}; + +export default StateGrowthChart; diff --git a/content/docs/nodes/maintain/bootstrapping.mdx b/content/docs/nodes/maintain/bootstrapping.mdx deleted file mode 100644 index 490cb5d08a3..00000000000 --- a/content/docs/nodes/maintain/bootstrapping.mdx +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: Node Bootstrap ---- - -Node Bootstrap is the process where a node _securely_ downloads linear chain blocks to recreate the latest state of the chain locally. - -Bootstrap must guarantee that the local state of a node is in sync with the state of other valid nodes. Once bootstrap is completed, a node has the latest state of the chain and can verify new incoming transactions and reach consensus with other nodes, collectively moving forward the chains. - -Bootstrapping a node is a multi-step process which requires downloading the chains required by the Primary Network (that is, the C-Chain, P-Chain, and X-Chain), as well as the chains required by any additional Avalanche L1s that the node explicitly tracks. - -This document covers the high-level technical details of how bootstrapping works. This document glosses over some specifics, but the [AvalancheGo](https://github.com/ava-labs/avalanchego) codebase is open-source and is available for curious-minded readers to learn more. - -Validators and Where to Find Them[​](#validators-and-where-to-find-them "Direct link to heading") -------------------------------------------------------------------------------------------------- - -Bootstrapping is all about downloading all previously accepted containers _securely_ so a node can have the latest correct state of the chain. A node can't arbitrarily trust any source - a malicious actor could provide malicious blocks, corrupting the bootstrapping node's local state, and making it impossible for the node to correctly validate the network and reach consensus with other correct nodes. - -What's the most reliable source of information in the Avalanche ecosystem? It's a _large enough_ majority of validators. Therefore, the first step of bootstrapping is finding a sufficient amount of validators to download containers from. - -The P-Chain is responsible for all platform-level operations, including staking events that modify an Avalanche L1's validator set. Whenever any chain (aside from the P-Chain itself) bootstraps, it requests an up-to-date validator set for that Avalanche L1 (Primary Network is an Avalanche L1 too). Once the Avalanche L1's current validator set is known, the node can securely download containers from these validators to bootstrap the chain. - -There is a caveat here: the validator set must be _up-to-date_. If a bootstrapping node's validator set is stale, the node may incorrectly believe that some nodes are still validators when their validation period has already expired. A node might unknowingly end up requesting blocks from non-validators which respond with malicious blocks that aren't safe to download. - -**For this reason, every Avalanche node must fully bootstrap the P-chain first before moving on to the other Primary Network chains and other Avalanche L1s to guarantee that their validator sets are up-to-date**. - -What about the P-chain? The P-chain can't ever have an up-to-date validator set before completing its bootstrap. To solve this chicken-and-egg situation the Avalanche Foundation maintains a trusted default set of validators called beacons (but users are free to configure their own). Beacon Node-IDs and IP addresses are listed in the [AvalancheGo codebase](https://github.com/ava-labs/avalanchego/blob/master/genesis/bootstrappers.json). Every node has the beacon list available from the start and can reach out to them as soon as it starts. - -Validators are the only sources of truth for a blockchain. Validator availability is so key to the bootstrapping process that **bootstrapping is blocked until the node establishes a sufficient amount of secure connections to validators**. If the node fails to reach a sufficient amount within a given period of time, it shuts down as no operation can be carried out safely. - -Bootstrapping the Blockchain[​](#bootstrapping-the-blockchain "Direct link to heading") ---------------------------------------------------------------------------------------- - -Once a node is able to discover and connect to validator and beacon nodes, it's able to start bootstrapping the blockchain by downloading the individual containers. - -One common misconception is that Avalanche blockchains are bootstrapped by retrieving containers starting at genesis and working up to the currently accepted frontier. - -Instead, containers are downloaded from the accepted frontier downwards to genesis, and then their corresponding state transitions are executed upwards from genesis to the accepted frontier. The accepted frontier is the last accepted block for linear chains. - -Why can't nodes simply download blocks in chronological order, starting from genesis upwards? The reason is efficiency: if nodes downloaded containers upwards they would only get a safety guarantee by polling a majority of validators for every single container. That's a lot of network traffic for a single container, and a node would still need to do that for each container in the chain. - -Instead, if a node starts by securely retrieving the accepted frontier from a majority of honest nodes and then recursively fetches the parent containers from the accepted frontier down to genesis, it can cheaply check that containers are correct just by verifying their IDs. Each Avalanche container has the IDs of its parents (one block parent for linear chains) and an ID's integrity can be guaranteed cryptographically. - -Let's dive deeper into the two bootstrap phases - frontier retrieval and container execution. - -### Frontier Retrieval[​](#frontier-retrieval "Direct link to heading") - -The current frontier is retrieved by requesting them from validator or beacon nodes. Avalanche bootstrap is designed to be robust - it must be able to make progress even in the presence of slow validators or network failures. This process needs to be fault-tolerant to these types of failures, since bootstrapping may take quite some time to complete and network connections can be unreliable. - -Bootstrap starts when a node has connected to a sufficient majority of validator stake. A node is able to start bootstrapping when it has connected to at least 75%75\\% of total validator stake. - -Seeders are the first set of peers that a node reaches out to when trying to figure out the current frontier. A subset of seeders is randomly sampled from the validator set. Seeders might be slow and provide a stale frontier, be malicious and return malicious container IDs, but they always provide an initial set of candidate frontiers to work with. - -Once a node has received the candidate frontiers form its seeders, it polls **every network validator** to vet the candidates frontiers. It sends the list of candidate frontiers it received from the seeders to each validator, asking whether or not they know about these frontiers. Each validator responds returning the subset of known candidates, regardless of how up-to-date or stale the containers are. Each validator returns containers irrespective of their age so that bootstrap works even in the presence of a stale frontier. - -Frontier retrieval is completed when at least one of the candidate frontiers is supported by at least 50%50\\% of total validator stake. Multiple candidate frontiers may be supported by a majority of stake, after which point the next phase, container fetching starts. - -At any point in these steps a network issue may occur, preventing a node from retrieving or validating frontiers. If this occurs, bootstrap restarts by sampling a new set of seeders and repeating the bootstrapping process, optimistically assuming that the network issue will go away. - -### Containers Execution[​](#containers-execution "Direct link to heading") - -Once a node has at least one valid frontiers, it starts downloading parent containers for each frontier. If it's the first time the node is running, it won't know about any containers and will try fetching all parent containers recursively from the accepted frontier down to genesis (unless [state sync](#state-sync) is enabled). If bootstrap had already run previously, some containers are already available locally and the node will stop as soon as it finds a known one. - -A node first just fetches and parses containers. Once the chain is complete, the node executes them in chronological order starting from the earliest downloaded container to the accepted frontier. This allows the node to rebuild the full chain state and to eventually be in sync with the rest of the network. - -When Does Bootstrapping Finish?[​](#when-does-bootstrapping-finish "Direct link to heading") --------------------------------------------------------------------------------------------- - -You've seen how [bootstrap works](#bootstrapping-the-blockchain) for a single chain. However, a node must bootstrap the chains in the Primary Network as well as the chains in each Avalanche L1 it tracks. This begs the questions - when are these chains bootstrapped? When is a node done bootstrapping? - -The P-chain is always the first to bootstrap before any other chain. Once the P-Chain has finished, all other chains start bootstrapping in parallel, connecting to their own validators independently of one another. - -A node completes bootstrapping an Avalanche L1 once all of its corresponding chains have completed bootstrapping. Because the Primary Network is a special case of Avalanche L1 that includes the entire network, this applies to it as well as any other manually tracked Avalanche L1s. - -Note that Avalanche L1s bootstrap is independently of one another - so even if one Avalanche L1 has bootstrapped and is validating new transactions and adding new containers, other Avalanche L1s may still be bootstrapping in parallel. - -Within a single Avalanche L1 however, an Avalanche L1 isn't done bootstrapping until the last chain completes bootstrapping. It's possible for a single chain to effectively stall a node from finishing the bootstrap for a single Avalanche L1, if it has a sufficiently long history or each operation is complex and time consuming. Even worse, other Avalanche L1 validators are continuously accepting new transactions and adding new containers on top of the previously known frontier, so a node that's slow to bootstrap can continuously fall behind the rest of the network. - -Nodes mitigate this by restarting bootstrap for any chains which is blocked waiting for the remaining Avalanche L1 chains to finish bootstrapping. These chains repeat the frontier retrieval and container downloading phases to stay up-to-date with the Avalanche L1's ever moving current frontier until the slowest chain has completed bootstrapping. - -Once this is complete, a node is finally ready to validate the network. - -State Sync[​](#state-sync "Direct link to heading") ---------------------------------------------------- - -The full node bootstrap process is long, and gets longer and longer over time as more and more containers are accepted. Nodes need to bootstrap a chain by reconstructing the full chain state locally - but downloading and executing each container isn't the only way to do this. - -Starting from [AvalancheGo version 1.7.11](https://github.com/ava-labs/avalanchego/releases/tag/v1.7.11), nodes can use state sync to drastically cut down bootstrapping time on the C-Chain. Instead of executing each block, state sync uses cryptographic techniques to download and verify just the state associated with the current frontier. State synced nodes can't serve every C-chain block ever historically accepted, but they can safely retrieve the full C-chain state needed to validate in a much shorter time. State sync will fetch the previous 256 blocks prior to support the previous block hash operation code. - -State sync is currently only available for the C-chain. The P-chain and X-chain currently bootstrap by downloading all blocks. Note that irrespective of the bootstrap method used (including state sync), each chain is still blocked on all other chains in its Avalanche L1 completing their bootstrap before continuing into normal operation. - - -There are no configs to state sync an archival node. If you need all the historical state then you must not use state sync and setup the config of the node for an archival node. - - - -Conclusions and FAQ[​](#conclusions-and-faq "Direct link to heading") ---------------------------------------------------------------------- - -If you got this far, you've hopefully gotten a better idea of what's going on when your node bootstraps. Here's a few frequently asked questions about bootstrapping. - -### How Can I Get the ETA for Node Bootstrap?[​](#how-can-i-get-the-eta-for-node-bootstrap "Direct link to heading") - -Logs provide information about both container downloading and their execution for each chain. Here is an example - -```bash -[02-16|17:31:42.950] INFO

bootstrap/bootstrapper.go:494 fetching blocks {"numFetchedBlocks": 5000, "numTotalBlocks": 101357, "eta": "2m52s"} -[02-16|17:31:58.110] INFO

bootstrap/bootstrapper.go:494 fetching blocks {"numFetchedBlocks": 10000, "numTotalBlocks": 101357, "eta": "3m40s"} -[02-16|17:32:04.554] INFO

bootstrap/bootstrapper.go:494 fetching blocks {"numFetchedBlocks": 15000, "numTotalBlocks": 101357, "eta": "2m56s"} -... -[02-16|17:36:52.404] INFO

queue/jobs.go:203 executing operations {"numExecuted": 17881, "numToExecute": 101357, "eta": "2m20s"} -[02-16|17:37:22.467] INFO

queue/jobs.go:203 executing operations {"numExecuted": 35009, "numToExecute": 101357, "eta": "1m54s"} -[02-16|17:37:52.468] INFO

queue/jobs.go:203 executing operations {"numExecuted": 52713, "numToExecute": 101357, "eta": "1m23s"} -``` - -Similar logs are emitted for X and C chains and any chain in explicitly tracked Avalanche L1s. - -### Why Chain Bootstrap ETA Keeps On Changing?[​](#why-chain-bootstrap-eta-keeps-on-changing "Direct link to heading") - -As you saw in the [bootstrap completion section](#when-does-bootstrapping-finish), an Avalanche L1 like the Primary Network completes once all of its chains finish bootstrapping. Some Avalanche L1 chains may have to wait for the slowest to finish. They'll restart bootstrapping in the meantime, to make sure they won't fall back too much with respect to the network accepted frontier. - -What Order Do The Chains Bootstrap?[​](#what-order-do-the-chains-bootstrap "Direct link to heading") ----------------------------------------------------------------------------------------------------- - -The 3 chains will bootstrap in the following order: P-chain, X-chain, C-chain. - -### Why Are AvalancheGo APIs Disabled During Bootstrapping?[​](#why-are-avalanchego-apis-disabled-during-bootstrapping "Direct link to heading") - -AvalancheGo APIs are [explicitly disabled](https://github.com/ava-labs/avalanchego/blob/master/api/server/server.go#L367:L379) during bootstrapping. The reason is that if the node has not fully rebuilt its Avalanche L1s state, it can't provide accurate information. AvalancheGo APIs are activated once bootstrap completes and node transition into its normal operating mode, accepting and validating transactions. diff --git a/content/docs/nodes/maintain/chain-state-management.mdx b/content/docs/nodes/maintain/chain-state-management.mdx index b29f75be4ee..887677ed266 100644 --- a/content/docs/nodes/maintain/chain-state-management.mdx +++ b/content/docs/nodes/maintain/chain-state-management.mdx @@ -3,34 +3,34 @@ title: Chain State Management description: Understanding active state vs archive state in EVM chains, and how to manage disk usage through state sync and offline pruning. --- -When running an EVM-based blockchain (C-Chain or Subnet-EVM L1s), your node stores blockchain state on disk. Understanding the difference between **active state** and **archive state** is crucial for managing disk space and choosing the right sync method. +When running an EVM-based blockchain (C-Chain or Subnet-EVM L1s), your node stores blockchain state on disk. Understanding the difference between **active state**, **pruned state**, and **archive state** is crucial for managing disk space and choosing the right sync method. -## Active State vs Archive State +## State Types Comparison + +Your node's storage requirements depend on which type of state you're maintaining: + +| Property | Active State | Pruned State | Archive State (Total State) | +|----------|--------------|--------------|---------------------------| +| **Size (C-Chain)** | ~500 GB | ~750 GB - 1 TB | ~3 TB+ (and growing) | +| **Contents** | Current account balances, contract storage, code | Current state + recent historical trie nodes | Complete state history at every block | +| **Required for** | Validating, sending transactions, reading current state | Same as Active State, with some recent history | Historical queries at any block height, block explorers, analytics | +| **Sync method** | State sync (fast) | State sync, then grows over time | Full sync from genesis (slower) | +| **Maintenance** | Regular pruning or resync needed | Regular pruning or resync needed | None needed (intentional full history) | ### Active State -The **active state** represents the current state of the blockchain—all account balances, contract storage, and code as of the latest block. This is what your node needs to validate new transactions and participate in consensus. +The **active state** represents the current state of the blockchain—all account balances, contract storage, and code as of the latest block. This is what your node needs to validate new transactions and participate in consensus. When you bootstrap with state sync, you start with just the active state. -| Property | Details | -|----------|---------| -| **Size** | ~500 GB for C-Chain | -| **Contents** | Current account balances, contract storage, code | -| **Required for** | Validating, sending transactions, reading current state | -| **Sync method** | State sync (fast, downloads only current state) | +### Pruned State -### Archive State (Total State) +**Pruned state** is what your node accumulates over time after starting with active state. As blocks are processed, old trie nodes aren't immediately deleted, causing disk usage to grow beyond the active state size. Running offline pruning periodically removes these old nodes while keeping your node functional. Most long-running validators operate in this state. -The **archive state** includes the complete history of all state changes since genesis. This allows querying historical state at any block height (e.g., "What was this account's balance at block 1,000,000?"). +### Archive State (Total State) -| Property | Details | -|----------|---------| -| **Size** | ~3 TB+ for C-Chain (and growing) | -| **Contents** | Complete state history at every block | -| **Required for** | Historical queries, block explorers, analytics | -| **Sync method** | Full sync from genesis (slower, replays all blocks) | +The **archive state** includes the complete history of all state changes since genesis. This allows querying historical state at any block height (e.g., "What was this account's balance at block 1,000,000?"). Archive nodes are typically only required for block explorers, indexers, and specialized analytics applications. -Most validators and RPC nodes only need the **active state**. Archive nodes are typically only required for block explorers, indexers, and specialized analytics applications. +Most validators and RPC nodes only need **active state** or can operate with **pruned state**. Archive nodes are specialized infrastructure for historical data access. ## Why State Grows Over Time @@ -43,11 +43,58 @@ Even if you start with just the active state, your node's disk usage will grow o This means a node that started with 500 GB via state sync might grow to 1 TB+ over months of operation, even though the "current" active state is still ~500 GB. +### State Growth Visualization + + + +**Understanding the Chart:** + +- **Total State** (solid red line): Grows faster over time as historical state data accumulates + - Includes both current state and all historical intermediate trie nodes + - Always larger than active state due to storage overhead + +- **Pruned State** (solid orange line): Represents typical validator disk usage over time + - Grows at a moderate rate between active and total state + - This is what most long-running nodes experience without maintenance + - Results from accumulating recent trie nodes that haven't been cleaned up + +- **Active State** (solid blue line): Grows at a slower, steady rate + - Represents only the current blockchain state needed for validation + - This is your starting point after state sync + +- **Total State After Pruning / Resync** (dotted red line): Shows state growth after performing maintenance + - Starts fresh from active state level after pruning or resync + - Continues to grow at the same rate as total state + +- **Resync/Offline Pruning** (vertical grey dotted line): The maintenance operation that reduces disk usage + - Removes accumulated historical data + - Brings total state back down close to active state level + + +[Firewood](https://github.com/ava-labs/firewood) is an upcoming database upgrade that will address the issue of total state growing too large. This next-generation storage layer is designed to efficiently manage state growth and reduce disk space requirements for node operators. + + ## Managing Disk Usage ### Option 1: State Sync (Re-sync) -The simplest way to reclaim disk space is to delete your node's data and re-sync using state sync. This downloads only the current active state. +The simplest way to reclaim disk space is to delete your node's data and re-sync using state sync. Instead of replaying the entire blockchain history to reconstruct the current state, state sync allows nodes to download only the current state directly from network peers. This shortens the bootstrap process from multiple days to just a couple of hours. + +State sync is ideal for: +- Validator nodes that don't need full transaction history +- RPC nodes focused on current state queries +- Any node where historical data queries are not required + + +State sync is available for the C-Chain and Avalanche L1s, but not for P-Chain or X-Chain. Since the bulk of transactions and state growth occur on the C-Chain, state sync still provides significant benefits for bootstrap time and disk usage management. + + +#### Configuring State Sync + +State sync is enabled by default for the C-Chain. For Avalanche L1s, you can configure it per-chain: + +- **C-Chain configuration**: See [C-Chain Config](/docs/nodes/chain-configs/primary-network/c-chain#state-sync-enabled) +- **Avalanche L1 configuration**: See [Subnet-EVM Config](/docs/nodes/chain-configs/avalanche-l1s/subnet-evm#state-sync-enabled) ```bash # Stop your node first @@ -65,23 +112,296 @@ sudo systemctl start avalanchego | Simple, no configuration needed | Several hours of downtime | | Guarantees minimal disk usage | Loses any local transaction index | | Fresh database with no fragmentation | Must re-sync from scratch | +| Fast bootstrap (hours vs days) | Not available for P-Chain or X-Chain | -To avoid validator downtime, spin up a fresh node and let it state sync completely. Once synced, stop both nodes, copy the `~/.avalanchego/staking/` folder from your current validator to the new node, then start the new node. Your validator identity (staking keys) transfers instantly with no missed uptime. +To avoid validator downtime, you can transfer your validator identity to a fresh node. This works because your validator identity is determined by cryptographic keys in the staking directory, not the database. + +**How It Works:** + +Your validator identity consists of three key files in `~/.avalanchego/staking/`: +- **staker.crt** - TLS certificate (determines your Node ID) +- **staker.key** - TLS private key (for encrypted P2P communication) +- **signer.key** - BLS signing key (for consensus signatures) + +These files define your validator identity. The Node ID shown on the P-Chain is cryptographically derived from `staker.crt`, so copying these files transfers your complete validator identity. + +**Step-by-Step Process:** + +1. **Provision a new server** with the same or better specs than your current validator + +2. **Install and configure AvalancheGo** on the new server, but don't start it yet: + ```bash + # Install AvalancheGo on new server + # Follow: /docs/nodes/run-a-node/using-install-script/installing-avalanche-go + ``` + +3. **Let the new node sync completely** (start it normally to download the blockchain): + ```bash + # On new server + sudo systemctl start avalanchego + + # Monitor sync progress (wait until fully synced) + # This may take several hours + curl -X POST --data '{ + "jsonrpc":"2.0", + "id" :1, + "method" :"info.isBootstrapped", + "params": { + "chain":"C" + } + }' -H 'content-type:application/json;' 127.0.0.1:9650/ext/info + ``` + +4. **Stop both nodes** simultaneously to prepare for the identity transfer: + ```bash + # On old validator + sudo systemctl stop avalanchego + + # On new server + sudo systemctl stop avalanchego + ``` + +5. **Backup the new server's auto-generated keys** (optional but recommended): + ```bash + # On new server + mv ~/.avalanchego/staking ~/.avalanchego/staking.backup + ``` + +6. **Copy the staking directory** from your old validator to the new server: + ```bash + # From your old validator, copy to new server + scp -r ~/.avalanchego/staking/ user@new-server:~/.avalanchego/ + + # Or use rsync for better control: + rsync -avz ~/.avalanchego/staking/ user@new-server:~/.avalanchego/staking/ + ``` + +7. **Verify file permissions** on the new server: + ```bash + # On new server + chmod 700 ~/.avalanchego/staking + chmod 400 ~/.avalanchego/staking/staker.key + chmod 400 ~/.avalanchego/staking/staker.crt + chmod 400 ~/.avalanchego/staking/signer.key + chown -R avalanche:avalanche ~/.avalanchego/staking # If using avalanche user + ``` + +8. **Start the new node** with your validator identity: + ```bash + # On new server + sudo systemctl start avalanchego + ``` + +9. **Verify the Node ID matches**: + ```bash + # On new server - confirm this matches your registered validator Node ID + curl -X POST --data '{ + "jsonrpc":"2.0", + "id" :1, + "method" :"info.getNodeID" + }' -H 'content-type:application/json;' 127.0.0.1:9650/ext/info + ``` + +10. **Monitor for successful validation**: + ```bash + # Check if you're validating + curl -X POST --data '{ + "jsonrpc":"2.0", + "id" :1, + "method" :"platform.getCurrentValidators", + "params": { + "subnetID": null + } + }' -H 'content-type:application/json;' 127.0.0.1:9650/ext/P + ``` + +**Important Notes:** + +- **Timing**: The entire stop → transfer → start process typically takes 5-15 minutes. Your validator will miss some blocks during this window, but won't be penalized as long as you're back online before your uptime drops below 80%. + +- **Don't run both nodes simultaneously**: Running two nodes with the same staking keys simultaneously can cause network issues and potential penalties. Always stop the old node before starting the new one. + +- **Database not needed**: You don't need to copy `~/.avalanchego/db/` - the new node already has a fresh, synced database from step 3. + +- **Config files**: If you have custom configuration in `~/.avalanchego/configs/`, copy those as well to maintain the same node behavior. + +**Alternative: Update node in place** + +If your server supports it, you can also achieve zero downtime by: +1. Attaching additional storage to your existing server +2. Syncing a new database to the new disk in a separate directory +3. Stopping AvalancheGo briefly +4. Updating the data directory config to point to the new disk +5. Starting AvalancheGo with the fresh database + +This avoids transferring staking keys but requires more complex storage management. ### Option 2: Offline Pruning Offline pruning removes old state trie nodes while keeping your node's database intact. This is faster than a full re-sync but requires temporary additional disk space. -See the [Reduce Disk Usage](/docs/nodes/maintain/reduce-disk-usage) guide for detailed instructions. - | Pros | Cons | |------|------| | Faster than full re-sync | Requires ~30-60 minutes downtime | | Preserves transaction index | Needs temporary disk space for bloom filter | | No network bandwidth required | Slightly more complex setup | +#### How Offline Pruning Works + +Offline Pruning is ported from `go-ethereum` to reduce the amount of disk space taken up by the TrieDB (storage for the Merkle Forest). + +Offline pruning creates a bloom filter and adds all trie nodes in the active state to the bloom filter to mark the data as protected. This ensures that any part of the active state will not be removed during offline pruning. + +After generating the bloom filter, offline pruning iterates over the database and searches for trie nodes that are safe to be removed from disk. + +A bloom filter is a probabilistic data structure that reports whether an item is definitely not in a set or possibly in a set. Therefore, for each key we iterate, we check if it is in the bloom filter. If the key is definitely not in the bloom filter, then it is not in the active state and we can safely delete it. If the key is possibly in the set, then we skip over it to ensure we do not delete any active state. + +During iteration, the underlying database (LevelDB) writes deletion markers, causing a temporary increase in disk usage. + +After iterating over the database and deleting any old trie nodes that it can, offline pruning then runs compaction to minimize the DB size after the potentially large number of delete operations. + +#### Finding the C-Chain Config File + +In order to enable offline pruning, you need to update the C-Chain config file to include the parameters `offline-pruning-enabled` and `offline-pruning-data-directory`. + +The default location of the C-Chain config file is `~/.avalanchego/configs/chains/C/config.json`. **Please note that by default, this file does not exist. You would need to create it manually.** You can update the directory for chain configs by passing in the directory of your choice via the CLI argument: `chain-config-dir`. See [this](/docs/nodes/configure/configs-flags) for more info. For example, if you start your node with: + +```bash +./build/avalanchego --chain-config-dir=/home/ubuntu/chain-configs +``` + +The chain config directory will be updated to `/home/ubuntu/chain-configs` and the corresponding C-Chain config file will be: `/home/ubuntu/chain-configs/C/config.json`. + +#### Running Offline Pruning + +In order to enable offline pruning, update the C-Chain config file to include the following parameters: + +```json +{ + "offline-pruning-enabled": true, + "offline-pruning-data-directory": "/home/ubuntu/offline-pruning" +} +``` + +This will set `/home/ubuntu/offline-pruning` as the directory to be used by the offline pruner. Offline pruning will store the bloom filter in this location, so you must ensure that the path exists. + +Now that the C-Chain config file has been updated, you can start your node with the command (no CLI arguments are necessary if using the default chain config directory): + +Once AvalancheGo starts the C-Chain, you can expect to see update logs from the offline pruner: + +```bash +INFO [02-09|00:20:15.625] Iterating state snapshot accounts=297,231 slots=6,669,708 elapsed=16.001s eta=1m29.03s +INFO [02-09|00:20:23.626] Iterating state snapshot accounts=401,907 slots=10,698,094 elapsed=24.001s eta=1m32.522s +INFO [02-09|00:20:31.626] Iterating state snapshot accounts=606,544 slots=13,891,948 elapsed=32.002s eta=1m10.927s +INFO [02-09|00:20:39.626] Iterating state snapshot accounts=760,948 slots=18,025,523 elapsed=40.002s eta=1m2.603s +INFO [02-09|00:20:47.626] Iterating state snapshot accounts=886,583 slots=21,769,199 elapsed=48.002s eta=1m8.834s +INFO [02-09|00:20:55.626] Iterating state snapshot accounts=1,046,295 slots=26,120,100 elapsed=56.002s eta=57.401s +INFO [02-09|00:21:03.626] Iterating state snapshot accounts=1,229,257 slots=30,241,391 elapsed=1m4.002s eta=47.674s +INFO [02-09|00:21:11.626] Iterating state snapshot accounts=1,344,091 slots=34,128,835 elapsed=1m12.002s eta=45.185s +INFO [02-09|00:21:19.626] Iterating state snapshot accounts=1,538,009 slots=37,791,218 elapsed=1m20.002s eta=34.59s +INFO [02-09|00:21:27.627] Iterating state snapshot accounts=1,729,564 slots=41,694,303 elapsed=1m28.002s eta=25.006s +INFO [02-09|00:21:35.627] Iterating state snapshot accounts=1,847,617 slots=45,646,011 elapsed=1m36.003s eta=20.052s +INFO [02-09|00:21:43.627] Iterating state snapshot accounts=1,950,875 slots=48,832,722 elapsed=1m44.003s eta=9.299s +INFO [02-09|00:21:47.342] Iterated snapshot accounts=1,950,875 slots=49,667,870 elapsed=1m47.718s +INFO [02-09|00:21:47.351] Writing state bloom to disk name=/home/ubuntu/offline-pruning/statebloom.0xd6fca36db4b60b34330377040ef6566f6033ed8464731cbb06dc35c8401fa38e.bf.gz +INFO [02-09|00:23:04.421] State bloom filter committed name=/home/ubuntu/offline-pruning/statebloom.0xd6fca36db4b60b34330377040ef6566f6033ed8464731cbb06dc35c8401fa38e.bf.gz +``` + +The bloom filter should be populated and committed to disk after about 5 minutes. At this point, if the node shuts down, it will resume the offline pruning session when it restarts (note: this operation cannot be cancelled). + +In order to ensure that users do not mistakenly leave offline pruning enabled for the long term (which could result in an hour of downtime on each restart), we have added a manual protection which requires that after an offline pruning session, the node must be started with offline pruning disabled at least once before it will start with offline pruning enabled again. Therefore, once the bloom filter has been committed to disk, you should update the C-Chain config file to include the following parameters: + +```json +{ + "offline-pruning-enabled": false, + "offline-pruning-data-directory": "/home/ubuntu/offline-pruning" +} +``` + +It is important to keep the same data directory in the config file, so that the node knows where to look for the bloom filter on a restart if offline pruning has not finished. + +Now if your node restarts, it will be marked as having correctly disabled offline pruning after the run and be allowed to resume normal operation once offline pruning has finished running. + +You will see progress logs throughout the offline pruning run which will indicate the session's progress: + +```bash +INFO [02-09|00:31:51.920] Pruning state data nodes=40,116,759 size=10.08GiB elapsed=8m47.499s eta=12m50.961s +INFO [02-09|00:31:59.921] Pruning state data nodes=41,659,059 size=10.47GiB elapsed=8m55.499s eta=12m13.822s +INFO [02-09|00:32:07.921] Pruning state data nodes=41,687,047 size=10.48GiB elapsed=9m3.499s eta=12m23.915s +INFO [02-09|00:32:15.921] Pruning state data nodes=41,715,823 size=10.48GiB elapsed=9m11.499s eta=12m33.965s +INFO [02-09|00:32:23.921] Pruning state data nodes=41,744,167 size=10.49GiB elapsed=9m19.500s eta=12m44.004s +INFO [02-09|00:32:31.921] Pruning state data nodes=41,772,613 size=10.50GiB elapsed=9m27.500s eta=12m54.01s +INFO [02-09|00:32:39.921] Pruning state data nodes=41,801,267 size=10.50GiB elapsed=9m35.500s eta=13m3.992s +INFO [02-09|00:32:47.922] Pruning state data nodes=41,829,714 size=10.51GiB elapsed=9m43.500s eta=13m13.951s +INFO [02-09|00:32:55.922] Pruning state data nodes=41,858,400 size=10.52GiB elapsed=9m51.501s eta=13m23.885s +INFO [02-09|00:33:03.923] Pruning state data nodes=41,887,131 size=10.53GiB elapsed=9m59.501s eta=13m33.79s +INFO [02-09|00:33:11.923] Pruning state data nodes=41,915,583 size=10.53GiB elapsed=10m7.502s eta=13m43.678s +INFO [02-09|00:33:19.924] Pruning state data nodes=41,943,891 size=10.54GiB elapsed=10m15.502s eta=13m53.551s +INFO [02-09|00:33:27.924] Pruning state data nodes=41,972,281 size=10.55GiB elapsed=10m23.502s eta=14m3.389s +INFO [02-09|00:33:35.924] Pruning state data nodes=42,001,414 size=10.55GiB elapsed=10m31.503s eta=14m13.192s +INFO [02-09|00:33:43.925] Pruning state data nodes=42,029,987 size=10.56GiB elapsed=10m39.504s eta=14m22.976s +INFO [02-09|00:33:51.925] Pruning state data nodes=42,777,042 size=10.75GiB elapsed=10m47.504s eta=14m7.245s +INFO [02-09|00:34:00.950] Pruning state data nodes=42,865,413 size=10.77GiB elapsed=10m56.529s eta=14m15.927s +INFO [02-09|00:34:08.956] Pruning state data nodes=42,918,719 size=10.79GiB elapsed=11m4.534s eta=14m24.453s +INFO [02-09|00:34:22.816] Pruning state data nodes=42,952,925 size=10.79GiB elapsed=11m18.394s eta=14m41.243s +INFO [02-09|00:34:30.818] Pruning state data nodes=42,998,715 size=10.81GiB elapsed=11m26.397s eta=14m49.961s +INFO [02-09|00:34:38.828] Pruning state data nodes=43,046,476 size=10.82GiB elapsed=11m34.407s eta=14m58.572s +INFO [02-09|00:34:46.893] Pruning state data nodes=43,107,656 size=10.83GiB elapsed=11m42.472s eta=15m6.729s +INFO [02-09|00:34:55.038] Pruning state data nodes=43,168,834 size=10.85GiB elapsed=11m50.616s eta=15m14.934s +INFO [02-09|00:35:03.039] Pruning state data nodes=43,446,900 size=10.92GiB elapsed=11m58.618s eta=15m14.705s +``` + +When the node completes, it will emit the following log and resume normal operation: + +```bash +INFO [02-09|00:42:16.009] Pruning state data nodes=93,649,812 size=23.53GiB elapsed=19m11.588s eta=1m2.658s +INFO [02-09|00:42:24.009] Pruning state data nodes=95,045,956 size=23.89GiB elapsed=19m19.588s eta=45.149s +INFO [02-09|00:42:32.009] Pruning state data nodes=96,429,410 size=24.23GiB elapsed=19m27.588s eta=28.041s +INFO [02-09|00:42:40.009] Pruning state data nodes=97,811,804 size=24.58GiB elapsed=19m35.588s eta=11.204s +INFO [02-09|00:42:45.359] Pruned state data nodes=98,744,430 size=24.82GiB elapsed=19m40.938s +INFO [02-09|00:42:45.360] Compacting database range=0x00-0x10 elapsed="2.157µs" +INFO [02-09|00:43:12.311] Compacting database range=0x10-0x20 elapsed=26.951s +INFO [02-09|00:43:38.763] Compacting database range=0x20-0x30 elapsed=53.402s +INFO [02-09|00:44:04.847] Compacting database range=0x30-0x40 elapsed=1m19.486s +INFO [02-09|00:44:31.194] Compacting database range=0x40-0x50 elapsed=1m45.834s +INFO [02-09|00:45:31.580] Compacting database range=0x50-0x60 elapsed=2m46.220s +INFO [02-09|00:45:58.465] Compacting database range=0x60-0x70 elapsed=3m13.104s +INFO [02-09|00:51:17.593] Compacting database range=0x70-0x80 elapsed=8m32.233s +INFO [02-09|00:56:19.679] Compacting database range=0x80-0x90 elapsed=13m34.319s +INFO [02-09|00:56:46.011] Compacting database range=0x90-0xa0 elapsed=14m0.651s +INFO [02-09|00:57:12.370] Compacting database range=0xa0-0xb0 elapsed=14m27.010s +INFO [02-09|00:57:38.600] Compacting database range=0xb0-0xc0 elapsed=14m53.239s +INFO [02-09|00:58:06.311] Compacting database range=0xc0-0xd0 elapsed=15m20.951s +INFO [02-09|00:58:35.484] Compacting database range=0xd0-0xe0 elapsed=15m50.123s +INFO [02-09|00:59:05.449] Compacting database range=0xe0-0xf0 elapsed=16m20.089s +INFO [02-09|00:59:34.365] Compacting database range=0xf0- elapsed=16m49.005s +INFO [02-09|00:59:34.367] Database compaction finished elapsed=16m49.006s +INFO [02-09|00:59:34.367] State pruning successful pruned=24.82GiB elapsed=39m34.749s +INFO [02-09|00:59:34.367] Completed offline pruning. Re-initializing blockchain. +INFO [02-09|00:59:34.387] Loaded most recent local header number=10,671,401 hash=b52d0a..7bd166 age=40m29s +INFO [02-09|00:59:34.387] Loaded most recent local full block number=10,671,401 hash=b52d0a..7bd166 age=40m29s +INFO [02-09|00:59:34.387] Initializing snapshots async=true +DEBUG[02-09|00:59:34.390] Reinjecting stale transactions count=0 +INFO [02-09|00:59:34.395] Transaction pool price threshold updated price=470,000,000,000 +INFO [02-09|00:59:34.396] Transaction pool price threshold updated price=225,000,000,000 +INFO [02-09|00:59:34.396] Transaction pool price threshold updated price=0 +INFO [02-09|00:59:34.396] lastAccepted = 0xb52d0a1302e4055b487c3a0243106b5e13a915c6e178da9f8491cebf017bd166 +INFO [02-09|00:59:34] snow/engine/snowman/transitive.go#67: initializing consensus engine +INFO [02-09|00:59:34] snow/engine/snowman/bootstrap/bootstrapper.go#220: Starting bootstrap... +``` + +At this point, the node will go into bootstrapping and (once bootstrapping completes) resume consensus and operate as normal. + +#### Disk Space Considerations for Offline Pruning + +To ensure the node does not enter an inconsistent state, the bloom filter used for pruning is persisted to `offline-pruning-data-directory` for the duration of the operation. This directory should have `offline-pruning-bloom-filter-size` available in disk space (default 512 MB). + +The underlying database (LevelDB) uses deletion markers (tombstones) to identify newly deleted keys. These markers are temporarily persisted to disk until they are removed during a process known as compaction. This will lead to an increase in disk usage during pruning. If your node runs out of disk space during pruning, you may safely restart the pruning operation. This may succeed as restarting the node triggers compaction. + +If restarting the pruning operation does not succeed, additional disk space should be provisioned. + ## Choosing the Right Approach | Scenario | Recommended Approach | @@ -105,11 +425,25 @@ df -h / Consider setting up alerts when disk usage exceeds 80% to give yourself time to plan maintenance. +## P-Chain and X-Chain State + +The P-Chain and X-Chain have significantly smaller state footprints compared to the C-Chain: + +- **P-Chain**: Stores validator metadata, Avalanche L1 definitions, and staking transactions. State size is typically < 10 GB and grows very slowly. +- **X-Chain**: Handles AVAX transfers using the UTXO model. State size is typically < 50 GB and grows slowly. + +**Important limitations:** +- State sync is **not available** for P-Chain or X-Chain +- These chains always sync from genesis by replaying all transactions +- Bootstrap time is faster than C-Chain despite no state sync due to much smaller state size (typically < 1 hour) +- Disk space management is rarely needed for these chains + ## L1-Specific Considerations For Avalanche L1s running Subnet-EVM: - **State size scales with usage**: High-throughput chains accumulate state faster - **Same pruning tools apply**: Offline pruning works identically to C-Chain +- **State sync available**: Configure via [Subnet-EVM chain config](/docs/nodes/chain-configs/avalanche-l1s/subnet-evm#state-sync-enabled) - **Plan storage accordingly**: Reference the [system requirements](/docs/nodes/system-requirements) for your throughput tier diff --git a/content/docs/nodes/maintain/reduce-disk-usage.mdx b/content/docs/nodes/maintain/reduce-disk-usage.mdx deleted file mode 100644 index def01776fb6..00000000000 --- a/content/docs/nodes/maintain/reduce-disk-usage.mdx +++ /dev/null @@ -1,182 +0,0 @@ ---- -title: Reduce Disk Usage ---- - -Offline Pruning is ported from `go-ethereum` to reduce the amount of disk space taken up by the TrieDB (storage for the Merkle Forest). - -Offline pruning creates a bloom filter and adds all trie nodes in the active state to the bloom filter to mark the data as protected. This ensures that any part of the active state will not be removed during offline pruning. - -After generating the bloom filter, offline pruning iterates over the database and searches for trie nodes that are safe to be removed from disk. - -A bloom filter is a probabilistic data structure that reports whether an item is definitely not in a set or possibly in a set. Therefore, for each key we iterate, we check if it is in the bloom filter. If the key is definitely not in the bloom filter, then it is not in the active state and we can safely delete it. If the key is possibly in the set, then we skip over it to ensure we do not delete any active state. - -During iteration, the underlying database (LevelDB) writes deletion markers, causing a temporary increase in disk usage. - -After iterating over the database and deleting any old trie nodes that it can, offline pruning then runs compaction to minimize the DB size after the potentially large number of delete operations. - -Finding the C-Chain Config File[​](#finding-the-c-chain-config-file "Direct link to heading") ---------------------------------------------------------------------------------------------- - -In order to enable offline pruning, you need to update the C-Chain config file to include the parameters `offline-pruning-enabled` and `offline-pruning-data-directory`. - -The default location of the C-Chain config file is `~/.avalanchego/configs/chains/C/config.json`. **Please note that by default, this file does not exist. You would need to create it manually.** You can update the directory for chain configs by passing in the directory of your choice via the CLI argument: `chain-config-dir`. See [this](/docs/nodes/configure/configs-flags) for more info. For example, if you start your node with: - -```bash -./build/avalanchego --chain-config-dir=/home/ubuntu/chain-configs -``` - -The chain config directory will be updated to `/home/ubuntu/chain-configs` and the corresponding C-Chain config file will be: `/home/ubuntu/chain-configs/C/config.json`. - -Running Offline Pruning[​](#running-offline-pruning "Direct link to heading") ------------------------------------------------------------------------------ - -In order to enable offline pruning, update the C-Chain config file to include the following parameters: - -```json -{ - "offline-pruning-enabled": true, - "offline-pruning-data-directory": "/home/ubuntu/offline-pruning" -} -``` - -This will set `/home/ubuntu/offline-pruning` as the directory to be used by the offline pruner. Offline pruning will store the bloom filter in this location, so you must ensure that the path exists. - -Now that the C-Chain config file has been updated, you can start your node with the command (no CLI arguments are necessary if using the default chain config directory): - -Once AvalancheGo starts the C-Chain, you can expect to see update logs from the offline pruner: - -```bash -INFO [02-09|00:20:15.625] Iterating state snapshot accounts=297,231 slots=6,669,708 elapsed=16.001s eta=1m29.03s -INFO [02-09|00:20:23.626] Iterating state snapshot accounts=401,907 slots=10,698,094 elapsed=24.001s eta=1m32.522s -INFO [02-09|00:20:31.626] Iterating state snapshot accounts=606,544 slots=13,891,948 elapsed=32.002s eta=1m10.927s -INFO [02-09|00:20:39.626] Iterating state snapshot accounts=760,948 slots=18,025,523 elapsed=40.002s eta=1m2.603s -INFO [02-09|00:20:47.626] Iterating state snapshot accounts=886,583 slots=21,769,199 elapsed=48.002s eta=1m8.834s -INFO [02-09|00:20:55.626] Iterating state snapshot accounts=1,046,295 slots=26,120,100 elapsed=56.002s eta=57.401s -INFO [02-09|00:21:03.626] Iterating state snapshot accounts=1,229,257 slots=30,241,391 elapsed=1m4.002s eta=47.674s -INFO [02-09|00:21:11.626] Iterating state snapshot accounts=1,344,091 slots=34,128,835 elapsed=1m12.002s eta=45.185s -INFO [02-09|00:21:19.626] Iterating state snapshot accounts=1,538,009 slots=37,791,218 elapsed=1m20.002s eta=34.59s -INFO [02-09|00:21:27.627] Iterating state snapshot accounts=1,729,564 slots=41,694,303 elapsed=1m28.002s eta=25.006s -INFO [02-09|00:21:35.627] Iterating state snapshot accounts=1,847,617 slots=45,646,011 elapsed=1m36.003s eta=20.052s -INFO [02-09|00:21:43.627] Iterating state snapshot accounts=1,950,875 slots=48,832,722 elapsed=1m44.003s eta=9.299s -INFO [02-09|00:21:47.342] Iterated snapshot accounts=1,950,875 slots=49,667,870 elapsed=1m47.718s -INFO [02-09|00:21:47.351] Writing state bloom to disk name=/home/ubuntu/offline-pruning/statebloom.0xd6fca36db4b60b34330377040ef6566f6033ed8464731cbb06dc35c8401fa38e.bf.gz -INFO [02-09|00:23:04.421] State bloom filter committed name=/home/ubuntu/offline-pruning/statebloom.0xd6fca36db4b60b34330377040ef6566f6033ed8464731cbb06dc35c8401fa38e.bf.gz -``` - -The bloom filter should be populated and committed to disk after about 5 minutes. At this point, if the node shuts down, it will resume the offline pruning session when it restarts (note: this operation cannot be cancelled). - -In order to ensure that users do not mistakenly leave offline pruning enabled for the long term (which could result in an hour of downtime on each restart), we have added a manual protection which requires that after an offline pruning session, the node must be started with offline pruning disabled at least once before it will start with offline pruning enabled again. Therefore, once the bloom filter has been committed to disk, you should update the C-Chain config file to include the following parameters: - -```json -{ - "offline-pruning-enabled": false, - "offline-pruning-data-directory": "/home/ubuntu/offline-pruning" -} -``` - -It is important to keep the same data directory in the config file, so that the node knows where to look for the bloom filter on a restart if offline pruning has not finished. - -Now if your node restarts, it will be marked as having correctly disabled offline pruning after the run and be allowed to resume normal operation once offline pruning has finished running. - -You will see progress logs throughout the offline pruning run which will indicate the session's progress: - -```bash -INFO [02-09|00:31:51.920] Pruning state data nodes=40,116,759 size=10.08GiB elapsed=8m47.499s eta=12m50.961s -INFO [02-09|00:31:59.921] Pruning state data nodes=41,659,059 size=10.47GiB elapsed=8m55.499s eta=12m13.822s -INFO [02-09|00:32:07.921] Pruning state data nodes=41,687,047 size=10.48GiB elapsed=9m3.499s eta=12m23.915s -INFO [02-09|00:32:15.921] Pruning state data nodes=41,715,823 size=10.48GiB elapsed=9m11.499s eta=12m33.965s -INFO [02-09|00:32:23.921] Pruning state data nodes=41,744,167 size=10.49GiB elapsed=9m19.500s eta=12m44.004s -INFO [02-09|00:32:31.921] Pruning state data nodes=41,772,613 size=10.50GiB elapsed=9m27.500s eta=12m54.01s -INFO [02-09|00:32:39.921] Pruning state data nodes=41,801,267 size=10.50GiB elapsed=9m35.500s eta=13m3.992s -INFO [02-09|00:32:47.922] Pruning state data nodes=41,829,714 size=10.51GiB elapsed=9m43.500s eta=13m13.951s -INFO [02-09|00:32:55.922] Pruning state data nodes=41,858,400 size=10.52GiB elapsed=9m51.501s eta=13m23.885s -INFO [02-09|00:33:03.923] Pruning state data nodes=41,887,131 size=10.53GiB elapsed=9m59.501s eta=13m33.79s -INFO [02-09|00:33:11.923] Pruning state data nodes=41,915,583 size=10.53GiB elapsed=10m7.502s eta=13m43.678s -INFO [02-09|00:33:19.924] Pruning state data nodes=41,943,891 size=10.54GiB elapsed=10m15.502s eta=13m53.551s -INFO [02-09|00:33:27.924] Pruning state data nodes=41,972,281 size=10.55GiB elapsed=10m23.502s eta=14m3.389s -INFO [02-09|00:33:35.924] Pruning state data nodes=42,001,414 size=10.55GiB elapsed=10m31.503s eta=14m13.192s -INFO [02-09|00:33:43.925] Pruning state data nodes=42,029,987 size=10.56GiB elapsed=10m39.504s eta=14m22.976s -INFO [02-09|00:33:51.925] Pruning state data nodes=42,777,042 size=10.75GiB elapsed=10m47.504s eta=14m7.245s -INFO [02-09|00:34:00.950] Pruning state data nodes=42,865,413 size=10.77GiB elapsed=10m56.529s eta=14m15.927s -INFO [02-09|00:34:08.956] Pruning state data nodes=42,918,719 size=10.79GiB elapsed=11m4.534s eta=14m24.453s -INFO [02-09|00:34:22.816] Pruning state data nodes=42,952,925 size=10.79GiB elapsed=11m18.394s eta=14m41.243s -INFO [02-09|00:34:30.818] Pruning state data nodes=42,998,715 size=10.81GiB elapsed=11m26.397s eta=14m49.961s -INFO [02-09|00:34:38.828] Pruning state data nodes=43,046,476 size=10.82GiB elapsed=11m34.407s eta=14m58.572s -INFO [02-09|00:34:46.893] Pruning state data nodes=43,107,656 size=10.83GiB elapsed=11m42.472s eta=15m6.729s -INFO [02-09|00:34:55.038] Pruning state data nodes=43,168,834 size=10.85GiB elapsed=11m50.616s eta=15m14.934s -INFO [02-09|00:35:03.039] Pruning state data nodes=43,446,900 size=10.92GiB elapsed=11m58.618s eta=15m14.705s -``` - -When the node completes, it will emit the following log and resume normal operation: - -```bash -INFO [02-09|00:42:16.009] Pruning state data nodes=93,649,812 size=23.53GiB elapsed=19m11.588s eta=1m2.658s -INFO [02-09|00:42:24.009] Pruning state data nodes=95,045,956 size=23.89GiB elapsed=19m19.588s eta=45.149s -INFO [02-09|00:42:32.009] Pruning state data nodes=96,429,410 size=24.23GiB elapsed=19m27.588s eta=28.041s -INFO [02-09|00:42:40.009] Pruning state data nodes=97,811,804 size=24.58GiB elapsed=19m35.588s eta=11.204s -INFO [02-09|00:42:45.359] Pruned state data nodes=98,744,430 size=24.82GiB elapsed=19m40.938s -INFO [02-09|00:42:45.360] Compacting database range=0x00-0x10 elapsed="2.157µs" -INFO [02-09|00:43:12.311] Compacting database range=0x10-0x20 elapsed=26.951s -INFO [02-09|00:43:38.763] Compacting database range=0x20-0x30 elapsed=53.402s -INFO [02-09|00:44:04.847] Compacting database range=0x30-0x40 elapsed=1m19.486s -INFO [02-09|00:44:31.194] Compacting database range=0x40-0x50 elapsed=1m45.834s -INFO [02-09|00:45:31.580] Compacting database range=0x50-0x60 elapsed=2m46.220s -INFO [02-09|00:45:58.465] Compacting database range=0x60-0x70 elapsed=3m13.104s -INFO [02-09|00:51:17.593] Compacting database range=0x70-0x80 elapsed=8m32.233s -INFO [02-09|00:56:19.679] Compacting database range=0x80-0x90 elapsed=13m34.319s -INFO [02-09|00:56:46.011] Compacting database range=0x90-0xa0 elapsed=14m0.651s -INFO [02-09|00:57:12.370] Compacting database range=0xa0-0xb0 elapsed=14m27.010s -INFO [02-09|00:57:38.600] Compacting database range=0xb0-0xc0 elapsed=14m53.239s -INFO [02-09|00:58:06.311] Compacting database range=0xc0-0xd0 elapsed=15m20.951s -INFO [02-09|00:58:35.484] Compacting database range=0xd0-0xe0 elapsed=15m50.123s -INFO [02-09|00:59:05.449] Compacting database range=0xe0-0xf0 elapsed=16m20.089s -INFO [02-09|00:59:34.365] Compacting database range=0xf0- elapsed=16m49.005s -INFO [02-09|00:59:34.367] Database compaction finished elapsed=16m49.006s -INFO [02-09|00:59:34.367] State pruning successful pruned=24.82GiB elapsed=39m34.749s -INFO [02-09|00:59:34.367] Completed offline pruning. Re-initializing blockchain. -INFO [02-09|00:59:34.387] Loaded most recent local header number=10,671,401 hash=b52d0a..7bd166 age=40m29s -INFO [02-09|00:59:34.387] Loaded most recent local full block number=10,671,401 hash=b52d0a..7bd166 age=40m29s -INFO [02-09|00:59:34.387] Initializing snapshots async=true -DEBUG[02-09|00:59:34.390] Reinjecting stale transactions count=0 -INFO [02-09|00:59:34.395] Transaction pool price threshold updated price=470,000,000,000 -INFO [02-09|00:59:34.396] Transaction pool price threshold updated price=225,000,000,000 -INFO [02-09|00:59:34.396] Transaction pool price threshold updated price=0 -INFO [02-09|00:59:34.396] lastAccepted = 0xb52d0a1302e4055b487c3a0243106b5e13a915c6e178da9f8491cebf017bd166 -INFO [02-09|00:59:34] snow/engine/snowman/transitive.go#67: initializing consensus engine -INFO [02-09|00:59:34] snow/engine/snowman/bootstrap/bootstrapper.go#220: Starting bootstrap... -INFO [02-09|00:59:34] chains/manager.go#246: creating chain: - ID: 2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM - VMID:jvYyfQTxGMJLuGWa55kdP2p2zSUYsQ5Raupu4TW34ZAUBAbtq -INFO [02-09|00:59:34.425] Enabled APIs: eth, eth-filter, net, web3, internal-eth, internal-blockchain, internal-transaction, avax -DEBUG[02-09|00:59:34.425] Allowed origin(s) for WS RPC interface [*] -INFO [02-09|00:59:34] api/server/server.go#203: adding route /ext/bc/2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5/avax -INFO [02-09|00:59:34] api/server/server.go#203: adding route /ext/bc/2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5/rpc -INFO [02-09|00:59:34] api/server/server.go#203: adding route /ext/bc/2q9e4r6Mu3U68nU1fYjgbR6JvwrRx36CohpAX5UQxse55x1Q5/ws -INFO [02-09|00:59:34] vms/avm/vm.go#437: Fee payments are using Asset with Alias: AVAX, AssetID: FvwEAhmxKfeiG8SnEvq42hc6whRyY3EFYAvebMqDNDGCgxN5Z -INFO [02-09|00:59:34] vms/avm/vm.go#229: address transaction indexing is disabled -INFO [02-09|00:59:34] snow/engine/avalanche/transitive.go#71: initializing consensus engine -INFO [02-09|00:59:34] snow/engine/avalanche/bootstrap/bootstrapper.go#258: Starting bootstrap... -INFO [02-09|00:59:34] api/server/server.go#203: adding route /ext/bc/2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM -INFO [02-09|00:59:34]

snow/engine/snowman/bootstrap/bootstrapper.go#445: waiting for the remaining chains in this subnet to finish syncing -INFO [02-09|00:59:34] api/server/server.go#203: adding route /ext/bc/2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM/wallet -INFO [02-09|00:59:34] api/server/server.go#203: adding route /ext/bc/2oYMBNV4eNHyqk2fjjV5nVQLDbtmNJzq5s3qs3Lo6ftnC6FByM/events -INFO [02-09|00:59:34]

snow/engine/common/bootstrapper.go#235: Bootstrapping started syncing with 1 vertices in the accepted frontier -INFO [02-09|00:59:46] snow/engine/common/bootstrapper.go#235: Bootstrapping started syncing with 2 vertices in the accepted frontier -INFO [02-09|00:59:49] snow/engine/common/bootstrapper.go#235: Bootstrapping started syncing with 1 vertices in the accepted frontier -INFO [02-09|00:59:49] snow/engine/avalanche/bootstrap/bootstrapper.go#473: bootstrapping fetched 55 vertices. Executing transaction state transitions... -INFO [02-09|00:59:49] snow/engine/common/queue/jobs.go#171: executed 55 operations -INFO [02-09|00:59:49] snow/engine/avalanche/bootstrap/bootstrapper.go#484: executing vertex state transitions... -INFO [02-09|00:59:49] snow/engine/common/queue/jobs.go#171: executed 55 operations -INFO [02-09|01:00:07] snow/engine/snowman/bootstrap/bootstrapper.go#406: bootstrapping fetched 1241 blocks. Executing state transitions... -``` - -At this point, the node will go into bootstrapping and (once bootstrapping completes) resume consensus and operate as normal. - -Disk Space Considerations[​](#disk-space-considerations "Direct link to heading") ---------------------------------------------------------------------------------- - -To ensure the node does not enter an inconsistent state, the bloom filter used for pruning is persisted to `offline-pruning-data-directory` for the duration of the operation. This directory should have `offline-pruning-bloom-filter-size` available in disk space (default 512 MB). - -The underlying database (LevelDB) uses deletion markers (tombstones) to identify newly deleted keys. These markers are temporarily persisted to disk until they are removed during a process known as compaction. This will lead to an increase in disk usage during pruning. If your node runs out of disk space during pruning, you may safely restart the pruning operation. This may succeed as restarting the node triggers compaction. - -If restarting the pruning operation does not succeed, additional disk space should be provisioned. \ No newline at end of file diff --git a/content/docs/nodes/run-a-node/common-errors.mdx b/content/docs/nodes/run-a-node/common-errors.mdx index 8700b3741e3..bbfd11ce177 100644 --- a/content/docs/nodes/run-a-node/common-errors.mdx +++ b/content/docs/nodes/run-a-node/common-errors.mdx @@ -3,16 +3,149 @@ title: Common Errors description: Common errors while running a node and their solutions. --- -If you experience any issues building your node, here are some common errors and possible solutions. +If you experience any issues running your node, here are common errors and their solutions. -### Failed to Connect to Bootstrap Nodes[​](#failed-to-connect-to-bootstrap-nodes "Direct link to heading") +## Bootstrap and Initialization Errors -Error: `WARN node/node.go:291 failed to connect to bootstrap nodes` +| Error | Cause | Solution | +|-------|-------|----------| +| `failed to connect to bootstrap nodes` | • No internet access
• NodeID already in use
• Old instance still running
• Firewall blocking outbound connections | • Check internet connection
• Ensure only one node instance is running
• Verify firewall allows outbound connections
• Confirm staking port (9651) is configured | +| `subnets not bootstrapped` | • Node still syncing with network
• Health checks called too early
• Network connectivity issues | • Wait for bootstrap to complete (can take hours)
• Monitor `/api/health` endpoint
• Ensure stable network connection
• Check logs for progress | +| `db contains invalid genesis hash` | • Database from different network
• Database corruption
• Incompatible database | • Delete database and resync from scratch
• Verify correct network connection
• Check `--network-id` flag matches database | -This error can occur when the node doesn't have access to the Internet or if the NodeID is already being used by a different node in the network. This can occur when an old instance is running and not terminated. +## Network and Connectivity Errors -### Cannot Query Unfinalized Data[​](#cannot-query-unfinalized-data "Direct link to heading") +| Error | Cause | Solution | +|-------|-------|----------| +| `cannot query unfinalized data` | • Not connected to other validators
• Wrong public IP configured
• Port 9651 closed/blocked
• Insufficient validator connections | • Configure public IP with `--public-ip`
• Open port 9651 to internet
• Allow inbound connections in firewall
• Set up port forwarding if behind NAT
• Verify peers: `curl -X POST --data '{"jsonrpc":"2.0","id":1,"method":"info.peers"}' -H 'content-type:application/json;' http://127.0.0.1:9650/ext/info` | +| `primary network validator has no inbound connections` | • Firewall blocking inbound traffic
• NAT/router not configured
• Wrong public IP advertised
• ISP blocking connections | • Configure port forwarding for 9651
• Verify firewall allows inbound
• Check public IP: `curl ifconfig.me`
• Test port with online checkers
• Use VPS if ISP blocks ports | +| `not connected to enough stake` | • Insufficient validator connections
• Network partitioning
• Node isolated from network
• Bootstrap incomplete | • Check network connectivity
• Verify firewall rules
• Wait for more connections
• Synchronize system time (NTP) | +| `throttled` (Code: -4) | • Too many connection attempts
• Rate limiting by peers
• Network congestion | • Wait before retrying
• Check for connection loops
• Reduce connection rate | -Error: `err="cannot query unfinalized data"` +## Database and Storage Errors -There may be a number of reasons for this issue, but it is likely that the node is not connected properly to other validators, which is usually caused by networking misconfiguration (wrong public IP, closed p2p port 9651). \ No newline at end of file +| Error | Cause | Solution | +|-------|-------|----------| +| `closed` | • Database accessed after shutdown
• Ungraceful termination
• Connection lost | • Restart the node
• Check for disk errors or full disk
• Verify database files not corrupted | +| `blockdb: unrecoverable corruption detected` | • Ungraceful shutdown (power loss, kill -9)
• Disk errors during writes
• Hardware failure | • Delete database and resync
• Run SMART diagnostics on disk
• Ensure 10+ GiB free space
• Use UPS for power protection
• Maintain regular backups | +| Disk space warnings | • Usage exceeds threshold
• Database growth without cleanup
• Log accumulation | • Keep at least 10 GiB free (20+ GiB recommended)
• Monitor disk usage regularly
• Clean up old logs
• Set up low-space alerts | +| `blockdb: invalid block height` | • Database corruption
• Querying non-existent block
• Index corruption | • Verify block height is valid
• Resync if corrupted
• Check database integrity | + +## Configuration Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `invalid TLS key` | • TLS key without certificate
• Certificate without key
• Invalid key format
• Corrupted certificate files | • Provide both key and certificate together
• Regenerate credentials if corrupted
• Verify file permissions
• Check certificate format | +| `minimum validator stake can't be greater than maximum` | • Invalid stake configuration
• Conflicting parameters
• Configuration typos | • Review configuration file
• Ensure min < max stake
• Check for typos | +| `uptime requirement must be in the range [0, 1]` | • Out-of-range uptime value | • Set uptime requirement between 0 and 1 | +| `delegation fee must be in the range [0, 1,000,000]` | • Invalid delegation fee | • Set fee between 0 and 1,000,000 | +| `min stake duration must be > 0` | • Invalid stake duration
• Min > max duration | • Set min duration > 0 and < max | +| `sybil protection disabled on public network` | • Disabling protection on mainnet/testnet
• Security misconfiguration | • Only disable on private networks
• Verify network configuration
• Remove override for public networks | +| `plugin dir is not a directory` | • Path points to file not directory
• Directory doesn't exist
• Permission issues | • Create plugin directory
• Verify path points to directory
• Check read/execute permissions | + +## Resource and Capacity Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `insufficient funds` | • Insufficient balance for fees
• Transaction exceeds balance
• Gas estimation too low | • Ensure sufficient balance
• Account for transaction fees
• Verify balance before submitting | +| `insufficient gas capacity to build block` | • Mempool exceeds block gas limit
• Complex transactions
• Network congestion | • Wait for congestion to clear
• Break into smaller transactions
• Increase gas limits if possible | +| `insufficient history to generate proof` | • Partial sync mode
• Pruned historical data
• Incomplete state sync | • Use full sync for complete history
• Wait for state sync to finish
• Use archival node for historical data | + +## Validator and Consensus Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `not a validator` (Code: -3) | • Validator-only operation on non-validator
• Stake expired or not active
• Not registered as validator | • Verify registration status
• Check stake is active
• Wait for validation period
• Use correct API for node type | +| `unknown validator` | • Not in current validator set
• NodeID mismatch
• Validator expired/removed | • Verify validator is active
• Check end time hasn't passed
• Confirm correct NodeID
• Query validator set | + +## Version and Upgrade Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `unknown network upgrade detected` | • Outdated node version
• Network upgrade scheduled/active
• Incompatible protocol | • **Update immediately** to latest version
• Monitor upgrade announcements
• Enable automatic updates
• Check version: `avalanchego --version` | +| `unknown network upgrade - update as soon as possible` | • Network upgrade approaching
• Node version outdated | • Update within the day
• Check GitHub releases
• Plan for maintenance window | +| `imminent network upgrade - update immediately` | • Network upgrade imminent (within hour) | • **Critical: Update immediately**
• Risk of network disconnection | +| `invalid upgrade configuration` | • Upgrade times not chronological
• Conflicting schedules
• Invalid precompile config | • Review upgrade config files
• Ensure sequential timing
• Validate precompile settings
• Consult upgrade documentation | + +## API and RPC Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| Health check: `not yet run` | • Node still initializing
• Bootstrap incomplete
• Subnet sync in progress
• Network issues | • Wait for initialization
• Monitor `/api/health` for updates
• Check individual health checks
• Ensure subnets are synced | +| `timed out` (Code: -1) | • Request exceeded timeout
• Node overloaded
• Network latency | • Increase timeout settings
• Check resource usage (CPU/memory/disk)
• Reduce request complexity
• Use retry with exponential backoff | +| Invalid content-type | • Wrong Content-Type header
• Missing header | • Add `Content-Type: application/json`
• Verify API client config
• Example: `curl -H 'content-type:application/json;' ...` | + +## State Sync Errors + +| Error | Cause | Solution | +|-------|-------|----------| +| `proof obtained an invalid root ID` | • State changed during sync
• Corrupted merkle proof
• Network issues | • Restart state sync
• Ensure stable connection
• Wait for state to stabilize | +| `vm does not implement StateSyncableVM interface` | • Unsupported VM
• Outdated VM version | • Update VM to support state sync
• Use full bootstrap instead
• Check VM compatibility docs | + +--- + +## Monitoring and Prevention + +### Key Metrics to Monitor + +| Metric | Threshold | How to Check | +|--------|-----------|--------------| +| **Disk Space** | Keep 10+ GiB free (20+ GiB recommended) | `df -h` | +| **Network Connectivity** | Inbound/outbound connections active | Check firewall, use port scanners | +| **Bootstrap Status** | Should be `bootstrapped` | `/api/health` | +| **Validator Connections** | Connected to sufficient stake | `/ext/info` API, check peer count | +| **Database Health** | No corruption warnings in logs | Monitor `~/.avalanchego/logs/` | +| **Node Version** | Current with latest release | `avalanchego --version` | + +### Best Practices + +| Practice | Benefit | +|----------|---------| +| Use UPS (uninterruptible power supply) | Prevents database corruption from power loss | +| Enable automatic updates | Stay current with security patches | +| Monitor logs regularly | Early detection of issues | +| Keep adequate disk space | Prevent database write failures | +| Configure port forwarding properly | Ensure validator connectivity | +| Synchronize system time with NTP | Prevent consensus issues | +| Backup critical files | Quick recovery from failures | +| Test changes on testnet first | Avoid production issues | + +### Health Check Endpoints + +| Endpoint | Purpose | What It Checks | +|----------|---------|----------------| +| `/ext/health/liveness` | Basic process health | Is the node process running? | +| `/ext/health/readiness` | Ready to serve traffic | Is bootstrapping complete? | +| `/ext/health` | Comprehensive status | All health checks and details | + +### Getting Help + +If you encounter errors not listed here: + +1. **Check Logs**: Review `~/.avalanchego/logs/` for detailed error messages +2. **Search Forum**: [Avalanche Forum](https://forum.avax.network/) +3. **Join Discord**: [Avalanche Discord](https://chat.avax.network/) +4. **GitHub Issues**: [Review existing issues](https://github.com/ava-labs/avalanchego/issues) +5. **Provide Context**: Include specific error messages, logs, and configuration when asking for help + +### Quick Diagnostic Commands + +```bash +# Check node version +avalanchego --version + +# Check disk space +df -h + +# Check if port 9651 is open +nc -zv 9651 + +# Check node health +curl -X POST --data '{"jsonrpc":"2.0","id":1,"method":"health.health"}' -H 'content-type:application/json;' http://127.0.0.1:9650/ext/health + +# Check peers +curl -X POST --data '{"jsonrpc":"2.0","id":1,"method":"info.peers"}' -H 'content-type:application/json;' http://127.0.0.1:9650/ext/info + +# Check bootstrap status +curl -X POST --data '{"jsonrpc":"2.0","id":1,"method":"info.isBootstrapped","params":{"chain":"X"}}' -H 'content-type:application/json;' http://127.0.0.1:9650/ext/info +``` \ No newline at end of file diff --git a/content/docs/nodes/run-a-node/using-docker.mdx b/content/docs/nodes/run-a-node/using-docker.mdx index 9cd27c1a715..fd20485b0af 100644 --- a/content/docs/nodes/run-a-node/using-docker.mdx +++ b/content/docs/nodes/run-a-node/using-docker.mdx @@ -1,8 +1,12 @@ --- -title: Using Docker -description: Learn how to run an Avalanche node using Docker. +title: Build AvalancheGo Docker Image +description: Learn how to build a Docker image for AvalancheGo. --- + +For an easier way to set up and run a node, try the [Avalanche Console Node Setup Tool](/console/primary-network/node-setup). + + ## Prerequisites Before beginning, you must ensure that: diff --git a/content/docs/nodes/system-requirements.mdx b/content/docs/nodes/system-requirements.mdx index 7a1f8c4f54e..ee9e8e2ec27 100644 --- a/content/docs/nodes/system-requirements.mdx +++ b/content/docs/nodes/system-requirements.mdx @@ -10,45 +10,34 @@ Running a Primary Network validator requires careful consideration of your stake ### Storage Requirements -You **must** use a local NVMe SSD attached directly to your hardware. Cloud block storage (AWS EBS, GCP Persistent Disk, Azure Managed Disks) introduces latency that causes poor performance, missed blocks, and potential benching. If running in the cloud, use instance types with local NVMe storage (e.g., AWS i3/i4 instances, GCP N2 with local SSD). +You **must** use a local NVMe SSD attached directly to your hardware with **minimum 3000 IOPS**. Cloud block storage (AWS EBS, GCP Persistent Disk, Azure Managed Disks) introduces latency that causes poor performance, missed blocks, and potential benching. If running in the cloud, use instance types with local NVMe storage (e.g., AWS i3/i4i instances, GCP N2 with local SSD). New validators should use **state sync** to bootstrap. While full sync from genesis is still possible, state sync is significantly faster—downloading only the active state (~500 GB) rather than replaying all historical blocks. -| Storage Type | Size | Description | -|--------------|------|-------------| +| Storage Type | Initial Size | Description | +|--------------|--------------|-------------| | Active State | ~500 GB | Current state required to validate. Downloaded via state sync. | | Full Archive | ~3 TB+ | Complete historical state. Only needed for archive nodes or block explorers. | + +Even with state sync, your node's storage usage will grow over time as new blocks are added and old state accumulates. A node starting at 500 GB can grow to 1 TB+ over months of operation. Plan for this growth when provisioning storage, or schedule periodic maintenance using [state management strategies](/docs/nodes/maintain/chain-state-management). + + ### Hardware Requirements Resource requirements scale with your stake weight. Higher stake means more validator duties and network traffic. -#### Low Stake Validators - -For validators with modest stake delegations who want reliable operation without over-provisioning. - -| Component | Requirement | -|-----------|-------------| -| **CPU** | 4 cores / 8 threads (e.g., AMD Ryzen 5, Intel i5) | -| **RAM** | 16 GB | -| **Storage** | 1 TB NVMe SSD (local, not network-attached) | -| **Network** | 100 Mbps symmetric, stable connection | -| **OS** | Ubuntu 22.04 LTS or macOS ≥ 12 | - -#### High Stake Validators - -For validators with significant stake who will handle proportionally more network traffic and validation duties. - -| Component | Requirement | -|-----------|-------------| -| **CPU** | 8+ cores / 16 threads (e.g., AMD Ryzen 7/9, Intel i7/i9) | -| **RAM** | 32 GB | -| **Storage** | 2 TB NVMe SSD (local, not network-attached) | -| **Network** | 1 Gbps symmetric, low-latency connection | -| **OS** | Ubuntu 22.04 LTS or macOS ≥ 12 | +| Component | Low Stake Validators | High Stake Validators | +|-----------|---------------------|----------------------| +| **Use Case** | Validators with modest stake delegations who want reliable operation without over-provisioning | Validators with significant stake who handle proportionally more network traffic and validation duties | +| **CPU** | 4 cores / 8 threads (e.g., AMD Ryzen 5, Intel i5) | 8+ cores / 16 threads (e.g., AMD Ryzen 7/9, Intel i7/i9) | +| **RAM** | 16 GB | 32 GB | +| **Storage** | 1 TB NVMe SSD (local, not network-attached) | 2 TB NVMe SSD (local, not network-attached) | +| **Network** | 100 Mbps symmetric, stable connection | 1 Gbps symmetric, low-latency connection | +| **OS** | Ubuntu 22.04 LTS or macOS ≥ 12 | Ubuntu 22.04 LTS or macOS ≥ 12 | If you're unsure which tier applies to you: start with low-stake specs and monitor performance. If you see high CPU usage, memory pressure, or network saturation, upgrade accordingly. @@ -60,41 +49,14 @@ If you're unsure which tier applies to you: start with low-stake specs and monit L1 validators run your own blockchain with custom parameters. Hardware requirements depend on your chain's transaction throughput and state size. -### Low Throughput - -Suitable for testnets, development chains, or production L1s with minimal traffic (< 10 TPS average). - -| Component | Requirement | -|-----------|-------------| -| **CPU** | 2 cores | -| **RAM** | 4 GB | -| **Storage** | 100 GB SSD | -| **Network** | 25 Mbps | -| **OS** | Ubuntu 22.04 LTS or macOS ≥ 12 | - -### Medium Throughput - -For production L1s with moderate activity (10–100 TPS average), gaming chains, or DeFi applications. - -| Component | Requirement | -|-----------|-------------| -| **CPU** | 4 cores | -| **RAM** | 8 GB | -| **Storage** | 500 GB SSD | -| **Network** | 100 Mbps | -| **OS** | Ubuntu 22.04 LTS or macOS ≥ 12 | - -### High Throughput - -For high-performance L1s with heavy transaction volume (100+ TPS), large state, or complex smart contracts. - -| Component | Requirement | -|-----------|-------------| -| **CPU** | 8+ cores | -| **RAM** | 16 GB+ | -| **Storage** | 1 TB+ NVMe SSD | -| **Network** | 1 Gbps | -| **OS** | Ubuntu 22.04 LTS or macOS ≥ 12 | +| Component | Low Throughput | Medium Throughput | High Throughput | +|-----------|----------------|-------------------|-----------------| +| **Use Case** | Testnets, development chains, or production L1s with minimal traffic (< 10 TPS) | Production L1s with moderate activity (10–100 TPS), gaming chains, or DeFi applications | High-performance L1s with heavy transaction volume (100+ TPS), large state, or complex smart contracts | +| **CPU** | 2 cores | 4 cores | 8+ cores | +| **RAM** | 4 GB | 8 GB | 16 GB+ | +| **Storage** | 100 GB (SSD optional) | 500 GB SSD | 1 TB+ NVMe SSD | +| **Network** | 25 Mbps | 100 Mbps | 1 Gbps | +| **OS** | Ubuntu 22.04 LTS or macOS ≥ 12 | Ubuntu 22.04 LTS or macOS ≥ 12 | Ubuntu 22.04 LTS or macOS ≥ 12 | L1 validators sync the P-Chain to track validator sets and cross-chain messages. This adds minimal overhead to the requirements above. @@ -106,10 +68,19 @@ L1 validators sync the P-Chain to track validator sets and cross-chain messages. AvalancheGo requires inbound connections on port `9651`. Before installation, ensure your networking environment is properly configured. +### IPv4 and IPv6 Support + +AvalancheGo supports both IPv4 and IPv6: +- **IPv4**: Fully supported and most common +- **IPv6**: Fully supported - your node can operate exclusively on IPv6 or dual-stack +- **Dual-stack**: You can run both IPv4 and IPv6 simultaneously + +If using IPv6, ensure your firewall and network configuration properly allow inbound IPv6 connections on port `9651`. + ### Cloud Providers Cloud instances have static IPs by default. Ensure your security group or firewall allows: -- **Inbound**: TCP port 9651 +- **Inbound**: TCP port 9651 (IPv4 and/or IPv6) - **Outbound**: All traffic ### Home Connections @@ -124,7 +95,25 @@ A fully connected Avalanche node maintains thousands of live TCP connections. Un --- +## Monitoring Thresholds + +Set up monitoring and alerts to catch resource issues before they impact your validator: + +| Resource | Warning Threshold | Critical Threshold | Action Required | +|----------|------------------|-------------------|-----------------| +| **Disk Usage** | 80% | 90% | Run [offline pruning](/docs/nodes/maintain/reduce-disk-usage) or [state sync](/docs/nodes/maintain/chain-state-management) | +| **CPU Usage** | 70% sustained | 90% sustained | Upgrade to higher-tier instance or optimize workload | +| **Memory Usage** | 80% | 90% | Upgrade RAM or investigate memory leaks | +| **Network Bandwidth** | 80% of capacity | 95% of capacity | Upgrade network tier or reduce other network traffic | +| **Disk IOPS** | 80% of available | 95% of available | Upgrade to higher IOPS storage | + + +**Disk usage** is the most common issue for validators. Consider setting up automated alerts at 80% to give yourself time to plan maintenance before your node runs out of space. + + +--- + ## Next Steps - Learn about [Active State vs Archive State](/docs/nodes/maintain/chain-state-management) to understand storage requirements -- Set up [node monitoring](/docs/nodes/maintain/monitoring) to track resource usage +- Set up [node monitoring](/docs/nodes/maintain/monitoring) to track resource usage and configure alerts diff --git a/mdx-components.tsx b/mdx-components.tsx index f495800cd60..a37e8e92234 100644 --- a/mdx-components.tsx +++ b/mdx-components.tsx @@ -26,6 +26,10 @@ const Mermaid = dynamic(() => import("@/components/content-design/mermaid"), { ssr: false, }); +const StateGrowthChart = dynamic(() => import("@/components/content-design/state-growth-chart"), { + ssr: false, +}); + export function useMDXComponents(components: MDXComponents): MDXComponents { // Exclude heading and img components from defaultComponents to avoid conflicts const { h1, h2, h3, h4, h5, h6, img, ...restDefaultComponents } = defaultComponents; @@ -73,6 +77,7 @@ export function useMDXComponents(components: MDXComponents): MDXComponents { YouTube, Gallery, Mermaid, + StateGrowthChart, InstallTabs: ({ items, children, diff --git a/next.config.mjs b/next.config.mjs index 3e9bbd506f3..cc170e3bb5c 100644 --- a/next.config.mjs +++ b/next.config.mjs @@ -1350,6 +1350,11 @@ const config = { destination: "/academy/avalanche-l1/avalanche-fundamentals/04-creating-an-l1", permanent: true, }, + { + source: "/docs/nodes/maintain/reduce-disk-usage", + destination: "/docs/nodes/maintain/chain-state-management", + permanent: true, + }, // 404 fixes - December 2025 { source: "/docs/build",