Skip to content

Commit

Permalink
Issue tenure extend if incoming miner fails to mine
Browse files Browse the repository at this point in the history
Signed-off-by: Jacinta Ferrant <[email protected]>
  • Loading branch information
jferrant committed Feb 5, 2025
1 parent 91a1398 commit 2f4f2dd
Show file tree
Hide file tree
Showing 7 changed files with 996 additions and 59 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
### Added

- Add miner configuration option `tenure_extend_cost_threshold` to specify the percentage of the tenure budget that must be spent before a time-based tenure extend is attempted
- Add miner configuration option `tenure_extend_wait_timeout_ms` to specify the time to wait to try to continue a tenure if a BlockFound is expected

### Changed

- Miner will include other transactions in blocks with tenure extend transactions (#5760)
- Add `block_rejection_timeout_steps` to miner configuration for defining rejections-based timeouts while waiting for signers response (#5705)
- Miner will not issue a tenure extend until at least half of the block budget has been spent (#5757)
- Miner will issue a tenure extend if the incoming miner has failed to produce a block (#5729)

### Fixed

Expand Down
1 change: 1 addition & 0 deletions stacks-signer/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to the versioning scheme outlined in the [README.md](RE
`StackerDB` messages, it logs `INFO` messages. Other interactions with the `stacks-node`
behave normally (e.g., submitting validation requests, submitting finished blocks). A
dry run signer will error out if the supplied key is actually a registered signer.
- Reduce default value of `block_proposal_timeout_ms` to 120_000

## [3.1.0.0.4.0]

Expand Down
2 changes: 1 addition & 1 deletion stacks-signer/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use stacks_common::util::hash::Hash160;
use crate::client::SignerSlotID;

const EVENT_TIMEOUT_MS: u64 = 5000;
const BLOCK_PROPOSAL_TIMEOUT_MS: u64 = 600_000;
const BLOCK_PROPOSAL_TIMEOUT_MS: u64 = 120_000;
const BLOCK_PROPOSAL_VALIDATION_TIMEOUT_MS: u64 = 120_000;
const DEFAULT_FIRST_PROPOSAL_BURN_BLOCK_TIMING_SECS: u64 = 60;
const DEFAULT_TENURE_LAST_BLOCK_PROPOSAL_TIMEOUT_SECS: u64 = 30;
Expand Down
15 changes: 11 additions & 4 deletions stackslib/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ const DEFAULT_TENURE_COST_LIMIT_PER_BLOCK_PERCENTAGE: u8 = 25;
/// see if we need to extend the ongoing tenure (e.g. because the current
/// sortition is empty or invalid).
const DEFAULT_TENURE_EXTEND_POLL_SECS: u64 = 1;
/// Default number of millis to wait to try to continue a tenure if a BlockFound is expected
const DEFAULT_TENURE_EXTEND_WAIT_MS: u64 = 120_000;
/// Default duration to wait before attempting to issue a tenure extend.
/// This should be greater than the signers' timeout. This is used for issuing
/// fallback tenure extends
Expand Down Expand Up @@ -2177,9 +2179,11 @@ pub struct MinerConfig {
pub block_commit_delay: Duration,
/// The percentage of the remaining tenure cost limit to consume each block.
pub tenure_cost_limit_per_block_percentage: Option<u8>,
/// The number of seconds to wait in-between polling the sortition DB to see if we need to
/// Duration to wait in-between polling the sortition DB to see if we need to
/// extend the ongoing tenure (e.g. because the current sortition is empty or invalid).
pub tenure_extend_poll_secs: Duration,
pub tenure_extend_poll_timeout: Duration,
/// Duration to wait to try to continue a tenure if a BlockFound is expected
pub tenure_extend_wait_timeout: Duration,
/// Duration to wait before attempting to issue a tenure extend
pub tenure_timeout: Duration,
/// Percentage of block budget that must be used before attempting a time-based tenure extend
Expand Down Expand Up @@ -2222,7 +2226,8 @@ impl Default for MinerConfig {
tenure_cost_limit_per_block_percentage: Some(
DEFAULT_TENURE_COST_LIMIT_PER_BLOCK_PERCENTAGE,
),
tenure_extend_poll_secs: Duration::from_secs(DEFAULT_TENURE_EXTEND_POLL_SECS),
tenure_extend_poll_timeout: Duration::from_secs(DEFAULT_TENURE_EXTEND_POLL_SECS),
tenure_extend_wait_timeout: Duration::from_millis(DEFAULT_TENURE_EXTEND_WAIT_MS),
tenure_timeout: Duration::from_secs(DEFAULT_TENURE_TIMEOUT_SECS),
tenure_extend_cost_threshold: DEFAULT_TENURE_EXTEND_COST_THRESHOLD,

Expand Down Expand Up @@ -2629,6 +2634,7 @@ pub struct MinerConfigFile {
pub block_commit_delay_ms: Option<u64>,
pub tenure_cost_limit_per_block_percentage: Option<u8>,
pub tenure_extend_poll_secs: Option<u64>,
pub tenure_extend_wait_timeout_ms: Option<u64>,
pub tenure_timeout_secs: Option<u64>,
pub tenure_extend_cost_threshold: Option<u64>,
pub block_rejection_timeout_steps: Option<HashMap<String, u64>>,
Expand Down Expand Up @@ -2772,7 +2778,8 @@ impl MinerConfigFile {
subsequent_rejection_pause_ms: self.subsequent_rejection_pause_ms.unwrap_or(miner_default_config.subsequent_rejection_pause_ms),
block_commit_delay: self.block_commit_delay_ms.map(Duration::from_millis).unwrap_or(miner_default_config.block_commit_delay),
tenure_cost_limit_per_block_percentage,
tenure_extend_poll_secs: self.tenure_extend_poll_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_extend_poll_secs),
tenure_extend_poll_timeout: self.tenure_extend_poll_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_extend_poll_timeout),
tenure_extend_wait_timeout: self.tenure_extend_wait_timeout_ms.map(Duration::from_millis).unwrap_or(miner_default_config.tenure_extend_wait_timeout),
tenure_timeout: self.tenure_timeout_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_timeout),
tenure_extend_cost_threshold: self.tenure_extend_cost_threshold.unwrap_or(miner_default_config.tenure_extend_cost_threshold),

Expand Down
152 changes: 99 additions & 53 deletions testnet/stacks-node/src/nakamoto_node/relayer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,48 @@ impl MinerStopHandle {
}
}

/// Information necessary to determine when to extend a tenure
pub struct TenureExtendTime {
/// The time at which we determined that we should tenure-extend
time: Instant,
/// The amount of time we should wait before tenure-extending
timeout: Duration,
}

impl TenureExtendTime {
/// Create a new `TenureExtendTime` with a delayed `timeout`
pub fn delayed(timeout: Duration) -> Self {
Self {
time: Instant::now(),
timeout,
}
}

/// Create a new `TenureExtendTime` with no `timeout`
pub fn immediate() -> Self {
Self {
time: Instant::now(),
timeout: Duration::from_secs(0),
}
}

/// Should we attempt to tenure-extend?
pub fn should_extend(&self) -> bool {
// We set the time, but have we waited long enough?
self.time.elapsed() > self.timeout
}

// Amount of time elapsed since we decided to tenure-extend
pub fn elapsed(&self) -> Duration {
self.time.elapsed()
}

// The timeout specified when we decided to tenure-extend
pub fn timeout(&self) -> Duration {
self.timeout
}
}

/// Relayer thread
/// * accepts network results and stores blocks and microblocks
/// * forwards new blocks, microblocks, and transactions to the p2p thread
Expand Down Expand Up @@ -319,8 +361,8 @@ pub struct RelayerThread {
last_committed: Option<LastCommit>,
/// Timeout for waiting for the first block in a tenure before submitting a block commit
new_tenure_timeout: Option<Instant>,
/// Timeout for waiting for a BlockFound in a subsequent tenure before trying to extend our own
tenure_extend_timeout: Option<Instant>,
/// Time to wait before attempting a tenure extend
tenure_extend_time: Option<TenureExtendTime>,
}

impl RelayerThread {
Expand Down Expand Up @@ -380,7 +422,7 @@ impl RelayerThread {
next_initiative: Instant::now() + Duration::from_millis(next_initiative_delay),
last_committed: None,
new_tenure_timeout: None,
tenure_extend_timeout: None,
tenure_extend_time: None,
}
}

Expand Down Expand Up @@ -505,7 +547,7 @@ impl RelayerThread {
SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn())
.expect("FATAL: failed to query sortition DB for stacks tip");

self.tenure_extend_timeout = None;
self.tenure_extend_time = None;

if sn.sortition {
// a sortition happened
Expand Down Expand Up @@ -535,11 +577,18 @@ impl RelayerThread {
sn.consensus_hash,
mining_pkh_opt,
) {
Ok(Some(_)) => {
Ok(Some((_, wait_for_miner))) => {
// we can continue our ongoing tenure, but we should give the new winning miner
// a chance to send their BlockFound first.
debug!("Relayer: Did not win sortition, but am mining the ongoing tenure. Allowing the new miner some time to come online before trying to continue.");
self.tenure_extend_timeout = Some(Instant::now());
if wait_for_miner {
debug!("Relayer: Did not win sortition, but am mining the ongoing tenure. Allowing the new miner some time to come online before trying to continue.");
self.tenure_extend_time = Some(TenureExtendTime::delayed(
self.config.miner.tenure_extend_wait_timeout,
));
} else {
debug!("Relayer: Did not win sortition, but am mining the ongoing tenure. Will try to continue tenure immediately.");
self.tenure_extend_time = Some(TenureExtendTime::immediate());
}
return Some(MinerDirective::StopTenure);
}
Ok(None) => {
Expand Down Expand Up @@ -611,7 +660,7 @@ impl RelayerThread {
"Relayer: ongoing tenure {} already represents last-winning snapshot",
&stacks_tip_sn.consensus_hash
);
self.tenure_extend_timeout = Some(Instant::now());
self.tenure_extend_time = Some(TenureExtendTime::immediate());
false
} else {
// stacks tip's snapshot may be an ancestor of the last-won sortition.
Expand Down Expand Up @@ -654,7 +703,7 @@ impl RelayerThread {
&last_winning_snapshot.consensus_hash
);
// prepare to extend after our BlockFound gets mined.
self.tenure_extend_timeout = Some(Instant::now());
self.tenure_extend_time = Some(TenureExtendTime::immediate());
return Some(MinerDirective::BeginTenure {
parent_tenure_start: StacksBlockId(
last_winning_snapshot.winning_stacks_block_hash.clone().0,
Expand All @@ -675,7 +724,9 @@ impl RelayerThread {
// by someone else -- there's a chance that this other miner will produce a
// BlockFound in the interim.
debug!("Relayer: Did not win last winning snapshot despite mining the ongoing tenure, so allowing the new miner some time to come online.");
self.tenure_extend_timeout = Some(Instant::now());
self.tenure_extend_time = Some(TenureExtendTime::delayed(
self.config.miner.tenure_extend_wait_timeout,
));
return None;
}
return Some(MinerDirective::ContinueTenure {
Expand Down Expand Up @@ -1348,10 +1399,10 @@ impl RelayerThread {
/// Assumes that the caller has already checked that the given miner has _not_ won the new
/// sortition.
///
/// Returns Ok(Some(stacks-tip-election-snapshot)) if the last-winning miner needs to extend.
/// For now, this only happens if the miner's election snapshot was the last-known valid and
/// non-empty snapshot. In the future, this function may return Ok(Some(..)) if the node
/// determines that a subsequent miner won sortition, but never came online.
/// Returns Ok(Some(stacks-tip-election-snapshot, wait-for-miner) if the last-winning miner should attempt to extend
/// This can happen for two seperate reasons:
/// - the miner's election snapshot was the last-known valid and non-empty snapshot and therefore should extend immediately
/// - the node determines that a subsequent miner won sortition, but has not yet produced a valid block and should wait-for-miner before extending
///
/// Returns OK(None) if the last-winning miner should not extend its tenure.
///
Expand All @@ -1361,7 +1412,7 @@ impl RelayerThread {
chain_state: &mut StacksChainState,
new_burn_view: ConsensusHash,
mining_key_opt: Option<Hash160>,
) -> Result<Option<BlockSnapshot>, NakamotoNodeError> {
) -> Result<Option<(BlockSnapshot, bool)>, NakamotoNodeError> {
let Some(mining_pkh) = mining_key_opt else {
return Ok(None);
};
Expand Down Expand Up @@ -1407,31 +1458,34 @@ impl RelayerThread {
return Ok(None);
}

// For now, only allow the miner to extend its tenure if won the highest valid sortition.
// There cannot be any higher sortitions that are valid (as defined above).
//
// In the future, the miner will be able to extend its tenure even if there are higher
// valid sortitions, but only if it determines that the miners of those sortitions are
// offline.
// Allow the miner to extend its tenure if won the highest valid sortition IFF
// it determines that the miners of the sortition fails to produce a block
// by the required timeout.
if let Some(highest_valid_sortition) = Self::find_highest_valid_sortition(
sortdb,
chain_state,
&sort_tip,
&canonical_stacks_snapshot.consensus_hash,
)? {
info!("Relayer: will not extend tenure -- we won sortition {}, but the highest valid sortition is {}", &canonical_stacks_snapshot.consensus_hash, &highest_valid_sortition.consensus_hash);
return Ok(None);
// TODO: I don't understand why this works? HELP???
if sort_tip.consensus_hash != highest_valid_sortition.consensus_hash {
info!("Relayer: will not extend tenure -- we won sortition {}, but the highest valid sortition is {}", &canonical_stacks_snapshot.consensus_hash, &highest_valid_sortition.consensus_hash);
return Ok(None);
}
info!("Relayer: MAY extend tenure -- we won sortition {}, but must give miner time to produce a valid block for the highest valid sortition {}", &canonical_stacks_snapshot.consensus_hash, &highest_valid_sortition.consensus_hash);
return Ok(Some((canonical_stacks_snapshot, true)));
}

Ok(Some(canonical_stacks_snapshot))
// There cannot be any higher sortitions that are valid (as defined above).
Ok(Some((canonical_stacks_snapshot, false)))
}

/// Attempt to continue a miner's tenure into the next burn block.
/// This is allowed if the miner won the last good sortition -- that is, the sortition which
/// elected the local view of the canonical Stacks fork's ongoing tenure.
///
/// This function assumes that the caller has checked that the sortition referred to by
/// `new_burn_view` does not have a sortition winner.
/// `new_burn_view` does not have a sortition winner or that the winner has not produced a
/// valid block yet.
fn continue_tenure(&mut self, new_burn_view: ConsensusHash) -> Result<(), NakamotoNodeError> {
if let Err(e) = self.stop_tenure() {
error!("Relayer: Failed to stop tenure: {e:?}");
Expand All @@ -1440,7 +1494,7 @@ impl RelayerThread {
debug!("Relayer: successfully stopped tenure; will try to continue.");

let mining_pkh_opt = self.get_mining_key_pkh();
let Some(canonical_stacks_tip_election_snapshot) = Self::can_continue_tenure(
let Some((canonical_stacks_tip_election_snapshot, _)) = Self::can_continue_tenure(
&self.sortdb,
&mut self.chainstate,
new_burn_view.clone(),
Expand Down Expand Up @@ -1758,38 +1812,30 @@ impl RelayerThread {
}

/// Try to start up a tenure-extend.
/// Only do this if the miner won the highest valid sortition but the burn view has changed.
/// In the future, the miner will also try to extend its tenure if a subsequent miner appears
/// to be offline.
/// Only do this if:
/// - the miner won the highest valid sortition but the burn view has changed.
/// - the subsequent miner appears to be offline.
fn try_continue_tenure(&mut self) {
if self.tenure_extend_timeout.is_none() {
return;
}

// time to poll to see if we should begin a tenure-extend?
let deadline_passed = self
.tenure_extend_timeout
.map(|tenure_extend_timeout| {
let deadline_passed =
tenure_extend_timeout.elapsed() > self.config.miner.tenure_extend_poll_secs;
if !deadline_passed {
test_debug!(
"Relayer: will not try to tenure-extend yet ({} <= {})",
tenure_extend_timeout.elapsed().as_secs(),
self.config.miner.tenure_extend_poll_secs.as_secs()
);
}
deadline_passed
})
.unwrap_or(false);

if !deadline_passed {
// Should begin a tenure-extend?
if let Some(tenure_extend_time) = &self.tenure_extend_time {
if !tenure_extend_time.should_extend() {
test_debug!(
"Relayer: will not try to tenure-extend yet ({} <= {})",
tenure_extend_time.elapsed().as_secs(),
tenure_extend_time.timeout().as_secs()
);
return;
}
} else {
// No tenure extend time set, so nothing to do.
return;
}

// reset timer so we can try again if for some reason a miner was already running (e.g. a
// blockfound from earlier).
self.tenure_extend_timeout = Some(Instant::now());
self.tenure_extend_time = Some(TenureExtendTime::delayed(
self.config.miner.tenure_extend_poll_timeout,
));

// try to extend, but only if we aren't already running a thread for the current or newer
// burnchain view
Expand Down
3 changes: 2 additions & 1 deletion testnet/stacks-node/src/tests/nakamoto_integrations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10897,14 +10897,15 @@ fn test_tenure_extend_from_flashblocks() {
assert_ne!(sort_tip.consensus_hash, election_tip.consensus_hash);

// we can, however, continue the tenure
let canonical_stacks_tip = RelayerThread::can_continue_tenure(
let (canonical_stacks_tip, wait) = RelayerThread::can_continue_tenure(
&sortdb,
&mut chainstate,
sort_tip.consensus_hash.clone(),
Some(mining_key_pkh.clone()),
)
.unwrap()
.unwrap();
assert!(!wait);
assert_eq!(canonical_stacks_tip, election_tip);

// if we didn't win the last block -- tantamount to the sortition winner miner key being
Expand Down
Loading

0 comments on commit 2f4f2dd

Please sign in to comment.