diff --git a/chain/client/src/chunk_inclusion_tracker.rs b/chain/client/src/chunk_inclusion_tracker.rs index ac80a720f21..46cec82cf2d 100644 --- a/chain/client/src/chunk_inclusion_tracker.rs +++ b/chain/client/src/chunk_inclusion_tracker.rs @@ -42,6 +42,20 @@ pub struct ChunkInclusionTracker { banned_chunk_producers: LruCache<(EpochId, AccountId), ()>, } +enum ChunkExclusionReason { + ChunkUnavailable, + InsufficientEndorsement, +} + +impl ChunkExclusionReason { + fn prometheus_label_value(&self) -> &'static str { + match self { + Self::ChunkUnavailable => "chunk_unavailable", + Self::InsufficientEndorsement => "insufficient_endorsement", + } + } +} + impl ChunkInclusionTracker { pub fn new() -> Self { Self { @@ -208,12 +222,13 @@ impl ChunkInclusionTracker { Ok((chunk_info.chunk_producer.clone(), chunk_info.received_time)) } - pub fn record_endorsement_metrics(&self, prev_block_hash: &CryptoHash) { - let Some(entry) = self.prev_block_to_chunk_hash_ready.peek(prev_block_hash) else { - return; - }; - - for (shard_id, chunk_hash) in entry { + pub fn record_endorsement_metrics(&self, prev_block_hash: &CryptoHash, all_shards: &[ShardId]) { + let maybe_entry = self.prev_block_to_chunk_hash_ready.peek(prev_block_hash); + for shard_id in all_shards { + let Some(chunk_hash) = maybe_entry.and_then(|entry| entry.get(shard_id)) else { + record_chunk_excluded_metric(ChunkExclusionReason::ChunkUnavailable, *shard_id); + continue; + }; let Some(chunk_info) = self.chunk_hash_to_chunk_info.get(chunk_hash) else { log_assert_fail!("Chunk info is missing for shard {shard_id} chunk {chunk_hash:?}"); continue; @@ -234,10 +249,17 @@ impl ChunkInclusionTracker { as f64, ); if !stats.is_endorsed { - metrics::BLOCK_PRODUCER_INSUFFICIENT_ENDORSEMENT_CHUNK_COUNT - .with_label_values(label_values) - .inc(); + record_chunk_excluded_metric( + ChunkExclusionReason::InsufficientEndorsement, + *shard_id, + ); } } } } + +fn record_chunk_excluded_metric(reason: ChunkExclusionReason, shard_id: ShardId) { + metrics::BLOCK_PRODUCER_EXCLUDED_CHUNKS_COUNT + .with_label_values(&[&shard_id.to_string(), reason.prometheus_label_value()]) + .inc(); +} diff --git a/chain/client/src/client_actor.rs b/chain/client/src/client_actor.rs index 10a60a3be3a..8952a65be65 100644 --- a/chain/client/src/client_actor.rs +++ b/chain/client/src/client_actor.rs @@ -1132,8 +1132,8 @@ impl ClientActorInner { .client .chunk_inclusion_tracker .num_chunk_headers_ready_for_inclusion(&epoch_id, &head.last_block_hash); - let have_all_chunks = head.height == 0 - || num_chunks == self.client.epoch_manager.shard_ids(&epoch_id).unwrap().len(); + let shard_ids = self.client.epoch_manager.shard_ids(&epoch_id).unwrap(); + let have_all_chunks = head.height == 0 || num_chunks == shard_ids.len(); if self.client.doomslug.ready_to_produce_block( height, @@ -1142,7 +1142,7 @@ impl ClientActorInner { ) { self.client .chunk_inclusion_tracker - .record_endorsement_metrics(&head.last_block_hash); + .record_endorsement_metrics(&head.last_block_hash, &shard_ids); if let Err(err) = self.produce_block(height, signer) { // If there is an error, report it and let it retry on the next loop step. error!(target: "client", height, "Block production failed: {}", err); diff --git a/chain/client/src/metrics.rs b/chain/client/src/metrics.rs index c774a3eab8c..971225cc790 100644 --- a/chain/client/src/metrics.rs +++ b/chain/client/src/metrics.rs @@ -602,12 +602,12 @@ pub(crate) static ORPHAN_CHUNK_STATE_WITNESS_POOL_MEMORY_USED: LazyLock = +pub(crate) static BLOCK_PRODUCER_EXCLUDED_CHUNKS_COUNT: LazyLock = LazyLock::new(|| { try_create_counter_vec( - "near_block_producer_insufficient_endorsement_chunk_count", + "near_block_producer_excluded_chunks_count", "Number of chunks excluded from the block due to insufficient chunk endorsements", - &["shard_id"], + &["shard_id", "reason"], ) .unwrap() });