Skip to content

Commit

Permalink
test(resharding): Adjust State mapping check for single shard tracking (
Browse files Browse the repository at this point in the history
#12706)

Unblocks #12691

**Changes**
* Adjust `check_state_shard_uid_mapping_after_resharding` so that it can
be run for a client that does not track all shards.
* Run `check_state_shard_uid_mapping_after_resharding` for each client.
* Slightly refactor (simplify) resharding test loop.
  • Loading branch information
staffik authored Jan 10, 2025
1 parent a497a91 commit 1885afa
Show file tree
Hide file tree
Showing 3 changed files with 182 additions and 47 deletions.
125 changes: 91 additions & 34 deletions integration-tests/src/test_loop/tests/resharding_v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ struct TestReshardingParameters {
impl TestReshardingParametersBuilder {
fn build(self) -> TestReshardingParameters {
// Give enough time for GC to kick in after resharding.
assert!(GC_NUM_EPOCHS_TO_KEEP + 2 < TESTLOOP_NUM_EPOCHS_TO_WAIT);
assert!(GC_NUM_EPOCHS_TO_KEEP + 3 < TESTLOOP_NUM_EPOCHS_TO_WAIT);
let epoch_length = self.epoch_length.unwrap_or(DEFAULT_EPOCH_LENGTH);
let tracked_shard_schedule = self.tracked_shard_schedule.unwrap_or(None);

Expand Down Expand Up @@ -459,10 +459,8 @@ fn test_resharding_v3_base(params: TestReshardingParameters) {
TrieSanityCheck::new(&clients, params.load_mem_tries_for_tracked_shards);

let latest_block_height = Cell::new(0u64);
// Height of a block after resharding.
let new_layout_block_height = Cell::new(None);
// Height of an epoch after resharding.
let new_layout_epoch_height = Cell::new(None);
let resharding_block_hash = Cell::new(None);
let epoch_height_after_resharding = Cell::new(None);
let success_condition = |test_loop_data: &mut TestLoopData| -> bool {
params
.loop_actions
Expand All @@ -486,55 +484,52 @@ fn test_resharding_v3_base(params: TestReshardingParameters) {
let client = clients[client_index];
let block_header = client.chain.get_block_header(&tip.last_block_hash).unwrap();
let shard_layout = client.epoch_manager.get_shard_layout(&tip.epoch_id).unwrap();
println!("Block: {:?} {} {:?}", tip.last_block_hash, tip.height, block_header.chunk_mask());
println!("Shard IDs: {:?}", shard_layout.shard_ids().collect_vec());

// Check that all chunks are included.
if params.all_chunks_expected && params.chunk_ranges_to_drop.is_empty() {
assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit));
}

let shard_layout = client.epoch_manager.get_shard_layout(&tip.epoch_id).unwrap();
println!(
"new block #{} shards: {:?} chunk mask {:?}",
"new block #{} shards: {:?} chunk mask {:?} block hash {} epoch id {:?}",
tip.height,
shard_layout.shard_ids().collect_vec(),
block_header.chunk_mask().to_vec()
block_header.chunk_mask().to_vec(),
tip.last_block_hash,
tip.epoch_id.0,
);

// Check that all chunks are included.
if params.all_chunks_expected && params.chunk_ranges_to_drop.is_empty() {
assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit));
}

trie_sanity_check.assert_state_sanity(&clients, expected_num_shards);

let epoch_height =
client.epoch_manager.get_epoch_height_from_prev_block(&tip.prev_block_hash).unwrap();

// Return false if we have not yet passed an epoch with increased number of shards.
if new_layout_epoch_height.get().is_none() {
assert!(epoch_height < 6);
let prev_epoch_id = client
.epoch_manager
.get_prev_epoch_id_from_prev_block(&tip.prev_block_hash)
.unwrap();
let epoch_config = client.epoch_manager.get_epoch_config(&prev_epoch_id).unwrap();
if epoch_config.shard_layout.num_shards() != expected_num_shards {
// Return false if we have not resharded yet.
if epoch_height_after_resharding.get().is_none() {
assert!(epoch_height < 5);
if shard_layout.num_shards() != expected_num_shards {
return false;
}
// Just passed an epoch with increased number of shards.
new_layout_block_height.set(Some(latest_block_height.get()));
new_layout_epoch_height.set(Some(epoch_height));
// Just resharded.
resharding_block_hash.set(Some(tip.prev_block_hash));
epoch_height_after_resharding.set(Some(epoch_height));
// Assert that we will have a chance for gc to kick in before the test is over.
assert!(epoch_height + GC_NUM_EPOCHS_TO_KEEP < TESTLOOP_NUM_EPOCHS_TO_WAIT);
println!("State after resharding:");
print_and_assert_shard_accounts(&clients, &tip);
}

check_state_shard_uid_mapping_after_resharding(
&client,
parent_shard_uid,
params.allow_negative_refcount,
);
for client in clients {
check_state_shard_uid_mapping_after_resharding(
client,
&resharding_block_hash.get().unwrap(),
parent_shard_uid,
params.allow_negative_refcount,
);
}

// Return false if garbage collection window has not passed yet since resharding.
if epoch_height <= new_layout_epoch_height.get().unwrap() + GC_NUM_EPOCHS_TO_KEEP {
if epoch_height <= TESTLOOP_NUM_EPOCHS_TO_WAIT {
return false;
}
for loop_action in &params.loop_actions {
Expand All @@ -545,7 +540,7 @@ fn test_resharding_v3_base(params: TestReshardingParameters) {

env.test_loop.run_until(
success_condition,
// Give enough time to produce ~TESTLOOP_NUM_EPOCHS_TO_WAIT epochs.
// Give enough time to produce TESTLOOP_NUM_EPOCHS_TO_WAIT epochs.
Duration::seconds((TESTLOOP_NUM_EPOCHS_TO_WAIT * params.epoch_length) as i64),
);
let client = &env.test_loop.data.get(&client_handles[client_index]).client;
Expand Down Expand Up @@ -601,6 +596,68 @@ fn test_resharding_v3_state_cleanup() {
);
}

#[test]
fn test_resharding_v3_do_not_track_children_after_resharding() {
// Track parent shard before resharding, but do not track any child shard after resharding.
let account_in_stable_shard: AccountId = "account0".parse().unwrap();
let split_boundary_account: AccountId = NEW_BOUNDARY_ACCOUNT.parse().unwrap();
let base_shard_layout = get_base_shard_layout(DEFAULT_SHARD_LAYOUT_VERSION);
let new_shard_layout =
ShardLayout::derive_shard_layout(&base_shard_layout, split_boundary_account.clone());
let parent_shard_id = base_shard_layout.account_id_to_shard_id(&split_boundary_account);
let unrelated_shard_id = new_shard_layout.account_id_to_shard_id(&account_in_stable_shard);

let tracked_shard_sequence =
vec![parent_shard_id, parent_shard_id, unrelated_shard_id, unrelated_shard_id];
let num_clients = 8;
let tracked_shard_schedule = TrackedShardSchedule {
client_index: (num_clients - 1) as usize,
schedule: shard_sequence_to_schedule(tracked_shard_sequence),
};
test_resharding_v3_base(
TestReshardingParametersBuilder::default()
.num_clients(num_clients)
.tracked_shard_schedule(Some(tracked_shard_schedule))
.build(),
);
}

#[test]
// TODO(resharding): Increase `TESTLOOP_NUM_EPOCHS_TO_WAIT` to 10, fix nearcore, and un-ignore this test
#[ignore]
fn test_resharding_v3_stop_track_child_for_2_epochs() {
// Track parent shard before resharding, and a child shard after resharding.
// Then do not track the child for 2 epochs and start tracking it again.
let account_in_stable_shard: AccountId = "account0".parse().unwrap();
let split_boundary_account: AccountId = NEW_BOUNDARY_ACCOUNT.parse().unwrap();
let base_shard_layout = get_base_shard_layout(DEFAULT_SHARD_LAYOUT_VERSION);
let new_shard_layout =
ShardLayout::derive_shard_layout(&base_shard_layout, split_boundary_account.clone());
let parent_shard_id = base_shard_layout.account_id_to_shard_id(&split_boundary_account);
let child_shard_id = new_shard_layout.account_id_to_shard_id(&split_boundary_account);
let unrelated_shard_id = new_shard_layout.account_id_to_shard_id(&account_in_stable_shard);

let tracked_shard_sequence = vec![
parent_shard_id,
parent_shard_id,
child_shard_id,
unrelated_shard_id,
unrelated_shard_id,
child_shard_id,
];
let num_clients = 8;
let tracked_shard_schedule = TrackedShardSchedule {
client_index: (num_clients - 1) as usize,
schedule: shard_sequence_to_schedule(tracked_shard_sequence),
};
test_resharding_v3_base(
TestReshardingParametersBuilder::default()
.num_clients(num_clients)
.tracked_shard_schedule(Some(tracked_shard_schedule))
.build(),
);
}

#[test]
fn test_resharding_v3_track_all_shards() {
test_resharding_v3_base(
Expand Down
27 changes: 27 additions & 0 deletions integration-tests/src/test_loop/utils/sharding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,30 @@ pub fn shard_was_split(shard_layout: &ShardLayout, shard_id: ShardId) -> bool {
};
parent != shard_id
}

pub fn get_tracked_shards_from_prev_block(
client: &Client,
prev_block_hash: &CryptoHash,
) -> Vec<ShardUId> {
let signer = client.validator_signer.get();
let account_id = signer.as_ref().map(|s| s.validator_id());
let shard_layout =
client.epoch_manager.get_shard_layout_from_prev_block(prev_block_hash).unwrap();
let mut tracked_shards = vec![];
for shard_uid in shard_layout.shard_uids() {
if client.shard_tracker.care_about_shard(
account_id,
prev_block_hash,
shard_uid.shard_id(),
true,
) {
tracked_shards.push(shard_uid);
}
}
tracked_shards
}

pub fn get_tracked_shards(client: &Client, block_hash: &CryptoHash) -> Vec<ShardUId> {
let block_header = client.chain.get_block_header(block_hash).unwrap();
get_tracked_shards_from_prev_block(client, block_header.prev_hash())
}
77 changes: 64 additions & 13 deletions integration-tests/src/test_loop/utils/trie_sanity.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use super::sharding::shard_was_split;
use crate::test_loop::utils::sharding::{client_tracking_shard, get_memtrie_for_shard};
use crate::test_loop::utils::sharding::{
client_tracking_shard, get_memtrie_for_shard, get_tracked_shards,
get_tracked_shards_from_prev_block,
};
use borsh::BorshDeserialize;
use itertools::Itertools;
use near_chain::types::Tip;
Expand All @@ -10,6 +13,7 @@ use near_primitives::shard_layout::ShardLayout;
use near_primitives::state::FlatStateValue;
use near_primitives::types::{AccountId, EpochId, NumShards};
use near_primitives::version::PROTOCOL_VERSION;
use near_store::adapter::trie_store::get_shard_uid_mapping;
use near_store::adapter::StoreAdapter;
use near_store::db::refcount::decode_value_with_rc;
use near_store::flat::FlatStorageStatus;
Expand Down Expand Up @@ -340,27 +344,46 @@ fn should_assert_state_sanity(
/// Asserts that all parent shard State is accessible via parent and children shards.
pub fn check_state_shard_uid_mapping_after_resharding(
client: &Client,
resharding_block_hash: &CryptoHash,
parent_shard_uid: ShardUId,
allow_negative_refcount: bool,
) {
let tip = client.chain.head().unwrap();
let epoch_id = tip.epoch_id;
let epoch_config = client.epoch_manager.get_epoch_config(&epoch_id).unwrap();
let shard_layout = client.epoch_manager.get_shard_layout(&epoch_id).unwrap();
let children_shard_uids =
epoch_config.shard_layout.get_children_shards_uids(parent_shard_uid.shard_id()).unwrap();
shard_layout.get_children_shards_uids(parent_shard_uid.shard_id()).unwrap();
assert_eq!(children_shard_uids.len(), 2);

let store = client.chain.chain_store.store().trie_store();
let mut checked_any = false;
for kv in store.store().iter_raw_bytes(DBCol::State) {
// Currently tracked shards.
let tracked_shards = get_tracked_shards_from_prev_block(client, &tip.prev_block_hash);
// ShardUId mappings (different than map to itself) that we have stored in DB.
let mut shard_uid_mapping = HashMap::new();
// Currently tracked children shards that are mapped to an ancestor.
let mut tracked_mapped_children = vec![];
let store = client.chain.chain_store.store();
for child_shard_uid in &children_shard_uids {
let mapped_shard_uid = get_shard_uid_mapping(store, *child_shard_uid);
if &mapped_shard_uid == child_shard_uid {
continue;
}
shard_uid_mapping.insert(child_shard_uid, mapped_shard_uid);
if tracked_shards.contains(child_shard_uid) {
tracked_mapped_children.push(*child_shard_uid);
}
}

// Whether we found any value in DB for which we could test the mapping.
let mut checked_any_key = false;
let trie_store = store.trie_store();
for kv in store.iter_raw_bytes(DBCol::State) {
let (key, value) = kv.unwrap();
let shard_uid = ShardUId::try_from_slice(&key[0..8]).unwrap();
// Just after resharding, no State data must be keyed using children ShardUIds.
assert!(!children_shard_uids.contains(&shard_uid));
assert!(!shard_uid_mapping.contains_key(&shard_uid));
if shard_uid != parent_shard_uid {
continue;
}
checked_any = true;
let node_hash = CryptoHash::try_from_slice(&key[8..]).unwrap();
let (value, rc) = decode_value_with_rc(&value);
// It is possible we have delayed receipts leftovers on disk,
Expand All @@ -374,14 +397,42 @@ pub fn check_state_shard_uid_mapping_after_resharding(
assert!(value.is_none());
continue;
}
let parent_value = store.get(parent_shard_uid, &node_hash);
// Parent shard data must still be accessible using parent ShardUId.
let parent_value = trie_store.get(parent_shard_uid, &node_hash);
// Sanity check: parent shard data must still be accessible using Trie interface and parent ShardUId.
assert_eq!(&parent_value.unwrap()[..], value.unwrap());

// All parent shard data is available via both children shards.
for child_shard_uid in &children_shard_uids {
let child_value = store.get(*child_shard_uid, &node_hash);
for child_shard_uid in &tracked_mapped_children {
let child_value = trie_store.get(*child_shard_uid, &node_hash);
assert_eq!(&child_value.unwrap()[..], value.unwrap());
}
checked_any_key = true;
}
assert!(checked_any_key);

let shards_tracked_before_resharding = get_tracked_shards(client, resharding_block_hash);
let tracked_parent_before_resharding =
shards_tracked_before_resharding.contains(&parent_shard_uid);
let shards_tracked_after_resharding =
get_tracked_shards_from_prev_block(client, resharding_block_hash);

// Sanity checks if the node tracks all shards (e.g. it is RPC node).
if !client.config.tracked_shards.is_empty() {
assert_eq!(tracked_mapped_children.len(), 2);
assert_eq!(shards_tracked_after_resharding.len(), shard_layout.num_shards() as usize,);
}
// If any child shard was tracked after resharding, it means the node had to split the parent shard.
if children_shard_uids
.iter()
.any(|child_shard_uid| shards_tracked_after_resharding.contains(child_shard_uid))
{
assert_eq!(shard_uid_mapping.len(), 2);
} else if tracked_parent_before_resharding {
// Parent was tracked before resharding, but no child was tracked after resharding.
// TODO(resharding) Consider not resharding in such case. If fixed, the assert below should change from 2 to 0.
assert_eq!(shard_uid_mapping.len(), 2);
} else {
// Otherwise, no mapping was set and no shard State would be mapped.
assert!(shard_uid_mapping.is_empty());
}
assert!(checked_any);
}

0 comments on commit 1885afa

Please sign in to comment.