diff --git a/integration-tests/src/test_loop/tests/resharding_v3.rs b/integration-tests/src/test_loop/tests/resharding_v3.rs index 95dacbf9e47..c9413ba5841 100644 --- a/integration-tests/src/test_loop/tests/resharding_v3.rs +++ b/integration-tests/src/test_loop/tests/resharding_v3.rs @@ -135,7 +135,7 @@ struct TestReshardingParameters { impl TestReshardingParametersBuilder { fn build(self) -> TestReshardingParameters { // Give enough time for GC to kick in after resharding. - assert!(GC_NUM_EPOCHS_TO_KEEP + 2 < TESTLOOP_NUM_EPOCHS_TO_WAIT); + assert!(GC_NUM_EPOCHS_TO_KEEP + 3 < TESTLOOP_NUM_EPOCHS_TO_WAIT); let epoch_length = self.epoch_length.unwrap_or(DEFAULT_EPOCH_LENGTH); let tracked_shard_schedule = self.tracked_shard_schedule.unwrap_or(None); @@ -459,10 +459,8 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { TrieSanityCheck::new(&clients, params.load_mem_tries_for_tracked_shards); let latest_block_height = Cell::new(0u64); - // Height of a block after resharding. - let new_layout_block_height = Cell::new(None); - // Height of an epoch after resharding. - let new_layout_epoch_height = Cell::new(None); + let resharding_block_hash = Cell::new(None); + let epoch_height_after_resharding = Cell::new(None); let success_condition = |test_loop_data: &mut TestLoopData| -> bool { params .loop_actions @@ -486,55 +484,52 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { let client = clients[client_index]; let block_header = client.chain.get_block_header(&tip.last_block_hash).unwrap(); let shard_layout = client.epoch_manager.get_shard_layout(&tip.epoch_id).unwrap(); - println!("Block: {:?} {} {:?}", tip.last_block_hash, tip.height, block_header.chunk_mask()); - println!("Shard IDs: {:?}", shard_layout.shard_ids().collect_vec()); - // Check that all chunks are included. - if params.all_chunks_expected && params.chunk_ranges_to_drop.is_empty() { - assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit)); - } - - let shard_layout = client.epoch_manager.get_shard_layout(&tip.epoch_id).unwrap(); println!( - "new block #{} shards: {:?} chunk mask {:?}", + "new block #{} shards: {:?} chunk mask {:?} block hash {} epoch id {:?}", tip.height, shard_layout.shard_ids().collect_vec(), - block_header.chunk_mask().to_vec() + block_header.chunk_mask().to_vec(), + tip.last_block_hash, + tip.epoch_id.0, ); + // Check that all chunks are included. + if params.all_chunks_expected && params.chunk_ranges_to_drop.is_empty() { + assert!(block_header.chunk_mask().iter().all(|chunk_bit| *chunk_bit)); + } + trie_sanity_check.assert_state_sanity(&clients, expected_num_shards); let epoch_height = client.epoch_manager.get_epoch_height_from_prev_block(&tip.prev_block_hash).unwrap(); - // Return false if we have not yet passed an epoch with increased number of shards. - if new_layout_epoch_height.get().is_none() { - assert!(epoch_height < 6); - let prev_epoch_id = client - .epoch_manager - .get_prev_epoch_id_from_prev_block(&tip.prev_block_hash) - .unwrap(); - let epoch_config = client.epoch_manager.get_epoch_config(&prev_epoch_id).unwrap(); - if epoch_config.shard_layout.num_shards() != expected_num_shards { + // Return false if we have not resharded yet. + if epoch_height_after_resharding.get().is_none() { + assert!(epoch_height < 5); + if shard_layout.num_shards() != expected_num_shards { return false; } - // Just passed an epoch with increased number of shards. - new_layout_block_height.set(Some(latest_block_height.get())); - new_layout_epoch_height.set(Some(epoch_height)); + // Just resharded. + resharding_block_hash.set(Some(tip.prev_block_hash)); + epoch_height_after_resharding.set(Some(epoch_height)); // Assert that we will have a chance for gc to kick in before the test is over. assert!(epoch_height + GC_NUM_EPOCHS_TO_KEEP < TESTLOOP_NUM_EPOCHS_TO_WAIT); println!("State after resharding:"); print_and_assert_shard_accounts(&clients, &tip); } - check_state_shard_uid_mapping_after_resharding( - &client, - parent_shard_uid, - params.allow_negative_refcount, - ); + for client in clients { + check_state_shard_uid_mapping_after_resharding( + client, + &resharding_block_hash.get().unwrap(), + parent_shard_uid, + params.allow_negative_refcount, + ); + } // Return false if garbage collection window has not passed yet since resharding. - if epoch_height <= new_layout_epoch_height.get().unwrap() + GC_NUM_EPOCHS_TO_KEEP { + if epoch_height <= TESTLOOP_NUM_EPOCHS_TO_WAIT { return false; } for loop_action in ¶ms.loop_actions { @@ -545,7 +540,7 @@ fn test_resharding_v3_base(params: TestReshardingParameters) { env.test_loop.run_until( success_condition, - // Give enough time to produce ~TESTLOOP_NUM_EPOCHS_TO_WAIT epochs. + // Give enough time to produce TESTLOOP_NUM_EPOCHS_TO_WAIT epochs. Duration::seconds((TESTLOOP_NUM_EPOCHS_TO_WAIT * params.epoch_length) as i64), ); let client = &env.test_loop.data.get(&client_handles[client_index]).client; @@ -601,6 +596,68 @@ fn test_resharding_v3_state_cleanup() { ); } +#[test] +fn test_resharding_v3_do_not_track_children_after_resharding() { + // Track parent shard before resharding, but do not track any child shard after resharding. + let account_in_stable_shard: AccountId = "account0".parse().unwrap(); + let split_boundary_account: AccountId = NEW_BOUNDARY_ACCOUNT.parse().unwrap(); + let base_shard_layout = get_base_shard_layout(DEFAULT_SHARD_LAYOUT_VERSION); + let new_shard_layout = + ShardLayout::derive_shard_layout(&base_shard_layout, split_boundary_account.clone()); + let parent_shard_id = base_shard_layout.account_id_to_shard_id(&split_boundary_account); + let unrelated_shard_id = new_shard_layout.account_id_to_shard_id(&account_in_stable_shard); + + let tracked_shard_sequence = + vec![parent_shard_id, parent_shard_id, unrelated_shard_id, unrelated_shard_id]; + let num_clients = 8; + let tracked_shard_schedule = TrackedShardSchedule { + client_index: (num_clients - 1) as usize, + schedule: shard_sequence_to_schedule(tracked_shard_sequence), + }; + test_resharding_v3_base( + TestReshardingParametersBuilder::default() + .num_clients(num_clients) + .tracked_shard_schedule(Some(tracked_shard_schedule)) + .build(), + ); +} + +#[test] +// TODO(resharding): Increase `TESTLOOP_NUM_EPOCHS_TO_WAIT` to 10, fix nearcore, and un-ignore this test +#[ignore] +fn test_resharding_v3_stop_track_child_for_2_epochs() { + // Track parent shard before resharding, and a child shard after resharding. + // Then do not track the child for 2 epochs and start tracking it again. + let account_in_stable_shard: AccountId = "account0".parse().unwrap(); + let split_boundary_account: AccountId = NEW_BOUNDARY_ACCOUNT.parse().unwrap(); + let base_shard_layout = get_base_shard_layout(DEFAULT_SHARD_LAYOUT_VERSION); + let new_shard_layout = + ShardLayout::derive_shard_layout(&base_shard_layout, split_boundary_account.clone()); + let parent_shard_id = base_shard_layout.account_id_to_shard_id(&split_boundary_account); + let child_shard_id = new_shard_layout.account_id_to_shard_id(&split_boundary_account); + let unrelated_shard_id = new_shard_layout.account_id_to_shard_id(&account_in_stable_shard); + + let tracked_shard_sequence = vec![ + parent_shard_id, + parent_shard_id, + child_shard_id, + unrelated_shard_id, + unrelated_shard_id, + child_shard_id, + ]; + let num_clients = 8; + let tracked_shard_schedule = TrackedShardSchedule { + client_index: (num_clients - 1) as usize, + schedule: shard_sequence_to_schedule(tracked_shard_sequence), + }; + test_resharding_v3_base( + TestReshardingParametersBuilder::default() + .num_clients(num_clients) + .tracked_shard_schedule(Some(tracked_shard_schedule)) + .build(), + ); +} + #[test] fn test_resharding_v3_track_all_shards() { test_resharding_v3_base( diff --git a/integration-tests/src/test_loop/utils/sharding.rs b/integration-tests/src/test_loop/utils/sharding.rs index 8f377f122b1..0da5f7a23c9 100644 --- a/integration-tests/src/test_loop/utils/sharding.rs +++ b/integration-tests/src/test_loop/utils/sharding.rs @@ -118,3 +118,30 @@ pub fn shard_was_split(shard_layout: &ShardLayout, shard_id: ShardId) -> bool { }; parent != shard_id } + +pub fn get_tracked_shards_from_prev_block( + client: &Client, + prev_block_hash: &CryptoHash, +) -> Vec { + let signer = client.validator_signer.get(); + let account_id = signer.as_ref().map(|s| s.validator_id()); + let shard_layout = + client.epoch_manager.get_shard_layout_from_prev_block(prev_block_hash).unwrap(); + let mut tracked_shards = vec![]; + for shard_uid in shard_layout.shard_uids() { + if client.shard_tracker.care_about_shard( + account_id, + prev_block_hash, + shard_uid.shard_id(), + true, + ) { + tracked_shards.push(shard_uid); + } + } + tracked_shards +} + +pub fn get_tracked_shards(client: &Client, block_hash: &CryptoHash) -> Vec { + let block_header = client.chain.get_block_header(block_hash).unwrap(); + get_tracked_shards_from_prev_block(client, block_header.prev_hash()) +} diff --git a/integration-tests/src/test_loop/utils/trie_sanity.rs b/integration-tests/src/test_loop/utils/trie_sanity.rs index 31acd1e32cb..1a9d76949ac 100644 --- a/integration-tests/src/test_loop/utils/trie_sanity.rs +++ b/integration-tests/src/test_loop/utils/trie_sanity.rs @@ -1,5 +1,8 @@ use super::sharding::shard_was_split; -use crate::test_loop::utils::sharding::{client_tracking_shard, get_memtrie_for_shard}; +use crate::test_loop::utils::sharding::{ + client_tracking_shard, get_memtrie_for_shard, get_tracked_shards, + get_tracked_shards_from_prev_block, +}; use borsh::BorshDeserialize; use itertools::Itertools; use near_chain::types::Tip; @@ -10,6 +13,7 @@ use near_primitives::shard_layout::ShardLayout; use near_primitives::state::FlatStateValue; use near_primitives::types::{AccountId, EpochId, NumShards}; use near_primitives::version::PROTOCOL_VERSION; +use near_store::adapter::trie_store::get_shard_uid_mapping; use near_store::adapter::StoreAdapter; use near_store::db::refcount::decode_value_with_rc; use near_store::flat::FlatStorageStatus; @@ -340,27 +344,46 @@ fn should_assert_state_sanity( /// Asserts that all parent shard State is accessible via parent and children shards. pub fn check_state_shard_uid_mapping_after_resharding( client: &Client, + resharding_block_hash: &CryptoHash, parent_shard_uid: ShardUId, allow_negative_refcount: bool, ) { let tip = client.chain.head().unwrap(); let epoch_id = tip.epoch_id; - let epoch_config = client.epoch_manager.get_epoch_config(&epoch_id).unwrap(); + let shard_layout = client.epoch_manager.get_shard_layout(&epoch_id).unwrap(); let children_shard_uids = - epoch_config.shard_layout.get_children_shards_uids(parent_shard_uid.shard_id()).unwrap(); + shard_layout.get_children_shards_uids(parent_shard_uid.shard_id()).unwrap(); assert_eq!(children_shard_uids.len(), 2); - let store = client.chain.chain_store.store().trie_store(); - let mut checked_any = false; - for kv in store.store().iter_raw_bytes(DBCol::State) { + // Currently tracked shards. + let tracked_shards = get_tracked_shards_from_prev_block(client, &tip.prev_block_hash); + // ShardUId mappings (different than map to itself) that we have stored in DB. + let mut shard_uid_mapping = HashMap::new(); + // Currently tracked children shards that are mapped to an ancestor. + let mut tracked_mapped_children = vec![]; + let store = client.chain.chain_store.store(); + for child_shard_uid in &children_shard_uids { + let mapped_shard_uid = get_shard_uid_mapping(store, *child_shard_uid); + if &mapped_shard_uid == child_shard_uid { + continue; + } + shard_uid_mapping.insert(child_shard_uid, mapped_shard_uid); + if tracked_shards.contains(child_shard_uid) { + tracked_mapped_children.push(*child_shard_uid); + } + } + + // Whether we found any value in DB for which we could test the mapping. + let mut checked_any_key = false; + let trie_store = store.trie_store(); + for kv in store.iter_raw_bytes(DBCol::State) { let (key, value) = kv.unwrap(); let shard_uid = ShardUId::try_from_slice(&key[0..8]).unwrap(); // Just after resharding, no State data must be keyed using children ShardUIds. - assert!(!children_shard_uids.contains(&shard_uid)); + assert!(!shard_uid_mapping.contains_key(&shard_uid)); if shard_uid != parent_shard_uid { continue; } - checked_any = true; let node_hash = CryptoHash::try_from_slice(&key[8..]).unwrap(); let (value, rc) = decode_value_with_rc(&value); // It is possible we have delayed receipts leftovers on disk, @@ -374,14 +397,42 @@ pub fn check_state_shard_uid_mapping_after_resharding( assert!(value.is_none()); continue; } - let parent_value = store.get(parent_shard_uid, &node_hash); - // Parent shard data must still be accessible using parent ShardUId. + let parent_value = trie_store.get(parent_shard_uid, &node_hash); + // Sanity check: parent shard data must still be accessible using Trie interface and parent ShardUId. assert_eq!(&parent_value.unwrap()[..], value.unwrap()); + // All parent shard data is available via both children shards. - for child_shard_uid in &children_shard_uids { - let child_value = store.get(*child_shard_uid, &node_hash); + for child_shard_uid in &tracked_mapped_children { + let child_value = trie_store.get(*child_shard_uid, &node_hash); assert_eq!(&child_value.unwrap()[..], value.unwrap()); } + checked_any_key = true; + } + assert!(checked_any_key); + + let shards_tracked_before_resharding = get_tracked_shards(client, resharding_block_hash); + let tracked_parent_before_resharding = + shards_tracked_before_resharding.contains(&parent_shard_uid); + let shards_tracked_after_resharding = + get_tracked_shards_from_prev_block(client, resharding_block_hash); + + // Sanity checks if the node tracks all shards (e.g. it is RPC node). + if !client.config.tracked_shards.is_empty() { + assert_eq!(tracked_mapped_children.len(), 2); + assert_eq!(shards_tracked_after_resharding.len(), shard_layout.num_shards() as usize,); + } + // If any child shard was tracked after resharding, it means the node had to split the parent shard. + if children_shard_uids + .iter() + .any(|child_shard_uid| shards_tracked_after_resharding.contains(child_shard_uid)) + { + assert_eq!(shard_uid_mapping.len(), 2); + } else if tracked_parent_before_resharding { + // Parent was tracked before resharding, but no child was tracked after resharding. + // TODO(resharding) Consider not resharding in such case. If fixed, the assert below should change from 2 to 0. + assert_eq!(shard_uid_mapping.len(), 2); + } else { + // Otherwise, no mapping was set and no shard State would be mapped. + assert!(shard_uid_mapping.is_empty()); } - assert!(checked_any); }