From 8fd6fb0ac15501c73eb154f71ad181b201c4890b Mon Sep 17 00:00:00 2001 From: Bhaskar Bora Date: Mon, 21 Oct 2024 14:21:33 -0400 Subject: [PATCH 1/6] save debug changes --- src/llfs/page_allocator_state.cpp | 18 +++++++++++------- src/llfs/page_allocator_state.hpp | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/llfs/page_allocator_state.cpp b/src/llfs/page_allocator_state.cpp index 578fc77..c8d0e49 100644 --- a/src/llfs/page_allocator_state.cpp +++ b/src/llfs/page_allocator_state.cpp @@ -458,11 +458,14 @@ PageAllocatorState::ProposalStatus PageAllocatorState::propose(PackedPageAllocat // If this is a valid proposal that will cause state change, go through and change the deltas to // the new ref count values. // + LOG(INFO) << "propose (start): txn-ref-count= " << txn->ref_counts.size(); if (status == ProposalStatus::kValid) { for (PackedPageRefCount& prc : txn->ref_counts) { - prc.ref_count = this->calculate_new_ref_count(prc); + LOG(INFO) << "prc loop: " << prc; + prc.ref_count = this->calculate_new_ref_count(prc, *user_index); } } + LOG(INFO) << "propose (end): txn-ref-count= " << txn->ref_counts.size(); return status; } @@ -556,23 +559,23 @@ namespace { void run_ref_count_update_sanity_checks(const PageIdFactory& id_factory, const PackedPageRefCount& delta, const PageAllocatorRefCount& obj, i32 old_count, - i32 new_count) + i32 new_count, const u32 index) { const auto debug_info = [&](std::ostream& out) { const page_generation_int delta_generation = id_factory.get_generation(delta.page_id.unpack()); out << "(" << BATT_INSPECT(delta) << BATT_INSPECT(obj) << BATT_INSPECT(old_count) - << BATT_INSPECT(new_count) << ")" << BATT_INSPECT(delta_generation); + << BATT_INSPECT(new_count) << ")" << BATT_INSPECT(delta_generation) << BATT_INSPECT(index); }; - LLFS_VLOG(2) << debug_info; + LLFS_VLOG(2) << "rrcusc: " << debug_info; BATT_CHECK_GE(old_count, 0) << "ref counts must be non-negative" << debug_info; BATT_CHECK_GE(new_count, 0) << "ref counts must be non-negative" << debug_info; if (old_count == new_count) { BATT_CHECK_EQ(delta.ref_count, 0) - << "delta was non-zero but count did not change" << debug_info; + << "delta was non-zero but count did not change (bbora)" << debug_info; return; } @@ -639,7 +642,8 @@ void run_ref_count_update_sanity_checks(const PageIdFactory& id_factory, //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - // -i32 PageAllocatorState::calculate_new_ref_count(const PackedPageRefCount& delta) const +i32 PageAllocatorState::calculate_new_ref_count(const PackedPageRefCount& delta, + const u32 index) const { const PageId page_id = delta.page_id.unpack(); const page_id_int physical_page = this->page_ids_.get_physical_page(page_id); @@ -662,7 +666,7 @@ i32 PageAllocatorState::calculate_new_ref_count(const PackedPageRefCount& delta) new_count = old_count + delta.ref_count; } - run_ref_count_update_sanity_checks(this->page_ids_, delta, obj, old_count, new_count); + run_ref_count_update_sanity_checks(this->page_ids_, delta, obj, old_count, new_count, index); return new_count; } diff --git a/src/llfs/page_allocator_state.hpp b/src/llfs/page_allocator_state.hpp index c139891..4c1e867 100644 --- a/src/llfs/page_allocator_state.hpp +++ b/src/llfs/page_allocator_state.hpp @@ -242,7 +242,7 @@ class PageAllocatorState : public PageAllocatorStateNoLock // Returns the new ref count that will result from applying the delta to the passed obj. // - i32 calculate_new_ref_count(const PackedPageRefCount& delta) const; + i32 calculate_new_ref_count(const PackedPageRefCount& delta, const u32 index) const; /** \brief If the given ref count object has a positive ref count but *is* in the free pool, then * this function removes it; otherwise if the object has a zero ref count but is *not* in the free From f94af8da8115ecafd246fcf6f02264f5eac4329f Mon Sep 17 00:00:00 2001 From: Bhaskar Bora Date: Wed, 18 Dec 2024 09:49:30 -0500 Subject: [PATCH 2/6] ref count changes --- src/llfs/committable_page_cache_job.cpp | 19 +++-- src/llfs/page_recycler.cpp | 78 +++++++++++++++++---- src/llfs/page_recycler.hpp | 12 +++- src/llfs/page_recycler.test.cpp | 4 +- src/llfs/page_recycler_events.hpp | 8 ++- src/llfs/page_recycler_recovery_visitor.cpp | 15 ++++ src/llfs/page_recycler_recovery_visitor.hpp | 4 ++ src/llfs/volume_trimmer.cpp | 5 +- 8 files changed, 121 insertions(+), 24 deletions(-) diff --git a/src/llfs/committable_page_cache_job.cpp b/src/llfs/committable_page_cache_job.cpp index 7170943..4635826 100644 --- a/src/llfs/committable_page_cache_job.cpp +++ b/src/llfs/committable_page_cache_job.cpp @@ -185,7 +185,8 @@ Status commit(std::unique_ptr job, const JobCommitParams& params, Status commit(CommittablePageCacheJob committable_job, const JobCommitParams& params, u64 callers, slot_offset_type prev_caller_slot, batt::Watch* durable_caller_slot) { - return committable_job.commit_impl(params, callers, prev_caller_slot, durable_caller_slot); + auto stat = committable_job.commit_impl(params, callers, prev_caller_slot, durable_caller_slot); + return stat; } //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - @@ -211,7 +212,12 @@ Status CommittablePageCacheJob::commit_impl(const JobCommitParams& params, u64 c const PageCacheJob* job = this->job_.get(); BATT_CHECK_NOT_NULLPTR(job); - LLFS_VLOG(1) << "commit(PageCacheJob): entered"; + if (durable_caller_slot) { + LLFS_VLOG(1) << "commit(PageCacheJob): entered" << BATT_INSPECT(prev_caller_slot) + << BATT_INSPECT(*durable_caller_slot); + } else { + LLFS_VLOG(1) << "commit(PageCacheJob): entered" << BATT_INSPECT(prev_caller_slot); + } // Make sure the job is pruned! // @@ -429,7 +435,7 @@ auto CommittablePageCacheJob::start_ref_count_updates(const JobCommitParams& par PageRefCountUpdates& updates, u64 /*callers*/) -> StatusOr { - LLFS_VLOG(1) << "commit(PageCacheJob): updating ref counts"; + LLFS_VLOG(1) << "commit(PageCacheJob): updating ref counts" << BATT_INSPECT(params.caller_slot); DeadPages dead_pages; @@ -579,10 +585,11 @@ Status CommittablePageCacheJob::recycle_dead_pages(const JobCommitParams& params BATT_ASSIGN_OK_RESULT( slot_offset_type recycler_sync_point, - params.recycler.recycle_pages(as_slice(dead_pages.ids), params.recycle_grant, - params.recycle_depth + 1)); + params.recycler.recycle_pages(as_slice(dead_pages.ids), params.caller_slot, + params.recycle_grant, params.recycle_depth + 1)); - LLFS_VLOG(1) << "commit(PageCacheJob): waiting for PageRecycler sync point"; + LLFS_VLOG(1) << "commit(PageCacheJob): waiting for PageRecycler sync point" + << BATT_INSPECT(params.caller_slot); return params.recycler.await_flush(recycler_sync_point); // diff --git a/src/llfs/page_recycler.cpp b/src/llfs/page_recycler.cpp index 5132b33..0616f90 100644 --- a/src/llfs/page_recycler.cpp +++ b/src/llfs/page_recycler.cpp @@ -107,6 +107,7 @@ StatusOr refresh_recycler_info_slot(TypedSlotWriter { initialize_status_codes(); + LLFS_VLOG(1) << "PageRecycler:recover() start" << BATT_INSPECT(name); PageRecyclerRecoveryVisitor visitor{default_options}; // Read the log, scanning its contents. @@ -153,9 +154,11 @@ StatusOr refresh_recycler_info_slot(TypedSlotWriter state->bulk_load(as_slice(visitor.recovered_pages())); - return std::unique_ptr{new PageRecycler(scheduler, std::string{name}, page_deleter, - std::move(*recovered_log), - std::move(latest_batch), std::move(state))}; + LLFS_VLOG(1) << "PageRecycler:recover() end" << BATT_INSPECT(name); + + return std::unique_ptr{ + new PageRecycler(scheduler, std::string{name}, page_deleter, std::move(*recovered_log), + std::move(latest_batch), std::move(state), visitor.largest_offset())}; } //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - @@ -163,7 +166,8 @@ StatusOr refresh_recycler_info_slot(TypedSlotWriter PageRecycler::PageRecycler(batt::TaskScheduler& scheduler, const std::string& name, PageDeleter& page_deleter, std::unique_ptr&& wal_device, Optional&& recovered_batch, - std::unique_ptr&& state) noexcept + std::unique_ptr&& state, + u64 largest_offset_as_unique_identifier_init) noexcept : scheduler_{scheduler} , name_{name} , page_deleter_{page_deleter} @@ -177,6 +181,7 @@ PageRecycler::PageRecycler(batt::TaskScheduler& scheduler, const std::string& na , recycle_task_{} , metrics_{} , prepared_batch_{std::move(recovered_batch)} + , largest_offset_as_unique_identifier_{largest_offset_as_unique_identifier_init} { const PageRecyclerOptions& options = this->state_.no_lock().options; @@ -202,6 +207,34 @@ PageRecycler::PageRecycler(batt::TaskScheduler& scheduler, const std::string& na #undef ADD_METRIC_ } +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +/** \brief This infrastructure is to collect metrics for PageRecycler submodule. + * This metric collection is currently used by test suit to assess execution behavior of internal + * flows. This is static metric infrastructure so that any user level code could access it. + * + */ +auto PageRecycler::metrics_export() -> MetricsExported& +{ + static MetricsExported& metrics_ = [&]() -> MetricsExported& { + static MetricsExported metrics_; + + LOG(INFO) << "Registering PageRecycler metrics..."; + const auto metric_name = [](std::string_view property) { + return batt::to_string("PageRecycler_", property); + }; + +#define ADD_METRIC_(n) global_metric_registry().add(metric_name(#n), metrics_.n) + + ADD_METRIC_(page_id_deletion_reissue); + +#undef ADD_METRIC_ + + return metrics_; + }(); + + return metrics_; +} + //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - // PageRecycler::~PageRecycler() noexcept @@ -270,20 +303,33 @@ void PageRecycler::join() //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - // -StatusOr PageRecycler::recycle_pages(const Slice& page_ids, - batt::Grant* grant, i32 depth) +StatusOr PageRecycler::recycle_pages( + const Slice& page_ids, llfs::slot_offset_type offset_as_unique_identifier, + batt::Grant* grant, i32 depth) { BATT_CHECK_GE(depth, 0); LLFS_VLOG(1) << "PageRecycler::recycle_pages(page_ids=" << batt::dump_range(page_ids) << "[" << page_ids.size() << "]" << ", grant=[" << (grant ? grant->size() : usize{0}) << "], depth=" << depth << ") " - << this->name_; + << this->name_ << BATT_INSPECT(offset_as_unique_identifier); if (page_ids.empty()) { return this->wal_device_->slot_range(LogReadMode::kDurable).upper_bound; } + // Check to see if we have already seen this or newer request. + // + if (this->largest_offset_as_unique_identifier_ >= offset_as_unique_identifier) { + this->metrics_export().page_id_deletion_reissue.fetch_add(1); + + return this->wal_device_->slot_range(LogReadMode::kDurable).upper_bound; + } + + // Update the largest unique identifier. + // + this->largest_offset_as_unique_identifier_ = offset_as_unique_identifier; + Optional sync_point = None; if (depth == 0) { @@ -297,6 +343,7 @@ StatusOr PageRecycler::recycle_pages(const Slice const PageRecyclerOptions& options = this->state_.no_lock().options; + LLFS_VLOG(1) << "recycle_pages entered grant==NULL case"; for (PageId page_id : page_ids) { StatusOr local_grant = [&] { const usize needed_size = options.insert_grant_size(); @@ -319,8 +366,9 @@ StatusOr PageRecycler::recycle_pages(const Slice BATT_REQUIRE_OK(local_grant); { auto locked_state = this->state_.lock(); - StatusOr append_slot = - this->insert_to_log(*local_grant, page_id, depth, locked_state); + // Writing to recycler log. + StatusOr append_slot = this->insert_to_log( + *local_grant, page_id, depth, offset_as_unique_identifier, locked_state); BATT_REQUIRE_OK(append_slot); clamp_min_slot(&sync_point, *append_slot); @@ -329,10 +377,13 @@ StatusOr PageRecycler::recycle_pages(const Slice } else { BATT_CHECK_LT(depth, (i32)kMaxPageRefDepth) << BATT_INSPECT_RANGE(page_ids); + LLFS_VLOG(1) << "recycle_pages entered the valid grant case"; + auto locked_state = this->state_.lock(); for (PageId page_id : page_ids) { + // Writing to recycler log. StatusOr append_slot = - this->insert_to_log(*grant, page_id, depth, locked_state); + this->insert_to_log(*grant, page_id, depth, offset_as_unique_identifier, locked_state); BATT_REQUIRE_OK(append_slot); clamp_min_slot(&sync_point, *append_slot); @@ -348,13 +399,13 @@ StatusOr PageRecycler::recycle_pages(const Slice StatusOr PageRecycler::recycle_page(PageId page_id, batt::Grant* grant, i32 depth) { std::array page_ids{page_id}; - return this->recycle_pages(batt::as_slice(page_ids), grant, depth); + return this->recycle_pages(batt::as_slice(page_ids), 0, grant, depth); } //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - // StatusOr PageRecycler::insert_to_log( - batt::Grant& grant, PageId page_id, i32 depth, + batt::Grant& grant, PageId page_id, i32 depth, slot_offset_type offset_as_unique_identifier, batt::Mutex>::Lock& locked_state) { BATT_CHECK(locked_state.is_held()); @@ -370,6 +421,7 @@ StatusOr PageRecycler::insert_to_log( .refresh_slot = None, .batch_slot = None, .depth = depth, + .offset_as_unique_identifier = offset_as_unique_identifier, }, [&](const batt::SmallVecBase& to_append) -> StatusOr { if (to_append.empty()) { @@ -387,7 +439,7 @@ StatusOr PageRecycler::insert_to_log( BATT_REQUIRE_OK(append_slot); item->refresh_slot = append_slot->lower_bound; last_slot = slot_max(last_slot, append_slot->upper_bound); - LLFS_VLOG(1) << "Write " << item << " to the log; last_slot=" << last_slot; + LLFS_VLOG(1) << "Write " << *item << " to the log; last_slot=" << last_slot; } BATT_CHECK_NE(this->slot_writer_.slot_offset(), current_slot); diff --git a/src/llfs/page_recycler.hpp b/src/llfs/page_recycler.hpp index c0ba830..529bbc6 100644 --- a/src/llfs/page_recycler.hpp +++ b/src/llfs/page_recycler.hpp @@ -55,6 +55,11 @@ class PageRecycler CountMetric page_drop_error_count{0}; }; + struct MetricsExported { + CountMetric page_id_deletion_reissue{0}; + }; + static MetricsExported& metrics_export(); + //+++++++++++-+-+--+----- --- -- - - - - static PageCount default_max_buffered_page_count(const PageRecyclerOptions& options); @@ -105,6 +110,7 @@ class PageRecycler // necessarily flushed (see `await_flush`). // StatusOr recycle_pages(const Slice& page_ids, + llfs::slot_offset_type offset, batt::Grant* grant = nullptr, i32 depth = 0); // Schedule a single page to be recycled. \see recycle_pages @@ -198,7 +204,8 @@ class PageRecycler explicit PageRecycler(batt::TaskScheduler& scheduler, const std::string& name, PageDeleter& page_deleter, std::unique_ptr&& wal_device, Optional&& recovered_batch, - std::unique_ptr&& state) noexcept; + std::unique_ptr&& state, + u64 largest_offset_as_unique_identifier_init) noexcept; void start_recycle_task(); @@ -209,6 +216,7 @@ class PageRecycler void refresh_grants(); StatusOr insert_to_log(batt::Grant& grant, PageId page_id, i32 depth, + slot_offset_type offset_as_unique_identifier, batt::Mutex>::Lock& locked_state); StatusOr prepare_batch(std::vector&& to_recycle); @@ -250,6 +258,8 @@ class PageRecycler Optional prepared_batch_; Optional latest_batch_upper_bound_; + + slot_offset_type largest_offset_as_unique_identifier_; }; inline std::ostream& operator<<(std::ostream& out, const PageRecycler::Batch& t) diff --git a/src/llfs/page_recycler.test.cpp b/src/llfs/page_recycler.test.cpp index 41268ed..36eb5b1 100644 --- a/src/llfs/page_recycler.test.cpp +++ b/src/llfs/page_recycler.test.cpp @@ -366,7 +366,7 @@ class FakePageDeleter : public PageDeleter // Recursively recycle any newly dead pages. If we try to recycle the same page multiple // times, that is OK, since PageIds are never reused. // - result = this->test_->recycler_->recycle_pages(as_slice(dead_pages), // + result = this->test_->recycler_->recycle_pages(as_slice(dead_pages), caller_slot, // &recycle_grant, depth + 1); BATT_REQUIRE_OK(result); @@ -500,7 +500,7 @@ void PageRecyclerTest::run_crash_recovery_test() const std::array to_recycle = {root_id}; BATT_DEBUG_INFO("Test - recycle_pages"); - StatusOr recycle_status = recycler.recycle_pages(to_recycle); + StatusOr recycle_status = recycler.recycle_pages(to_recycle, 0); if (!recycle_status.ok()) { failed = true; break; diff --git a/src/llfs/page_recycler_events.hpp b/src/llfs/page_recycler_events.hpp index 8fe0e8b..60a79bb 100644 --- a/src/llfs/page_recycler_events.hpp +++ b/src/llfs/page_recycler_events.hpp @@ -36,6 +36,8 @@ struct PageToRecycle { // i32 depth; + slot_offset_type offset_as_unique_identifier; + //+++++++++++-+-+--+----- --- -- - - - - static PageToRecycle make_invalid() @@ -45,6 +47,7 @@ struct PageToRecycle { .refresh_slot = None, .batch_slot = None, .depth = 0, + .offset_as_unique_identifier = 0, }; } @@ -129,12 +132,13 @@ struct PackedPageToRecycle { little_page_id_int page_id; little_u64 batch_slot; + u64 offset_as_unique_identifier; little_i32 depth; u8 flags; u8 reserved_[3]; }; -BATT_STATIC_ASSERT_EQ(24, sizeof(PackedPageToRecycle)); +BATT_STATIC_ASSERT_EQ(32, sizeof(PackedPageToRecycle)); inline std::size_t packed_sizeof(const PackedPageToRecycle&) { @@ -161,6 +165,7 @@ inline bool pack_object_to(const PageToRecycle& from, PackedPageToRecycle* to, D } else { to->batch_slot = 0; } + to->offset_as_unique_identifier = from.offset_as_unique_identifier; return true; } @@ -176,6 +181,7 @@ inline StatusOr unpack_object(const PackedPageToRecycle& packed, return None; }(), .depth = packed.depth, + .offset_as_unique_identifier = packed.offset_as_unique_identifier, }; } diff --git a/src/llfs/page_recycler_recovery_visitor.cpp b/src/llfs/page_recycler_recovery_visitor.cpp index 8bd3ecd..2d68768 100644 --- a/src/llfs/page_recycler_recovery_visitor.cpp +++ b/src/llfs/page_recycler_recovery_visitor.cpp @@ -29,6 +29,7 @@ namespace llfs { , latest_batch_{} , recycler_uuid_{boost::uuids::random_generator{}()} , latest_info_refresh_slot_{} + , largest_offset_as_unique_identifier_{0} { initialize_status_codes(); LLFS_VLOG(1) << "PageRecyclerRecoveryVisitor()"; @@ -111,6 +112,13 @@ Optional PageRecyclerRecoveryVisitor::latest_info_refresh_slot() cons return this->latest_info_refresh_slot_; } +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +u64 PageRecyclerRecoveryVisitor::largest_offset() const +{ + return this->largest_offset_as_unique_identifier_; +} + //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - // Status PageRecyclerRecoveryVisitor::operator()(const SlotParse& slot, @@ -140,6 +148,13 @@ Status PageRecyclerRecoveryVisitor::operator()(const SlotParse& slot, existing_record.batch_slot = recovered.batch_slot; } + + // Save the largest unique offset identifier. + // + if (this->largest_offset_as_unique_identifier_ < recovered.offset_as_unique_identifier) { + this->largest_offset_as_unique_identifier_ = recovered.offset_as_unique_identifier; + } + PageToRecycle& to_recycle = iter->second; to_recycle.refresh_slot = slot.offset.lower_bound; diff --git a/src/llfs/page_recycler_recovery_visitor.hpp b/src/llfs/page_recycler_recovery_visitor.hpp index a274e93..5d81d16 100644 --- a/src/llfs/page_recycler_recovery_visitor.hpp +++ b/src/llfs/page_recycler_recovery_visitor.hpp @@ -45,6 +45,8 @@ class PageRecyclerRecoveryVisitor Optional latest_info_refresh_slot() const; + u64 largest_offset() const; + //+++++++++++-+-+--+----- --- -- - - - - Status operator()(const SlotParse&, const PageToRecycle& to_recycle); @@ -79,6 +81,8 @@ class PageRecyclerRecoveryVisitor /** \brief The most recent slot at which recycler info was refreshed. */ Optional latest_info_refresh_slot_; + + u64 largest_offset_as_unique_identifier_; }; } // namespace llfs diff --git a/src/llfs/volume_trimmer.cpp b/src/llfs/volume_trimmer.cpp index 72b3e64..04ff3a2 100644 --- a/src/llfs/volume_trimmer.cpp +++ b/src/llfs/volume_trimmer.cpp @@ -136,6 +136,7 @@ const usize kTrimEventGrantSize = // Finish the partially completed trim. // + LLFS_VLOG(1) << "finishing partial trim..."; BATT_REQUIRE_OK(trim_volume_log(trimmer_uuid, slot_writer, grant, std::move(*trim_event_info), std::move(*trimmed_region_info), metadata_refresher, drop_roots, batt::LogLevel::kWarning)); @@ -143,6 +144,7 @@ const usize kTrimEventGrantSize = // The trim state is now clean! Create and return the VolumeTrimmer. // + LLFS_VLOG(1) << "creating the VolumeTrimmer object..."; return std::make_unique(trimmer_uuid, std::move(name), trim_control, trim_delay, std::move(log_reader), slot_writer, std::move(drop_roots), metadata_refresher); @@ -404,7 +406,8 @@ Status trim_volume_log(const boost::uuids::uuid& trimmer_uuid, VolumeMetadataRefresher& metadata_refresher, const VolumeDropRootsFn& drop_roots, batt::LogLevel error_log_level) { - LLFS_VLOG(1) << "trim_volume_log()" << BATT_INSPECT(trimmed_region.slot_range); + LLFS_VLOG(1) << "trim_volume_log()" << BATT_INSPECT(trimmed_region.slot_range) + << BATT_INSPECT(trim_event); auto on_scope_exit = batt::finally([&] { BATT_CHECK_LE(grant.size(), kTrimEventGrantSize); From f6d43607aa110433998ab21e53b19c33f68ceb3f Mon Sep 17 00:00:00 2001 From: Bhaskar Bora Date: Wed, 18 Dec 2024 11:04:53 -0500 Subject: [PATCH 3/6] removed death-test --- script | 2 +- src/llfs/page_allocator_state.cpp | 6 ++--- src/llfs/volume.test.cpp | 38 ++++++++++++++----------------- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/script b/script index 27ed130..6d266c2 160000 --- a/script +++ b/script @@ -1 +1 @@ -Subproject commit 27ed13046d0d81974032a4d830565870cb07965a +Subproject commit 6d266c2de572dde8fcf70085b8fa960084aae663 diff --git a/src/llfs/page_allocator_state.cpp b/src/llfs/page_allocator_state.cpp index c8d0e49..9ba8ddc 100644 --- a/src/llfs/page_allocator_state.cpp +++ b/src/llfs/page_allocator_state.cpp @@ -458,14 +458,14 @@ PageAllocatorState::ProposalStatus PageAllocatorState::propose(PackedPageAllocat // If this is a valid proposal that will cause state change, go through and change the deltas to // the new ref count values. // - LOG(INFO) << "propose (start): txn-ref-count= " << txn->ref_counts.size(); + LLFS_VLOG(1) << "propose (start): txn-ref-count= " << txn->ref_counts.size(); if (status == ProposalStatus::kValid) { for (PackedPageRefCount& prc : txn->ref_counts) { - LOG(INFO) << "prc loop: " << prc; + LLFS_VLOG(1) << "reference count: " << prc; prc.ref_count = this->calculate_new_ref_count(prc, *user_index); } } - LOG(INFO) << "propose (end): txn-ref-count= " << txn->ref_counts.size(); + LLFS_VLOG(1) << "propose (end): txn-ref-count= " << txn->ref_counts.size(); return status; } diff --git a/src/llfs/volume.test.cpp b/src/llfs/volume.test.cpp index d82dd11..aece135 100644 --- a/src/llfs/volume.test.cpp +++ b/src/llfs/volume.test.cpp @@ -2137,11 +2137,9 @@ TEST_F(VolumeSimTest, DeadPageRefCountSimulation) } } -using DeathTestVolumeSimTest = VolumeSimTest; - //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - // -TEST_F(DeathTestVolumeSimTest, DeadPageRefCountVariantSimulation) +TEST_F(VolumeSimTest, DeadPageRefCountVariantSimulation) { const u32 max_seeds = batt::getenv_as("MAX_SEEDS").value_or(1000); @@ -2149,8 +2147,6 @@ TEST_F(DeathTestVolumeSimTest, DeadPageRefCountVariantSimulation) // 50 and 40 respectively. To create a perfectly failing test use 50 and 40 as the pre_halt and // post_halt values respectively. - LOG(INFO) << "Starting DeadPageRefCountVariant test for " << max_seeds << " iterations..."; - // We will setup a two ranges to pick yield counts from. std::vector> ranges{{30, 50}, {30, 100}}; std::mt19937 rng{0}; @@ -2158,22 +2154,22 @@ TEST_F(DeathTestVolumeSimTest, DeadPageRefCountVariantSimulation) std::uniform_int_distribution pick_value2{ranges[1].first, ranges[1].second}; usize yield_pre_halt = 0, yield_post_halt = 0; - auto main_test_block = [&]() { - for (u32 current_seed = 0; current_seed < max_seeds; ++current_seed) { - LOG_EVERY_N(INFO, 100) << BATT_INSPECT(current_seed) << BATT_INSPECT(max_seeds); + for (u32 current_seed = 0; current_seed < max_seeds; ++current_seed) { + LOG_EVERY_N(INFO, 100) << BATT_INSPECT(current_seed) << BATT_INSPECT(max_seeds); - yield_pre_halt = (yield_pre_halt % 2) ? pick_value1(rng) : pick_value2(rng); - yield_post_halt = (yield_post_halt % 2) ? pick_value1(rng) : pick_value2(rng); + yield_pre_halt = (yield_pre_halt % 2) ? pick_value1(rng) : pick_value2(rng); + yield_post_halt = (yield_post_halt % 2) ? pick_value1(rng) : pick_value2(rng); - ASSERT_NO_FATAL_FAILURE( - this->run_dead_page_recovery_test_variant(current_seed, yield_pre_halt, yield_post_halt)); - } - }; - // Note that we are enabling thread-safe mode for Death-Test. + ASSERT_NO_FATAL_FAILURE( + this->run_dead_page_recovery_test_variant(current_seed, yield_pre_halt, yield_post_halt)); + } + + LOG(INFO) << "Ran DeadPageRefCountVariant test for " << max_seeds << " iterations..." + << BATT_INSPECT(llfs::PageRecycler::metrics_export().page_id_deletion_reissue); + + // We need to have atleast one iteration hitting the issue. // - ::testing::FLAGS_gtest_death_test_style = "threadsafe"; - EXPECT_EXIT(main_test_block(), testing::ExitedWithCode(6), "FATAL.*"); - ::testing::FLAGS_gtest_death_test_style = "fast"; + ASSERT_GE(llfs::PageRecycler::metrics_export().page_id_deletion_reissue, 0); } //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - @@ -2330,9 +2326,9 @@ void VolumeSimTest::post_halt_processing(RecoverySimState& state, llfs::StorageS void VolumeSimTest::run_dead_page_recovery_test(u32 seed, u32 yield_count) { RecoverySimState state; - state.seed = seed; + std::mt19937 rng{seed}; - std::mt19937 rng{state.seed}; + state.seed = seed; llfs::StorageSimulation sim{batt::StateMachineEntropySource{ /*entropy_fn=*/[&rng](usize min_value, usize max_value) -> usize { @@ -2417,7 +2413,7 @@ void VolumeSimTest::run_dead_page_recovery_test_variant(u32 seed, u32 yield_coun u32 yield_count_post_halt) { RecoverySimState state; - std::mt19937 rng{state.seed}; + std::mt19937 rng{seed}; state.seed = seed; From 315d8baac0295781bbfca223daefcc5705990a5d Mon Sep 17 00:00:00 2001 From: Bhaskar Bora Date: Wed, 18 Dec 2024 11:44:20 -0500 Subject: [PATCH 4/6] added a histogram plot for the bug hits --- src/llfs/committable_page_cache_job.cpp | 3 +-- src/llfs/volume.test.cpp | 27 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/llfs/committable_page_cache_job.cpp b/src/llfs/committable_page_cache_job.cpp index 788e9db..3cfa6d1 100644 --- a/src/llfs/committable_page_cache_job.cpp +++ b/src/llfs/committable_page_cache_job.cpp @@ -182,8 +182,7 @@ Status commit(std::unique_ptr job, const JobCommitParams& params, Status commit(CommittablePageCacheJob committable_job, const JobCommitParams& params, u64 callers, slot_offset_type prev_caller_slot, batt::Watch* durable_caller_slot) { - auto stat = committable_job.commit_impl(params, callers, prev_caller_slot, durable_caller_slot); - return stat; + return committable_job.commit_impl(params, callers, prev_caller_slot, durable_caller_slot); } //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - diff --git a/src/llfs/volume.test.cpp b/src/llfs/volume.test.cpp index aece135..ab80b5b 100644 --- a/src/llfs/volume.test.cpp +++ b/src/llfs/volume.test.cpp @@ -2137,6 +2137,22 @@ TEST_F(VolumeSimTest, DeadPageRefCountSimulation) } } +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +void display_histogram(u32 max_seeds, const std::vector& histogram) +{ + std::string hist_str; + usize current_seed = 0; + const usize div = max_seeds / histogram.size(); + + for (const auto entry : histogram) { + hist_str += std::to_string(current_seed) + ":" + std::to_string(entry) + " "; + current_seed += div; + } + + LOG(INFO) << "Histogram: " << hist_str; +} + //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - // TEST_F(VolumeSimTest, DeadPageRefCountVariantSimulation) @@ -2153,6 +2169,10 @@ TEST_F(VolumeSimTest, DeadPageRefCountVariantSimulation) std::uniform_int_distribution pick_value1{ranges[0].first, ranges[0].second}; std::uniform_int_distribution pick_value2{ranges[1].first, ranges[1].second}; usize yield_pre_halt = 0, yield_post_halt = 0; + std::vector histogram(10); + const usize div = max_seeds / histogram.size(); + + llfs::PageRecycler::metrics_export().page_id_deletion_reissue.set(0); for (u32 current_seed = 0; current_seed < max_seeds; ++current_seed) { LOG_EVERY_N(INFO, 100) << BATT_INSPECT(current_seed) << BATT_INSPECT(max_seeds); @@ -2160,13 +2180,20 @@ TEST_F(VolumeSimTest, DeadPageRefCountVariantSimulation) yield_pre_halt = (yield_pre_halt % 2) ? pick_value1(rng) : pick_value2(rng); yield_post_halt = (yield_post_halt % 2) ? pick_value1(rng) : pick_value2(rng); + usize last_value = llfs::PageRecycler::metrics_export().page_id_deletion_reissue; + ASSERT_NO_FATAL_FAILURE( this->run_dead_page_recovery_test_variant(current_seed, yield_pre_halt, yield_post_halt)); + + last_value = llfs::PageRecycler::metrics_export().page_id_deletion_reissue - last_value; + histogram[current_seed / div] += last_value; } LOG(INFO) << "Ran DeadPageRefCountVariant test for " << max_seeds << " iterations..." << BATT_INSPECT(llfs::PageRecycler::metrics_export().page_id_deletion_reissue); + display_histogram(max_seeds, histogram); + // We need to have atleast one iteration hitting the issue. // ASSERT_GE(llfs::PageRecycler::metrics_export().page_id_deletion_reissue, 0); From 8f9264591aff2d8a57a21aca1774f672a4181c6f Mon Sep 17 00:00:00 2001 From: Bhaskar Bora Date: Tue, 7 Jan 2025 15:04:58 -0500 Subject: [PATCH 5/6] temp fix for the test failure --- src/llfs/page_recycler_options.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llfs/page_recycler_options.hpp b/src/llfs/page_recycler_options.hpp index 32b52d9..72902c9 100644 --- a/src/llfs/page_recycler_options.hpp +++ b/src/llfs/page_recycler_options.hpp @@ -22,7 +22,7 @@ class PageRecyclerOptions public: static constexpr usize kDefaultInfoRefreshRate = 4; static constexpr usize kDefaultMaxRefsPerPage = 1 * kMiB; - static constexpr usize kDefaultBatchSize = 24; + static constexpr usize kDefaultBatchSize = 20; static constexpr usize kDefaultRefreshFactor = 2; //+++++++++++-+-+--+----- --- -- - - - - From 4b68e888dea8e418546d9a9ed2bde672e5fbd0d7 Mon Sep 17 00:00:00 2001 From: Bhaskar Bora Date: Wed, 8 Jan 2025 14:19:14 -0500 Subject: [PATCH 6/6] page_tracer test case fix --- src/llfs/page_recycler.cpp | 31 +++++++++++++++++++++++------- src/llfs/page_recycler.hpp | 3 +++ src/llfs/page_recycler_options.hpp | 2 +- 3 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/llfs/page_recycler.cpp b/src/llfs/page_recycler.cpp index 0616f90..232fb58 100644 --- a/src/llfs/page_recycler.cpp +++ b/src/llfs/page_recycler.cpp @@ -72,15 +72,32 @@ StatusOr refresh_recycler_info_slot(TypedSlotWriter // /*static*/ u64 PageRecycler::calculate_log_size(const PageRecyclerOptions& options, Optional max_buffered_page_count) +{ + const usize log_size_raw = calculate_log_size_no_padding(options, max_buffered_page_count); + const usize padding_bytes = 1 * kKiB; + + return round_up_to_page_size_multiple(log_size_raw + padding_bytes); +} + +//==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - +// +/*static*/ u64 PageRecycler::calculate_log_size_no_padding( + const PageRecyclerOptions& options, Optional max_buffered_page_count) { static const PackedPageRecyclerInfo info = {}; - return round_up_to_page_size_multiple( - options.total_page_grant_size() * - (1 + max_buffered_page_count.value_or( - PageRecycler::default_max_buffered_page_count(options))) + - options.recycle_task_target() + packed_sizeof_slot(info) * (options.info_refresh_rate() + 1) + - 1 * kKiB); + const usize bytes_per_buffered_page = options.total_page_grant_size(); + + const usize minimum_required_buffered_pages = + 1 + max_buffered_page_count.value_or(PageRecycler::default_max_buffered_page_count(options)); + + const usize bytes_for_minimum_required_buffered_pages = + bytes_per_buffered_page * minimum_required_buffered_pages; + + const usize fixed_size_overhead_bytes = + options.recycle_task_target() + packed_sizeof_slot(info) * (options.info_refresh_rate() + 1); + + return bytes_for_minimum_required_buffered_pages + fixed_size_overhead_bytes; } //==#==========+==+=+=++=+++++++++++-+-+--+----- --- -- - - - - @@ -88,7 +105,7 @@ StatusOr refresh_recycler_info_slot(TypedSlotWriter /*static*/ PageCount PageRecycler::calculate_max_buffered_page_count( const PageRecyclerOptions& options, u64 log_size) { - const u64 required_log_size = PageRecycler::calculate_log_size(options, PageCount{0}); + const u64 required_log_size = PageRecycler::calculate_log_size_no_padding(options, PageCount{0}); if (log_size <= required_log_size) { return PageCount{0}; } diff --git a/src/llfs/page_recycler.hpp b/src/llfs/page_recycler.hpp index 529bbc6..c0c5a5e 100644 --- a/src/llfs/page_recycler.hpp +++ b/src/llfs/page_recycler.hpp @@ -67,6 +67,9 @@ class PageRecycler static u64 calculate_log_size(const PageRecyclerOptions& options, Optional max_buffered_page_count = None); + static u64 calculate_log_size_no_padding(const PageRecyclerOptions& options, + Optional max_buffered_page_count = None); + static PageCount calculate_max_buffered_page_count(const PageRecyclerOptions& options, u64 log_size); diff --git a/src/llfs/page_recycler_options.hpp b/src/llfs/page_recycler_options.hpp index 72902c9..32b52d9 100644 --- a/src/llfs/page_recycler_options.hpp +++ b/src/llfs/page_recycler_options.hpp @@ -22,7 +22,7 @@ class PageRecyclerOptions public: static constexpr usize kDefaultInfoRefreshRate = 4; static constexpr usize kDefaultMaxRefsPerPage = 1 * kMiB; - static constexpr usize kDefaultBatchSize = 20; + static constexpr usize kDefaultBatchSize = 24; static constexpr usize kDefaultRefreshFactor = 2; //+++++++++++-+-+--+----- --- -- - - - -