From 2f42518998d985e41d5d0bf2feaa6be623917356 Mon Sep 17 00:00:00 2001 From: Andrew Wong Date: Thu, 20 Jun 2024 01:07:45 -0700 Subject: [PATCH] spillover_manifest: remove get_manifest_path() --- src/v/cloud_storage/partition_manifest.cc | 34 -------------------- src/v/cloud_storage/partition_manifest.h | 3 -- src/v/cloud_storage/spillover_manifest.h | 38 +---------------------- 3 files changed, 1 insertion(+), 74 deletions(-) diff --git a/src/v/cloud_storage/partition_manifest.cc b/src/v/cloud_storage/partition_manifest.cc index 8e70e5e46c15b..2196a37df90ec 100644 --- a/src/v/cloud_storage/partition_manifest.cc +++ b/src/v/cloud_storage/partition_manifest.cc @@ -250,40 +250,6 @@ partition_manifest::partition_manifest( , _segments() , _last_offset(0) {} -// NOTE: the methods that generate remote paths use the xxhash function -// to randomize the prefix. S3 groups the objects into chunks based on -// these prefixes. It also applies rate limit to chunks so if all segments -// and manifests will have the same prefix we will be able to do around -// 3000-5000 req/sec. AWS doc mentions that having only two prefix -// characters should be enough for most workloads -// (https://aws.amazon.com/blogs/aws/amazon-s3-performance-tips-tricks-seattle-hiring-event/) -// We're using eight because it's free and because AWS S3 is not the only -// backend and other S3 API implementations might benefit from that. - -remote_manifest_path generate_partition_manifest_path( - const model::ntp& ntp, - model::initial_revision_id rev, - manifest_format format) { - // NOTE: the idea here is to split all possible hash values into - // 16 bins. Every bin should have lowest 28-bits set to 0. - // As result, for segment names all prefixes are possible, but - // for manifests, only 0x00000000, 0x10000000, ... 0xf0000000 - // are used. This will allow us to quickly find all manifests - // that S3 bucket contains. - constexpr uint32_t bitmask = 0xF0000000; - auto path = ssx::sformat("{}_{}", ntp.path(), rev()); - uint32_t hash = bitmask & xxhash_32(path.data(), path.size()); - return remote_manifest_path(fmt::format( - "{:08x}/meta/{}_{}/manifest.{}", hash, ntp.path(), rev(), [&] { - switch (format) { - case manifest_format::json: - return "json"; - case manifest_format::serde: - return "bin"; - } - }())); -} - remote_manifest_path partition_manifest::get_manifest_path( const remote_path_provider& path_provider) const { return remote_manifest_path{path_provider.partition_manifest_path(*this)}; diff --git a/src/v/cloud_storage/partition_manifest.h b/src/v/cloud_storage/partition_manifest.h index 4062cdf950502..9717f01a28a32 100644 --- a/src/v/cloud_storage/partition_manifest.h +++ b/src/v/cloud_storage/partition_manifest.h @@ -71,9 +71,6 @@ remote_segment_path generate_remote_segment_path( /// Generate correct S3 segment name based on term and base offset segment_name generate_local_segment_name(model::offset o, model::term_id t); -remote_manifest_path generate_partition_manifest_path( - const model::ntp&, model::initial_revision_id, manifest_format); - // This structure can be impelenented // to allow access to private fields of the manifest. struct partition_manifest_accessor; diff --git a/src/v/cloud_storage/spillover_manifest.h b/src/v/cloud_storage/spillover_manifest.h index 1502b22b8be03..a79b91397badb 100644 --- a/src/v/cloud_storage/spillover_manifest.h +++ b/src/v/cloud_storage/spillover_manifest.h @@ -18,28 +18,6 @@ namespace cloud_storage { -namespace { - -remote_manifest_path generate_spillover_manifest_path( - const model::ntp& ntp, - model::initial_revision_id rev, - const spillover_manifest_path_components& c) { - auto path = generate_partition_manifest_path( - ntp, rev, manifest_format::serde); - // Given the topic name size limit the name should fit into - // the AWS S3 size limit. - return remote_manifest_path(fmt::format( - "{}.{}.{}.{}.{}.{}.{}", - path().string(), - c.base(), - c.last(), - c.base_kafka(), - c.next_kafka(), - c.base_ts.value(), - c.last_ts.value())); -} -} // namespace - /// The section of the partition manifest /// /// The only purpose of this class is to provide different implementation of the @@ -73,21 +51,7 @@ class spillover_manifest final : public partition_manifest { c.base_ts.value(), c.last_ts.value()); } - remote_manifest_path get_manifest_path() const { - const auto ls = last_segment(); - vassert(ls.has_value(), "Spillover manifest can't be empty"); - const auto fs = *begin(); - spillover_manifest_path_components smc{ - .base = fs.base_offset, - .last = ls->committed_offset, - .base_kafka = fs.base_kafka_offset(), - .next_kafka = ls->next_kafka_offset(), - .base_ts = fs.base_timestamp, - .last_ts = ls->max_timestamp, - }; - return generate_spillover_manifest_path( - get_ntp(), get_revision_id(), smc); - } + remote_manifest_path get_manifest_path(const remote_path_provider& path_provider) const { return remote_manifest_path{