Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/meta-srv/src/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,16 @@ use store_api::storage::RegionId;

mod candidate;
mod ctx;
mod handler;
#[cfg(test)]
mod mock;
mod options;
mod procedure;
mod scheduler;
mod tracker;

pub(crate) use options::GcSchedulerOptions;
pub(crate) use scheduler::{GcScheduler, GcTickerRef};

pub(crate) type Region2Peers = HashMap<RegionId, (Peer, Vec<Peer>)>;

Expand Down
85 changes: 85 additions & 0 deletions src/meta-srv/src/gc/candidate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use store_api::storage::RegionId;
use table::metadata::TableId;

use crate::error::Result;
use crate::gc::scheduler::GcScheduler;

/// Represents a region candidate for GC with its priority score.
#[derive(Debug, Clone, PartialEq, Eq)]
Expand All @@ -46,3 +47,87 @@ impl GcCandidate {
self.score.into_inner()
}
}

impl GcScheduler {
/// Calculate GC priority score for a region based on various metrics.
fn calculate_gc_score(&self, region_stat: &RegionStat) -> f64 {
let sst_count_score = region_stat.sst_num as f64 * self.config.sst_count_weight;

let file_remove_cnt_score = match &region_stat.region_manifest {
RegionManifestInfo::Mito {
file_removed_cnt, ..
} => *file_removed_cnt as f64 * self.config.file_removed_cnt_weight,
// Metric engine doesn't have file_removal_rate, also this should be unreachable since metrics engine doesn't support gc
RegionManifestInfo::Metric { .. } => 0.0,
};

sst_count_score + file_remove_cnt_score
}

/// Filter and score regions that are candidates for GC, grouped by table.
pub(crate) async fn select_gc_candidates(
&self,
table_to_region_stats: &HashMap<TableId, Vec<RegionStat>>,
) -> Result<HashMap<TableId, Vec<GcCandidate>>> {
let mut table_candidates: HashMap<TableId, Vec<GcCandidate>> = HashMap::new();
let now = Instant::now();

for (table_id, region_stats) in table_to_region_stats {
let mut candidates = Vec::new();

for region_stat in region_stats {
if region_stat.role != RegionRole::Leader {
continue;
}

// Skip regions that are too small
if region_stat.approximate_bytes < self.config.min_region_size_threshold {
continue;
}

// Skip regions that are in cooldown period
if let Some(gc_info) = self.region_gc_tracker.lock().await.get(&region_stat.id)
&& now.duration_since(gc_info.last_gc_time) < self.config.gc_cooldown_period
{
debug!("Skipping region {} due to cooldown", region_stat.id);
continue;
}

let score = self.calculate_gc_score(region_stat);

debug!(
"Region {} (table {}) has GC score {:.4}",
region_stat.id, table_id, score
);

// Only consider regions with a meaningful score
if score > 0.0 {
candidates.push(GcCandidate::new(region_stat.id, score, region_stat.clone()));
}
}

// Sort candidates by score in descending order and take top N
candidates.sort_by(|a, b| b.score.cmp(&a.score));
let top_candidates: Vec<GcCandidate> = candidates
.into_iter()
.take(self.config.regions_per_table_threshold)
.collect();

if !top_candidates.is_empty() {
info!(
"Selected {} GC candidates for table {} (top {} out of all qualified)",
top_candidates.len(),
table_id,
self.config.regions_per_table_threshold
);
table_candidates.insert(*table_id, top_candidates);
}
}

info!(
"Selected GC candidates for {} tables",
table_candidates.len()
);
Ok(table_candidates)
}
}
3 changes: 0 additions & 3 deletions src/meta-srv/src/gc/ctx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// TODO(discord9): remove this once gc scheduler is fully merged
#![allow(unused)]

use std::collections::{HashMap, HashSet};
use std::time::Duration;

Expand Down
Loading