From 0c4cd3158f492901271364da0899156505574075 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 20 May 2026 14:44:51 -0400 Subject: [PATCH 01/26] Fix fallback stampede protection tests --- crates/cachet/src/builder/cache.rs | 2 +- crates/cachet/tests/fallback.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/cachet/src/builder/cache.rs b/crates/cachet/src/builder/cache.rs index f9d05b583..90ab97492 100644 --- a/crates/cachet/src/builder/cache.rs +++ b/crates/cachet/src/builder/cache.rs @@ -450,8 +450,8 @@ mod tests { let fb = Cache::builder::(clock.clone()).storage(cachet_tier::MockCache::new()); let cache = Cache::builder::(clock) .storage(cachet_tier::MockCache::new()) - .fallback(fb) .stampede_protection() + .fallback(fb) .build(); assert!(!cache.name().is_empty()); } diff --git a/crates/cachet/tests/fallback.rs b/crates/cachet/tests/fallback.rs index 33d78b860..c7752768f 100644 --- a/crates/cachet/tests/fallback.rs +++ b/crates/cachet/tests/fallback.rs @@ -310,8 +310,8 @@ async fn fallback_builder_stampede_protection() { let cache = Cache::builder::(clock) .memory() - .fallback(fallback) .stampede_protection() + .fallback(fallback) .build(); let key = "key".to_string(); From 3b4c24000cf2dd88d00a63fe742f96578d44a1ab Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 20 May 2026 14:53:10 -0400 Subject: [PATCH 02/26] Revert "Fix fallback stampede protection tests" This reverts commit 0c4cd3158f492901271364da0899156505574075. --- crates/cachet/src/builder/cache.rs | 2 +- crates/cachet/tests/fallback.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/cachet/src/builder/cache.rs b/crates/cachet/src/builder/cache.rs index 90ab97492..f9d05b583 100644 --- a/crates/cachet/src/builder/cache.rs +++ b/crates/cachet/src/builder/cache.rs @@ -450,8 +450,8 @@ mod tests { let fb = Cache::builder::(clock.clone()).storage(cachet_tier::MockCache::new()); let cache = Cache::builder::(clock) .storage(cachet_tier::MockCache::new()) - .stampede_protection() .fallback(fb) + .stampede_protection() .build(); assert!(!cache.name().is_empty()); } diff --git a/crates/cachet/tests/fallback.rs b/crates/cachet/tests/fallback.rs index c7752768f..33d78b860 100644 --- a/crates/cachet/tests/fallback.rs +++ b/crates/cachet/tests/fallback.rs @@ -310,8 +310,8 @@ async fn fallback_builder_stampede_protection() { let cache = Cache::builder::(clock) .memory() - .stampede_protection() .fallback(fallback) + .stampede_protection() .build(); let key = "key".to_string(); From 43a388113aaf1b4d17dea60a3d112b70d99d2e32 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Thu, 28 May 2026 14:35:32 -0400 Subject: [PATCH 03/26] Request coalescing telemetry --- Cargo.lock | 2 +- crates/cachet/Cargo.toml | 12 +- crates/cachet/README.md | 9 +- crates/cachet/benches/operations.rs | 26 + .../cachet/examples/telemetry_accumulator.rs | 142 ++++ .../cachet/examples/telemetry_subscriber.rs | 89 +- crates/cachet/src/builder/buildable.rs | 28 +- crates/cachet/src/builder/cache.rs | 11 +- crates/cachet/src/builder/transform.rs | 10 +- crates/cachet/src/cache.rs | 283 +++++-- crates/cachet/src/fallback.rs | 30 +- crates/cachet/src/lib.rs | 9 +- crates/cachet/src/refresh.rs | 33 +- crates/cachet/src/telemetry/attributes.rs | 18 +- crates/cachet/src/telemetry/cache.rs | 797 ++++++++++++++---- crates/cachet/src/telemetry/ext.rs | 130 --- crates/cachet/src/telemetry/handler.rs | 69 ++ crates/cachet/src/telemetry/mod.rs | 10 +- crates/cachet/src/wrapper.rs | 143 ++-- 19 files changed, 1310 insertions(+), 541 deletions(-) create mode 100644 crates/cachet/examples/telemetry_accumulator.rs delete mode 100644 crates/cachet/src/telemetry/ext.rs create mode 100644 crates/cachet/src/telemetry/handler.rs diff --git a/Cargo.lock b/Cargo.lock index f67637ef7..bca0960ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -442,6 +442,7 @@ dependencies = [ "cachet_service", "cachet_tier", "criterion", + "dashmap", "dynosaur", "futures", "layered", @@ -455,7 +456,6 @@ dependencies = [ "seatbelt", "serde", "testing_aids", - "thread_aware", "tick", "tokio", "tracing", diff --git a/crates/cachet/Cargo.toml b/crates/cachet/Cargo.toml index 1b9b64f84..e08eab8cf 100644 --- a/crates/cachet/Cargo.toml +++ b/crates/cachet/Cargo.toml @@ -36,7 +36,7 @@ allowed_external_types = [ [features] default = ["memory"] -logs = ["dep:tracing"] +logs = [] test-util = ["cachet_tier/test-util", "tick/test-util"] memory = ["dep:cachet_memory"] service = ["dep:cachet_service", "dep:layered"] @@ -56,9 +56,8 @@ parking_lot = { workspace = true } pin-project-lite = { workspace = true } postcard = { workspace = true, optional = true } serde = { workspace = true, optional = true, features = ["derive"] } -thread_aware = { workspace = true } tick = { workspace = true, features = [] } -tracing = { workspace = true, optional = true } +tracing = { workspace = true } uniflight = { workspace = true } [dev-dependencies] @@ -67,6 +66,7 @@ bytesbuf = { workspace = true } cachet_memory = { workspace = true } cachet_tier = { workspace = true, features = ["test-util"] } criterion = { workspace = true } +dashmap = { workspace = true } dynosaur = { workspace = true } opentelemetry = { workspace = true, features = [ "metrics", @@ -81,7 +81,7 @@ testing_aids = { path = "../testing_aids" } tick = { workspace = true, features = ["test-util", "tokio"] } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } tracing = { workspace = true, features = ["std"] } -tracing-subscriber = { workspace = true } +tracing-subscriber = { workspace = true, features = ["registry"] } tracing-test = { workspace = true, features = ["no-env-filter"] } [[bench]] @@ -155,6 +155,10 @@ required-features = ["memory", "test-util"] name = "telemetry_subscriber" required-features = ["memory", "logs"] +[[example]] +name = "telemetry_accumulator" +required-features = ["memory", "logs"] + [[example]] name = "serialization" required-features = ["memory", "serialize"] diff --git a/crates/cachet/README.md b/crates/cachet/README.md index bca66ac12..ec237a15e 100644 --- a/crates/cachet/README.md +++ b/crates/cachet/README.md @@ -230,8 +230,9 @@ cache.insert("key".to_string(), "value".to_string()).await?; Enable with the `logs` feature and `.enable_logs()` on the cache builder. -Each cache operation emits a structured [`tracing`][__link20] event with fields -`cache.name`, `cache.event`, and `cache.duration_ns`. +Each cache operation creates a structured [`tracing`][__link20] span and emits events inside +that span. Span fields include `cache.name`, `cache.event`, and `cache.duration_ns`, +with `cache.coalesced` and `cache.fallback` recorded when applicable. ### Subscribing to events @@ -256,7 +257,7 @@ See the `telemetry_subscriber` example for a complete demonstration. |Level|Events| |-----|------| |ERROR|`cache.get_error`, `cache.insert_error`, `cache.invalidate_error`, `cache.clear_error`| -|INFO|`cache.expired`, `cache.refresh_miss`, `cache.inserted`, `cache.insert_rejected`, `cache.invalidated`, `cache.fallback`| +|INFO|`cache.expired`, `cache.refresh_miss`, `cache.inserted`, `cache.insert_rejected`, `cache.invalidated`| |DEBUG|`cache.hit`, `cache.miss`, `cache.refresh_hit`, `cache.cleared`| @@ -265,7 +266,7 @@ See the `telemetry_subscriber` example for a complete demonstration. This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG-n6c1asXb8uG6CeIkUqNVu-GxjETKbvQrZwG4I5xAXRHmeEYWSIgmhieXRlc2J1ZmUwLjUuMIJmY2FjaGV0ZTAuNS4wgm1jYWNoZXRfbWVtb3J5ZTAuMi4wgm5jYWNoZXRfc2VydmljZWUwLjEuMIJrY2FjaGV0X3RpZXJlMC4xLjCCZHRpY2tlMC4zLjCCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjA + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG-oOSdNzEYacG_QJDULSptgGG0JW5KX3QM-kG0OU4DwdpoIBYWSIgmhieXRlc2J1ZmUwLjUuMIJmY2FjaGV0ZTAuNS4wgm1jYWNoZXRfbWVtb3J5ZTAuMi4wgm5jYWNoZXRfc2VydmljZWUwLjEuMIJrY2FjaGV0X3RpZXJlMC4xLjCCZHRpY2tlMC4zLjCCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjA [__link0]: https://docs.rs/cachet/0.5.0/cachet/?search=TimeToRefresh [__link1]: https://crates.io/crates/uniflight/0.2.0 [__link10]: https://docs.rs/cachet_tier/0.1.0/cachet_tier/?search=CacheTier diff --git a/crates/cachet/benches/operations.rs b/crates/cachet/benches/operations.rs index c3598580f..bcacdbe56 100644 --- a/crates/cachet/benches/operations.rs +++ b/crates/cachet/benches/operations.rs @@ -15,6 +15,7 @@ use cachet_tier::MockCache; use criterion::{Criterion, criterion_group, criterion_main}; use tick::Clock; use tokio::runtime::Runtime; +use tracing_subscriber::layer::SubscriberExt; fn rt() -> Runtime { Runtime::new().expect("failed to create runtime") @@ -148,6 +149,31 @@ fn bench_wrapper_overhead(c: &mut Criterion) { }); }); + // With telemetry + active subscriber (measures span processing overhead) + group.bench_function("with_telemetry_subscriber", |b| { + let subscriber = tracing_subscriber::registry().with(tracing_subscriber::fmt::layer().with_writer(std::io::sink).with_ansi(false)); + let _guard = tracing::subscriber::set_default(subscriber); + + let cache = rt.block_on(async { + let clock = Clock::new_tokio(); + Cache::builder(clock) + .storage(MockCache::::new()) + .enable_logs() + .build() + }); + let key = "key".to_string(); + + b.iter_custom(|iters| { + rt.block_on(async { + let start = Instant::now(); + for _ in 0..iters { + let _ = black_box(cache.get(black_box(&key)).await); + } + start.elapsed() + }) + }); + }); + // With fallback tier group.bench_function("with_fallback", |b| { let cache = rt.block_on(async { diff --git a/crates/cachet/examples/telemetry_accumulator.rs b/crates/cachet/examples/telemetry_accumulator.rs new file mode 100644 index 000000000..057ee07fe --- /dev/null +++ b/crates/cachet/examples/telemetry_accumulator.rs @@ -0,0 +1,142 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Demonstrates accumulating cachet telemetry into a single summary +//! per cache operation, correlated by `request_id`. +//! +//! This pattern mirrors how a TVS-style consumer would collect tier +//! outcomes, latencies, and flags into one log row per request. +//! +//! Uses `DashMap` for lock-free concurrent accumulation — safe across +//! all async runtimes, including work-stealing (tokio) and thread-per-core +//! (oxidizer), even if a task migrates between cores mid-operation. +//! +//! Run with: `cargo run --example telemetry_accumulator --features "memory,logs"` + +use std::time::Duration; + +use cachet::telemetry::handler::{CacheEventHandler, CacheOperationEvent, CacheTierEvent, RequestId}; +use cachet::{Cache, CacheEntry}; +use dashmap::DashMap; +use tick::Clock; + +// --------------------------------------------------------------------------- +// Accumulated state — one entry per in-flight operation, keyed by request_id +// --------------------------------------------------------------------------- + +#[derive(Debug)] +struct TierRecord { + tier_name: String, + outcome: String, + duration_us: u64, + fallback: bool, +} + +/// Handler that accumulates tier events per `request_id` and prints a +/// one-line summary when the operation completes. +/// +/// `DashMap` shards the map internally so concurrent operations on +/// different cores rarely contend. +struct AccumulatingHandler { + pending: DashMap>, +} + +impl AccumulatingHandler { + fn new() -> Self { + Self { pending: DashMap::new() } + } +} + +impl CacheEventHandler for AccumulatingHandler { + fn on_tier_event(&self, event: &CacheTierEvent<'_>) { + self.pending.entry(event.request_id).or_default().push(TierRecord { + tier_name: event.tier_name.to_owned(), + outcome: event.outcome.to_owned(), + duration_us: event.duration.as_micros() as u64, + fallback: event.fallback, + }); + } + + fn on_operation_complete(&self, event: &CacheOperationEvent<'_>) { + let tiers = self.pending.remove(&event.request_id).map(|(_, v)| v).unwrap_or_default(); + + // --- Build the summary line --- + // A TVS consumer would pack these into a bitfield here. + + let mut flags = Vec::new(); + if event.coalesced { + flags.push("COALESCED"); + } + if tiers.iter().any(|t| t.fallback) { + flags.push("FALLBACK"); + } + let flags_str = if flags.is_empty() { + String::new() + } else { + format!(" [{}]", flags.join(", ")) + }; + + // Final outcome = last tier's outcome + let outcome = tiers.last().map_or("?", |t| t.outcome.as_str()); + + print!( + "[{}] {} -> {} ({}us total){flags_str}", + event.cache_name, + event.operation, + outcome, + event.duration.as_micros(), + ); + + // Per-tier breakdown for multi-tier caches + if tiers.len() > 1 { + print!(" | "); + for (i, tier) in tiers.iter().enumerate() { + if i > 0 { + print!(", "); + } + print!("{}={} ({}us)", tier.tier_name, tier.outcome, tier.duration_us); + } + } + + println!(); + } +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +#[tokio::main] +async fn main() { + let clock = Clock::new_tokio(); + + // Single-tier cache + println!("=== Single-tier cache ==="); + let cache: Cache = Cache::builder(clock.clone()) + .memory() + .name("single") + .event_handler(AccumulatingHandler::new()) + .build(); + + cache.insert("key".to_string(), CacheEntry::new("value".to_string())).await.unwrap(); + let _ = cache.get(&"key".to_string()).await; + let _ = cache.get(&"missing".to_string()).await; + + // Two-tier cache with fallback + println!("\n=== Two-tier cache (L1 -> L2) ==="); + let l2 = Cache::builder::(clock.clone()).memory().name("l2"); + let cache2: Cache = Cache::builder(clock) + .memory() + .name("l1") + .ttl(Duration::from_secs(30)) + .event_handler(AccumulatingHandler::new()) + .fallback(l2) + .build(); + + cache2 + .insert("user:1".to_string(), CacheEntry::new("Alice".to_string())) + .await + .unwrap(); + let _ = cache2.get(&"user:1".to_string()).await; + let _ = cache2.get(&"nobody".to_string()).await; +} diff --git a/crates/cachet/examples/telemetry_subscriber.rs b/crates/cachet/examples/telemetry_subscriber.rs index 9ce1d914b..30dcbae9d 100644 --- a/crates/cachet/examples/telemetry_subscriber.rs +++ b/crates/cachet/examples/telemetry_subscriber.rs @@ -1,96 +1,41 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Demonstrates subscribing to cachet telemetry events using a custom tracing Layer. +//! Demonstrates cachet telemetry: spans for traces, events for logs. //! -//! This example shows how to use the public constants in `cachet::telemetry::attributes` -//! to build a Layer that reacts to specific cache events. +//! Run with: `cargo run --example telemetry_subscriber --features "memory,logs"` use std::time::Duration; -use cachet::telemetry::attributes; use cachet::{Cache, CacheEntry}; use tick::Clock; -use tracing::field::{Field, Visit}; -use tracing_subscriber::Layer; -use tracing_subscriber::layer::{Context, SubscriberExt}; - -/// A simple Layer that prints cache events to stdout. -struct CacheEventPrinter; - -/// Visitor that extracts cache telemetry fields from a tracing event. -#[derive(Default)] -struct CacheFieldVisitor { - cache_name: Option, - event: Option, - duration_ns: Option, -} - -impl Visit for CacheFieldVisitor { - fn record_str(&mut self, field: &Field, value: &str) { - match field.name() { - attributes::FIELD_NAME => self.cache_name = Some(value.to_owned()), - attributes::FIELD_EVENT => self.event = Some(value.to_owned()), - _ => {} - } - } - - fn record_u128(&mut self, field: &Field, value: u128) { - if field.name() == attributes::FIELD_DURATION_NS { - self.duration_ns = Some(value); - } - } - - fn record_debug(&mut self, _field: &Field, _value: &dyn std::fmt::Debug) {} -} - -impl Layer for CacheEventPrinter { - fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) { - // Only process cachet events - if !event.metadata().target().starts_with(attributes::TARGET) { - return; - } - - let mut visitor = CacheFieldVisitor::default(); - event.record(&mut visitor); - - let cache_name = visitor.cache_name.as_deref().unwrap_or("unknown"); - let event_type = visitor.event.as_deref().unwrap_or("unknown"); - let duration_us = visitor.duration_ns.unwrap_or(0) / 1000; - - // React to specific events using the public constants - match event_type { - attributes::EVENT_HIT => println!("HIT on {cache_name} ({duration_us}µs)"), - attributes::EVENT_MISS => println!("MISS on {cache_name} ({duration_us}µs)"), - attributes::EVENT_INSERTED => println!("INSERT on {cache_name} ({duration_us}µs)"), - attributes::EVENT_EXPIRED => println!("EXPIRED on {cache_name} ({duration_us}µs)"), - other => println!("{other} on {cache_name} ({duration_us}µs)"), - } - } -} +use tracing_subscriber::fmt::format::FmtSpan; +use tracing_subscriber::layer::SubscriberExt; #[tokio::main] async fn main() { - // Set up the subscriber with our custom cache event layer - let subscriber = tracing_subscriber::registry().with(CacheEventPrinter); - let _guard = tracing::subscriber::set_default(subscriber); + // Set up a subscriber that shows both events and span open/close. + // This demonstrates what cachet telemetry looks like out of the box. + let subscriber = tracing_subscriber::registry().with( + tracing_subscriber::fmt::layer() + .with_ansi(true) + .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) + .with_target(false), + ); + tracing::subscriber::set_global_default(subscriber).expect("subscriber already set"); let clock = Clock::new_tokio(); - let cache = Cache::builder::(clock) - .memory() - .enable_logs() - .ttl(Duration::from_secs(30)) - .build(); + let cache: Cache = Cache::builder(clock).memory().enable_logs().ttl(Duration::from_secs(30)).build(); - println!("Inserting entry..."); + println!("--- Insert ---"); cache .insert("user:1".to_string(), CacheEntry::new("Alice".to_string())) .await .expect("insert failed"); - println!("Getting existing key..."); + println!("\n--- Get (hit) ---"); let _ = cache.get(&"user:1".to_string()).await; - println!("Getting missing key..."); + println!("\n--- Get (miss) ---"); let _ = cache.get(&"user:999".to_string()).await; } diff --git a/crates/cachet/src/builder/buildable.rs b/crates/cachet/src/builder/buildable.rs index 7c405b710..55c16926b 100644 --- a/crates/cachet/src/builder/buildable.rs +++ b/crates/cachet/src/builder/buildable.rs @@ -19,7 +19,7 @@ pub(crate) trait Buildable { fn build(self) -> Cache; - fn build_tier(self, clock: Clock, telemetry: CacheTelemetry) -> Self::TierOutput; + fn build_tier(self, clock: Clock, telemetry: CacheTelemetry, fallback: bool) -> Self::TierOutput; } impl Buildable for CacheBuilder @@ -36,13 +36,21 @@ where let telemetry = self.telemetry.clone(); let stampede_protection = self.stampede_protection; - let tier = DynamicCache::new(self.build_tier(clock.clone(), telemetry)); + let tier = DynamicCache::new(self.build_tier(clock.clone(), telemetry.clone(), false)); - Cache::new(type_name::(name), tier, clock, stampede_protection) + Cache::new(type_name::(name), tier, clock, telemetry, stampede_protection) } - fn build_tier(self, clock: Clock, telemetry: CacheTelemetry) -> Self::TierOutput { - CacheWrapper::new(type_name::(self.name), self.storage, clock, self.ttl, telemetry, self.policy) + fn build_tier(self, clock: Clock, telemetry: CacheTelemetry, fallback: bool) -> Self::TierOutput { + CacheWrapper::new( + type_name::(self.name), + self.storage, + clock, + self.ttl, + telemetry, + self.policy, + fallback, + ) } } @@ -61,14 +69,14 @@ where let telemetry = self.telemetry.clone(); let stampede_protection = self.stampede_protection; - let tier = DynamicCache::new(self.build_tier(clock.clone(), telemetry)); + let tier = DynamicCache::new(self.build_tier(clock.clone(), telemetry.clone(), false)); - Cache::new(type_name::(name), tier, clock, stampede_protection) + Cache::new(type_name::(name), tier, clock, telemetry, stampede_protection) } - fn build_tier(self, clock: Clock, telemetry: CacheTelemetry) -> Self::TierOutput { - let primary = self.primary_builder.build_tier(clock.clone(), telemetry.clone()); - let fallback = self.fallback_builder.build_tier(clock.clone(), telemetry.clone()); + fn build_tier(self, clock: Clock, telemetry: CacheTelemetry, fallback: bool) -> Self::TierOutput { + let primary = self.primary_builder.build_tier(clock.clone(), telemetry.clone(), fallback); + let fallback = self.fallback_builder.build_tier(clock.clone(), telemetry.clone(), true); FallbackCache::new( type_name::(self.name), diff --git a/crates/cachet/src/builder/cache.rs b/crates/cachet/src/builder/cache.rs index f9d05b583..f695d59eb 100644 --- a/crates/cachet/src/builder/cache.rs +++ b/crates/cachet/src/builder/cache.rs @@ -3,6 +3,7 @@ use std::hash::Hash; use std::marker::PhantomData; +use std::sync::Arc; use std::time::Duration; #[cfg(feature = "memory")] @@ -14,6 +15,7 @@ use super::fallback::FallbackBuilder; use super::sealed::{CacheTierBuilder, Sealed}; use crate::policy::InsertPolicy; use crate::telemetry::CacheTelemetry; +use crate::telemetry::handler::CacheEventHandler; use crate::{Cache, CacheTier}; /// Builder for constructing a cache with a single tier. @@ -168,7 +170,7 @@ impl CacheBuilder { #[cfg(any(feature = "logs", test))] #[must_use] pub fn enable_logs(mut self) -> Self { - self.telemetry = CacheTelemetry::with_logging(); + self.telemetry = self.telemetry.enable_logging(); self } @@ -198,6 +200,13 @@ impl CacheBuilder { self } + /// Registers a callback for structured cache events. + #[must_use] + pub fn event_handler(mut self, handler: impl CacheEventHandler + 'static) -> Self { + self.telemetry = self.telemetry.with_handler(Arc::new(handler)); + self + } + /// Sets the time-to-live (TTL) for entries in this cache tier. /// /// Entries older than the TTL will be considered expired and won't be diff --git a/crates/cachet/src/builder/transform.rs b/crates/cachet/src/builder/transform.rs index 990b1e9d2..97a48e6f7 100644 --- a/crates/cachet/src/builder/transform.rs +++ b/crates/cachet/src/builder/transform.rs @@ -245,17 +245,17 @@ where let clock = self.clock.clone(); let telemetry = self.telemetry.clone(); let stampede_protection = self.stampede_protection; - let tier = self.build_tier(clock.clone(), telemetry); + let tier = self.build_tier(clock.clone(), telemetry.clone(), false); - crate::Cache::new(type_name::(None), tier, clock, stampede_protection) + crate::Cache::new(type_name::(None), tier, clock, telemetry, stampede_protection) } - fn build_tier(self, clock: Clock, telemetry: CacheTelemetry) -> Self::TierOutput { + fn build_tier(self, clock: Clock, telemetry: CacheTelemetry, fallback: bool) -> Self::TierOutput { // Build pre-transform tier - let pre_tier = self.pre.build_tier(clock.clone(), telemetry.clone()); + let pre_tier = self.pre.build_tier(clock.clone(), telemetry.clone(), fallback); // Build post-transform tier, wrap in TransformAdapter - let post_tier = self.post.build_tier(clock.clone(), telemetry.clone()); + let post_tier = self.post.build_tier(clock.clone(), telemetry.clone(), true); let adapted = TransformAdapter::from_boxed(post_tier, self.key_encoder, self.value_codec); // Combine: pre is primary, adapted is fallback diff --git a/crates/cachet/src/cache.rs b/crates/cachet/src/cache.rs index 61cc9d1e7..dbb7d5a94 100644 --- a/crates/cachet/src/cache.rs +++ b/crates/cachet/src/cache.rs @@ -9,10 +9,13 @@ use std::hash::Hash; use cachet_tier::{CacheEntry, CacheTier, DynamicCache, SizeError}; use tick::Clock; +use tracing::Instrument; use uniflight::Merger; use crate::Error; use crate::builder::CacheBuilder; +use crate::telemetry::CacheTelemetry; +use crate::telemetry::cache::{WithRequestIdExt, next_request_id}; /// Type alias for cache names used in telemetry. /// @@ -109,6 +112,7 @@ pub struct Cache { pub(crate) name: CacheName, pub(crate) storage: DynamicCache, pub(crate) clock: Clock, + pub(crate) telemetry: CacheTelemetry, /// Mergers for stampede protection on all operations. /// Only present when `stampede_protection` is enabled. mergers: Option>, @@ -145,11 +149,18 @@ where K: Clone + Eq + Hash + Send + Sync + 'static, V: Clone + Send + Sync + 'static, { - pub(crate) fn new(name: CacheName, storage: DynamicCache, clock: Clock, stampede_protection: bool) -> Self { + pub(crate) fn new( + name: CacheName, + storage: DynamicCache, + clock: Clock, + telemetry: CacheTelemetry, + stampede_protection: bool, + ) -> Self { Self { name, storage, clock, + telemetry, mergers: stampede_protection.then(Mergers::new), } } @@ -216,18 +227,30 @@ where K: Borrow, Q: Hash + Eq + ToOwned + ?Sized + Send + Sync, { - if let Some(mergers) = &self.mergers { - let owned = key.to_owned(); - let storage = &self.storage; - mergers - .get - .execute(key, move || async move { storage.get(&owned).await }) - .await - .unwrap_or_else(|panicked| Err(Error::from_source(panicked))) - } else { - let owned = key.to_owned(); - self.storage.get(&owned).await + let request_id = next_request_id(); + let span = self.telemetry.get_span(self.name); + let watch = self.clock.stopwatch(); + async { + let (result, coalesced) = if let Some(mergers) = &self.mergers { + let owned = key.to_owned(); + let storage = &self.storage; + let result = mergers + .get + .execute(key, move || async move { storage.get(&owned).await }) + .await + .unwrap_or_else(|panicked| Err(Error::from_source(panicked))); + (result, true) + } else { + let owned = key.to_owned(); + (self.storage.get(&owned).await, false) + }; + self.telemetry + .complete_operation(request_id, self.name, "cache.get", watch.elapsed(), coalesced); + result } + .with_request_id(request_id) + .instrument(span) + .await } /// Inserts a value into the cache. @@ -256,7 +279,18 @@ where /// # }; /// ``` pub async fn insert(&self, key: K, entry: impl Into>) -> Result<(), Error> { - self.storage.insert(key, entry.into()).await + let request_id = next_request_id(); + let span = self.telemetry.insert_span(self.name); + let watch = self.clock.stopwatch(); + async { + let result = self.storage.insert(key, entry.into()).await; + self.telemetry + .complete_operation(request_id, self.name, "cache.insert", watch.elapsed(), false); + result + } + .with_request_id(request_id) + .instrument(span) + .await } /// Invalidates (removes) a value from the cache. @@ -278,18 +312,30 @@ where K: Borrow, Q: Hash + Eq + ToOwned + ?Sized + Send + Sync, { - if let Some(mergers) = &self.mergers { - let owned = key.to_owned(); - let storage = &self.storage; - mergers - .invalidate - .execute(key, move || async move { storage.invalidate(&owned).await }) - .await - .unwrap_or_else(|panicked| Err(Error::from_source(panicked))) - } else { - let owned = key.to_owned(); - self.storage.invalidate(&owned).await + let request_id = next_request_id(); + let span = self.telemetry.invalidate_span(self.name); + let watch = self.clock.stopwatch(); + async { + let (result, coalesced) = if let Some(mergers) = &self.mergers { + let owned = key.to_owned(); + let storage = &self.storage; + let result = mergers + .invalidate + .execute(key, move || async move { storage.invalidate(&owned).await }) + .await + .unwrap_or_else(|panicked| Err(Error::from_source(panicked))); + (result, true) + } else { + let owned = key.to_owned(); + (self.storage.invalidate(&owned).await, false) + }; + self.telemetry + .complete_operation(request_id, self.name, "cache.invalidate", watch.elapsed(), coalesced); + result } + .with_request_id(request_id) + .instrument(span) + .await } /// Returns true if the cache contains a value for the given key. @@ -311,7 +357,18 @@ where /// /// Returns an error if the underlying cache tier operation fails. pub async fn clear(&self) -> Result<(), Error> { - self.storage.clear().await + let request_id = next_request_id(); + let span = self.telemetry.clear_span(self.name); + let watch = self.clock.stopwatch(); + async { + let result = self.storage.clear().await; + self.telemetry + .complete_operation(request_id, self.name, "cache.clear", watch.elapsed(), false); + result + } + .with_request_id(request_id) + .instrument(span) + .await } /// Returns an **approximate** count of entries, if supported by the underlying storage. @@ -408,16 +465,28 @@ where Q: Hash + Eq + ToOwned + ?Sized + Send + Sync, Fut: Future + Send, { - let owned = key.to_owned(); - if let Some(mergers) = &self.mergers { - mergers - .get_or_insert - .execute(key, move || async move { self.do_get_or_insert(&owned, f).await }) - .await - .unwrap_or_else(|panicked| Err(Error::from_source(panicked))) - } else { - self.do_get_or_insert(&owned, f).await + let request_id = next_request_id(); + let span = self.telemetry.get_or_insert_span(self.name); + let watch = self.clock.stopwatch(); + async { + let owned = key.to_owned(); + let (result, coalesced) = if let Some(mergers) = &self.mergers { + let result = mergers + .get_or_insert + .execute(key, move || async move { self.do_get_or_insert(&owned, f).await }) + .await + .unwrap_or_else(|panicked| Err(Error::from_source(panicked))); + (result, true) + } else { + (self.do_get_or_insert(&owned, f).await, false) + }; + self.telemetry + .complete_operation(request_id, self.name, "cache.get_or_insert", watch.elapsed(), coalesced); + result } + .with_request_id(request_id) + .instrument(span) + .await } async fn do_get_or_insert(&self, key: &K, f: impl FnOnce() -> Fut) -> Result, Error> @@ -481,16 +550,28 @@ where E: std::error::Error + Send + Sync + 'static, Fut: Future> + Send, { - let owned = key.to_owned(); - if let Some(mergers) = &self.mergers { - mergers - .try_get_or_insert - .execute(key, move || async move { self.do_try_get_or_insert(&owned, f).await }) - .await - .unwrap_or_else(|panicked| Err(Error::from_source(panicked))) - } else { - self.do_try_get_or_insert(&owned, f).await + let request_id = next_request_id(); + let span = self.telemetry.try_get_or_insert_span(self.name); + let watch = self.clock.stopwatch(); + async { + let owned = key.to_owned(); + let (result, coalesced) = if let Some(mergers) = &self.mergers { + let result = mergers + .try_get_or_insert + .execute(key, move || async move { self.do_try_get_or_insert(&owned, f).await }) + .await + .unwrap_or_else(|panicked| Err(Error::from_source(panicked))); + (result, true) + } else { + (self.do_try_get_or_insert(&owned, f).await, false) + }; + self.telemetry + .complete_operation(request_id, self.name, "cache.try_get_or_insert", watch.elapsed(), coalesced); + result } + .with_request_id(request_id) + .instrument(span) + .await } async fn do_try_get_or_insert(&self, key: &K, f: impl FnOnce() -> Fut) -> Result, Error> @@ -558,16 +639,28 @@ where Q: Hash + Eq + ToOwned + ?Sized + Send + Sync, Fut: Future> + Send, { - let owned = key.to_owned(); - if let Some(mergers) = &self.mergers { - mergers - .optionally_get_or_insert - .execute(key, move || async move { self.do_optionally_get_or_insert(&owned, f).await }) - .await - .unwrap_or_else(|panicked| Err(Error::from_source(panicked))) - } else { - self.do_optionally_get_or_insert(&owned, f).await + let request_id = next_request_id(); + let span = self.telemetry.optionally_get_or_insert_span(self.name); + let watch = self.clock.stopwatch(); + async { + let owned = key.to_owned(); + let (result, coalesced) = if let Some(mergers) = &self.mergers { + let result = mergers + .optionally_get_or_insert + .execute(key, move || async move { self.do_optionally_get_or_insert(&owned, f).await }) + .await + .unwrap_or_else(|panicked| Err(Error::from_source(panicked))); + (result, true) + } else { + (self.do_optionally_get_or_insert(&owned, f).await, false) + }; + self.telemetry + .complete_operation(request_id, self.name, "cache.optionally_get_or_insert", watch.elapsed(), coalesced); + result } + .with_request_id(request_id) + .instrument(span) + .await } async fn do_optionally_get_or_insert(&self, key: &K, f: impl FnOnce() -> Fut) -> Result>, Error> @@ -620,9 +713,13 @@ where #[cfg(test)] mod tests { + use std::sync::{Arc, Mutex}; + use cachet_tier::MockCache; use super::*; + use crate::telemetry::handler::RequestId; + use crate::{CacheEventHandler, CacheOperationEvent, CacheTierEvent}; fn block_on(f: F) -> F::Output { futures::executor::block_on(f) @@ -670,6 +767,88 @@ mod tests { }); } + #[test] + fn cache_event_handler_receives_fallback_tier_events() { + type EventRecord = Vec<(RequestId, String, String, bool)>; + + #[derive(Clone)] + struct RecordingHandler { + tier_events: Arc>, + operation_events: Arc>, + } + + impl CacheEventHandler for RecordingHandler { + fn on_tier_event(&self, event: &CacheTierEvent<'_>) { + self.tier_events.lock().expect("test handler mutex should not be poisoned").push(( + event.request_id, + event.tier_name.to_string(), + event.outcome.to_string(), + event.fallback, + )); + } + + fn on_operation_complete(&self, event: &CacheOperationEvent<'_>) { + self.operation_events + .lock() + .expect("test handler mutex should not be poisoned") + .push(( + event.request_id, + event.cache_name.to_string(), + event.operation.to_string(), + event.coalesced, + )); + } + } + + let tier_events = Arc::new(Mutex::new(Vec::new())); + let operation_events = Arc::new(Mutex::new(Vec::new())); + + block_on(async { + let clock = Clock::new_frozen(); + let handler = RecordingHandler { + tier_events: Arc::clone(&tier_events), + operation_events: Arc::clone(&operation_events), + }; + + let l2 = Cache::builder::(clock.clone()).storage(MockCache::new()).name("l2"); + let cache = Cache::builder::(clock) + .storage(MockCache::new()) + .name("l1") + .event_handler(handler) + .fallback(l2) + .build(); + + let result = cache.get("missing").await.unwrap(); + assert!(result.is_none()); + }); + + let tier_events = tier_events.lock().expect("test handler mutex should not be poisoned").clone(); + let operation_events = operation_events.lock().expect("test handler mutex should not be poisoned").clone(); + let request_id = operation_events[0].0; + + assert_eq!( + tier_events, + vec![ + ( + request_id, + "l1".to_string(), + crate::telemetry::attributes::EVENT_MISS.to_string(), + false + ), + ( + request_id, + "l2".to_string(), + crate::telemetry::attributes::EVENT_MISS.to_string(), + true + ), + ] + ); + assert_eq!( + operation_events, + vec![(request_id, "l1".to_string(), "cache.get".to_string(), false)] + ); + } + #[test] fn cache_insert_and_get() { block_on(async { diff --git a/crates/cachet/src/fallback.rs b/crates/cachet/src/fallback.rs index 4a8d9ae52..198979287 100644 --- a/crates/cachet/src/fallback.rs +++ b/crates/cachet/src/fallback.rs @@ -13,12 +13,12 @@ use std::sync::Arc; use cachet_tier::{CacheEntry, CacheTier, SizeError}; use futures::join; use tick::Clock; +use tracing::Instrument; use crate::Error; use crate::cache::CacheName; use crate::refresh::TimeToRefresh; use crate::telemetry::CacheTelemetry; -use crate::telemetry::ext::ClockExt; pub(crate) struct FallbackCacheInner { pub(crate) name: CacheName, @@ -102,20 +102,21 @@ where /// /// Separated from [`get`](Self::get) to keep the hot path (primary hits) small. async fn get_from_fallback(&self, key: &K) -> Result>, Error> { - let timed = self.inner.clock.timed_async(self.inner.fallback.get(key)).await; - self.inner.telemetry.cache_fallback(self.inner.name, timed.duration); - - // Propagate any error from fallback - let fallback_value = timed.result?; + async { + self.inner.telemetry.record_fallback(); + let fallback_value = self.inner.fallback.get(key).await?; + + if let Some(ref v) = fallback_value { + // Insert errors are intentionally swallowed - a failed promotion should not + // fail the overall get. The CacheWrapper around the primary tier already + // records telemetry for the insert (Inserted, Rejected, or Error). + let _ = self.inner.primary.insert(key.clone(), v.clone()).await; + } - if let Some(ref v) = fallback_value { - // Insert errors are intentionally swallowed - a failed promotion should not - // fail the overall get. The CacheWrapper around the primary tier already - // records telemetry for the insert (Inserted, Rejected, or Error). - let _ = self.inner.primary.insert(key.clone(), v.clone()).await; + Ok(fallback_value) } - - Ok(fallback_value) + .instrument(tracing::Span::current()) + .await } } @@ -193,7 +194,7 @@ mod tests { fn make_primary() -> TestPrimary { let clock = Clock::new_frozen(); let telemetry = CacheTelemetry::new(); - CacheWrapper::new("primary", MockCache::new(), clock, None, telemetry, InsertPolicy::default()) + CacheWrapper::new("primary", MockCache::new(), clock, None, telemetry, InsertPolicy::default(), false) } fn make_fallback_cache() -> TestFallbackCache { @@ -446,6 +447,7 @@ mod tests { None, telemetry.clone(), InsertPolicy::default(), + false, ); let fc = FallbackCache::new("test", primary, fallback_mock, clock, Some(refresh), telemetry); diff --git a/crates/cachet/src/lib.rs b/crates/cachet/src/lib.rs index 61f8239c2..a300d6338 100644 --- a/crates/cachet/src/lib.rs +++ b/crates/cachet/src/lib.rs @@ -228,8 +228,9 @@ //! //! Enable with the `logs` feature and `.enable_logs()` on the cache builder. //! -//! Each cache operation emits a structured [`tracing`] event with fields -//! `cache.name`, `cache.event`, and `cache.duration_ns`. +//! Each cache operation creates a structured [`tracing`] span and emits events inside +//! that span. Span fields include `cache.name`, `cache.event`, and `cache.duration_ns`, +//! with `cache.coalesced` and `cache.fallback` recorded when applicable. //! //! ## Subscribing to events //! @@ -254,7 +255,7 @@ //! | Level | Events | //! |-------|--------| //! | ERROR | `cache.get_error`, `cache.insert_error`, `cache.invalidate_error`, `cache.clear_error` | -//! | INFO | `cache.expired`, `cache.refresh_miss`, `cache.inserted`, `cache.insert_rejected`, `cache.invalidated`, `cache.fallback` | +//! | INFO | `cache.expired`, `cache.refresh_miss`, `cache.inserted`, `cache.insert_rejected`, `cache.invalidated` | //! | DEBUG | `cache.hit`, `cache.miss`, `cache.refresh_hit`, `cache.cleared` | mod builder; @@ -290,4 +291,6 @@ pub use policy::InsertPolicy; #[doc(inline)] pub use refresh::TimeToRefresh; #[doc(inline)] +pub use telemetry::handler::{CacheEventHandler, CacheOperationEvent, CacheTierEvent}; +#[doc(inline)] pub use transform::{Codec, DecodeOutcome, Encoder, TransformCodec, TransformEncoder, infallible, infallible_owned}; diff --git a/crates/cachet/src/refresh.rs b/crates/cachet/src/refresh.rs index d1eba0010..ba5b40e06 100644 --- a/crates/cachet/src/refresh.rs +++ b/crates/cachet/src/refresh.rs @@ -16,9 +16,9 @@ use std::time::{Duration, SystemTime}; use anyspawn::Spawner; use cachet_tier::{CacheEntry, CacheTier}; use parking_lot::Mutex; +use tracing::Instrument; use crate::fallback::{FallbackCache, FallbackCacheInner}; -use crate::telemetry::ext::ClockExt; /// Configuration for background cache refresh. /// @@ -145,16 +145,20 @@ where F: CacheTier + Send + Sync + 'static, { pub(crate) async fn fetch_and_promote(&self, key: K) { - let timed = self.clock.timed_async(self.fallback.get(&key)).await; - - match timed.result { - Ok(Some(value)) => self.handle_fallback_hit(key, value, timed.duration).await, - Ok(None) | Err(_) => self.handle_fallback_miss(timed.duration), + let span = self.telemetry.get_span(self.name); + async { + let watch = self.clock.stopwatch(); + match self.fallback.get(&key).await { + Ok(Some(value)) => self.handle_fallback_hit(key, value, watch.elapsed()).await, + Ok(None) | Err(_) => self.handle_fallback_miss(watch.elapsed()), + } } + .instrument(span) + .await; } async fn handle_fallback_hit(&self, key: K, value: CacheEntry, fetch_duration: Duration) { - self.telemetry.refresh_hit(self.name, fetch_duration); + self.telemetry.record_refresh_hit(fetch_duration); self.promote_to_primary(key, value).await; } @@ -167,7 +171,7 @@ where } fn handle_fallback_miss(&self, duration: Duration) { - self.telemetry.refresh_miss(self.name, duration); + self.telemetry.record_refresh_miss(duration); } } @@ -385,6 +389,7 @@ mod fetch_and_promote_tests { None, telemetry.clone(), InsertPolicy::never(), + false, ); let fc = FallbackCache::new("test", primary, fallback, clock, None, telemetry); @@ -475,7 +480,7 @@ mod fetch_and_promote_tests { fn make_wrapper(mock: MockCache) -> MockWrapper { let clock = Clock::new_frozen(); let telemetry = CacheTelemetry::new(); - CacheWrapper::new("test_primary", mock, clock, None, telemetry, InsertPolicy::default()) + CacheWrapper::new("test_primary", mock, clock, None, telemetry, InsertPolicy::default(), false) } fn build_mock_fallback_cache( @@ -507,7 +512,15 @@ mod fetch_and_promote_tests { let telemetry = CacheTelemetry::new(); let refresh = TimeToRefresh::new(Duration::from_secs(60), Spawner::new_tokio()); - let primary_wrapper = CacheWrapper::new("primary", primary, clock.clone(), None, telemetry.clone(), InsertPolicy::default()); + let primary_wrapper = CacheWrapper::new( + "primary", + primary, + clock.clone(), + None, + telemetry.clone(), + InsertPolicy::default(), + false, + ); let fc = FallbackCache::new("test", primary_wrapper, fallback, clock, Some(refresh), telemetry); let key = "key".to_string(); diff --git a/crates/cachet/src/telemetry/attributes.rs b/crates/cachet/src/telemetry/attributes.rs index f7f7733b7..2f8422bf7 100644 --- a/crates/cachet/src/telemetry/attributes.rs +++ b/crates/cachet/src/telemetry/attributes.rs @@ -3,9 +3,9 @@ //! Public constants for cachet telemetry field names and event values. //! -//! Use these constants to filter or match cachet events in a custom -//! `tracing_subscriber::Layer`. All cachet events are emitted with -//! `FIELD_NAME`, `FIELD_EVENT`, and `FIELD_DURATION_NS` fields. +//! Use these constants to filter or match cachet spans and events in a custom +//! `tracing_subscriber::Layer`. Cache operation spans record `FIELD_NAME`, +//! `FIELD_EVENT`, and `FIELD_DURATION_NS`, with additional flag fields when applicable. //! //! # Example //! @@ -41,6 +41,12 @@ pub const FIELD_EVENT: &str = "cache.event"; /// Field name for the operation duration in nanoseconds. pub const FIELD_DURATION_NS: &str = "cache.duration_ns"; +/// Field name recording whether request coalescing was used. +pub const FIELD_COALESCED: &str = "cache.coalesced"; + +/// Field name recording whether a fallback tier was consulted. +pub const FIELD_FALLBACK: &str = "cache.fallback"; + // -- Event values (emitted in the `cache.event` field) -- /// Cache entry was found and valid. @@ -55,9 +61,6 @@ pub const EVENT_EXPIRED: &str = "cache.expired"; /// An error occurred during a get operation. pub const EVENT_GET_ERROR: &str = "cache.get_error"; -/// A fallback tier was consulted. -pub const EVENT_FALLBACK: &str = "cache.fallback"; - /// An entry was successfully inserted. pub const EVENT_INSERTED: &str = "cache.inserted"; @@ -95,6 +98,8 @@ mod tests { assert_eq!(FIELD_NAME, "cache.name"); assert_eq!(FIELD_EVENT, "cache.event"); assert_eq!(FIELD_DURATION_NS, "cache.duration_ns"); + assert_eq!(FIELD_COALESCED, "cache.coalesced"); + assert_eq!(FIELD_FALLBACK, "cache.fallback"); } #[test] @@ -104,7 +109,6 @@ mod tests { EVENT_MISS, EVENT_EXPIRED, EVENT_GET_ERROR, - EVENT_FALLBACK, EVENT_INSERTED, EVENT_INSERT_REJECTED, EVENT_INSERT_ERROR, diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 2179f6a09..9a187f717 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -3,233 +3,536 @@ //! Cache telemetry types and recording. +use std::cell::Cell; +use std::pin::Pin; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::task::{Context, Poll}; use std::time::Duration; -#[cfg(any(feature = "logs", test))] -use thread_aware::{Arc, PerCore}; +use pin_project_lite::pin_project; +use tracing::Span; use crate::cache::CacheName; use crate::telemetry::attributes; +use crate::telemetry::handler::{CacheEventHandler, CacheOperationEvent, CacheTierEvent, RequestId}; -/// Internal state for cache telemetry when features are enabled. -#[cfg(any(feature = "logs", test))] -#[derive(Clone, Debug)] -pub(crate) struct CacheTelemetryInner { - pub(crate) logging_enabled: bool, +/// Process-wide counter for generating unique request IDs. +static NEXT_REQUEST_ID: AtomicU64 = AtomicU64::new(1); + +std::thread_local! { + static CURRENT_REQUEST_ID: Cell = const { Cell::new(0) }; } -#[cfg(any(feature = "logs", test))] -impl CacheTelemetryInner { - #[inline] - fn debug(&self, cache_name: CacheName, event: &'static str, duration: Duration) { - if self.logging_enabled { - tracing::debug!( - cache.name = cache_name, - cache.event = event, - cache.duration_ns = duration.as_nanos() - ); - } - } +/// Generates a unique request ID for correlating tier events with their parent operation. +pub(crate) fn next_request_id() -> RequestId { + NEXT_REQUEST_ID.fetch_add(1, Ordering::Relaxed) +} - #[inline] - fn info(&self, cache_name: CacheName, event: &'static str, duration: Duration) { - if self.logging_enabled { - tracing::info!( - cache.name = cache_name, - cache.event = event, - cache.duration_ns = duration.as_nanos() - ); - } +pin_project! { + /// A future wrapper that restores the request ID into the thread-local + /// on every poll. This ensures the correct request ID is available + /// even if the task migrates to a different thread between polls. + /// + /// Same pattern as `tracing::Instrument` which re-enters the span per poll. + pub(crate) struct WithRequestId { + #[pin] + inner: F, + request_id: RequestId, } +} - #[inline] - fn error(&self, cache_name: CacheName, event: &'static str, duration: Duration) { - if self.logging_enabled { - tracing::error!( - cache.name = cache_name, - cache.event = event, - cache.duration_ns = duration.as_nanos() - ); - } +impl Future for WithRequestId { + type Output = F::Output; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.project(); + CURRENT_REQUEST_ID.with(|cell| cell.set(*this.request_id)); + this.inner.poll(cx) } } -/// Internal state for cache telemetry when no features are enabled (no-op). -#[cfg(not(any(feature = "logs", test)))] -#[derive(Clone, Debug, Default)] -pub(crate) struct CacheTelemetryInner; - -#[cfg(not(any(feature = "logs", test)))] -#[expect(clippy::unused_self, reason = "Methods must match the logs-enabled impl signature")] -impl CacheTelemetryInner { - #[inline] - fn debug(&self, _: CacheName, _: &'static str, _: Duration) {} +/// Extension trait for wrapping a future with a request ID. +pub(crate) trait WithRequestIdExt: Sized { + /// Wraps this future so that `request_id` is set in the thread-local + /// on every poll, surviving task migration across threads. + fn with_request_id(self, request_id: RequestId) -> WithRequestId; +} - #[inline] - fn info(&self, _: CacheName, _: &'static str, _: Duration) {} +impl WithRequestIdExt for F { + fn with_request_id(self, request_id: RequestId) -> WithRequestId { + WithRequestId { inner: self, request_id } + } +} - #[inline] - fn error(&self, _: CacheName, _: &'static str, _: Duration) {} +/// Converts a `Duration` to nanoseconds as `u64`, saturating at `u64::MAX`. +/// A `u64` of nanoseconds covers ~584 years — overflow is not a practical concern. +fn saturating_nanos(duration: Duration) -> u64 { + u64::try_from(duration.as_nanos()).unwrap_or(u64::MAX) } /// Cache telemetry provider. /// /// This type is created internally by the cache builder and handles -/// recording cache operations as structured tracing events. -#[derive(Clone, Debug)] +/// creating cache operation spans and recording structured tracing events. +#[derive(Clone, Default)] pub struct CacheTelemetry { #[cfg(any(feature = "logs", test))] - pub(crate) inner: Arc, - #[cfg(not(any(feature = "logs", test)))] - pub(crate) inner: CacheTelemetryInner, + logging_enabled: bool, + handler: Option>, +} + +impl std::fmt::Debug for CacheTelemetry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CacheTelemetry") + .field("logging_enabled", &{ + #[cfg(any(feature = "logs", test))] + { + self.logging_enabled + } + #[cfg(not(any(feature = "logs", test)))] + { + false + } + }) + .field("has_handler", &self.handler.is_some()) + .finish() + } } impl CacheTelemetry { /// Creates a new `CacheTelemetry` with logging disabled. #[must_use] pub(crate) fn new() -> Self { - #[cfg(any(feature = "logs", test))] - { - Self { - inner: Arc::from_unaware(CacheTelemetryInner { logging_enabled: false }), - } - } - #[cfg(not(any(feature = "logs", test)))] - { - Self { - inner: CacheTelemetryInner, - } + Self { + #[cfg(any(feature = "logs", test))] + logging_enabled: false, + handler: None, } } - /// Creates a new `CacheTelemetry` with logging enabled. - #[cfg(any(feature = "logs", test))] #[must_use] - pub(crate) fn with_logging() -> Self { - Self { - inner: Arc::from_unaware(CacheTelemetryInner { logging_enabled: true }), + pub(crate) fn with_handler(mut self, handler: Arc) -> Self { + self.handler = Some(handler); + self + } + + pub(crate) fn current_request_id() -> RequestId { + CURRENT_REQUEST_ID.with(Cell::get) + } + + fn emit_tier_event(&self, request_id: RequestId, tier_name: CacheName, outcome: &'static str, duration: Duration, fallback: bool) { + if let Some(handler) = &self.handler { + handler.on_tier_event(&CacheTierEvent { + request_id, + tier_name, + outcome, + duration, + fallback, + }); } } - // -- Get -- + #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + fn record_debug_with_duration(&self, event: &'static str, duration: Duration) { + let span = Span::current(); + if !span.is_disabled() { + let duration_ns = saturating_nanos(duration); + span.record(attributes::FIELD_EVENT, event); + span.record(attributes::FIELD_DURATION_NS, duration_ns); + tracing::debug!(cache.event = event, cache.duration_ns = duration_ns); + } + } - /// Records a cache hit (key found and not expired). - #[inline] - pub(crate) fn cache_hit(&self, cache_name: CacheName, duration: Duration) { - self.inner.debug(cache_name, attributes::EVENT_HIT, duration); + #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + fn record_info_with_duration(&self, event: &'static str, duration: Duration) { + let span = Span::current(); + if !span.is_disabled() { + let duration_ns = saturating_nanos(duration); + span.record(attributes::FIELD_EVENT, event); + span.record(attributes::FIELD_DURATION_NS, duration_ns); + tracing::info!(cache.event = event, cache.duration_ns = duration_ns); + } } - /// Records a cache miss (key not found). - #[inline] - pub(crate) fn cache_miss(&self, cache_name: CacheName, duration: Duration) { - self.inner.debug(cache_name, attributes::EVENT_MISS, duration); + #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + fn record_error_with_duration(&self, event: &'static str, duration: Duration) { + let span = Span::current(); + if !span.is_disabled() { + let duration_ns = saturating_nanos(duration); + span.record(attributes::FIELD_EVENT, event); + span.record(attributes::FIELD_DURATION_NS, duration_ns); + tracing::error!(cache.event = event, cache.duration_ns = duration_ns); + } } - /// Records a cache entry that was found but expired. - #[inline] - pub(crate) fn cache_expired(&self, cache_name: CacheName, duration: Duration) { - self.inner.info(cache_name, attributes::EVENT_EXPIRED, duration); + #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + fn record_info_event(&self, event: &'static str) { + let span = Span::current(); + if !span.is_disabled() { + span.record(attributes::FIELD_EVENT, event); + tracing::info!(cache.event = event); + } } - /// Records an error during a get operation. - #[inline] - pub(crate) fn get_error(&self, cache_name: CacheName, duration: Duration) { - self.inner.error(cache_name, attributes::EVENT_GET_ERROR, duration); + pub(crate) fn record_hit(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_debug_with_duration(attributes::EVENT_HIT, duration); + self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_HIT, duration, fallback); } - /// Records a fallback tier lookup. - #[inline] - pub(crate) fn cache_fallback(&self, cache_name: CacheName, duration: Duration) { - self.inner.info(cache_name, attributes::EVENT_FALLBACK, duration); + pub(crate) fn record_miss(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_debug_with_duration(attributes::EVENT_MISS, duration); + self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_MISS, duration, fallback); } - // -- Refresh -- + pub(crate) fn record_expired(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_info_with_duration(attributes::EVENT_EXPIRED, duration); + self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_EXPIRED, duration, fallback); + } - /// Records a successful background refresh from fallback. - #[inline] - pub(crate) fn refresh_hit(&self, cache_name: CacheName, duration: Duration) { - self.inner.debug(cache_name, attributes::EVENT_REFRESH_HIT, duration); + pub(crate) fn record_get_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_error_with_duration(attributes::EVENT_GET_ERROR, duration); + self.emit_tier_event( + Self::current_request_id(), + tier_name, + attributes::EVENT_GET_ERROR, + duration, + fallback, + ); } - /// Records a background refresh miss (fallback had no data or returned error). - #[inline] - pub(crate) fn refresh_miss(&self, cache_name: CacheName, duration: Duration) { - self.inner.info(cache_name, attributes::EVENT_REFRESH_MISS, duration); + pub(crate) fn record_inserted(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_info_with_duration(attributes::EVENT_INSERTED, duration); + self.emit_tier_event( + Self::current_request_id(), + tier_name, + attributes::EVENT_INSERTED, + duration, + fallback, + ); } - // -- Insert -- + pub(crate) fn record_insert_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_error_with_duration(attributes::EVENT_INSERT_ERROR, duration); + self.emit_tier_event( + Self::current_request_id(), + tier_name, + attributes::EVENT_INSERT_ERROR, + duration, + fallback, + ); + } - /// Records a successful cache insert. - #[inline] - pub(crate) fn cache_inserted(&self, cache_name: CacheName, duration: Duration) { - self.inner.info(cache_name, attributes::EVENT_INSERTED, duration); + pub(crate) fn record_invalidated(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_info_with_duration(attributes::EVENT_INVALIDATED, duration); + self.emit_tier_event( + Self::current_request_id(), + tier_name, + attributes::EVENT_INVALIDATED, + duration, + fallback, + ); } - /// Records a cache insert that was rejected by the insert policy. - #[inline] - pub(crate) fn insert_rejected(&self, cache_name: CacheName, duration: Duration) { - self.inner.info(cache_name, attributes::EVENT_INSERT_REJECTED, duration); + pub(crate) fn record_invalidate_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_error_with_duration(attributes::EVENT_INVALIDATE_ERROR, duration); + self.emit_tier_event( + Self::current_request_id(), + tier_name, + attributes::EVENT_INVALIDATE_ERROR, + duration, + fallback, + ); } - /// Records an error during an insert operation. - #[inline] - pub(crate) fn insert_error(&self, cache_name: CacheName, duration: Duration) { - self.inner.error(cache_name, attributes::EVENT_INSERT_ERROR, duration); + pub(crate) fn record_cleared(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_debug_with_duration(attributes::EVENT_CLEARED, duration); + self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_CLEARED, duration, fallback); } - // -- Invalidate -- + pub(crate) fn record_clear_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { + self.record_error_with_duration(attributes::EVENT_CLEAR_ERROR, duration); + self.emit_tier_event( + Self::current_request_id(), + tier_name, + attributes::EVENT_CLEAR_ERROR, + duration, + fallback, + ); + } - /// Records a successful cache invalidation. - #[inline] - pub(crate) fn cache_invalidated(&self, cache_name: CacheName, duration: Duration) { - self.inner.info(cache_name, attributes::EVENT_INVALIDATED, duration); + pub(crate) fn record_refresh_hit(&self, duration: Duration) { + self.record_debug_with_duration(attributes::EVENT_REFRESH_HIT, duration); } - /// Records an error during an invalidate operation. - #[inline] - pub(crate) fn invalidate_error(&self, cache_name: CacheName, duration: Duration) { - self.inner.error(cache_name, attributes::EVENT_INVALIDATE_ERROR, duration); + pub(crate) fn record_refresh_miss(&self, duration: Duration) { + self.record_info_with_duration(attributes::EVENT_REFRESH_MISS, duration); } - // -- Clear -- + pub(crate) fn record_insert_rejected(&self, tier_name: CacheName, fallback: bool) { + self.record_info_event(attributes::EVENT_INSERT_REJECTED); + self.emit_tier_event( + Self::current_request_id(), + tier_name, + attributes::EVENT_INSERT_REJECTED, + Duration::ZERO, + fallback, + ); + } - /// Records a successful cache clear. - #[inline] - pub(crate) fn cache_cleared(&self, cache_name: CacheName, duration: Duration) { - self.inner.debug(cache_name, attributes::EVENT_CLEARED, duration); + #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + pub(crate) fn record_fallback(&self) { + let span = Span::current(); + if !span.is_disabled() { + span.record(attributes::FIELD_FALLBACK, true); + } } - /// Records an error during a clear operation. - #[inline] - pub(crate) fn clear_error(&self, cache_name: CacheName, duration: Duration) { - self.inner.error(cache_name, attributes::EVENT_CLEAR_ERROR, duration); + pub(crate) fn complete_operation( + &self, + request_id: RequestId, + cache_name: CacheName, + operation: &'static str, + duration: Duration, + coalesced: bool, + ) { + let span = Span::current(); + if !span.is_disabled() { + span.record(attributes::FIELD_DURATION_NS, saturating_nanos(duration)); + if coalesced { + span.record(attributes::FIELD_COALESCED, true); + } + } + + if let Some(handler) = &self.handler { + handler.on_operation_complete(&CacheOperationEvent { + request_id, + cache_name, + operation, + duration, + coalesced, + }); + } + } +} + +#[cfg(any(feature = "logs", test))] +impl CacheTelemetry { + #[must_use] + pub(crate) fn with_logging() -> Self { + Self::new().enable_logging() + } + + #[must_use] + pub(crate) fn enable_logging(mut self) -> Self { + self.logging_enabled = true; + self + } + + pub(crate) fn get_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.get", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty, + cache.coalesced = tracing::field::Empty, + cache.fallback = tracing::field::Empty + ) + } else { + Span::none() + } + } + + pub(crate) fn insert_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.insert", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty + ) + } else { + Span::none() + } + } + + pub(crate) fn invalidate_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.invalidate", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty + ) + } else { + Span::none() + } + } + + pub(crate) fn clear_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.clear", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty + ) + } else { + Span::none() + } + } + + pub(crate) fn get_or_insert_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.get_or_insert", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty, + cache.coalesced = tracing::field::Empty, + cache.fallback = tracing::field::Empty + ) + } else { + Span::none() + } + } + + pub(crate) fn try_get_or_insert_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.try_get_or_insert", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty, + cache.coalesced = tracing::field::Empty, + cache.fallback = tracing::field::Empty + ) + } else { + Span::none() + } + } + + pub(crate) fn optionally_get_or_insert_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.optionally_get_or_insert", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty, + cache.coalesced = tracing::field::Empty, + cache.fallback = tracing::field::Empty + ) + } else { + Span::none() + } + } + + pub(crate) fn tier_span(&self, name: CacheName) -> Span { + if self.logging_enabled { + tracing::span!( + tracing::Level::DEBUG, + "cache.tier", + cache.name = name, + cache.event = tracing::field::Empty, + cache.duration_ns = tracing::field::Empty + ) + } else { + Span::none() + } + } +} + +#[cfg(not(any(feature = "logs", test)))] +#[expect(clippy::unused_self, reason = "Span factories are no-ops when logs are disabled")] +impl CacheTelemetry { + pub(crate) fn get_span(&self, _: CacheName) -> Span { + Span::none() + } + + pub(crate) fn insert_span(&self, _: CacheName) -> Span { + Span::none() + } + + pub(crate) fn invalidate_span(&self, _: CacheName) -> Span { + Span::none() + } + + pub(crate) fn clear_span(&self, _: CacheName) -> Span { + Span::none() + } + + pub(crate) fn get_or_insert_span(&self, _: CacheName) -> Span { + Span::none() + } + + pub(crate) fn try_get_or_insert_span(&self, _: CacheName) -> Span { + Span::none() + } + + pub(crate) fn optionally_get_or_insert_span(&self, _: CacheName) -> Span { + Span::none() + } + + pub(crate) fn tier_span(&self, _: CacheName) -> Span { + Span::none() } } #[cfg(test)] mod tests { + use std::sync::Mutex; + use testing_aids::LogCapture; + use tracing::Instrument; + use tracing_subscriber::fmt::format::FmtSpan; + use tracing_subscriber::layer::SubscriberExt; use super::*; + fn subscriber(capture: &LogCapture) -> impl tracing::Subscriber { + tracing_subscriber::registry().with( + tracing_subscriber::fmt::layer() + .with_writer(capture.clone()) + .with_ansi(false) + .with_span_events(FmtSpan::CLOSE), + ) + } + #[cfg_attr(miri, ignore)] #[test] fn logs_emit_contains_all_fields_and_values() { let capture = LogCapture::new(); - let _guard = tracing::subscriber::set_default(capture.subscriber()); - + let _guard = tracing::subscriber::set_default(subscriber(&capture)); let telemetry = CacheTelemetry::with_logging(); - telemetry.invalidate_error("my_test_cache", Duration::from_nanos(12345)); - // Verify field names match public constants + let request_id = next_request_id(); + futures::executor::block_on( + async { + telemetry.record_hit("my_test_cache", Duration::from_nanos(12345), false); + telemetry.complete_operation(request_id, "my_test_cache", "cache.get", Duration::from_nanos(12345), true); + telemetry.record_fallback(); + } + .with_request_id(request_id) + .instrument(telemetry.get_span("my_test_cache")), + ); + capture.assert_contains(attributes::FIELD_NAME); capture.assert_contains(attributes::FIELD_EVENT); capture.assert_contains(attributes::FIELD_DURATION_NS); - - // Verify values + capture.assert_contains(attributes::FIELD_COALESCED); + capture.assert_contains(attributes::FIELD_FALLBACK); capture.assert_contains("my_test_cache"); - capture.assert_contains(attributes::EVENT_INVALIDATE_ERROR); + capture.assert_contains(attributes::EVENT_HIT); capture.assert_contains("12345"); + capture.assert_contains("true"); } #[cfg_attr(miri, ignore)] @@ -237,22 +540,34 @@ mod tests { fn logs_emit_at_correct_severity_levels() { let telemetry = CacheTelemetry::with_logging(); - // Error level let capture = LogCapture::new(); - let _guard = tracing::subscriber::set_default(capture.subscriber()); - telemetry.get_error("cache", Duration::ZERO); + let _guard = tracing::subscriber::set_default(subscriber(&capture)); + let request_id = next_request_id(); + futures::executor::block_on( + async { telemetry.record_get_error("cache", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(telemetry.tier_span("cache")), + ); capture.assert_contains("ERROR"); - // Info level let capture = LogCapture::new(); - let _guard = tracing::subscriber::set_default(capture.subscriber()); - telemetry.cache_expired("cache", Duration::ZERO); + let _guard = tracing::subscriber::set_default(subscriber(&capture)); + let request_id = next_request_id(); + futures::executor::block_on( + async { telemetry.record_expired("cache", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(telemetry.tier_span("cache")), + ); capture.assert_contains("INFO"); - // Debug level let capture = LogCapture::new(); - let _guard = tracing::subscriber::set_default(capture.subscriber()); - telemetry.cache_hit("cache", Duration::ZERO); + let _guard = tracing::subscriber::set_default(subscriber(&capture)); + let request_id = next_request_id(); + futures::executor::block_on( + async { telemetry.record_hit("cache", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(telemetry.tier_span("cache")), + ); capture.assert_contains("DEBUG"); } @@ -260,41 +575,183 @@ mod tests { #[test] fn telemetry_disabled_emits_nothing() { let telemetry = CacheTelemetry::new(); - let capture = LogCapture::new(); - let _guard = tracing::subscriber::set_default(capture.subscriber()); + let _guard = tracing::subscriber::set_default(subscriber(&capture)); - telemetry.cache_hit("cache", Duration::from_secs(1)); + let request_id = next_request_id(); + futures::executor::block_on( + async { telemetry.record_hit("cache", Duration::from_secs(1), false) } + .with_request_id(request_id) + .instrument(telemetry.get_span("cache")), + ); assert!(capture.output().is_empty()); } - /// Asserts that a telemetry helper emits the expected event string. #[cfg_attr(miri, ignore)] - fn assert_emits(f: impl FnOnce(&CacheTelemetry), expected: &str) { + fn assert_emits(expected: &str, f: impl FnOnce(&CacheTelemetry, RequestId)) { let capture = LogCapture::new(); - let _guard = tracing::subscriber::set_default(capture.subscriber()); + let _guard = tracing::subscriber::set_default(subscriber(&capture)); let telemetry = CacheTelemetry::with_logging(); - f(&telemetry); + let request_id = next_request_id(); + f(&telemetry, request_id); capture.assert_contains(expected); } #[cfg_attr(miri, ignore)] #[test] fn every_helper_emits_its_event() { - assert_emits(|t| t.cache_hit("c", Duration::ZERO), attributes::EVENT_HIT); - assert_emits(|t| t.cache_miss("c", Duration::ZERO), attributes::EVENT_MISS); - assert_emits(|t| t.cache_expired("c", Duration::ZERO), attributes::EVENT_EXPIRED); - assert_emits(|t| t.get_error("c", Duration::ZERO), attributes::EVENT_GET_ERROR); - assert_emits(|t| t.cache_fallback("c", Duration::ZERO), attributes::EVENT_FALLBACK); - assert_emits(|t| t.refresh_hit("c", Duration::ZERO), attributes::EVENT_REFRESH_HIT); - assert_emits(|t| t.refresh_miss("c", Duration::ZERO), attributes::EVENT_REFRESH_MISS); - assert_emits(|t| t.cache_inserted("c", Duration::ZERO), attributes::EVENT_INSERTED); - assert_emits(|t| t.insert_rejected("c", Duration::ZERO), attributes::EVENT_INSERT_REJECTED); - assert_emits(|t| t.insert_error("c", Duration::ZERO), attributes::EVENT_INSERT_ERROR); - assert_emits(|t| t.cache_invalidated("c", Duration::ZERO), attributes::EVENT_INVALIDATED); - assert_emits(|t| t.invalidate_error("c", Duration::ZERO), attributes::EVENT_INVALIDATE_ERROR); - assert_emits(|t| t.cache_cleared("c", Duration::ZERO), attributes::EVENT_CLEARED); - assert_emits(|t| t.clear_error("c", Duration::ZERO), attributes::EVENT_CLEAR_ERROR); + assert_emits(attributes::EVENT_HIT, |t, request_id| { + futures::executor::block_on( + async { t.record_hit("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.tier_span("c")), + ); + }); + assert_emits(attributes::EVENT_MISS, |t, request_id| { + futures::executor::block_on( + async { t.record_miss("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.tier_span("c")), + ); + }); + assert_emits(attributes::EVENT_EXPIRED, |t, request_id| { + futures::executor::block_on( + async { t.record_expired("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.tier_span("c")), + ); + }); + assert_emits(attributes::EVENT_GET_ERROR, |t, request_id| { + futures::executor::block_on( + async { t.record_get_error("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.tier_span("c")), + ); + }); + assert_emits(attributes::EVENT_REFRESH_HIT, |t, request_id| { + futures::executor::block_on( + async { t.record_refresh_hit(Duration::ZERO) } + .with_request_id(request_id) + .instrument(t.get_span("c")), + ); + }); + assert_emits(attributes::EVENT_REFRESH_MISS, |t, request_id| { + futures::executor::block_on( + async { t.record_refresh_miss(Duration::ZERO) } + .with_request_id(request_id) + .instrument(t.get_span("c")), + ); + }); + assert_emits(attributes::EVENT_INSERTED, |t, request_id| { + futures::executor::block_on( + async { t.record_inserted("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.insert_span("c")), + ); + }); + assert_emits(attributes::EVENT_INSERT_REJECTED, |t, request_id| { + futures::executor::block_on( + async { t.record_insert_rejected("c", false) } + .with_request_id(request_id) + .instrument(t.insert_span("c")), + ); + }); + assert_emits(attributes::EVENT_INSERT_ERROR, |t, request_id| { + futures::executor::block_on( + async { t.record_insert_error("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.insert_span("c")), + ); + }); + assert_emits(attributes::EVENT_INVALIDATED, |t, request_id| { + futures::executor::block_on( + async { t.record_invalidated("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.invalidate_span("c")), + ); + }); + assert_emits(attributes::EVENT_INVALIDATE_ERROR, |t, request_id| { + futures::executor::block_on( + async { t.record_invalidate_error("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.invalidate_span("c")), + ); + }); + assert_emits(attributes::EVENT_CLEARED, |t, request_id| { + futures::executor::block_on( + async { t.record_cleared("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.clear_span("c")), + ); + }); + assert_emits(attributes::EVENT_CLEAR_ERROR, |t, request_id| { + futures::executor::block_on( + async { t.record_clear_error("c", Duration::ZERO, false) } + .with_request_id(request_id) + .instrument(t.clear_span("c")), + ); + }); + } + + #[test] + fn handler_receives_tier_and_operation_events_without_logging() { + type EventRecord = Vec<(RequestId, String, String, u128, bool)>; + + #[derive(Clone)] + struct RecordingHandler { + tier_events: Arc>, + operation_events: Arc>, + } + + impl CacheEventHandler for RecordingHandler { + fn on_tier_event(&self, event: &CacheTierEvent<'_>) { + self.tier_events.lock().expect("test handler mutex should not be poisoned").push(( + event.request_id, + event.tier_name.to_string(), + event.outcome.to_string(), + event.duration.as_nanos(), + event.fallback, + )); + } + + fn on_operation_complete(&self, event: &CacheOperationEvent<'_>) { + self.operation_events + .lock() + .expect("test handler mutex should not be poisoned") + .push(( + event.request_id, + event.cache_name.to_string(), + event.operation.to_string(), + event.duration.as_nanos(), + event.coalesced, + )); + } + } + + let tier_events = Arc::new(Mutex::new(Vec::new())); + let operation_events = Arc::new(Mutex::new(Vec::new())); + let telemetry = CacheTelemetry::new().with_handler(Arc::new(RecordingHandler { + tier_events: Arc::clone(&tier_events), + operation_events: Arc::clone(&operation_events), + })); + + let request_id = next_request_id(); + futures::executor::block_on( + async { + telemetry.record_hit("l2", Duration::from_nanos(7), true); + telemetry.complete_operation(request_id, "cache", "cache.get", Duration::from_nanos(11), true); + } + .with_request_id(request_id), + ); + + assert_eq!( + *tier_events.lock().expect("test handler mutex should not be poisoned"), + vec![(request_id, "l2".to_string(), attributes::EVENT_HIT.to_string(), 7, true)] + ); + assert_eq!( + *operation_events.lock().expect("test handler mutex should not be poisoned"), + vec![(request_id, "cache".to_string(), "cache.get".to_string(), 11, true)] + ); } } diff --git a/crates/cachet/src/telemetry/ext.rs b/crates/cachet/src/telemetry/ext.rs deleted file mode 100644 index 1f1c18bc6..000000000 --- a/crates/cachet/src/telemetry/ext.rs +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! Extension traits for telemetry recording. - -use std::pin::Pin; -use std::task::{Context, Poll}; -use std::time::Duration; - -use pin_project_lite::pin_project; -use tick::{Clock, Stopwatch}; - -/// Result of a timed async operation. -#[derive(Debug, Clone, Copy)] -pub struct TimedResult { - /// The result of the operation. - pub result: R, - /// The duration of the operation. - pub duration: Duration, -} - -pin_project! { - /// A future that times the inner future's execution. - #[must_use = "futures do nothing unless polled"] - pub struct Timed { - #[pin] - inner: F, - watch: Stopwatch, - } -} - -impl Future for Timed { - type Output = TimedResult; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - let this = self.project(); - match this.inner.poll(cx) { - Poll::Ready(result) => Poll::Ready(TimedResult { - result, - duration: this.watch.elapsed(), - }), - Poll::Pending => Poll::Pending, - } - } -} - -/// Extension trait for timing async operations. -pub trait ClockExt { - /// Times an async operation and returns both the result and elapsed duration. - fn timed_async(&self, f: F) -> Timed - where - F: Future; -} - -impl ClockExt for Clock { - fn timed_async(&self, f: F) -> Timed - where - F: Future, - { - Timed { - inner: f, - watch: self.stopwatch(), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn block_on(f: F) -> F::Output { - futures::executor::block_on(f) - } - - #[test] - fn clock_ext_timed_async_measures_duration() { - block_on(async { - let control = tick::ClockControl::new(); - let clock = control.to_clock(); - - let timed = clock - .timed_async(async { - control.advance(Duration::from_millis(100)); - 42 - }) - .await; - - assert_eq!(timed.result, 42); - assert_eq!(timed.duration, Duration::from_millis(100)); - }); - } - - #[test] - fn clock_ext_timed_async_handles_pending() { - use std::pin::Pin; - use std::sync::Arc; - use std::sync::atomic::{AtomicBool, Ordering}; - use std::task::{Context, Poll}; - - /// A future that returns Pending on the first poll, then Ready on the second. - struct YieldOnce { - yielded: Arc, - } - - impl std::future::Future for YieldOnce { - type Output = i32; - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - if self.yielded.swap(true, Ordering::SeqCst) { - Poll::Ready(99) - } else { - cx.waker().wake_by_ref(); - Poll::Pending - } - } - } - - block_on(async { - let control = tick::ClockControl::new(); - let clock = control.to_clock(); - - let timed = clock - .timed_async(YieldOnce { - yielded: Arc::new(AtomicBool::new(false)), - }) - .await; - - assert_eq!(timed.result, 99); - }); - } -} diff --git a/crates/cachet/src/telemetry/handler.rs b/crates/cachet/src/telemetry/handler.rs new file mode 100644 index 000000000..a336164cd --- /dev/null +++ b/crates/cachet/src/telemetry/handler.rs @@ -0,0 +1,69 @@ +use std::time::Duration; + +/// Unique identifier for a cache operation, used to correlate tier events +/// with their parent operation. Generated from a process-wide atomic counter. +pub type RequestId = u64; + +/// Data from a per-tier cache operation. +#[non_exhaustive] +#[derive(Debug, Clone)] +pub struct CacheTierEvent<'a> { + /// Identifies which top-level operation this tier event belongs to. + pub request_id: RequestId, + /// Name of the cache tier (for example, "L1" or "L2"). + pub tier_name: &'a str, + /// Outcome event name (e.g., `attributes::EVENT_HIT`). + pub outcome: &'a str, + /// How long the tier operation took. + pub duration: Duration, + /// Whether this tier was consulted as a fallback. + pub fallback: bool, +} + +/// Data from a completed top-level cache operation. +#[non_exhaustive] +#[derive(Debug, Clone)] +pub struct CacheOperationEvent<'a> { + /// Identifies this operation. Matches `request_id` on associated tier events. + pub request_id: RequestId, + /// Name of the cache. + pub cache_name: &'a str, + /// The span/operation name (e.g., "cache.get", "cache.insert"). + pub operation: &'a str, + /// Total duration of the operation. + pub duration: Duration, + /// Whether the request was coalesced via stampede protection. + pub coalesced: bool, +} + +/// Trait for consuming cachet telemetry events. +/// +/// Implement this trait to receive structured callbacks for cache operations. +/// Register via [`CacheBuilder::event_handler`](crate::CacheBuilder::event_handler). +/// +/// # Example +/// +/// ```ignore +/// use cachet::telemetry::handler::{CacheEventHandler, CacheOperationEvent, CacheTierEvent}; +/// +/// struct MyHandler; +/// +/// impl CacheEventHandler for MyHandler { +/// fn on_tier_event(&self, event: &CacheTierEvent<'_>) { +/// println!("tier {} = {} ({}ns)", event.tier_name, event.outcome, event.duration.as_nanos()); +/// } +/// +/// fn on_operation_complete(&self, event: &CacheOperationEvent<'_>) { +/// println!("op {} took {}ns", event.operation, event.duration.as_nanos()); +/// } +/// } +/// ``` +pub trait CacheEventHandler: Send + Sync { + /// Called for each per-tier sub-operation. + /// + /// May be called multiple times per top-level operation (once per tier). + fn on_tier_event(&self, event: &CacheTierEvent<'_>); + + /// Called once when the top-level cache operation completes. + fn on_operation_complete(&self, event: &CacheOperationEvent<'_>); +} diff --git a/crates/cachet/src/telemetry/mod.rs b/crates/cachet/src/telemetry/mod.rs index 6848ee95f..d2048e577 100644 --- a/crates/cachet/src/telemetry/mod.rs +++ b/crates/cachet/src/telemetry/mod.rs @@ -4,16 +4,18 @@ //! Cache telemetry integration. //! //! This module provides telemetry recording for cache operations via the -//! `tracing` crate. Enable structured logging through the cache builder's -//! `enable_logs()` method. +//! `tracing` crate and the [`handler`] callback API. Enable structured logging +//! through the cache builder's `enable_logs()` method. //! //! Consumers can subscribe to cache events using a custom -//! `tracing_subscriber::Layer` and the public constants in [`attributes`]. +//! `tracing_subscriber::Layer` and the public constants in [`attributes`], or +//! register a [`handler::CacheEventHandler`] with the cache builder. //! See the `telemetry_subscriber` example for a complete demonstration. pub mod attributes; pub(crate) mod cache; -pub(crate) mod ext; +/// Callback-based telemetry handlers. +pub mod handler; #[doc(inline)] pub use cache::CacheTelemetry; diff --git a/crates/cachet/src/wrapper.rs b/crates/cachet/src/wrapper.rs index 679411570..3da025626 100644 --- a/crates/cachet/src/wrapper.rs +++ b/crates/cachet/src/wrapper.rs @@ -12,10 +12,10 @@ use std::time::Duration; use cachet_tier::{CacheTier, SizeError}; use tick::Clock; +use tracing::Instrument; use crate::cache::CacheName; use crate::telemetry::CacheTelemetry; -use crate::telemetry::ext::ClockExt; use crate::{CacheEntry, Error, InsertPolicy}; /// Wraps a cache tier with telemetry and TTL expiration. @@ -51,6 +51,7 @@ pub struct CacheWrapper { pub(crate) ttl: Option, pub(crate) telemetry: CacheTelemetry, pub(crate) policy: InsertPolicy, + pub(crate) fallback: bool, _phantom: PhantomData<(K, V)>, } @@ -62,6 +63,7 @@ impl CacheWrapper { ttl: Option, telemetry: CacheTelemetry, policy: InsertPolicy, + fallback: bool, ) -> Self { Self { name, @@ -70,6 +72,7 @@ impl CacheWrapper { ttl, telemetry, policy, + fallback, _phantom: PhantomData, } } @@ -107,14 +110,14 @@ where fn handle_get_result(&self, value: Option>, duration: Duration) -> Option> { if let Some(entry) = value { if self.is_expired(&entry) { - self.telemetry.cache_expired(self.name, duration); + self.telemetry.record_expired(self.name, duration, self.fallback); None } else { - self.telemetry.cache_hit(self.name, duration); + self.telemetry.record_hit(self.name, duration, self.fallback); Some(entry) } } else { - self.telemetry.cache_miss(self.name, duration); + self.telemetry.record_miss(self.name, duration, self.fallback); None } } @@ -127,59 +130,70 @@ where CT: CacheTier + Send + Sync, { async fn get(&self, key: &K) -> Result>, Error> { - let timed = self.clock.timed_async(self.inner.get(key)).await; - match timed.result { - Ok(value) => Ok(self.handle_get_result(value, timed.duration)), - Err(e) => { - self.telemetry.get_error(self.name, timed.duration); - Err(e) + let span = self.telemetry.tier_span(self.name); + async { + let watch = self.clock.stopwatch(); + match self.inner.get(key).await { + Ok(value) => Ok(self.handle_get_result(value, watch.elapsed())), + Err(e) => { + self.telemetry.record_get_error(self.name, watch.elapsed(), self.fallback); + Err(e) + } } } + .instrument(span) + .await } async fn insert(&self, key: K, mut entry: CacheEntry) -> Result<(), Error> { - entry.ensure_cached_at(self.clock.system_time()); - if !self.policy.should_insert(&entry) { - self.telemetry.insert_rejected(self.name, Duration::default()); - return Ok(()); - } - - let timed = self.clock.timed_async(self.inner.insert(key, entry)).await; - match &timed.result { - Ok(()) => { - self.telemetry.cache_inserted(self.name, timed.duration); + let span = self.telemetry.tier_span(self.name); + async { + entry.ensure_cached_at(self.clock.system_time()); + if !self.policy.should_insert(&entry) { + self.telemetry.record_insert_rejected(self.name, self.fallback); + return Ok(()); } - Err(_) => { - self.telemetry.insert_error(self.name, timed.duration); + + let watch = self.clock.stopwatch(); + let result = self.inner.insert(key, entry).await; + match &result { + Ok(()) => self.telemetry.record_inserted(self.name, watch.elapsed(), self.fallback), + Err(_) => self.telemetry.record_insert_error(self.name, watch.elapsed(), self.fallback), } + result } - timed.result + .instrument(span) + .await } async fn invalidate(&self, key: &K) -> Result<(), Error> { - let timed = self.clock.timed_async(self.inner.invalidate(key)).await; - match &timed.result { - Ok(()) => { - self.telemetry.cache_invalidated(self.name, timed.duration); - } - Err(_) => { - self.telemetry.invalidate_error(self.name, timed.duration); + let span = self.telemetry.tier_span(self.name); + async { + let watch = self.clock.stopwatch(); + let result = self.inner.invalidate(key).await; + match &result { + Ok(()) => self.telemetry.record_invalidated(self.name, watch.elapsed(), self.fallback), + Err(_) => self.telemetry.record_invalidate_error(self.name, watch.elapsed(), self.fallback), } + result } - timed.result + .instrument(span) + .await } async fn clear(&self) -> Result<(), Error> { - let timed = self.clock.timed_async(self.inner.clear()).await; - match &timed.result { - Ok(()) => { - self.telemetry.cache_cleared(self.name, timed.duration); - } - Err(_) => { - self.telemetry.clear_error(self.name, timed.duration); + let span = self.telemetry.tier_span(self.name); + async { + let watch = self.clock.stopwatch(); + let result = self.inner.clear().await; + match &result { + Ok(()) => self.telemetry.record_cleared(self.name, watch.elapsed(), self.fallback), + Err(_) => self.telemetry.record_clear_error(self.name, watch.elapsed(), self.fallback), } + result } - timed.result + .instrument(span) + .await } async fn len(&self) -> Result { @@ -198,7 +212,8 @@ mod tests { let clock = Clock::new_frozen(); let inner = MockCache::::new(); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); // Entry without TTL should not be expired let entry = CacheEntry::new(42); @@ -217,6 +232,7 @@ mod tests { Some(Duration::from_secs(60)), telemetry, InsertPolicy::default(), + false, ); // Entry without cached_at should be expired if TTL is configured (treat as expired to be safe) @@ -233,8 +249,15 @@ mod tests { let telemetry = CacheTelemetry::new(); let tier_ttl = Duration::from_secs(60); let entry_ttl = Duration::from_secs(30); - let wrapper: CacheWrapper = - CacheWrapper::new("test", inner, clock.clone(), Some(tier_ttl), telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = CacheWrapper::new( + "test", + inner, + clock.clone(), + Some(tier_ttl), + telemetry, + InsertPolicy::default(), + false, + ); let entry = CacheEntry::expires_at(42, entry_ttl, clock.system_time()); @@ -251,7 +274,8 @@ mod tests { let inner = MockCache::::new(); let inner_check = inner.clone(); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); let entry = CacheEntry::new(42); wrapper.insert("key".to_string(), entry).await.unwrap(); @@ -269,7 +293,7 @@ mod tests { let telemetry = CacheTelemetry::new(); let tier_ttl = Duration::from_secs(60); let wrapper: CacheWrapper = - CacheWrapper::new("test", inner, clock, Some(tier_ttl), telemetry, InsertPolicy::default()); + CacheWrapper::new("test", inner, clock, Some(tier_ttl), telemetry, InsertPolicy::default(), false); let entry = CacheEntry::new(42); wrapper.insert("key".to_string(), entry).await.unwrap(); @@ -290,6 +314,7 @@ mod tests { Some(Duration::from_secs(60)), telemetry, InsertPolicy::default(), + false, ); // Entry with cached_at in the future simulates clock going backward @@ -304,7 +329,7 @@ mod tests { let telemetry = CacheTelemetry::new(); let ttl = Duration::from_secs(60); let wrapper: CacheWrapper = - CacheWrapper::new("test", inner, clock.clone(), Some(ttl), telemetry, InsertPolicy::default()); + CacheWrapper::new("test", inner, clock.clone(), Some(ttl), telemetry, InsertPolicy::default(), false); // Entry cached exactly TTL ago → elapsed == ttl → should NOT be expired (uses >) let entry = CacheEntry::expires_at(42, ttl, clock.system_time() - ttl); @@ -316,7 +341,8 @@ mod tests { let clock = Clock::new_frozen(); let inner = MockCache::::new(); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("mock_test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("mock_test", inner, clock, None, telemetry, InsertPolicy::default(), false); assert_eq!(wrapper.name(), "mock_test"); } @@ -325,7 +351,8 @@ mod tests { let clock = Clock::new_frozen(); let inner = MockCache::::new(); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); let result = wrapper.handle_get_result(None, Duration::from_secs(0)); assert!(result.is_none()); } @@ -342,6 +369,7 @@ mod tests { Some(Duration::from_secs(60)), telemetry, InsertPolicy::default(), + false, ); // Entry without cached_at → considered expired let entry = CacheEntry::new(42); @@ -354,7 +382,8 @@ mod tests { let clock = Clock::new_frozen(); let inner = MockCache::::new(); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); let entry = CacheEntry::new(42); let result = wrapper.handle_get_result(Some(entry), Duration::from_secs(0)); assert!(result.is_some()); @@ -366,7 +395,8 @@ mod tests { let clock = Clock::new_frozen(); let inner = MockCache::::new(); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); // get miss assert!(wrapper.get(&"key".to_string()).await.unwrap().is_none()); @@ -392,7 +422,8 @@ mod tests { let clock = Clock::new_frozen(); let inner = MockCache::::new(); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); assert_eq!(wrapper.len().await.expect("len should return Ok"), 0); wrapper.insert("key".to_string(), CacheEntry::new(1)).await.unwrap(); assert_eq!(wrapper.len().await.expect("len should return Ok"), 1); @@ -405,7 +436,8 @@ mod tests { let inner = MockCache::::new(); inner.fail_when(|op| matches!(op, cachet_tier::CacheOp::Get(_))); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); let result = wrapper.get(&"key".to_string()).await; result.unwrap_err(); } @@ -417,7 +449,8 @@ mod tests { let inner = MockCache::::new(); inner.fail_when(|op| matches!(op, cachet_tier::CacheOp::Insert { .. })); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); let result = wrapper.insert("key".to_string(), CacheEntry::new(1)).await; result.unwrap_err(); } @@ -429,7 +462,8 @@ mod tests { let inner = MockCache::::new(); inner.fail_when(|op| matches!(op, cachet_tier::CacheOp::Invalidate(_))); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); let result = wrapper.invalidate(&"key".to_string()).await; result.unwrap_err(); } @@ -441,7 +475,8 @@ mod tests { let inner = MockCache::::new(); inner.fail_when(|op| matches!(op, cachet_tier::CacheOp::Clear)); let telemetry = CacheTelemetry::new(); - let wrapper: CacheWrapper = CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default()); + let wrapper: CacheWrapper = + CacheWrapper::new("test", inner, clock, None, telemetry, InsertPolicy::default(), false); let result = wrapper.clear().await; result.unwrap_err(); } From cd35c722d634833d5279c258571a9ac96be77602 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Thu, 28 May 2026 16:00:33 -0400 Subject: [PATCH 04/26] CI fixes --- crates/cachet/src/telemetry/cache.rs | 3 ++- crates/cachet/src/telemetry/handler.rs | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 9a187f717..85e37b9b4 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -66,7 +66,7 @@ impl WithRequestIdExt for F { } /// Converts a `Duration` to nanoseconds as `u64`, saturating at `u64::MAX`. -/// A `u64` of nanoseconds covers ~584 years — overflow is not a practical concern. +/// A `u64` of nanoseconds covers around 584 years - overflow is not a practical concern. fn saturating_nanos(duration: Duration) -> u64 { u64::try_from(duration.as_nanos()).unwrap_or(u64::MAX) } @@ -318,6 +318,7 @@ impl CacheTelemetry { #[cfg(any(feature = "logs", test))] impl CacheTelemetry { + #[cfg(test)] #[must_use] pub(crate) fn with_logging() -> Self { Self::new().enable_logging() diff --git a/crates/cachet/src/telemetry/handler.rs b/crates/cachet/src/telemetry/handler.rs index a336164cd..a1ac89ca1 100644 --- a/crates/cachet/src/telemetry/handler.rs +++ b/crates/cachet/src/telemetry/handler.rs @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + use std::time::Duration; /// Unique identifier for a cache operation, used to correlate tier events From cd8d893e409813e10cec72235c73f8bc6c0dd1b7 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 29 May 2026 13:41:07 -0400 Subject: [PATCH 05/26] CI fixes --- crates/cachet/benches/operations.rs | 1 + crates/cachet/examples/telemetry_accumulator.rs | 9 ++++++--- crates/cachet/src/telemetry/cache.rs | 10 ++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/crates/cachet/benches/operations.rs b/crates/cachet/benches/operations.rs index bcacdbe56..1f63127b1 100644 --- a/crates/cachet/benches/operations.rs +++ b/crates/cachet/benches/operations.rs @@ -88,6 +88,7 @@ fn bench_cache_operations(c: &mut Criterion) { // Wrapper Overhead (direct vs wrapped vs features) // ============================================================================= +#[expect(clippy::too_many_lines, reason = "benchmark function with multiple related groups")] fn bench_wrapper_overhead(c: &mut Criterion) { let rt = rt(); let mut group = c.benchmark_group("wrapper_overhead"); diff --git a/crates/cachet/examples/telemetry_accumulator.rs b/crates/cachet/examples/telemetry_accumulator.rs index 057ee07fe..1ac251e1a 100644 --- a/crates/cachet/examples/telemetry_accumulator.rs +++ b/crates/cachet/examples/telemetry_accumulator.rs @@ -52,7 +52,7 @@ impl CacheEventHandler for AccumulatingHandler { self.pending.entry(event.request_id).or_default().push(TierRecord { tier_name: event.tier_name.to_owned(), outcome: event.outcome.to_owned(), - duration_us: event.duration.as_micros() as u64, + duration_us: u64::try_from(event.duration.as_micros()).unwrap_or(u64::MAX), fallback: event.fallback, }); } @@ -118,7 +118,10 @@ async fn main() { .event_handler(AccumulatingHandler::new()) .build(); - cache.insert("key".to_string(), CacheEntry::new("value".to_string())).await.unwrap(); + cache + .insert("key".to_string(), CacheEntry::new("value".to_string())) + .await + .expect("insert should succeed"); let _ = cache.get(&"key".to_string()).await; let _ = cache.get(&"missing".to_string()).await; @@ -136,7 +139,7 @@ async fn main() { cache2 .insert("user:1".to_string(), CacheEntry::new("Alice".to_string())) .await - .unwrap(); + .expect("insert should succeed"); let _ = cache2.get(&"user:1".to_string()).await; let _ = cache2.get(&"nobody".to_string()).await; } diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 85e37b9b4..29e73bb7f 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -452,6 +452,7 @@ impl CacheTelemetry { } #[cfg(not(any(feature = "logs", test)))] +#[cfg_attr(test, mutants::skip)] // Equivalent mutants: cfg-gated off during testing. #[expect(clippy::unused_self, reason = "Span factories are no-ops when logs are disabled")] impl CacheTelemetry { pub(crate) fn get_span(&self, _: CacheName) -> Span { @@ -755,4 +756,13 @@ mod tests { vec![(request_id, "cache".to_string(), "cache.get".to_string(), 11, true)] ); } + + #[test] + fn next_request_id_returns_unique_incrementing_values() { + let a = next_request_id(); + let b = next_request_id(); + let c = next_request_id(); + assert!(b > a, "request IDs must increment: got {a} then {b}"); + assert!(c > b, "request IDs must increment: got {b} then {c}"); + } } From 1b89e36d5ee5db2099f6d534d82dc0b026496a75 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 29 May 2026 16:54:07 -0400 Subject: [PATCH 06/26] Fix broken cachet_memory tests --- crates/cachet_memory/Cargo.toml | 2 +- crates/cachet_memory/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/cachet_memory/Cargo.toml b/crates/cachet_memory/Cargo.toml index 83e4e206f..d047205fd 100644 --- a/crates/cachet_memory/Cargo.toml +++ b/crates/cachet_memory/Cargo.toml @@ -39,7 +39,7 @@ futures = { workspace = true, features = ["executor"] } mutants = { workspace = true } ohno = { workspace = true, features = ["test-util"] } tick = { workspace = true, features = ["test-util"] } -tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } +tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread"] } [[bench]] name = "overhead" diff --git a/crates/cachet_memory/README.md b/crates/cachet_memory/README.md index 0cac02680..cefaf75e0 100644 --- a/crates/cachet_memory/README.md +++ b/crates/cachet_memory/README.md @@ -91,7 +91,7 @@ TTL/TTI unset or set them to a sufficiently high ceiling. This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGmYW0CYXZlMC43LjJhdIQbLiTyV0MU86EbZU15e0PmecoboQ9jo59bnAEbyDXw04U13GlhYvRhcoQbN0kpRlU_G9QbWC713oa4KjsbRG6BIsW3BU8bzI21NivEBVphZIKCbWNhY2hldF9tZW1vcnllMC4yLjGCa2NhY2hldF90aWVyZTAuMS4w + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEGzdJKUZVPxvUG1gu9d6GuCo7G0RugSLFtwVPG8yNtTYrxAVaYWSCgm1jYWNoZXRfbWVtb3J5ZTAuMi4xgmtjYWNoZXRfdGllcmUwLjEuMA [__link0]: https://docs.rs/cachet_memory/0.2.1/cachet_memory/?search=InMemoryCache [__link1]: https://docs.rs/cachet_memory/0.2.1/cachet_memory/?search=InMemoryCacheBuilder [__link10]: https://docs.rs/cachet_tier/0.1.0/cachet_tier/?search=CacheEntry::expires_after From b02b890ab51b18156b796980845f45ee0a94930c Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 29 May 2026 17:13:24 -0400 Subject: [PATCH 07/26] CI fixes --- .spelling | 1 + crates/cachet/src/telemetry/cache.rs | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.spelling b/.spelling index ea386895d..9921fa89a 100644 --- a/.spelling +++ b/.spelling @@ -360,6 +360,7 @@ mockable mockall modularity moka +moka's monomorphization monomorphize monomorphized diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 51e0030c3..ac3c1de26 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -318,7 +318,7 @@ impl CacheTelemetry { /// Like [`record_eviction`](Self::record_eviction), the request ID is /// read from the thread-local (non-zero when triggered synchronously /// during a cache operation). - #[cfg(any(feature = "memory", test))] + #[cfg(feature = "memory")] pub(crate) fn record_background_expired(&self, cache_name: CacheName) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { @@ -821,9 +821,6 @@ mod tests { #[test] fn eviction_handler_receives_request_id_from_calling_thread() { - let tier_events = Arc::new(Mutex::new(Vec::<(RequestId, String, String)>::new())); - let events = Arc::clone(&tier_events); - struct EvictionRecorder { events: Arc>>, } @@ -838,7 +835,10 @@ mod tests { fn on_operation_complete(&self, _: &CacheOperationEvent<'_>) {} } - let telemetry = CacheTelemetry::new().with_handler(Arc::new(EvictionRecorder { events })); + let tier_events = Arc::new(Mutex::new(Vec::<(RequestId, String, String)>::new())); + let telemetry = CacheTelemetry::new().with_handler(Arc::new(EvictionRecorder { + events: Arc::clone(&tier_events), + })); // Simulate an insert that triggers eviction on the same thread let request_id = next_request_id(); @@ -857,9 +857,6 @@ mod tests { #[test] fn eviction_without_request_context_has_zero_id() { - let tier_events = Arc::new(Mutex::new(Vec::::new())); - let events = Arc::clone(&tier_events); - struct IdRecorder { events: Arc>>, } @@ -873,7 +870,10 @@ mod tests { fn on_operation_complete(&self, _: &CacheOperationEvent<'_>) {} } - let telemetry = CacheTelemetry::new().with_handler(Arc::new(IdRecorder { events })); + let tier_events = Arc::new(Mutex::new(Vec::::new())); + let telemetry = CacheTelemetry::new().with_handler(Arc::new(IdRecorder { + events: Arc::clone(&tier_events), + })); // No WithRequestId wrapper — simulates background maintenance thread telemetry.record_eviction("bg_cache"); From aee1c5697d24d73747db9782d84ce4aa03625529 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Tue, 2 Jun 2026 14:41:14 -0400 Subject: [PATCH 08/26] Increase code coverage --- crates/cachet/src/eviction.rs | 19 ++++++ crates/cachet/src/telemetry/cache.rs | 95 ++++++++++++++++++++++------ 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/crates/cachet/src/eviction.rs b/crates/cachet/src/eviction.rs index a5165e616..9f468a516 100644 --- a/crates/cachet/src/eviction.rs +++ b/crates/cachet/src/eviction.rs @@ -99,4 +99,23 @@ mod tests { hook.handle(RemovalCause::Expired); capture.assert_contains(attributes::EVENT_EXPIRED); } + + #[cfg_attr(miri, ignore)] + #[test] + fn handle_without_logging_still_emits_handler_events() { + let capture = LogCapture::new(); + let _guard = tracing::subscriber::set_default(capture.subscriber()); + + // Logging disabled — covers the false branch of the logging_enabled check + let hook = Arc::new(EvictionHook::new()); + hook.init(CacheTelemetry::new(), "no_logs"); + + hook.handle(RemovalCause::Size); + hook.handle(RemovalCause::Expired); + + assert!( + capture.output().is_empty(), + "no log events should fire without logging enabled" + ); + } } diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index ac3c1de26..38f9fc834 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -619,6 +619,30 @@ mod tests { capture.assert_contains("DEBUG"); } + #[cfg_attr(miri, ignore)] + #[test] + fn all_span_factories_produce_enabled_spans() { + let capture = LogCapture::new(); + let _guard = tracing::subscriber::set_default(subscriber(&capture)); + let t = CacheTelemetry::with_logging(); + + // Exercise every span factory so coverage includes them all. + let spans = [ + t.get_span("c"), + t.insert_span("c"), + t.invalidate_span("c"), + t.clear_span("c"), + t.get_or_insert_span("c"), + t.try_get_or_insert_span("c"), + t.optionally_get_or_insert_span("c"), + t.tier_span("c"), + ]; + + for span in &spans { + assert!(!span.is_disabled(), "span factory should produce an enabled span"); + } + } + #[cfg_attr(miri, ignore)] #[test] fn telemetry_disabled_emits_nothing() { @@ -821,65 +845,98 @@ mod tests { #[test] fn eviction_handler_receives_request_id_from_calling_thread() { + type TierRecord = (RequestId, String, String); + type OpRecord = (RequestId, String, String); + struct EvictionRecorder { - events: Arc>>, + tier_events: Arc>>, + operation_events: Arc>>, } impl CacheEventHandler for EvictionRecorder { fn on_tier_event(&self, event: &CacheTierEvent<'_>) { - self.events.lock().expect("test mutex should not be poisoned").push(( + self.tier_events.lock().expect("test mutex should not be poisoned").push(( event.request_id, event.tier_name.to_string(), event.outcome.to_string(), )); } - fn on_operation_complete(&self, _: &CacheOperationEvent<'_>) {} + fn on_operation_complete(&self, event: &CacheOperationEvent<'_>) { + self.operation_events.lock().expect("test mutex should not be poisoned").push(( + event.request_id, + event.cache_name.to_string(), + event.operation.to_string(), + )); + } } - let tier_events = Arc::new(Mutex::new(Vec::<(RequestId, String, String)>::new())); + let tier_events = Arc::new(Mutex::new(Vec::new())); + let operation_events = Arc::new(Mutex::new(Vec::new())); let telemetry = CacheTelemetry::new().with_handler(Arc::new(EvictionRecorder { - events: Arc::clone(&tier_events), + tier_events: Arc::clone(&tier_events), + operation_events: Arc::clone(&operation_events), })); - // Simulate an insert that triggers eviction on the same thread let request_id = next_request_id(); futures::executor::block_on( async { telemetry.record_eviction("my_cache"); + telemetry.complete_operation(request_id, "my_cache", "cache.insert", Duration::ZERO, false); } .with_request_id(request_id), ); - let recorded = tier_events.lock().expect("test mutex should not be poisoned"); - assert_eq!(recorded.len(), 1, "expected exactly one eviction event"); - assert_eq!(recorded[0].0, request_id, "eviction should carry the inserting thread's request_id"); - assert_eq!(recorded[0].2, attributes::EVENT_EVICTION); + let tiers = tier_events.lock().expect("test mutex should not be poisoned"); + assert_eq!(tiers.len(), 1, "expected exactly one eviction tier event"); + assert_eq!(tiers[0].0, request_id, "eviction should carry the inserting thread's request_id"); + assert_eq!(tiers[0].2, attributes::EVENT_EVICTION); + + let ops = operation_events.lock().expect("test mutex should not be poisoned"); + assert_eq!(ops.len(), 1, "expected one operation complete event"); + assert_eq!(ops[0].0, request_id); + assert_eq!(ops[0].2, "cache.insert"); } #[test] fn eviction_without_request_context_has_zero_id() { + type TierRecord = (RequestId, String); + type OpRecord = (RequestId, String); + struct IdRecorder { - events: Arc>>, + tier_events: Arc>>, + operation_events: Arc>>, } impl CacheEventHandler for IdRecorder { fn on_tier_event(&self, event: &CacheTierEvent<'_>) { - self.events + self.tier_events + .lock() + .expect("test mutex should not be poisoned") + .push((event.request_id, event.outcome.to_string())); + } + fn on_operation_complete(&self, event: &CacheOperationEvent<'_>) { + self.operation_events .lock() .expect("test mutex should not be poisoned") - .push(event.request_id); + .push((event.request_id, event.operation.to_string())); } - fn on_operation_complete(&self, _: &CacheOperationEvent<'_>) {} } - let tier_events = Arc::new(Mutex::new(Vec::::new())); + let tier_events = Arc::new(Mutex::new(Vec::new())); + let operation_events = Arc::new(Mutex::new(Vec::new())); let telemetry = CacheTelemetry::new().with_handler(Arc::new(IdRecorder { - events: Arc::clone(&tier_events), + tier_events: Arc::clone(&tier_events), + operation_events: Arc::clone(&operation_events), })); // No WithRequestId wrapper — simulates background maintenance thread telemetry.record_eviction("bg_cache"); + telemetry.complete_operation(0, "bg_cache", "background", Duration::ZERO, false); + + let tiers = tier_events.lock().expect("test mutex should not be poisoned"); + assert_eq!(tiers.len(), 1); + assert_eq!(tiers[0].0, 0, "background eviction should have request_id 0"); - let recorded = tier_events.lock().expect("test mutex should not be poisoned"); - assert_eq!(recorded.len(), 1); - assert_eq!(recorded[0], 0, "background eviction should have request_id 0"); + let ops = operation_events.lock().expect("test mutex should not be poisoned"); + assert_eq!(ops.len(), 1); + assert_eq!(ops[0].0, 0); } } From 31ded781218cfc4b9dbf5c936e153dee1385436b Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Tue, 2 Jun 2026 16:29:16 -0400 Subject: [PATCH 09/26] Review fixes --- crates/cachet/src/eviction.rs | 7 ++--- crates/cachet/src/telemetry/cache.rs | 21 +++++++-------- crates/cachet/tests/cache.rs | 39 ++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/crates/cachet/src/eviction.rs b/crates/cachet/src/eviction.rs index 9f468a516..bc3cbfc8b 100644 --- a/crates/cachet/src/eviction.rs +++ b/crates/cachet/src/eviction.rs @@ -102,7 +102,7 @@ mod tests { #[cfg_attr(miri, ignore)] #[test] - fn handle_without_logging_still_emits_handler_events() { + fn handle_without_logging_emits_no_tracing_events() { let capture = LogCapture::new(); let _guard = tracing::subscriber::set_default(capture.subscriber()); @@ -113,9 +113,6 @@ mod tests { hook.handle(RemovalCause::Size); hook.handle(RemovalCause::Expired); - assert!( - capture.output().is_empty(), - "no log events should fire without logging enabled" - ); + assert!(capture.output().is_empty(), "no log events should fire without logging enabled"); } } diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 38f9fc834..aabdd7a11 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -47,8 +47,14 @@ impl Future for WithRequestId { fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); - CURRENT_REQUEST_ID.with(|cell| cell.set(*this.request_id)); - this.inner.poll(cx) + let prev = CURRENT_REQUEST_ID.with(|cell| { + let prev = cell.get(); + cell.set(*this.request_id); + prev + }); + let out = this.inner.poll(cx); + CURRENT_REQUEST_ID.with(|cell| cell.set(prev)); + out } } @@ -166,15 +172,6 @@ impl CacheTelemetry { } } - #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] - fn record_info_event(&self, event: &'static str) { - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_EVENT, event); - tracing::info!(cache.event = event); - } - } - pub(crate) fn record_hit(&self, tier_name: CacheName, duration: Duration, fallback: bool) { self.record_debug_with_duration(attributes::EVENT_HIT, duration); self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_HIT, duration, fallback); @@ -270,7 +267,7 @@ impl CacheTelemetry { } pub(crate) fn record_insert_rejected(&self, tier_name: CacheName, fallback: bool) { - self.record_info_event(attributes::EVENT_INSERT_REJECTED); + self.record_info_with_duration(attributes::EVENT_INSERT_REJECTED, Duration::ZERO); self.emit_tier_event( Self::current_request_id(), tier_name, diff --git a/crates/cachet/tests/cache.rs b/crates/cachet/tests/cache.rs index 22adda07b..2667513d7 100644 --- a/crates/cachet/tests/cache.rs +++ b/crates/cachet/tests/cache.rs @@ -911,3 +911,42 @@ mod service_tests { assert!(cache.get(&"key".to_string()).await.unwrap().is_none()); } } + +#[cfg(feature = "logs")] +#[tokio::test] +async fn get_or_insert_with_logging_emits_span() { + let capture = testing_aids::LogCapture::new(); + let _guard = tracing::subscriber::set_default(capture.subscriber()); + + let clock = Clock::new_frozen(); + let cache = Cache::builder::(clock).memory().enable_logs().build(); + + let _ = cache.get_or_insert(&"k".to_string(), || async { 1 }).await; + capture.assert_contains("cache.miss"); +} + +#[cfg(feature = "logs")] +#[tokio::test] +async fn try_get_or_insert_with_logging_emits_span() { + let capture = testing_aids::LogCapture::new(); + let _guard = tracing::subscriber::set_default(capture.subscriber()); + + let clock = Clock::new_frozen(); + let cache = Cache::builder::(clock).memory().enable_logs().build(); + + let _ = cache.try_get_or_insert(&"k".to_string(), || async { Ok::<_, Error>(1) }).await; + capture.assert_contains("cache.miss"); +} + +#[cfg(feature = "logs")] +#[tokio::test] +async fn optionally_get_or_insert_with_logging_emits_span() { + let capture = testing_aids::LogCapture::new(); + let _guard = tracing::subscriber::set_default(capture.subscriber()); + + let clock = Clock::new_frozen(); + let cache = Cache::builder::(clock).memory().enable_logs().build(); + + let _ = cache.optionally_get_or_insert(&"k".to_string(), || async { Some(1) }).await; + capture.assert_contains("cache.miss"); +} From 012ddbf628869a63daf64be59738492400d54184 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 3 Jun 2026 12:40:13 -0400 Subject: [PATCH 10/26] Fix miri issue --- crates/cachet/tests/cache.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/cachet/tests/cache.rs b/crates/cachet/tests/cache.rs index 2667513d7..416fc35cb 100644 --- a/crates/cachet/tests/cache.rs +++ b/crates/cachet/tests/cache.rs @@ -913,6 +913,7 @@ mod service_tests { } #[cfg(feature = "logs")] +#[cfg_attr(miri, ignore)] #[tokio::test] async fn get_or_insert_with_logging_emits_span() { let capture = testing_aids::LogCapture::new(); @@ -926,6 +927,7 @@ async fn get_or_insert_with_logging_emits_span() { } #[cfg(feature = "logs")] +#[cfg_attr(miri, ignore)] #[tokio::test] async fn try_get_or_insert_with_logging_emits_span() { let capture = testing_aids::LogCapture::new(); @@ -939,6 +941,7 @@ async fn try_get_or_insert_with_logging_emits_span() { } #[cfg(feature = "logs")] +#[cfg_attr(miri, ignore)] #[tokio::test] async fn optionally_get_or_insert_with_logging_emits_span() { let capture = testing_aids::LogCapture::new(); From 5794cb74b49d6b94f54ac02c0437b8dc5a5f5368 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 3 Jun 2026 13:04:41 -0400 Subject: [PATCH 11/26] Comment fixes --- crates/cachet/src/telemetry/cache.rs | 26 +++++++++++++++++--------- crates/cachet/tests/cache.rs | 9 ++++++--- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index aabdd7a11..5e100ee98 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -47,14 +47,18 @@ impl Future for WithRequestId { fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); - let prev = CURRENT_REQUEST_ID.with(|cell| { - let prev = cell.get(); - cell.set(*this.request_id); - prev - }); - let out = this.inner.poll(cx); - CURRENT_REQUEST_ID.with(|cell| cell.set(prev)); - out + + // RAII guard resets the thread-local to 0 even if the inner poll panics. + struct ResetGuard; + impl Drop for ResetGuard { + fn drop(&mut self) { + CURRENT_REQUEST_ID.with(|cell| cell.set(0)); + } + } + + CURRENT_REQUEST_ID.with(|cell| cell.set(*this.request_id)); + let _guard = ResetGuard; + this.inner.poll(cx) } } @@ -267,7 +271,11 @@ impl CacheTelemetry { } pub(crate) fn record_insert_rejected(&self, tier_name: CacheName, fallback: bool) { - self.record_info_with_duration(attributes::EVENT_INSERT_REJECTED, Duration::ZERO); + let span = Span::current(); + if !span.is_disabled() { + span.record(attributes::FIELD_EVENT, attributes::EVENT_INSERT_REJECTED); + tracing::info!(cache.event = attributes::EVENT_INSERT_REJECTED); + } self.emit_tier_event( Self::current_request_id(), tier_name, diff --git a/crates/cachet/tests/cache.rs b/crates/cachet/tests/cache.rs index 416fc35cb..5fd67f84c 100644 --- a/crates/cachet/tests/cache.rs +++ b/crates/cachet/tests/cache.rs @@ -915,7 +915,7 @@ mod service_tests { #[cfg(feature = "logs")] #[cfg_attr(miri, ignore)] #[tokio::test] -async fn get_or_insert_with_logging_emits_span() { +async fn get_or_insert_with_logging_emits_span_and_events() { let capture = testing_aids::LogCapture::new(); let _guard = tracing::subscriber::set_default(capture.subscriber()); @@ -923,13 +923,14 @@ async fn get_or_insert_with_logging_emits_span() { let cache = Cache::builder::(clock).memory().enable_logs().build(); let _ = cache.get_or_insert(&"k".to_string(), || async { 1 }).await; + capture.assert_contains("cache.get_or_insert"); capture.assert_contains("cache.miss"); } #[cfg(feature = "logs")] #[cfg_attr(miri, ignore)] #[tokio::test] -async fn try_get_or_insert_with_logging_emits_span() { +async fn try_get_or_insert_with_logging_emits_span_and_events() { let capture = testing_aids::LogCapture::new(); let _guard = tracing::subscriber::set_default(capture.subscriber()); @@ -937,13 +938,14 @@ async fn try_get_or_insert_with_logging_emits_span() { let cache = Cache::builder::(clock).memory().enable_logs().build(); let _ = cache.try_get_or_insert(&"k".to_string(), || async { Ok::<_, Error>(1) }).await; + capture.assert_contains("cache.try_get_or_insert"); capture.assert_contains("cache.miss"); } #[cfg(feature = "logs")] #[cfg_attr(miri, ignore)] #[tokio::test] -async fn optionally_get_or_insert_with_logging_emits_span() { +async fn optionally_get_or_insert_with_logging_emits_span_and_events() { let capture = testing_aids::LogCapture::new(); let _guard = tracing::subscriber::set_default(capture.subscriber()); @@ -951,5 +953,6 @@ async fn optionally_get_or_insert_with_logging_emits_span() { let cache = Cache::builder::(clock).memory().enable_logs().build(); let _ = cache.optionally_get_or_insert(&"k".to_string(), || async { Some(1) }).await; + capture.assert_contains("cache.optionally_get_or_insert"); capture.assert_contains("cache.miss"); } From b9aa17595cab7a9d07536c0a0ef7133081dd726d Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 3 Jun 2026 14:43:51 -0400 Subject: [PATCH 12/26] More comment fixes --- crates/cachet/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cachet/Cargo.toml b/crates/cachet/Cargo.toml index 8db86ec2a..4a1c8e2ba 100644 --- a/crates/cachet/Cargo.toml +++ b/crates/cachet/Cargo.toml @@ -81,7 +81,7 @@ testing_aids = { path = "../testing_aids" } tick = { path = "../tick", features = ["test-util", "tokio"] } tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } tracing = { workspace = true, features = ["std"] } -tracing-subscriber = { workspace = true, features = ["registry"] } +tracing-subscriber = { workspace = true, features = ["fmt", "registry"] } tracing-test = { workspace = true, features = ["no-env-filter"] } [[bench]] From 01111485072f5debc783a07b28133f93245c35fb Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 3 Jun 2026 15:16:48 -0400 Subject: [PATCH 13/26] PR comment fixes --- crates/cachet/src/telemetry/cache.rs | 132 ++++++++++++++++++--------- 1 file changed, 88 insertions(+), 44 deletions(-) diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 5e100ee98..5fa3614f7 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -42,22 +42,23 @@ pin_project! { } } +/// RAII guard that resets the thread-local request ID to 0 on drop, +/// ensuring cleanup even if the inner future panics during poll. +struct ResetRequestId; + +impl Drop for ResetRequestId { + fn drop(&mut self) { + CURRENT_REQUEST_ID.with(|cell| cell.set(0)); + } +} + impl Future for WithRequestId { type Output = F::Output; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); - - // RAII guard resets the thread-local to 0 even if the inner poll panics. - struct ResetGuard; - impl Drop for ResetGuard { - fn drop(&mut self) { - CURRENT_REQUEST_ID.with(|cell| cell.set(0)); - } - } - CURRENT_REQUEST_ID.with(|cell| cell.set(*this.request_id)); - let _guard = ResetGuard; + let _guard = ResetRequestId; this.inner.poll(cx) } } @@ -77,6 +78,7 @@ impl WithRequestIdExt for F { /// Converts a `Duration` to nanoseconds as `u64`, saturating at `u64::MAX`. /// A `u64` of nanoseconds covers around 584 years - overflow is not a practical concern. +#[cfg(any(feature = "logs", test))] fn saturating_nanos(duration: Duration) -> u64 { u64::try_from(duration.as_nanos()).unwrap_or(u64::MAX) } @@ -143,36 +145,66 @@ impl CacheTelemetry { } } - #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + #[cfg_attr( + not(feature = "logs"), + expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") + )] fn record_debug_with_duration(&self, event: &'static str, duration: Duration) { - let span = Span::current(); - if !span.is_disabled() { - let duration_ns = saturating_nanos(duration); - span.record(attributes::FIELD_EVENT, event); - span.record(attributes::FIELD_DURATION_NS, duration_ns); - tracing::debug!(cache.event = event, cache.duration_ns = duration_ns); + #[cfg(any(feature = "logs", test))] + if self.logging_enabled { + let span = Span::current(); + if !span.is_disabled() { + let duration_ns = saturating_nanos(duration); + span.record(attributes::FIELD_EVENT, event); + span.record(attributes::FIELD_DURATION_NS, duration_ns); + tracing::debug!(cache.event = event, cache.duration_ns = duration_ns); + } + } + #[cfg(not(any(feature = "logs", test)))] + { + let _ = (event, duration); } } - #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + #[cfg_attr( + not(feature = "logs"), + expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") + )] fn record_info_with_duration(&self, event: &'static str, duration: Duration) { - let span = Span::current(); - if !span.is_disabled() { - let duration_ns = saturating_nanos(duration); - span.record(attributes::FIELD_EVENT, event); - span.record(attributes::FIELD_DURATION_NS, duration_ns); - tracing::info!(cache.event = event, cache.duration_ns = duration_ns); + #[cfg(any(feature = "logs", test))] + if self.logging_enabled { + let span = Span::current(); + if !span.is_disabled() { + let duration_ns = saturating_nanos(duration); + span.record(attributes::FIELD_EVENT, event); + span.record(attributes::FIELD_DURATION_NS, duration_ns); + tracing::info!(cache.event = event, cache.duration_ns = duration_ns); + } + } + #[cfg(not(any(feature = "logs", test)))] + { + let _ = (event, duration); } } - #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + #[cfg_attr( + not(feature = "logs"), + expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") + )] fn record_error_with_duration(&self, event: &'static str, duration: Duration) { - let span = Span::current(); - if !span.is_disabled() { - let duration_ns = saturating_nanos(duration); - span.record(attributes::FIELD_EVENT, event); - span.record(attributes::FIELD_DURATION_NS, duration_ns); - tracing::error!(cache.event = event, cache.duration_ns = duration_ns); + #[cfg(any(feature = "logs", test))] + if self.logging_enabled { + let span = Span::current(); + if !span.is_disabled() { + let duration_ns = saturating_nanos(duration); + span.record(attributes::FIELD_EVENT, event); + span.record(attributes::FIELD_DURATION_NS, duration_ns); + tracing::error!(cache.event = event, cache.duration_ns = duration_ns); + } + } + #[cfg(not(any(feature = "logs", test)))] + { + let _ = (event, duration); } } @@ -271,10 +303,13 @@ impl CacheTelemetry { } pub(crate) fn record_insert_rejected(&self, tier_name: CacheName, fallback: bool) { - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_EVENT, attributes::EVENT_INSERT_REJECTED); - tracing::info!(cache.event = attributes::EVENT_INSERT_REJECTED); + #[cfg(any(feature = "logs", test))] + if self.logging_enabled { + let span = Span::current(); + if !span.is_disabled() { + span.record(attributes::FIELD_EVENT, attributes::EVENT_INSERT_REJECTED); + tracing::info!(cache.event = attributes::EVENT_INSERT_REJECTED); + } } self.emit_tier_event( Self::current_request_id(), @@ -285,11 +320,17 @@ impl CacheTelemetry { ); } - #[expect(clippy::unused_self, reason = "Consistent API — may use self in future (e.g., emit migration)")] + #[cfg_attr( + not(feature = "logs"), + expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") + )] pub(crate) fn record_fallback(&self) { - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_FALLBACK, true); + #[cfg(any(feature = "logs", test))] + if self.logging_enabled { + let span = Span::current(); + if !span.is_disabled() { + span.record(attributes::FIELD_FALLBACK, true); + } } } @@ -347,11 +388,14 @@ impl CacheTelemetry { duration: Duration, coalesced: bool, ) { - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_DURATION_NS, saturating_nanos(duration)); - if coalesced { - span.record(attributes::FIELD_COALESCED, true); + #[cfg(any(feature = "logs", test))] + if self.logging_enabled { + let span = Span::current(); + if !span.is_disabled() { + span.record(attributes::FIELD_DURATION_NS, saturating_nanos(duration)); + if coalesced { + span.record(attributes::FIELD_COALESCED, true); + } } } From 25a2e3ac8c9a88218d9f0b01383eb8c489ed0f0c Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 3 Jun 2026 17:16:59 -0400 Subject: [PATCH 14/26] Mutant coverage --- crates/cachet/src/telemetry/cache.rs | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 5fa3614f7..1f9158a03 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -709,6 +709,25 @@ mod tests { assert!(capture.output().is_empty()); } + #[test] + fn logging_enabled_without_subscriber_is_noop() { + // logging_enabled=true but no tracing subscriber → spans are disabled. + // This exercises the `logging_enabled && span.is_disabled()` branches. + let telemetry = CacheTelemetry::with_logging(); + let request_id = next_request_id(); + futures::executor::block_on( + async { + telemetry.record_hit("c", Duration::ZERO, false); + telemetry.record_get_error("c", Duration::ZERO, false); + telemetry.record_insert_rejected("c", false); + telemetry.record_fallback(); + telemetry.complete_operation(request_id, "c", "cache.get", Duration::ZERO, true); + } + .with_request_id(request_id), + ); + // No panic = all paths handled gracefully with disabled spans. + } + #[cfg_attr(miri, ignore)] fn assert_emits(expected: &str, f: impl FnOnce(&CacheTelemetry, RequestId)) { let capture = LogCapture::new(); @@ -892,6 +911,26 @@ mod tests { assert!(c > b, "request IDs must increment: got {b} then {c}"); } + #[test] + fn with_request_id_resets_thread_local_after_completion() { + let request_id = next_request_id(); + futures::executor::block_on( + async { + assert_eq!( + CacheTelemetry::current_request_id(), + request_id, + "request_id should be set during poll" + ); + } + .with_request_id(request_id), + ); + assert_eq!( + CacheTelemetry::current_request_id(), + 0, + "request_id should be reset to 0 after WithRequestId completes" + ); + } + #[test] fn eviction_handler_receives_request_id_from_calling_thread() { type TierRecord = (RequestId, String, String); From 7b05348f1f50416aea8310b1c846e0bde15d32ed Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 3 Jun 2026 18:04:44 -0400 Subject: [PATCH 15/26] Comment fixeus --- crates/cachet/src/telemetry/cache.rs | 77 +++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 13 deletions(-) diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 1f9158a03..e273a34ec 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -35,6 +35,8 @@ pin_project! { /// even if the task migrates to a different thread between polls. /// /// Same pattern as `tracing::Instrument` which re-enters the span per poll. + /// Supports nesting (e.g., a `get_or_insert` closure calling another cache + /// operation) by saving and restoring the previous request ID. pub(crate) struct WithRequestId { #[pin] inner: F, @@ -42,13 +44,13 @@ pin_project! { } } -/// RAII guard that resets the thread-local request ID to 0 on drop, +/// RAII guard that restores the previous thread-local request ID on drop, /// ensuring cleanup even if the inner future panics during poll. -struct ResetRequestId; +struct RestoreRequestId(RequestId); -impl Drop for ResetRequestId { +impl Drop for RestoreRequestId { fn drop(&mut self) { - CURRENT_REQUEST_ID.with(|cell| cell.set(0)); + CURRENT_REQUEST_ID.with(|cell| cell.set(self.0)); } } @@ -57,8 +59,8 @@ impl Future for WithRequestId { fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let this = self.project(); - CURRENT_REQUEST_ID.with(|cell| cell.set(*this.request_id)); - let _guard = ResetRequestId; + let prev = CURRENT_REQUEST_ID.with(|cell| cell.replace(*this.request_id)); + let _guard = RestoreRequestId(prev); this.inner.poll(cx) } } @@ -152,13 +154,13 @@ impl CacheTelemetry { fn record_debug_with_duration(&self, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { + let duration_ns = saturating_nanos(duration); let span = Span::current(); if !span.is_disabled() { - let duration_ns = saturating_nanos(duration); span.record(attributes::FIELD_EVENT, event); span.record(attributes::FIELD_DURATION_NS, duration_ns); - tracing::debug!(cache.event = event, cache.duration_ns = duration_ns); } + tracing::debug!(cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] { @@ -173,13 +175,13 @@ impl CacheTelemetry { fn record_info_with_duration(&self, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { + let duration_ns = saturating_nanos(duration); let span = Span::current(); if !span.is_disabled() { - let duration_ns = saturating_nanos(duration); span.record(attributes::FIELD_EVENT, event); span.record(attributes::FIELD_DURATION_NS, duration_ns); - tracing::info!(cache.event = event, cache.duration_ns = duration_ns); } + tracing::info!(cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] { @@ -194,13 +196,13 @@ impl CacheTelemetry { fn record_error_with_duration(&self, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { + let duration_ns = saturating_nanos(duration); let span = Span::current(); if !span.is_disabled() { - let duration_ns = saturating_nanos(duration); span.record(attributes::FIELD_EVENT, event); span.record(attributes::FIELD_DURATION_NS, duration_ns); - tracing::error!(cache.event = event, cache.duration_ns = duration_ns); } + tracing::error!(cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] { @@ -308,8 +310,8 @@ impl CacheTelemetry { let span = Span::current(); if !span.is_disabled() { span.record(attributes::FIELD_EVENT, attributes::EVENT_INSERT_REJECTED); - tracing::info!(cache.event = attributes::EVENT_INSERT_REJECTED); } + tracing::info!(cache.event = attributes::EVENT_INSERT_REJECTED); } self.emit_tier_event( Self::current_request_id(), @@ -931,6 +933,55 @@ mod tests { ); } + #[test] + fn nested_with_request_id_restores_outer_id() { + use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker}; + + fn noop_waker() -> Waker { + fn no_op(_: *const ()) {} + fn clone(p: *const ()) -> RawWaker { + RawWaker::new(p, &VTABLE) + } + static VTABLE: RawWakerVTable = RawWakerVTable::new(clone, no_op, no_op, no_op); + unsafe { Waker::from_raw(RawWaker::new(std::ptr::null(), &VTABLE)) } + } + + let outer_id = next_request_id(); + let inner_id = next_request_id(); + + let waker = noop_waker(); + + // Poll outer WithRequestId, which sets outer_id + let mut outer = std::pin::pin!( + async { + assert_eq!(CacheTelemetry::current_request_id(), outer_id); + + // Poll inner WithRequestId — sets inner_id, should restore outer_id on completion + let mut inner = std::pin::pin!( + async { + assert_eq!(CacheTelemetry::current_request_id(), inner_id); + } + .with_request_id(inner_id) + ); + let mut inner_cx = Context::from_waker(&waker); + assert!(matches!(inner.as_mut().poll(&mut inner_cx), Poll::Ready(()))); + + // After inner completes, outer_id should be restored + assert_eq!( + CacheTelemetry::current_request_id(), + outer_id, + "outer request_id should be restored after nested WithRequestId" + ); + } + .with_request_id(outer_id) + ); + let mut outer_cx = Context::from_waker(&waker); + assert!(matches!(outer.as_mut().poll(&mut outer_cx), Poll::Ready(()))); + + // After outer completes, should be reset to 0 + assert_eq!(CacheTelemetry::current_request_id(), 0); + } + #[test] fn eviction_handler_receives_request_id_from_calling_thread() { type TierRecord = (RequestId, String, String); From 2cb02aee22fb50ff5f2265e2de0efa50223cfab8 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Wed, 3 Jun 2026 18:18:57 -0400 Subject: [PATCH 16/26] Fix mutants --- crates/cachet/src/telemetry/cache.rs | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index e273a34ec..977f81e37 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -935,21 +935,12 @@ mod tests { #[test] fn nested_with_request_id_restores_outer_id() { - use std::task::{Context, Poll, RawWaker, RawWakerVTable, Waker}; - - fn noop_waker() -> Waker { - fn no_op(_: *const ()) {} - fn clone(p: *const ()) -> RawWaker { - RawWaker::new(p, &VTABLE) - } - static VTABLE: RawWakerVTable = RawWakerVTable::new(clone, no_op, no_op, no_op); - unsafe { Waker::from_raw(RawWaker::new(std::ptr::null(), &VTABLE)) } - } + use std::task::{Context, Poll, Waker}; let outer_id = next_request_id(); let inner_id = next_request_id(); - let waker = noop_waker(); + let waker = Waker::noop(); // Poll outer WithRequestId, which sets outer_id let mut outer = std::pin::pin!( From bf3586e3573c9004d0208f516091bfac317de5f7 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Thu, 4 Jun 2026 12:58:58 -0400 Subject: [PATCH 17/26] Remove spans, they are not really necessary, especially with request ids on events --- crates/cachet/README.md | 8 +- .../cachet/examples/telemetry_subscriber.rs | 13 +- crates/cachet/src/builder/cache.rs | 2 +- crates/cachet/src/cache.rs | 15 - crates/cachet/src/fallback.rs | 2 - crates/cachet/src/lib.rs | 6 +- crates/cachet/src/policy.rs | 2 +- crates/cachet/src/refresh.rs | 3 - crates/cachet/src/telemetry/attributes.rs | 10 +- crates/cachet/src/telemetry/cache.rs | 371 ++++-------------- crates/cachet/src/wrapper.rs | 9 - crates/cachet/tests/cache.rs | 6 +- 12 files changed, 98 insertions(+), 349 deletions(-) diff --git a/crates/cachet/README.md b/crates/cachet/README.md index 8a67b6c7a..d44dfcadc 100644 --- a/crates/cachet/README.md +++ b/crates/cachet/README.md @@ -230,9 +230,9 @@ cache.insert("key".to_string(), "value".to_string()).await?; Enable with the `logs` feature and `.enable_logs()` on the cache builder. -Each cache operation creates a structured [`tracing`][__link20] span and emits events inside -that span. Span fields include `cache.name`, `cache.event`, and `cache.duration_ns`, -with `cache.coalesced` and `cache.fallback` recorded when applicable. +Each cache operation emits structured [`tracing`][__link20] events. Event fields include +`cache.name`, `cache.event`, `cache.duration_ns`, and `cache.operation`, with +`cache.coalesced` and `cache.fallback` recorded when applicable. ### Subscribing to events @@ -266,7 +266,7 @@ See the `telemetry_subscriber` example for a complete demonstration. This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG2xL7qJyuCgIG_83YqgWgkfeG5xypaCx1UNDG_r6sBXY4Zc3YWSIgmhieXRlc2J1ZmUwLjUuMoJmY2FjaGV0ZTAuNi4ygm1jYWNoZXRfbWVtb3J5ZTAuMy4xgm5jYWNoZXRfc2VydmljZWUwLjIuMoJrY2FjaGV0X3RpZXJlMC4yLjGCZHRpY2tlMC4zLjKCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjI + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG1SlUsq8JhozG8GxRSYYFzJGGytyZY2NjhMFGxOIGd3DC-SbYWSIgmhieXRlc2J1ZmUwLjUuMoJmY2FjaGV0ZTAuNi4ygm1jYWNoZXRfbWVtb3J5ZTAuMy4xgm5jYWNoZXRfc2VydmljZWUwLjIuMoJrY2FjaGV0X3RpZXJlMC4yLjGCZHRpY2tlMC4zLjKCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjI [__link0]: https://docs.rs/cachet/0.6.2/cachet/?search=TimeToRefresh [__link1]: https://crates.io/crates/uniflight/0.2.2 [__link10]: https://docs.rs/cachet_tier/0.2.1/cachet_tier/?search=CacheTier diff --git a/crates/cachet/examples/telemetry_subscriber.rs b/crates/cachet/examples/telemetry_subscriber.rs index 30dcbae9d..27476f86c 100644 --- a/crates/cachet/examples/telemetry_subscriber.rs +++ b/crates/cachet/examples/telemetry_subscriber.rs @@ -1,7 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -//! Demonstrates cachet telemetry: spans for traces, events for logs. +//! Demonstrates cachet telemetry as structured tracing events. //! //! Run with: `cargo run --example telemetry_subscriber --features "memory,logs"` @@ -9,19 +9,12 @@ use std::time::Duration; use cachet::{Cache, CacheEntry}; use tick::Clock; -use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::layer::SubscriberExt; #[tokio::main] async fn main() { - // Set up a subscriber that shows both events and span open/close. - // This demonstrates what cachet telemetry looks like out of the box. - let subscriber = tracing_subscriber::registry().with( - tracing_subscriber::fmt::layer() - .with_ansi(true) - .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) - .with_target(false), - ); + // Set up a subscriber that shows cachet's structured events. + let subscriber = tracing_subscriber::registry().with(tracing_subscriber::fmt::layer().with_ansi(true).with_target(false)); tracing::subscriber::set_global_default(subscriber).expect("subscriber already set"); let clock = Clock::new_tokio(); diff --git a/crates/cachet/src/builder/cache.rs b/crates/cachet/src/builder/cache.rs index 84eaf0f06..9dc3d3f4f 100644 --- a/crates/cachet/src/builder/cache.rs +++ b/crates/cachet/src/builder/cache.rs @@ -291,7 +291,7 @@ impl CacheBuilder { /// [`Cache::get_or_insert`](crate::Cache::get_or_insert), and promotion from a fallback tier. /// /// If the policy rejects an insert, the operation is skipped and a - /// `cache.rejected` telemetry event is recorded with `cache.operation = cache.insert`. + /// `cache.insert_rejected` telemetry event is recorded. /// /// # Examples /// diff --git a/crates/cachet/src/cache.rs b/crates/cachet/src/cache.rs index a7342bdfa..ad724cd81 100644 --- a/crates/cachet/src/cache.rs +++ b/crates/cachet/src/cache.rs @@ -9,7 +9,6 @@ use std::hash::Hash; use cachet_tier::{CacheEntry, CacheTier, DynamicCache, SizeError}; use tick::Clock; -use tracing::Instrument; use uniflight::Merger; use crate::Error; @@ -232,7 +231,6 @@ where Q: Hash + Eq + ToOwned + ?Sized + Send + Sync, { let request_id = next_request_id(); - let span = self.telemetry.get_span(self.name); let watch = self.clock.stopwatch(); async { let (result, coalesced) = if let Some(mergers) = &self.mergers { @@ -253,7 +251,6 @@ where result } .with_request_id(request_id) - .instrument(span) .await } @@ -284,7 +281,6 @@ where /// ``` pub async fn insert(&self, key: K, entry: impl Into>) -> Result<(), Error> { let request_id = next_request_id(); - let span = self.telemetry.insert_span(self.name); let watch = self.clock.stopwatch(); async { let result = self.storage.insert(key, entry.into()).await; @@ -293,7 +289,6 @@ where result } .with_request_id(request_id) - .instrument(span) .await } @@ -317,7 +312,6 @@ where Q: Hash + Eq + ToOwned + ?Sized + Send + Sync, { let request_id = next_request_id(); - let span = self.telemetry.invalidate_span(self.name); let watch = self.clock.stopwatch(); async { let (result, coalesced) = if let Some(mergers) = &self.mergers { @@ -338,7 +332,6 @@ where result } .with_request_id(request_id) - .instrument(span) .await } @@ -362,7 +355,6 @@ where /// Returns an error if the underlying cache tier operation fails. pub async fn clear(&self) -> Result<(), Error> { let request_id = next_request_id(); - let span = self.telemetry.clear_span(self.name); let watch = self.clock.stopwatch(); async { let result = self.storage.clear().await; @@ -371,7 +363,6 @@ where result } .with_request_id(request_id) - .instrument(span) .await } @@ -470,7 +461,6 @@ where Fut: Future + Send, { let request_id = next_request_id(); - let span = self.telemetry.get_or_insert_span(self.name); let watch = self.clock.stopwatch(); async { let owned = key.to_owned(); @@ -489,7 +479,6 @@ where result } .with_request_id(request_id) - .instrument(span) .await } @@ -720,7 +709,6 @@ where Fut: Future> + Send, { let request_id = next_request_id(); - let span = self.telemetry.try_get_or_insert_span(self.name); let watch = self.clock.stopwatch(); async { let owned = key.to_owned(); @@ -739,7 +727,6 @@ where result } .with_request_id(request_id) - .instrument(span) .await } @@ -810,7 +797,6 @@ where Fut: Future> + Send, { let request_id = next_request_id(); - let span = self.telemetry.optionally_get_or_insert_span(self.name); let watch = self.clock.stopwatch(); async { let owned = key.to_owned(); @@ -829,7 +815,6 @@ where result } .with_request_id(request_id) - .instrument(span) .await } diff --git a/crates/cachet/src/fallback.rs b/crates/cachet/src/fallback.rs index b3cc8d5c3..d9ea0fdce 100644 --- a/crates/cachet/src/fallback.rs +++ b/crates/cachet/src/fallback.rs @@ -13,7 +13,6 @@ use std::sync::Arc; use cachet_tier::{CacheEntry, CacheTier, SizeError}; use futures::join; use tick::Clock; -use tracing::Instrument; use crate::Error; use crate::cache::CacheName; @@ -115,7 +114,6 @@ where Ok(fallback_value) } - .instrument(tracing::Span::current()) .await } } diff --git a/crates/cachet/src/lib.rs b/crates/cachet/src/lib.rs index 1d6be7282..b4e583759 100644 --- a/crates/cachet/src/lib.rs +++ b/crates/cachet/src/lib.rs @@ -227,9 +227,9 @@ //! //! Enable with the `logs` feature and `.enable_logs()` on the cache builder. //! -//! Each cache operation creates a structured [`tracing`] span and emits events inside -//! that span. Span fields include `cache.name`, `cache.event`, and `cache.duration_ns`, -//! with `cache.coalesced` and `cache.fallback` recorded when applicable. +//! Each cache operation emits structured [`tracing`] events. Event fields include +//! `cache.name`, `cache.event`, `cache.duration_ns`, and `cache.operation`, with +//! `cache.coalesced` and `cache.fallback` recorded when applicable. //! //! ## Subscribing to events //! diff --git a/crates/cachet/src/policy.rs b/crates/cachet/src/policy.rs index 1a827159f..2f83da17b 100644 --- a/crates/cachet/src/policy.rs +++ b/crates/cachet/src/policy.rs @@ -13,7 +13,7 @@ type InsertPredicate = Arc) -> bool + Send + Sync>; /// The insert policy applies to all inserts into the tier, including direct /// [`Cache::insert`](crate::Cache::insert) calls, [`Cache::get_or_insert`](crate::Cache::get_or_insert), and promotion from a /// fallback tier. If the policy rejects an insert, the operation is skipped -/// and a `cache.rejected` telemetry event is recorded, with the operation recorded as `cache.insert`. +/// and a `cache.insert_rejected` telemetry event is recorded. /// /// # Examples /// diff --git a/crates/cachet/src/refresh.rs b/crates/cachet/src/refresh.rs index a7048a2de..81a420b91 100644 --- a/crates/cachet/src/refresh.rs +++ b/crates/cachet/src/refresh.rs @@ -16,7 +16,6 @@ use std::time::{Duration, SystemTime}; use anyspawn::Spawner; use cachet_tier::{CacheEntry, CacheTier}; use parking_lot::Mutex; -use tracing::Instrument; use crate::fallback::{FallbackCache, FallbackCacheInner}; @@ -145,7 +144,6 @@ where F: CacheTier + Send + Sync + 'static, { pub(crate) async fn fetch_and_promote(&self, key: K) { - let span = self.telemetry.get_span(self.name); async { let watch = self.clock.stopwatch(); match self.fallback.get(&key).await { @@ -153,7 +151,6 @@ where Ok(None) | Err(_) => self.handle_fallback_miss(watch.elapsed()), } } - .instrument(span) .await; } diff --git a/crates/cachet/src/telemetry/attributes.rs b/crates/cachet/src/telemetry/attributes.rs index 200af0eb1..76aaaa358 100644 --- a/crates/cachet/src/telemetry/attributes.rs +++ b/crates/cachet/src/telemetry/attributes.rs @@ -3,9 +3,9 @@ //! Public constants for cachet telemetry field names and event values. //! -//! Use these constants to filter or match cachet spans and events in a custom -//! `tracing_subscriber::Layer`. Cache operation spans record `FIELD_NAME`, -//! `FIELD_EVENT`, and `FIELD_DURATION_NS`, with additional flag fields when applicable. +//! Use these constants to filter or match cachet telemetry events in a custom +//! `tracing_subscriber::Layer`. Cache operations emit structured events with +//! `FIELD_NAME`, `FIELD_EVENT`, `FIELD_DURATION_NS`, and additional fields when applicable. //! //! # Example //! @@ -41,6 +41,9 @@ pub const FIELD_EVENT: &str = "cache.event"; /// Field name for the operation duration in nanoseconds. pub const FIELD_DURATION_NS: &str = "cache.duration_ns"; +/// Field name for the cache operation name. +pub const FIELD_OPERATION: &str = "cache.operation"; + /// Field name recording whether request coalescing was used. pub const FIELD_COALESCED: &str = "cache.coalesced"; @@ -102,6 +105,7 @@ mod tests { assert_eq!(FIELD_NAME, "cache.name"); assert_eq!(FIELD_EVENT, "cache.event"); assert_eq!(FIELD_DURATION_NS, "cache.duration_ns"); + assert_eq!(FIELD_OPERATION, "cache.operation"); assert_eq!(FIELD_COALESCED, "cache.coalesced"); assert_eq!(FIELD_FALLBACK, "cache.fallback"); } diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 977f81e37..a7d210b7f 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -11,7 +11,6 @@ use std::task::{Context, Poll}; use std::time::Duration; use pin_project_lite::pin_project; -use tracing::Span; use crate::cache::CacheName; use crate::telemetry::attributes; @@ -34,7 +33,6 @@ pin_project! { /// on every poll. This ensures the correct request ID is available /// even if the task migrates to a different thread between polls. /// - /// Same pattern as `tracing::Instrument` which re-enters the span per poll. /// Supports nesting (e.g., a `get_or_insert` closure calling another cache /// operation) by saving and restoring the previous request ID. pub(crate) struct WithRequestId { @@ -88,7 +86,7 @@ fn saturating_nanos(duration: Duration) -> u64 { /// Cache telemetry provider. /// /// This type is created internally by the cache builder and handles -/// creating cache operation spans and recording structured tracing events. +/// emitting structured tracing events and forwarding handler callbacks. #[derive(Clone, Default)] pub struct CacheTelemetry { #[cfg(any(feature = "logs", test))] @@ -155,11 +153,6 @@ impl CacheTelemetry { #[cfg(any(feature = "logs", test))] if self.logging_enabled { let duration_ns = saturating_nanos(duration); - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_EVENT, event); - span.record(attributes::FIELD_DURATION_NS, duration_ns); - } tracing::debug!(cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] @@ -176,11 +169,6 @@ impl CacheTelemetry { #[cfg(any(feature = "logs", test))] if self.logging_enabled { let duration_ns = saturating_nanos(duration); - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_EVENT, event); - span.record(attributes::FIELD_DURATION_NS, duration_ns); - } tracing::info!(cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] @@ -197,11 +185,6 @@ impl CacheTelemetry { #[cfg(any(feature = "logs", test))] if self.logging_enabled { let duration_ns = saturating_nanos(duration); - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_EVENT, event); - span.record(attributes::FIELD_DURATION_NS, duration_ns); - } tracing::error!(cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] @@ -307,10 +290,6 @@ impl CacheTelemetry { pub(crate) fn record_insert_rejected(&self, tier_name: CacheName, fallback: bool) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_EVENT, attributes::EVENT_INSERT_REJECTED); - } tracing::info!(cache.event = attributes::EVENT_INSERT_REJECTED); } self.emit_tier_event( @@ -329,10 +308,7 @@ impl CacheTelemetry { pub(crate) fn record_fallback(&self) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_FALLBACK, true); - } + tracing::info!(cache.event = "cache.fallback", cache.fallback = true); } } @@ -362,7 +338,7 @@ impl CacheTelemetry { /// Records that an entry expired in the background (moka eviction listener). /// /// Unlike [`record_expired`](Self::record_expired), this fires from a - /// background thread with no parent span, so it emits a standalone event. + /// background thread with no parent operation context, so it emits a standalone event. /// Like [`record_eviction`](Self::record_eviction), the request ID is /// read from the thread-local (non-zero when triggered synchronously /// during a cache operation). @@ -392,13 +368,13 @@ impl CacheTelemetry { ) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - let span = Span::current(); - if !span.is_disabled() { - span.record(attributes::FIELD_DURATION_NS, saturating_nanos(duration)); - if coalesced { - span.record(attributes::FIELD_COALESCED, true); - } - } + let duration_ns = saturating_nanos(duration); + tracing::debug!( + cache.name = cache_name, + cache.operation = operation, + cache.duration_ns = duration_ns, + cache.coalesced = coalesced + ); } if let Some(handler) = &self.handler { @@ -426,163 +402,6 @@ impl CacheTelemetry { self.logging_enabled = true; self } - - pub(crate) fn get_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.get", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty, - cache.coalesced = tracing::field::Empty, - cache.fallback = tracing::field::Empty - ) - } else { - Span::none() - } - } - - pub(crate) fn insert_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.insert", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty - ) - } else { - Span::none() - } - } - - pub(crate) fn invalidate_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.invalidate", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty - ) - } else { - Span::none() - } - } - - pub(crate) fn clear_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.clear", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty - ) - } else { - Span::none() - } - } - - pub(crate) fn get_or_insert_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.get_or_insert", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty, - cache.coalesced = tracing::field::Empty, - cache.fallback = tracing::field::Empty - ) - } else { - Span::none() - } - } - - pub(crate) fn try_get_or_insert_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.try_get_or_insert", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty, - cache.coalesced = tracing::field::Empty, - cache.fallback = tracing::field::Empty - ) - } else { - Span::none() - } - } - - pub(crate) fn optionally_get_or_insert_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.optionally_get_or_insert", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty, - cache.coalesced = tracing::field::Empty, - cache.fallback = tracing::field::Empty - ) - } else { - Span::none() - } - } - - pub(crate) fn tier_span(&self, name: CacheName) -> Span { - if self.logging_enabled { - tracing::span!( - tracing::Level::DEBUG, - "cache.tier", - cache.name = name, - cache.event = tracing::field::Empty, - cache.duration_ns = tracing::field::Empty - ) - } else { - Span::none() - } - } -} - -#[cfg(not(any(feature = "logs", test)))] -#[cfg_attr(test, mutants::skip)] // Equivalent mutants: cfg-gated off during testing. -#[expect(clippy::unused_self, reason = "Span factories are no-ops when logs are disabled")] -impl CacheTelemetry { - pub(crate) fn get_span(&self, _: CacheName) -> Span { - Span::none() - } - - pub(crate) fn insert_span(&self, _: CacheName) -> Span { - Span::none() - } - - pub(crate) fn invalidate_span(&self, _: CacheName) -> Span { - Span::none() - } - - pub(crate) fn clear_span(&self, _: CacheName) -> Span { - Span::none() - } - - pub(crate) fn get_or_insert_span(&self, _: CacheName) -> Span { - Span::none() - } - - pub(crate) fn try_get_or_insert_span(&self, _: CacheName) -> Span { - Span::none() - } - - pub(crate) fn optionally_get_or_insert_span(&self, _: CacheName) -> Span { - Span::none() - } - - pub(crate) fn tier_span(&self, _: CacheName) -> Span { - Span::none() - } } #[cfg(test)] @@ -590,19 +409,12 @@ mod tests { use std::sync::Mutex; use testing_aids::LogCapture; - use tracing::Instrument; - use tracing_subscriber::fmt::format::FmtSpan; use tracing_subscriber::layer::SubscriberExt; use super::*; fn subscriber(capture: &LogCapture) -> impl tracing::Subscriber { - tracing_subscriber::registry().with( - tracing_subscriber::fmt::layer() - .with_writer(capture.clone()) - .with_ansi(false) - .with_span_events(FmtSpan::CLOSE), - ) + tracing_subscriber::registry().with(tracing_subscriber::fmt::layer().with_writer(capture.clone()).with_ansi(false)) } #[cfg_attr(miri, ignore)] @@ -613,24 +425,27 @@ mod tests { let telemetry = CacheTelemetry::with_logging(); let request_id = next_request_id(); - futures::executor::block_on( + futures::executor::block_on(async { async { telemetry.record_hit("my_test_cache", Duration::from_nanos(12345), false); telemetry.complete_operation(request_id, "my_test_cache", "cache.get", Duration::from_nanos(12345), true); telemetry.record_fallback(); } .with_request_id(request_id) - .instrument(telemetry.get_span("my_test_cache")), - ); + .await; + }); capture.assert_contains(attributes::FIELD_NAME); capture.assert_contains(attributes::FIELD_EVENT); capture.assert_contains(attributes::FIELD_DURATION_NS); + capture.assert_contains(attributes::FIELD_OPERATION); capture.assert_contains(attributes::FIELD_COALESCED); capture.assert_contains(attributes::FIELD_FALLBACK); capture.assert_contains("my_test_cache"); capture.assert_contains(attributes::EVENT_HIT); + capture.assert_contains("cache.get"); capture.assert_contains("12345"); + capture.assert_contains("cache.fallback"); capture.assert_contains("true"); } @@ -642,58 +457,34 @@ mod tests { let capture = LogCapture::new(); let _guard = tracing::subscriber::set_default(subscriber(&capture)); let request_id = next_request_id(); - futures::executor::block_on( + futures::executor::block_on(async { async { telemetry.record_get_error("cache", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(telemetry.tier_span("cache")), - ); + .await; + }); capture.assert_contains("ERROR"); let capture = LogCapture::new(); let _guard = tracing::subscriber::set_default(subscriber(&capture)); let request_id = next_request_id(); - futures::executor::block_on( + futures::executor::block_on(async { async { telemetry.record_expired("cache", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(telemetry.tier_span("cache")), - ); + .await; + }); capture.assert_contains("INFO"); let capture = LogCapture::new(); let _guard = tracing::subscriber::set_default(subscriber(&capture)); let request_id = next_request_id(); - futures::executor::block_on( + futures::executor::block_on(async { async { telemetry.record_hit("cache", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(telemetry.tier_span("cache")), - ); + .await; + }); capture.assert_contains("DEBUG"); } - #[cfg_attr(miri, ignore)] - #[test] - fn all_span_factories_produce_enabled_spans() { - let capture = LogCapture::new(); - let _guard = tracing::subscriber::set_default(subscriber(&capture)); - let t = CacheTelemetry::with_logging(); - - // Exercise every span factory so coverage includes them all. - let spans = [ - t.get_span("c"), - t.insert_span("c"), - t.invalidate_span("c"), - t.clear_span("c"), - t.get_or_insert_span("c"), - t.try_get_or_insert_span("c"), - t.optionally_get_or_insert_span("c"), - t.tier_span("c"), - ]; - - for span in &spans { - assert!(!span.is_disabled(), "span factory should produce an enabled span"); - } - } - #[cfg_attr(miri, ignore)] #[test] fn telemetry_disabled_emits_nothing() { @@ -702,19 +493,19 @@ mod tests { let _guard = tracing::subscriber::set_default(subscriber(&capture)); let request_id = next_request_id(); - futures::executor::block_on( + futures::executor::block_on(async { async { telemetry.record_hit("cache", Duration::from_secs(1), false) } .with_request_id(request_id) - .instrument(telemetry.get_span("cache")), - ); + .await; + }); assert!(capture.output().is_empty()); } #[test] fn logging_enabled_without_subscriber_is_noop() { - // logging_enabled=true but no tracing subscriber → spans are disabled. - // This exercises the `logging_enabled && span.is_disabled()` branches. + // logging_enabled=true but no tracing subscriber. + // No panic means tracing events degrade to a no-op cleanly. let telemetry = CacheTelemetry::with_logging(); let request_id = next_request_id(); futures::executor::block_on( @@ -727,7 +518,7 @@ mod tests { } .with_request_id(request_id), ); - // No panic = all paths handled gracefully with disabled spans. + // No panic = all paths handled gracefully without a subscriber. } #[cfg_attr(miri, ignore)] @@ -744,102 +535,92 @@ mod tests { #[test] fn every_helper_emits_its_event() { assert_emits(attributes::EVENT_HIT, |t, request_id| { - futures::executor::block_on( - async { t.record_hit("c", Duration::ZERO, false) } - .with_request_id(request_id) - .instrument(t.tier_span("c")), - ); + futures::executor::block_on(async { + async { t.record_hit("c", Duration::ZERO, false) }.with_request_id(request_id).await; + }); }); assert_emits(attributes::EVENT_MISS, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_miss("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.tier_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_EXPIRED, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_expired("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.tier_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_GET_ERROR, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_get_error("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.tier_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_REFRESH_HIT, |t, request_id| { - futures::executor::block_on( - async { t.record_refresh_hit(Duration::ZERO) } - .with_request_id(request_id) - .instrument(t.get_span("c")), - ); + futures::executor::block_on(async { + async { t.record_refresh_hit(Duration::ZERO) }.with_request_id(request_id).await; + }); }); assert_emits(attributes::EVENT_REFRESH_MISS, |t, request_id| { - futures::executor::block_on( - async { t.record_refresh_miss(Duration::ZERO) } - .with_request_id(request_id) - .instrument(t.get_span("c")), - ); + futures::executor::block_on(async { + async { t.record_refresh_miss(Duration::ZERO) }.with_request_id(request_id).await; + }); }); assert_emits(attributes::EVENT_INSERTED, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_inserted("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.insert_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_INSERT_REJECTED, |t, request_id| { - futures::executor::block_on( - async { t.record_insert_rejected("c", false) } - .with_request_id(request_id) - .instrument(t.insert_span("c")), - ); + futures::executor::block_on(async { + async { t.record_insert_rejected("c", false) }.with_request_id(request_id).await; + }); }); assert_emits(attributes::EVENT_INSERT_ERROR, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_insert_error("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.insert_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_INVALIDATED, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_invalidated("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.invalidate_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_INVALIDATE_ERROR, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_invalidate_error("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.invalidate_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_CLEARED, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_cleared("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.clear_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_CLEAR_ERROR, |t, request_id| { - futures::executor::block_on( + futures::executor::block_on(async { async { t.record_clear_error("c", Duration::ZERO, false) } .with_request_id(request_id) - .instrument(t.clear_span("c")), - ); + .await; + }); }); assert_emits(attributes::EVENT_EVICTION, |t, request_id| { - futures::executor::block_on( - async { t.record_eviction("c") } - .with_request_id(request_id) - .instrument(t.get_span("c")), - ); + futures::executor::block_on(async { + async { t.record_eviction("c") }.with_request_id(request_id).await; + }); }); } @@ -954,7 +735,7 @@ mod tests { } .with_request_id(inner_id) ); - let mut inner_cx = Context::from_waker(&waker); + let mut inner_cx = Context::from_waker(waker); assert!(matches!(inner.as_mut().poll(&mut inner_cx), Poll::Ready(()))); // After inner completes, outer_id should be restored @@ -966,7 +747,7 @@ mod tests { } .with_request_id(outer_id) ); - let mut outer_cx = Context::from_waker(&waker); + let mut outer_cx = Context::from_waker(waker); assert!(matches!(outer.as_mut().poll(&mut outer_cx), Poll::Ready(()))); // After outer completes, should be reset to 0 diff --git a/crates/cachet/src/wrapper.rs b/crates/cachet/src/wrapper.rs index f27384bee..a98b6392a 100644 --- a/crates/cachet/src/wrapper.rs +++ b/crates/cachet/src/wrapper.rs @@ -12,7 +12,6 @@ use std::time::Duration; use cachet_tier::{CacheTier, SizeError}; use tick::Clock; -use tracing::Instrument; use crate::cache::CacheName; use crate::telemetry::CacheTelemetry; @@ -130,7 +129,6 @@ where CT: CacheTier + Send + Sync, { async fn get(&self, key: &K) -> Result>, Error> { - let span = self.telemetry.tier_span(self.name); async { let watch = self.clock.stopwatch(); match self.inner.get(key).await { @@ -141,12 +139,10 @@ where } } } - .instrument(span) .await } async fn insert(&self, key: K, mut entry: CacheEntry) -> Result<(), Error> { - let span = self.telemetry.tier_span(self.name); async { entry.ensure_cached_at(self.clock.system_time()); if !self.policy.should_insert(&entry) { @@ -162,12 +158,10 @@ where } result } - .instrument(span) .await } async fn invalidate(&self, key: &K) -> Result<(), Error> { - let span = self.telemetry.tier_span(self.name); async { let watch = self.clock.stopwatch(); let result = self.inner.invalidate(key).await; @@ -177,12 +171,10 @@ where } result } - .instrument(span) .await } async fn clear(&self) -> Result<(), Error> { - let span = self.telemetry.tier_span(self.name); async { let watch = self.clock.stopwatch(); let result = self.inner.clear().await; @@ -192,7 +184,6 @@ where } result } - .instrument(span) .await } diff --git a/crates/cachet/tests/cache.rs b/crates/cachet/tests/cache.rs index 5fd67f84c..9606ad39c 100644 --- a/crates/cachet/tests/cache.rs +++ b/crates/cachet/tests/cache.rs @@ -915,7 +915,7 @@ mod service_tests { #[cfg(feature = "logs")] #[cfg_attr(miri, ignore)] #[tokio::test] -async fn get_or_insert_with_logging_emits_span_and_events() { +async fn get_or_insert_with_logging_emits_operation_and_events() { let capture = testing_aids::LogCapture::new(); let _guard = tracing::subscriber::set_default(capture.subscriber()); @@ -930,7 +930,7 @@ async fn get_or_insert_with_logging_emits_span_and_events() { #[cfg(feature = "logs")] #[cfg_attr(miri, ignore)] #[tokio::test] -async fn try_get_or_insert_with_logging_emits_span_and_events() { +async fn try_get_or_insert_with_logging_emits_operation_and_events() { let capture = testing_aids::LogCapture::new(); let _guard = tracing::subscriber::set_default(capture.subscriber()); @@ -945,7 +945,7 @@ async fn try_get_or_insert_with_logging_emits_span_and_events() { #[cfg(feature = "logs")] #[cfg_attr(miri, ignore)] #[tokio::test] -async fn optionally_get_or_insert_with_logging_emits_span_and_events() { +async fn optionally_get_or_insert_with_logging_emits_operation_and_events() { let capture = testing_aids::LogCapture::new(); let _guard = tracing::subscriber::set_default(capture.subscriber()); From 356017fc4e37ddab441f0d36fcdc9a71e9d2339d Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 5 Jun 2026 12:27:44 -0400 Subject: [PATCH 18/26] Comment fixes --- crates/cachet/src/refresh.rs | 4 +- crates/cachet/src/telemetry/attributes.rs | 15 ++++-- crates/cachet/src/telemetry/cache.rs | 58 ++++++++++++----------- 3 files changed, 45 insertions(+), 32 deletions(-) diff --git a/crates/cachet/src/refresh.rs b/crates/cachet/src/refresh.rs index 81a420b91..2d550738b 100644 --- a/crates/cachet/src/refresh.rs +++ b/crates/cachet/src/refresh.rs @@ -155,7 +155,7 @@ where } async fn handle_fallback_hit(&self, key: K, value: CacheEntry, fetch_duration: Duration) { - self.telemetry.record_refresh_hit(fetch_duration); + self.telemetry.record_refresh_hit(self.name, fetch_duration); self.promote_to_primary(key, value).await; } @@ -168,7 +168,7 @@ where } fn handle_fallback_miss(&self, duration: Duration) { - self.telemetry.record_refresh_miss(duration); + self.telemetry.record_refresh_miss(self.name, duration); } } diff --git a/crates/cachet/src/telemetry/attributes.rs b/crates/cachet/src/telemetry/attributes.rs index 76aaaa358..4d9fb7f48 100644 --- a/crates/cachet/src/telemetry/attributes.rs +++ b/crates/cachet/src/telemetry/attributes.rs @@ -4,8 +4,13 @@ //! Public constants for cachet telemetry field names and event values. //! //! Use these constants to filter or match cachet telemetry events in a custom -//! `tracing_subscriber::Layer`. Cache operations emit structured events with -//! `FIELD_NAME`, `FIELD_EVENT`, `FIELD_DURATION_NS`, and additional fields when applicable. +//! `tracing_subscriber::Layer`. +//! +//! **Tier events** (hit, miss, expired, etc.) carry `FIELD_NAME`, `FIELD_EVENT`, +//! and `FIELD_DURATION_NS`. +//! +//! **Operation-complete events** carry `FIELD_NAME`, `FIELD_OPERATION`, +//! `FIELD_DURATION_NS`, and `FIELD_COALESCED`. //! //! # Example //! @@ -95,13 +100,16 @@ pub const EVENT_REFRESH_MISS: &str = "cache.refresh_miss"; /// Only emitted when eviction telemetry is enabled. pub const EVENT_EVICTION: &str = "cache.eviction"; +/// The operation used a fallback tier after the primary tier missed. +pub const EVENT_FALLBACK: &str = "cache.fallback"; + #[cfg(test)] mod tests { use super::*; #[test] fn field_constants_match_tracing_field_names() { - // These constants must match the field names used in tracing macros in cache.rs. + // These constants must match the field names used in tracing macros in telemetry/cache.rs. assert_eq!(FIELD_NAME, "cache.name"); assert_eq!(FIELD_EVENT, "cache.event"); assert_eq!(FIELD_DURATION_NS, "cache.duration_ns"); @@ -127,6 +135,7 @@ mod tests { EVENT_REFRESH_HIT, EVENT_REFRESH_MISS, EVENT_EVICTION, + EVENT_FALLBACK, ]; for (i, a) in events.iter().enumerate() { diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index a7d210b7f..2ae47aed9 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -149,15 +149,15 @@ impl CacheTelemetry { not(feature = "logs"), expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") )] - fn record_debug_with_duration(&self, event: &'static str, duration: Duration) { + fn record_debug_with_duration(&self, cache_name: CacheName, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { let duration_ns = saturating_nanos(duration); - tracing::debug!(cache.event = event, cache.duration_ns = duration_ns); + tracing::debug!(cache.name = cache_name, cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] { - let _ = (event, duration); + let _ = (cache_name, event, duration); } } @@ -165,15 +165,15 @@ impl CacheTelemetry { not(feature = "logs"), expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") )] - fn record_info_with_duration(&self, event: &'static str, duration: Duration) { + fn record_info_with_duration(&self, cache_name: CacheName, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { let duration_ns = saturating_nanos(duration); - tracing::info!(cache.event = event, cache.duration_ns = duration_ns); + tracing::info!(cache.name = cache_name, cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] { - let _ = (event, duration); + let _ = (cache_name, event, duration); } } @@ -181,35 +181,35 @@ impl CacheTelemetry { not(feature = "logs"), expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") )] - fn record_error_with_duration(&self, event: &'static str, duration: Duration) { + fn record_error_with_duration(&self, cache_name: CacheName, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { let duration_ns = saturating_nanos(duration); - tracing::error!(cache.event = event, cache.duration_ns = duration_ns); + tracing::error!(cache.name = cache_name, cache.event = event, cache.duration_ns = duration_ns); } #[cfg(not(any(feature = "logs", test)))] { - let _ = (event, duration); + let _ = (cache_name, event, duration); } } pub(crate) fn record_hit(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_debug_with_duration(attributes::EVENT_HIT, duration); + self.record_debug_with_duration(tier_name, attributes::EVENT_HIT, duration); self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_HIT, duration, fallback); } pub(crate) fn record_miss(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_debug_with_duration(attributes::EVENT_MISS, duration); + self.record_debug_with_duration(tier_name, attributes::EVENT_MISS, duration); self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_MISS, duration, fallback); } pub(crate) fn record_expired(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_info_with_duration(attributes::EVENT_EXPIRED, duration); + self.record_info_with_duration(tier_name, attributes::EVENT_EXPIRED, duration); self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_EXPIRED, duration, fallback); } pub(crate) fn record_get_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_error_with_duration(attributes::EVENT_GET_ERROR, duration); + self.record_error_with_duration(tier_name, attributes::EVENT_GET_ERROR, duration); self.emit_tier_event( Self::current_request_id(), tier_name, @@ -220,7 +220,7 @@ impl CacheTelemetry { } pub(crate) fn record_inserted(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_info_with_duration(attributes::EVENT_INSERTED, duration); + self.record_info_with_duration(tier_name, attributes::EVENT_INSERTED, duration); self.emit_tier_event( Self::current_request_id(), tier_name, @@ -231,7 +231,7 @@ impl CacheTelemetry { } pub(crate) fn record_insert_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_error_with_duration(attributes::EVENT_INSERT_ERROR, duration); + self.record_error_with_duration(tier_name, attributes::EVENT_INSERT_ERROR, duration); self.emit_tier_event( Self::current_request_id(), tier_name, @@ -242,7 +242,7 @@ impl CacheTelemetry { } pub(crate) fn record_invalidated(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_info_with_duration(attributes::EVENT_INVALIDATED, duration); + self.record_info_with_duration(tier_name, attributes::EVENT_INVALIDATED, duration); self.emit_tier_event( Self::current_request_id(), tier_name, @@ -253,7 +253,7 @@ impl CacheTelemetry { } pub(crate) fn record_invalidate_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_error_with_duration(attributes::EVENT_INVALIDATE_ERROR, duration); + self.record_error_with_duration(tier_name, attributes::EVENT_INVALIDATE_ERROR, duration); self.emit_tier_event( Self::current_request_id(), tier_name, @@ -264,12 +264,12 @@ impl CacheTelemetry { } pub(crate) fn record_cleared(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_debug_with_duration(attributes::EVENT_CLEARED, duration); + self.record_debug_with_duration(tier_name, attributes::EVENT_CLEARED, duration); self.emit_tier_event(Self::current_request_id(), tier_name, attributes::EVENT_CLEARED, duration, fallback); } pub(crate) fn record_clear_error(&self, tier_name: CacheName, duration: Duration, fallback: bool) { - self.record_error_with_duration(attributes::EVENT_CLEAR_ERROR, duration); + self.record_error_with_duration(tier_name, attributes::EVENT_CLEAR_ERROR, duration); self.emit_tier_event( Self::current_request_id(), tier_name, @@ -279,18 +279,18 @@ impl CacheTelemetry { ); } - pub(crate) fn record_refresh_hit(&self, duration: Duration) { - self.record_debug_with_duration(attributes::EVENT_REFRESH_HIT, duration); + pub(crate) fn record_refresh_hit(&self, cache_name: CacheName, duration: Duration) { + self.record_debug_with_duration(cache_name, attributes::EVENT_REFRESH_HIT, duration); } - pub(crate) fn record_refresh_miss(&self, duration: Duration) { - self.record_info_with_duration(attributes::EVENT_REFRESH_MISS, duration); + pub(crate) fn record_refresh_miss(&self, cache_name: CacheName, duration: Duration) { + self.record_info_with_duration(cache_name, attributes::EVENT_REFRESH_MISS, duration); } pub(crate) fn record_insert_rejected(&self, tier_name: CacheName, fallback: bool) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - tracing::info!(cache.event = attributes::EVENT_INSERT_REJECTED); + tracing::info!(cache.name = tier_name, cache.event = attributes::EVENT_INSERT_REJECTED); } self.emit_tier_event( Self::current_request_id(), @@ -308,7 +308,7 @@ impl CacheTelemetry { pub(crate) fn record_fallback(&self) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - tracing::info!(cache.event = "cache.fallback", cache.fallback = true); + tracing::info!(cache.event = attributes::EVENT_FALLBACK, cache.fallback = true); } } @@ -562,12 +562,16 @@ mod tests { }); assert_emits(attributes::EVENT_REFRESH_HIT, |t, request_id| { futures::executor::block_on(async { - async { t.record_refresh_hit(Duration::ZERO) }.with_request_id(request_id).await; + async { t.record_refresh_hit("c", Duration::ZERO) } + .with_request_id(request_id) + .await; }); }); assert_emits(attributes::EVENT_REFRESH_MISS, |t, request_id| { futures::executor::block_on(async { - async { t.record_refresh_miss(Duration::ZERO) }.with_request_id(request_id).await; + async { t.record_refresh_miss("c", Duration::ZERO) } + .with_request_id(request_id) + .await; }); }); assert_emits(attributes::EVENT_INSERTED, |t, request_id| { From 9100bfc938a91dc1ed1d2a37474bb144fde47b9d Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 5 Jun 2026 13:01:37 -0400 Subject: [PATCH 19/26] Comment fixes --- crates/cachet/README.md | 35 +++++++++++++++++------ crates/cachet/src/builder/cache.rs | 2 +- crates/cachet/src/cache.rs | 2 +- crates/cachet/src/fallback.rs | 20 ++++++------- crates/cachet/src/lib.rs | 30 +++++++++++++------ crates/cachet/src/telemetry/attributes.rs | 13 ++------- crates/cachet/src/telemetry/cache.rs | 17 +---------- crates/cachet/src/telemetry/handler.rs | 4 +-- 8 files changed, 64 insertions(+), 59 deletions(-) diff --git a/crates/cachet/README.md b/crates/cachet/README.md index 225ba238a..3c443065d 100644 --- a/crates/cachet/README.md +++ b/crates/cachet/README.md @@ -14,7 +14,7 @@ A composable, multi-tier caching library with stampede protection, background -refresh, and built-in OpenTelemetry telemetry. +refresh, and structured telemetry. ## Why Multi-Tier Caching? @@ -89,7 +89,7 @@ builds on top of them and adds: |Stampede protection|❌|✅| |Background refresh|❌|✅| |Service middleware integration|❌|✅| -|Structured telemetry (tracing)|❌|✅| +|Structured telemetry|❌|✅| |Pluggable storage backends|❌|✅| |Clock injection for testing|❌|✅| @@ -228,13 +228,16 @@ cache.insert("key".to_string(), "value".to_string()).await?; ## Telemetry -Enable with the `logs` feature and `.enable_logs()` on the cache builder. +Cachet provides two complementary telemetry channels: + +### Tracing events -Each cache operation emits structured [`tracing`][__link20] events. Event fields include -`cache.name`, `cache.event`, `cache.duration_ns`, and `cache.operation`, with -`cache.coalesced` and `cache.fallback` recorded when applicable. +Enable with the `logs` feature and `.enable_logs()` on the cache builder. +Each tier outcome and operation completion emits a structured [`tracing`][__link20] event. -### Subscribing to events +**Tier events** carry `cache.name`, `cache.event`, and `cache.duration_ns`. +**Operation-complete events** carry `cache.name`, `cache.operation`, +`cache.duration_ns`, and `cache.coalesced`. Use [`telemetry::attributes`][__link21] constants to filter and match events in a custom `tracing_subscriber::Layer`: @@ -252,7 +255,7 @@ if event_value == attributes::EVENT_HIT { /* cache hit */ } See the `telemetry_subscriber` example for a complete demonstration. -### Event types +#### Event types |Level|Events| |-----|------| @@ -260,13 +263,24 @@ See the `telemetry_subscriber` example for a complete demonstration. |INFO|`cache.expired`, `cache.refresh_miss`, `cache.inserted`, `cache.insert_rejected`, `cache.invalidated`, `cache.eviction`| |DEBUG|`cache.hit`, `cache.miss`, `cache.refresh_hit`, `cache.cleared`| +### Event handler callback API + +Register a [`CacheEventHandler`][__link22] via +`.event_handler(handler)` on the cache builder to receive typed +[`CacheTierEvent`][__link23] and +[`CacheOperationEvent`][__link24] callbacks. +Events carry a `request_id` for correlating tier outcomes with their parent +operation. Works independently of the `logs` feature. + +See the `telemetry_accumulator` example for a DashMap-based accumulation pattern. +
This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG1SlUsq8JhozG8GxRSYYFzJGGytyZY2NjhMFGxOIGd3DC-SbYWSIgmhieXRlc2J1ZmUwLjUuM4JmY2FjaGV0ZTAuNi40gm1jYWNoZXRfbWVtb3J5ZTAuMy4zgm5jYWNoZXRfc2VydmljZWUwLjIuM4JrY2FjaGV0X3RpZXJlMC4yLjKCZHRpY2tlMC4zLjOCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjM + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG7MdCGSTYW-OG0jYqVjxOH6KG2qAGaERLckRGyAy82bM0nbaYWSIgmhieXRlc2J1ZmUwLjUuM4JmY2FjaGV0ZTAuNi40gm1jYWNoZXRfbWVtb3J5ZTAuMy4zgm5jYWNoZXRfc2VydmljZWUwLjIuM4JrY2FjaGV0X3RpZXJlMC4yLjKCZHRpY2tlMC4zLjOCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjM [__link0]: https://docs.rs/cachet/0.6.4/cachet/?search=TimeToRefresh [__link1]: https://crates.io/crates/uniflight/0.2.3 [__link10]: https://docs.rs/cachet_tier/0.2.2/cachet_tier/?search=CacheTier @@ -282,6 +296,9 @@ This crate was developed as part of The Oxidizer Project. Br [__link2]: https://docs.rs/cachet/0.6.4/cachet/?search=CacheBuilder::stampede_protection [__link20]: https://crates.io/crates/tracing/0.1.44 [__link21]: https://docs.rs/cachet/0.6.4/cachet/?search=telemetry::attributes + [__link22]: https://docs.rs/cachet/0.6.4/cachet/?search=telemetry::handler::CacheEventHandler + [__link23]: https://docs.rs/cachet/0.6.4/cachet/?search=telemetry::handler::CacheTierEvent + [__link24]: https://docs.rs/cachet/0.6.4/cachet/?search=telemetry::handler::CacheOperationEvent [__link3]: https://docs.rs/cachet_tier/0.2.2/cachet_tier/?search=CacheTier [__link4]: https://docs.rs/cachet_tier/0.2.2/cachet_tier/?search=DynamicCache [__link5]: https://docs.rs/cachet/0.6.4/cachet/?search=InsertPolicy diff --git a/crates/cachet/src/builder/cache.rs b/crates/cachet/src/builder/cache.rs index 9dc3d3f4f..9eea7946d 100644 --- a/crates/cachet/src/builder/cache.rs +++ b/crates/cachet/src/builder/cache.rs @@ -206,7 +206,7 @@ impl CacheBuilder { /// If not set, a name is derived from the storage type. /// /// Requires `&'static str` because the name is embedded in every telemetry - /// event (metric labels, log fields). A static reference avoids cloning the + /// event (tracing fields, handler callbacks). A static reference avoids cloning the /// name into a new allocation on each cache operation, which matters at high /// throughput. In practice, cache names are always string literals. #[must_use] diff --git a/crates/cachet/src/cache.rs b/crates/cachet/src/cache.rs index ad724cd81..6c15856a3 100644 --- a/crates/cachet/src/cache.rs +++ b/crates/cachet/src/cache.rs @@ -19,7 +19,7 @@ use crate::telemetry::cache::{WithRequestIdExt, next_request_id}; /// Type alias for cache names used in telemetry. /// /// A static reference is used so that names can be embedded in telemetry -/// attributes (metric labels, log fields) without allocating on every +/// fields (tracing events, handler callbacks) without allocating on every /// cache operation. pub type CacheName = &'static str; diff --git a/crates/cachet/src/fallback.rs b/crates/cachet/src/fallback.rs index d9ea0fdce..734c78ab4 100644 --- a/crates/cachet/src/fallback.rs +++ b/crates/cachet/src/fallback.rs @@ -101,20 +101,16 @@ where /// /// Separated from [`get`](Self::get) to keep the hot path (primary hits) small. async fn get_from_fallback(&self, key: &K) -> Result>, Error> { - async { - self.inner.telemetry.record_fallback(); - let fallback_value = self.inner.fallback.get(key).await?; - - if let Some(ref v) = fallback_value { - // Insert errors are intentionally swallowed - a failed promotion should not - // fail the overall get. The CacheWrapper around the primary tier already - // records telemetry for the insert (Inserted, Rejected, or Error). - let _ = self.inner.primary.insert(key.clone(), v.clone()).await; - } + let fallback_value = self.inner.fallback.get(key).await?; - Ok(fallback_value) + if let Some(ref v) = fallback_value { + // Insert errors are intentionally swallowed - a failed promotion should not + // fail the overall get. The CacheWrapper around the primary tier already + // records telemetry for the insert (Inserted, Rejected, or Error). + let _ = self.inner.primary.insert(key.clone(), v.clone()).await; } - .await + + Ok(fallback_value) } } diff --git a/crates/cachet/src/lib.rs b/crates/cachet/src/lib.rs index b4e583759..4e492c972 100644 --- a/crates/cachet/src/lib.rs +++ b/crates/cachet/src/lib.rs @@ -4,7 +4,7 @@ #![cfg_attr(docsrs, feature(doc_cfg))] //! A composable, multi-tier caching library with stampede protection, background -//! refresh, and built-in OpenTelemetry telemetry. +//! refresh, and structured telemetry. //! //! # Why Multi-Tier Caching? //! @@ -79,7 +79,7 @@ //! | Stampede protection | ❌ | ✅ | //! | Background refresh | ❌ | ✅ | //! | Service middleware integration | ❌ | ✅ | -//! | Structured telemetry (tracing) | ❌ | ✅ | +//! | Structured telemetry | ❌ | ✅ | //! | Pluggable storage backends | ❌ | ✅ | //! | Clock injection for testing | ❌ | ✅ | //! @@ -225,13 +225,16 @@ //! //! # Telemetry //! -//! Enable with the `logs` feature and `.enable_logs()` on the cache builder. +//! Cachet provides two complementary telemetry channels: +//! +//! ## Tracing events //! -//! Each cache operation emits structured [`tracing`] events. Event fields include -//! `cache.name`, `cache.event`, `cache.duration_ns`, and `cache.operation`, with -//! `cache.coalesced` and `cache.fallback` recorded when applicable. +//! Enable with the `logs` feature and `.enable_logs()` on the cache builder. +//! Each tier outcome and operation completion emits a structured [`tracing`] event. //! -//! ## Subscribing to events +//! **Tier events** carry `cache.name`, `cache.event`, and `cache.duration_ns`. +//! **Operation-complete events** carry `cache.name`, `cache.operation`, +//! `cache.duration_ns`, and `cache.coalesced`. //! //! Use [`telemetry::attributes`] constants to filter and match events in a //! custom `tracing_subscriber::Layer`: @@ -249,13 +252,24 @@ //! //! See the `telemetry_subscriber` example for a complete demonstration. //! -//! ## Event types +//! ### Event types //! //! | Level | Events | //! |-------|--------| //! | ERROR | `cache.get_error`, `cache.insert_error`, `cache.invalidate_error`, `cache.clear_error` | //! | INFO | `cache.expired`, `cache.refresh_miss`, `cache.inserted`, `cache.insert_rejected`, `cache.invalidated`, `cache.eviction` | //! | DEBUG | `cache.hit`, `cache.miss`, `cache.refresh_hit`, `cache.cleared` | +//! +//! ## Event handler callback API +//! +//! Register a [`CacheEventHandler`](telemetry::handler::CacheEventHandler) via +//! `.event_handler(handler)` on the cache builder to receive typed +//! [`CacheTierEvent`](telemetry::handler::CacheTierEvent) and +//! [`CacheOperationEvent`](telemetry::handler::CacheOperationEvent) callbacks. +//! Events carry a `request_id` for correlating tier outcomes with their parent +//! operation. Works independently of the `logs` feature. +//! +//! See the `telemetry_accumulator` example for a DashMap-based accumulation pattern. mod builder; mod cache; diff --git a/crates/cachet/src/telemetry/attributes.rs b/crates/cachet/src/telemetry/attributes.rs index 4d9fb7f48..4353dc5bb 100644 --- a/crates/cachet/src/telemetry/attributes.rs +++ b/crates/cachet/src/telemetry/attributes.rs @@ -7,7 +7,8 @@ //! `tracing_subscriber::Layer`. //! //! **Tier events** (hit, miss, expired, etc.) carry `FIELD_NAME`, `FIELD_EVENT`, -//! and `FIELD_DURATION_NS`. +//! and `FIELD_DURATION_NS`. The exception is `EVENT_INSERT_REJECTED`, which +//! omits `FIELD_DURATION_NS` (rejection is instantaneous, not timed). //! //! **Operation-complete events** carry `FIELD_NAME`, `FIELD_OPERATION`, //! `FIELD_DURATION_NS`, and `FIELD_COALESCED`. @@ -49,12 +50,9 @@ pub const FIELD_DURATION_NS: &str = "cache.duration_ns"; /// Field name for the cache operation name. pub const FIELD_OPERATION: &str = "cache.operation"; -/// Field name recording whether request coalescing was used. +/// Field name recording whether stampede protection was enabled for the operation. pub const FIELD_COALESCED: &str = "cache.coalesced"; -/// Field name recording whether a fallback tier was consulted. -pub const FIELD_FALLBACK: &str = "cache.fallback"; - // -- Event values (emitted in the `cache.event` field) -- /// Cache entry was found and valid. @@ -100,9 +98,6 @@ pub const EVENT_REFRESH_MISS: &str = "cache.refresh_miss"; /// Only emitted when eviction telemetry is enabled. pub const EVENT_EVICTION: &str = "cache.eviction"; -/// The operation used a fallback tier after the primary tier missed. -pub const EVENT_FALLBACK: &str = "cache.fallback"; - #[cfg(test)] mod tests { use super::*; @@ -115,7 +110,6 @@ mod tests { assert_eq!(FIELD_DURATION_NS, "cache.duration_ns"); assert_eq!(FIELD_OPERATION, "cache.operation"); assert_eq!(FIELD_COALESCED, "cache.coalesced"); - assert_eq!(FIELD_FALLBACK, "cache.fallback"); } #[test] @@ -135,7 +129,6 @@ mod tests { EVENT_REFRESH_HIT, EVENT_REFRESH_MISS, EVENT_EVICTION, - EVENT_FALLBACK, ]; for (i, a) in events.iter().enumerate() { diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 2ae47aed9..320185c0d 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -301,17 +301,6 @@ impl CacheTelemetry { ); } - #[cfg_attr( - not(feature = "logs"), - expect(clippy::unused_self, reason = "self.logging_enabled is used when logs is enabled") - )] - pub(crate) fn record_fallback(&self) { - #[cfg(any(feature = "logs", test))] - if self.logging_enabled { - tracing::info!(cache.event = attributes::EVENT_FALLBACK, cache.fallback = true); - } - } - /// Records that an entry was evicted from the cache due to capacity limits. /// /// When moka evicts during an `insert()`, the eviction listener runs @@ -346,7 +335,7 @@ impl CacheTelemetry { pub(crate) fn record_background_expired(&self, cache_name: CacheName) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - tracing::debug!(cache.name = cache_name, cache.event = attributes::EVENT_EXPIRED); + tracing::info!(cache.name = cache_name, cache.event = attributes::EVENT_EXPIRED); } self.emit_tier_event( @@ -429,7 +418,6 @@ mod tests { async { telemetry.record_hit("my_test_cache", Duration::from_nanos(12345), false); telemetry.complete_operation(request_id, "my_test_cache", "cache.get", Duration::from_nanos(12345), true); - telemetry.record_fallback(); } .with_request_id(request_id) .await; @@ -440,12 +428,10 @@ mod tests { capture.assert_contains(attributes::FIELD_DURATION_NS); capture.assert_contains(attributes::FIELD_OPERATION); capture.assert_contains(attributes::FIELD_COALESCED); - capture.assert_contains(attributes::FIELD_FALLBACK); capture.assert_contains("my_test_cache"); capture.assert_contains(attributes::EVENT_HIT); capture.assert_contains("cache.get"); capture.assert_contains("12345"); - capture.assert_contains("cache.fallback"); capture.assert_contains("true"); } @@ -513,7 +499,6 @@ mod tests { telemetry.record_hit("c", Duration::ZERO, false); telemetry.record_get_error("c", Duration::ZERO, false); telemetry.record_insert_rejected("c", false); - telemetry.record_fallback(); telemetry.complete_operation(request_id, "c", "cache.get", Duration::ZERO, true); } .with_request_id(request_id), diff --git a/crates/cachet/src/telemetry/handler.rs b/crates/cachet/src/telemetry/handler.rs index a1ac89ca1..6cedb1774 100644 --- a/crates/cachet/src/telemetry/handler.rs +++ b/crates/cachet/src/telemetry/handler.rs @@ -31,11 +31,11 @@ pub struct CacheOperationEvent<'a> { pub request_id: RequestId, /// Name of the cache. pub cache_name: &'a str, - /// The span/operation name (e.g., "cache.get", "cache.insert"). + /// The operation name (e.g., "cache.get", "cache.insert"). pub operation: &'a str, /// Total duration of the operation. pub duration: Duration, - /// Whether the request was coalesced via stampede protection. + /// Whether this operation ran with stampede protection enabled. pub coalesced: bool, } From 18c80c81bcd615a6428e4a102a3c99bb7655c8aa Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 5 Jun 2026 13:41:07 -0400 Subject: [PATCH 20/26] Fix broken docs --- crates/cachet/README.md | 2 +- crates/cachet/src/lib.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/cachet/README.md b/crates/cachet/README.md index 3c443065d..e81dfc1d2 100644 --- a/crates/cachet/README.md +++ b/crates/cachet/README.md @@ -280,7 +280,7 @@ See the `telemetry_accumulator` example for a DashMap-based accumulation pattern This crate was developed as part of The Oxidizer Project. Browse this crate's source code. - [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG7MdCGSTYW-OG0jYqVjxOH6KG2qAGaERLckRGyAy82bM0nbaYWSIgmhieXRlc2J1ZmUwLjUuM4JmY2FjaGV0ZTAuNi40gm1jYWNoZXRfbWVtb3J5ZTAuMy4zgm5jYWNoZXRfc2VydmljZWUwLjIuM4JrY2FjaGV0X3RpZXJlMC4yLjKCZHRpY2tlMC4zLjOCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjM + [__cargo_doc2readme_dependencies_info]: ggGkYW0CYXSEGy4k8ldDFPOhG2VNeXtD5nnKG6EPY6OfW5wBG8g18NOFNdxpYXKEG_8ZSA792uloG6CGM3YZObWMG5vDWjb2V8K3G4SF7NHmnsnBYWSIgmhieXRlc2J1ZmUwLjUuM4JmY2FjaGV0ZTAuNi40gm1jYWNoZXRfbWVtb3J5ZTAuMy4zgm5jYWNoZXRfc2VydmljZWUwLjIuM4JrY2FjaGV0X3RpZXJlMC4yLjKCZHRpY2tlMC4zLjOCZ3RyYWNpbmdmMC4xLjQ0gml1bmlmbGlnaHRlMC4yLjM [__link0]: https://docs.rs/cachet/0.6.4/cachet/?search=TimeToRefresh [__link1]: https://crates.io/crates/uniflight/0.2.3 [__link10]: https://docs.rs/cachet_tier/0.2.2/cachet_tier/?search=CacheTier diff --git a/crates/cachet/src/lib.rs b/crates/cachet/src/lib.rs index 4e492c972..88f672025 100644 --- a/crates/cachet/src/lib.rs +++ b/crates/cachet/src/lib.rs @@ -262,10 +262,10 @@ //! //! ## Event handler callback API //! -//! Register a [`CacheEventHandler`](telemetry::handler::CacheEventHandler) via +//! Register a [`CacheEventHandler`] via //! `.event_handler(handler)` on the cache builder to receive typed -//! [`CacheTierEvent`](telemetry::handler::CacheTierEvent) and -//! [`CacheOperationEvent`](telemetry::handler::CacheOperationEvent) callbacks. +//! [`CacheTierEvent`] and +//! [`CacheOperationEvent`] callbacks. //! Events carry a `request_id` for correlating tier outcomes with their parent //! operation. Works independently of the `logs` feature. //! From d0410d95e6594b981fe5144e0d7139529113a8ab Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 5 Jun 2026 14:33:53 -0400 Subject: [PATCH 21/26] Fix PR comments --- crates/cachet/src/refresh.rs | 11 ++-- crates/cachet/src/telemetry/attributes.rs | 5 +- crates/cachet/src/wrapper.rs | 70 ++++++++++------------- 3 files changed, 36 insertions(+), 50 deletions(-) diff --git a/crates/cachet/src/refresh.rs b/crates/cachet/src/refresh.rs index 2d550738b..d448e804b 100644 --- a/crates/cachet/src/refresh.rs +++ b/crates/cachet/src/refresh.rs @@ -144,14 +144,11 @@ where F: CacheTier + Send + Sync + 'static, { pub(crate) async fn fetch_and_promote(&self, key: K) { - async { - let watch = self.clock.stopwatch(); - match self.fallback.get(&key).await { - Ok(Some(value)) => self.handle_fallback_hit(key, value, watch.elapsed()).await, - Ok(None) | Err(_) => self.handle_fallback_miss(watch.elapsed()), - } + let watch = self.clock.stopwatch(); + match self.fallback.get(&key).await { + Ok(Some(value)) => self.handle_fallback_hit(key, value, watch.elapsed()).await, + Ok(None) | Err(_) => self.handle_fallback_miss(watch.elapsed()), } - .await; } async fn handle_fallback_hit(&self, key: K, value: CacheEntry, fetch_duration: Duration) { diff --git a/crates/cachet/src/telemetry/attributes.rs b/crates/cachet/src/telemetry/attributes.rs index 4353dc5bb..b867cf094 100644 --- a/crates/cachet/src/telemetry/attributes.rs +++ b/crates/cachet/src/telemetry/attributes.rs @@ -7,8 +7,9 @@ //! `tracing_subscriber::Layer`. //! //! **Tier events** (hit, miss, expired, etc.) carry `FIELD_NAME`, `FIELD_EVENT`, -//! and `FIELD_DURATION_NS`. The exception is `EVENT_INSERT_REJECTED`, which -//! omits `FIELD_DURATION_NS` (rejection is instantaneous, not timed). +//! and `FIELD_DURATION_NS`. Some events intentionally omit `FIELD_DURATION_NS` +//! to indicate "not timed": `EVENT_INSERT_REJECTED`, `EVENT_EVICTION`, and +//! background `EVENT_EXPIRED` events emitted from eviction listeners. //! //! **Operation-complete events** carry `FIELD_NAME`, `FIELD_OPERATION`, //! `FIELD_DURATION_NS`, and `FIELD_COALESCED`. diff --git a/crates/cachet/src/wrapper.rs b/crates/cachet/src/wrapper.rs index a98b6392a..6317fcef3 100644 --- a/crates/cachet/src/wrapper.rs +++ b/crates/cachet/src/wrapper.rs @@ -129,62 +129,50 @@ where CT: CacheTier + Send + Sync, { async fn get(&self, key: &K) -> Result>, Error> { - async { - let watch = self.clock.stopwatch(); - match self.inner.get(key).await { - Ok(value) => Ok(self.handle_get_result(value, watch.elapsed())), - Err(e) => { - self.telemetry.record_get_error(self.name, watch.elapsed(), self.fallback); - Err(e) - } + let watch = self.clock.stopwatch(); + match self.inner.get(key).await { + Ok(value) => Ok(self.handle_get_result(value, watch.elapsed())), + Err(e) => { + self.telemetry.record_get_error(self.name, watch.elapsed(), self.fallback); + Err(e) } } - .await } async fn insert(&self, key: K, mut entry: CacheEntry) -> Result<(), Error> { - async { - entry.ensure_cached_at(self.clock.system_time()); - if !self.policy.should_insert(&entry) { - self.telemetry.record_insert_rejected(self.name, self.fallback); - return Ok(()); - } + entry.ensure_cached_at(self.clock.system_time()); + if !self.policy.should_insert(&entry) { + self.telemetry.record_insert_rejected(self.name, self.fallback); + return Ok(()); + } - let watch = self.clock.stopwatch(); - let result = self.inner.insert(key, entry).await; - match &result { - Ok(()) => self.telemetry.record_inserted(self.name, watch.elapsed(), self.fallback), - Err(_) => self.telemetry.record_insert_error(self.name, watch.elapsed(), self.fallback), - } - result + let watch = self.clock.stopwatch(); + let result = self.inner.insert(key, entry).await; + match &result { + Ok(()) => self.telemetry.record_inserted(self.name, watch.elapsed(), self.fallback), + Err(_) => self.telemetry.record_insert_error(self.name, watch.elapsed(), self.fallback), } - .await + result } async fn invalidate(&self, key: &K) -> Result<(), Error> { - async { - let watch = self.clock.stopwatch(); - let result = self.inner.invalidate(key).await; - match &result { - Ok(()) => self.telemetry.record_invalidated(self.name, watch.elapsed(), self.fallback), - Err(_) => self.telemetry.record_invalidate_error(self.name, watch.elapsed(), self.fallback), - } - result + let watch = self.clock.stopwatch(); + let result = self.inner.invalidate(key).await; + match &result { + Ok(()) => self.telemetry.record_invalidated(self.name, watch.elapsed(), self.fallback), + Err(_) => self.telemetry.record_invalidate_error(self.name, watch.elapsed(), self.fallback), } - .await + result } async fn clear(&self) -> Result<(), Error> { - async { - let watch = self.clock.stopwatch(); - let result = self.inner.clear().await; - match &result { - Ok(()) => self.telemetry.record_cleared(self.name, watch.elapsed(), self.fallback), - Err(_) => self.telemetry.record_clear_error(self.name, watch.elapsed(), self.fallback), - } - result + let watch = self.clock.stopwatch(); + let result = self.inner.clear().await; + match &result { + Ok(()) => self.telemetry.record_cleared(self.name, watch.elapsed(), self.fallback), + Err(_) => self.telemetry.record_clear_error(self.name, watch.elapsed(), self.fallback), } - .await + result } async fn len(&self) -> Result { From 7f2d29e7be07694cac7b358b186a793b57c9dbdc Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 5 Jun 2026 14:36:57 -0400 Subject: [PATCH 22/26] Make tracing optional on logs --- crates/cachet/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/cachet/Cargo.toml b/crates/cachet/Cargo.toml index 6faf12c13..da626a34b 100644 --- a/crates/cachet/Cargo.toml +++ b/crates/cachet/Cargo.toml @@ -36,7 +36,7 @@ allowed_external_types = [ [features] default = ["memory"] -logs = [] +logs = ["dep:tracing"] test-util = ["cachet_tier/test-util", "tick/test-util"] memory = ["dep:cachet_memory"] service = ["dep:cachet_service", "dep:layered"] @@ -57,7 +57,7 @@ pin-project-lite = { workspace = true } postcard = { workspace = true, optional = true } serde = { workspace = true, optional = true, features = ["derive"] } tick = { workspace = true, features = [] } -tracing = { workspace = true } +tracing = { workspace = true, optional = true } uniflight = { workspace = true } [dev-dependencies] From b9c44e8d5dc26e225aac1e29df2e7f6d949944ea Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Fri, 5 Jun 2026 17:04:46 -0400 Subject: [PATCH 23/26] PR comment fixes --- crates/cachet/benches/operations.rs | 2 +- crates/cachet/examples/telemetry_accumulator.rs | 2 +- crates/cachet/src/refresh.rs | 4 +++- crates/cachet/src/telemetry/cache.rs | 16 ++++++++++++++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/crates/cachet/benches/operations.rs b/crates/cachet/benches/operations.rs index 1f63127b1..a4db39446 100644 --- a/crates/cachet/benches/operations.rs +++ b/crates/cachet/benches/operations.rs @@ -150,7 +150,7 @@ fn bench_wrapper_overhead(c: &mut Criterion) { }); }); - // With telemetry + active subscriber (measures span processing overhead) + // With telemetry + active subscriber (measures event formatting/dispatch overhead) group.bench_function("with_telemetry_subscriber", |b| { let subscriber = tracing_subscriber::registry().with(tracing_subscriber::fmt::layer().with_writer(std::io::sink).with_ansi(false)); let _guard = tracing::subscriber::set_default(subscriber); diff --git a/crates/cachet/examples/telemetry_accumulator.rs b/crates/cachet/examples/telemetry_accumulator.rs index c8879a28a..45b4e81e4 100644 --- a/crates/cachet/examples/telemetry_accumulator.rs +++ b/crates/cachet/examples/telemetry_accumulator.rs @@ -7,7 +7,7 @@ //! This pattern mirrors how a TVS-style consumer would collect tier //! outcomes, latencies, and flags into one log row per request. //! -//! Uses `DashMap` for lock-free concurrent accumulation — safe across +//! Uses `DashMap` for concurrent, low-contention accumulation — safe across //! all async runtimes, including work-stealing (tokio) and thread-per-core //! (oxidizer), even if a task migrates between cores mid-operation. //! diff --git a/crates/cachet/src/refresh.rs b/crates/cachet/src/refresh.rs index d448e804b..d5345c03e 100644 --- a/crates/cachet/src/refresh.rs +++ b/crates/cachet/src/refresh.rs @@ -18,6 +18,7 @@ use cachet_tier::{CacheEntry, CacheTier}; use parking_lot::Mutex; use crate::fallback::{FallbackCache, FallbackCacheInner}; +use crate::telemetry::cache::{WithRequestIdExt, next_request_id}; /// Configuration for background cache refresh. /// @@ -130,7 +131,8 @@ where } } }); - inner.fetch_and_promote(key).await; + let request_id = next_request_id(); + inner.fetch_and_promote(key).with_request_id(request_id).await; })); } } diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 320185c0d..0542b326a 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -279,12 +279,28 @@ impl CacheTelemetry { ); } + /// Records a successful background refresh from the fallback tier. pub(crate) fn record_refresh_hit(&self, cache_name: CacheName, duration: Duration) { self.record_debug_with_duration(cache_name, attributes::EVENT_REFRESH_HIT, duration); + self.emit_tier_event( + Self::current_request_id(), + cache_name, + attributes::EVENT_REFRESH_HIT, + duration, + true, + ); } + /// Records a background refresh that found no data in the fallback tier. pub(crate) fn record_refresh_miss(&self, cache_name: CacheName, duration: Duration) { self.record_info_with_duration(cache_name, attributes::EVENT_REFRESH_MISS, duration); + self.emit_tier_event( + Self::current_request_id(), + cache_name, + attributes::EVENT_REFRESH_MISS, + duration, + true, + ); } pub(crate) fn record_insert_rejected(&self, tier_name: CacheName, fallback: bool) { From 7056cdf7b0d4471ec223ee0ef6fec6cd7c508a1a Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Mon, 8 Jun 2026 12:27:37 -0400 Subject: [PATCH 24/26] Fix PR comments --- crates/cachet/examples/telemetry_accumulator.rs | 10 +++++++--- crates/cachet/src/refresh.rs | 15 ++++++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/crates/cachet/examples/telemetry_accumulator.rs b/crates/cachet/examples/telemetry_accumulator.rs index 45b4e81e4..2387e5bb6 100644 --- a/crates/cachet/examples/telemetry_accumulator.rs +++ b/crates/cachet/examples/telemetry_accumulator.rs @@ -49,9 +49,13 @@ impl AccumulatingHandler { impl CacheEventHandler for AccumulatingHandler { fn on_tier_event(&self, event: &CacheTierEvent<'_>) { - // Eviction events have request_id > 0 when triggered synchronously - // during an insert (capacity overflow). Background maintenance - // evictions have request_id == 0. + if event.request_id == 0 { + // Background maintenance events (moka housekeeping) have no + // associated operation. Log them immediately instead of + // accumulating, to avoid unbounded growth under key 0. + println!("[background] {} -> {}", event.tier_name, event.outcome); + return; + } self.pending.entry(event.request_id).or_default().push(TierRecord { tier_name: event.tier_name.to_owned(), outcome: event.outcome.to_owned(), diff --git a/crates/cachet/src/refresh.rs b/crates/cachet/src/refresh.rs index d5345c03e..e34dae901 100644 --- a/crates/cachet/src/refresh.rs +++ b/crates/cachet/src/refresh.rs @@ -18,6 +18,7 @@ use cachet_tier::{CacheEntry, CacheTier}; use parking_lot::Mutex; use crate::fallback::{FallbackCache, FallbackCacheInner}; +use crate::telemetry::CacheTelemetry; use crate::telemetry::cache::{WithRequestIdExt, next_request_id}; /// Configuration for background cache refresh. @@ -147,10 +148,18 @@ where { pub(crate) async fn fetch_and_promote(&self, key: K) { let watch = self.clock.stopwatch(); - match self.fallback.get(&key).await { - Ok(Some(value)) => self.handle_fallback_hit(key, value, watch.elapsed()).await, - Ok(None) | Err(_) => self.handle_fallback_miss(watch.elapsed()), + if let Ok(Some(value)) = self.fallback.get(&key).await { + self.handle_fallback_hit(key, value, watch.elapsed()).await; + } else { + self.handle_fallback_miss(watch.elapsed()); } + self.telemetry.complete_operation( + CacheTelemetry::current_request_id(), + self.name, + "cache.refresh", + watch.elapsed(), + false, + ); } async fn handle_fallback_hit(&self, key: K, value: CacheEntry, fetch_duration: Duration) { From e743ca78c7fbd32503ed14cb2241423b1b04e308 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Mon, 8 Jun 2026 13:48:56 -0400 Subject: [PATCH 25/26] Fix CI failure --- crates/cachet/src/refresh.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/cachet/src/refresh.rs b/crates/cachet/src/refresh.rs index e34dae901..9d7082770 100644 --- a/crates/cachet/src/refresh.rs +++ b/crates/cachet/src/refresh.rs @@ -330,7 +330,7 @@ mod fetch_and_promote_tests { use super::*; use crate::InsertPolicy; - use crate::telemetry::{CacheTelemetry, attributes}; + use crate::telemetry::attributes; use crate::wrapper::CacheWrapper; fn block_on(f: F) -> F::Output { From e4aa8f7c2b555f0519c78d6eed9ddb88c91f3eb6 Mon Sep 17 00:00:00 2001 From: Schuyler Goodman Date: Mon, 8 Jun 2026 15:26:05 -0400 Subject: [PATCH 26/26] Comment fixes --- crates/cachet/src/cache.rs | 10 ++++----- crates/cachet/src/telemetry/cache.rs | 31 ++++++++++++++-------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/crates/cachet/src/cache.rs b/crates/cachet/src/cache.rs index 6c15856a3..86a9b71c0 100644 --- a/crates/cachet/src/cache.rs +++ b/crates/cachet/src/cache.rs @@ -492,7 +492,7 @@ where let value = f().await; let mut entry = CacheEntry::new(value); entry.ensure_cached_at(self.clock.system_time()); - self.insert(key.clone(), entry.clone()).await?; + self.storage.insert(key.clone(), entry.clone()).await?; Ok(entry) } @@ -575,7 +575,7 @@ where } let mut entry = f().await; entry.ensure_cached_at(self.clock.system_time()); - self.insert(key.clone(), entry.clone()).await?; + self.storage.insert(key.clone(), entry.clone()).await?; Ok(entry) } @@ -656,7 +656,7 @@ where } let mut entry = f().await.map_err(Error::from_source)?; entry.ensure_cached_at(self.clock.system_time()); - self.insert(key.clone(), entry.clone()).await?; + self.storage.insert(key.clone(), entry.clone()).await?; Ok(entry) } @@ -741,7 +741,7 @@ where let value = f().await.map_err(Error::from_source)?; let mut entry = CacheEntry::new(value); entry.ensure_cached_at(self.clock.system_time()); - self.insert(key.clone(), entry.clone()).await?; + self.storage.insert(key.clone(), entry.clone()).await?; Ok(entry) } @@ -829,7 +829,7 @@ where Some(value) => { let mut entry = CacheEntry::new(value); entry.ensure_cached_at(self.clock.system_time()); - self.insert(key.clone(), entry.clone()).await?; + self.storage.insert(key.clone(), entry.clone()).await?; Ok(Some(entry)) } None => Ok(None), diff --git a/crates/cachet/src/telemetry/cache.rs b/crates/cachet/src/telemetry/cache.rs index 0542b326a..8c162dd60 100644 --- a/crates/cachet/src/telemetry/cache.rs +++ b/crates/cachet/src/telemetry/cache.rs @@ -76,13 +76,6 @@ impl WithRequestIdExt for F { } } -/// Converts a `Duration` to nanoseconds as `u64`, saturating at `u64::MAX`. -/// A `u64` of nanoseconds covers around 584 years - overflow is not a practical concern. -#[cfg(any(feature = "logs", test))] -fn saturating_nanos(duration: Duration) -> u64 { - u64::try_from(duration.as_nanos()).unwrap_or(u64::MAX) -} - /// Cache telemetry provider. /// /// This type is created internally by the cache builder and handles @@ -152,8 +145,11 @@ impl CacheTelemetry { fn record_debug_with_duration(&self, cache_name: CacheName, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - let duration_ns = saturating_nanos(duration); - tracing::debug!(cache.name = cache_name, cache.event = event, cache.duration_ns = duration_ns); + tracing::debug!( + cache.name = cache_name, + cache.event = event, + cache.duration_ns = duration.as_nanos() + ); } #[cfg(not(any(feature = "logs", test)))] { @@ -168,8 +164,11 @@ impl CacheTelemetry { fn record_info_with_duration(&self, cache_name: CacheName, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - let duration_ns = saturating_nanos(duration); - tracing::info!(cache.name = cache_name, cache.event = event, cache.duration_ns = duration_ns); + tracing::info!( + cache.name = cache_name, + cache.event = event, + cache.duration_ns = duration.as_nanos() + ); } #[cfg(not(any(feature = "logs", test)))] { @@ -184,8 +183,11 @@ impl CacheTelemetry { fn record_error_with_duration(&self, cache_name: CacheName, event: &'static str, duration: Duration) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - let duration_ns = saturating_nanos(duration); - tracing::error!(cache.name = cache_name, cache.event = event, cache.duration_ns = duration_ns); + tracing::error!( + cache.name = cache_name, + cache.event = event, + cache.duration_ns = duration.as_nanos() + ); } #[cfg(not(any(feature = "logs", test)))] { @@ -373,11 +375,10 @@ impl CacheTelemetry { ) { #[cfg(any(feature = "logs", test))] if self.logging_enabled { - let duration_ns = saturating_nanos(duration); tracing::debug!( cache.name = cache_name, cache.operation = operation, - cache.duration_ns = duration_ns, + cache.duration_ns = duration.as_nanos(), cache.coalesced = coalesced ); }