metrics: Improve flexibility of H2Histogram Configuration

Based on feedback tokio-rs#6960, customers want more flexibility in histogram configuration. The main change here is allowing precision for an H2 Histgoram to go all the way down to zero—there is not fundamental reason this wasn't possible before, but typically H2 histograms stop at P=2. I've also added another optimization that truncates the number of buckets based on the requested max_value. This can save space by truncating in the middle of a bucket group. Along the way and in the process of adding more tests, I found a couple of bugs which are fixed here: 1. The `max_value` in LogHistogramBuilder wasn't precisely respected 2. The `max_value` computed by `LogHistogram` was actually incorrect for some n/p combinations. The new version precisely considers bucket layout instead.
rcoh · Nov 8, 2024 · 215a94b · 215a94b
1 parent bb7ca75
commit 215a94b
Show file tree

Hide file tree

Showing 3 changed files with 125 additions and 13 deletions.
diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs
@@ -1245,8 +1245,30 @@ impl Builder {
         ///     .unwrap();
         /// ```
         ///
+        /// Configure a `LogHistogram` with similar behavior to [`HistogramScale::Log`]
+        /// ```rust
+        ///
+        /// use std::time::Duration;
+        /// use tokio::runtime::{HistogramConfiguration, LogHistogram};
+        /// let rt = tokio::runtime::Builder::new_current_thread()
+        ///     .enable_all()
+        ///     .enable_metrics_poll_time_histogram()
+        ///     .metrics_poll_time_histogram_configuration(HistogramConfiguration::log(
+        ///         LogHistogram::builder()
+        ///             .max_value(Duration::from_millis(5))
+        ///             .min_value(Duration::from_micros(20))
+        ///             // Set `precision_exact` to `0` to match `HistogramScale::Log`
+        ///             .precision_exact(0)
+        ///             .max_buckets(10)
+        ///             .unwrap(),
+        ///     ))
+        ///     .build()
+        ///     .unwrap();
+        /// ```
+        ///
         /// [`LogHistogram`]: crate::runtime::LogHistogram
         /// [default configuration]: crate::runtime::LogHistogramBuilder
+        /// [`HistogramScale::Log`]: crate::runtime::HistogramScale::Log
         pub fn metrics_poll_time_histogram_configuration(&mut self, configuration: HistogramConfiguration) -> &mut Self {
             self.metrics_poll_count_histogram.histogram_type = configuration.inner;
             self

diff --git a/tokio/src/runtime/metrics/histogram/h2_histogram.rs b/tokio/src/runtime/metrics/histogram/h2_histogram.rs
@@ -9,6 +9,7 @@ const DEFAULT_MAX_VALUE: Duration = Duration::from_secs(60);
 
 /// Default precision is 2^-2 = 25% max error
 const DEFAULT_PRECISION: u32 = 2;
+const MAX_PRECISION: u32 = 10;
 
 /// Log Histogram
 ///
@@ -57,15 +58,23 @@ impl LogHistogram {
         }
     }
 
+    fn truncate_to_max_value(&self, max_value: u64) -> LogHistogram {
+        let mut hist = self.clone();
+        while hist.max_value() >= max_value {
+            hist.num_buckets -= 1;
+        }
+        hist.num_buckets += 1;
+        hist
+    }
+
     /// Creates a builder for [`LogHistogram`]
     pub fn builder() -> LogHistogramBuilder {
         LogHistogramBuilder::default()
     }
 
     /// The maximum value that can be stored before truncation in this histogram
     pub fn max_value(&self) -> u64 {
-        let n = (self.num_buckets / (1 << self.p)) - 1 + self.p as usize;
-        (1_u64 << n) - 1
+        self.bucket_range(self.num_buckets - 2).end
     }
 
     pub(crate) fn value_to_bucket(&self, value: u64) -> usize {
@@ -155,6 +164,10 @@ impl LogHistogramBuilder {
     /// such that `2^-p` is less than `precision`. To set `p` directly, use
     /// [`LogHistogramBuilder::precision_exact`].
     ///
+    /// To match the behavior of the previous log histogram implementation, use
+    /// `builder.precision_exact(0)`. This creates a histogram where each bucket is twice the size
+    /// of the previous value.
+    ///
     /// The default value is 25% (2^-2)
     ///
     /// The highest supported precision is `0.0977%` `(2^-10)`. Provided values
@@ -171,7 +184,7 @@ impl LogHistogramBuilder {
             panic!("precision must be > 1");
         };
         let mut p = 2;
-        while 2_f64.powf(-1.0 * p as f64) > max_error && p <= 10 {
+        while 2_f64.powf(-1.0 * p as f64) > max_error && p <= MAX_PRECISION {
             p += 1;
         }
         self.precision = Some(p);
@@ -180,14 +193,16 @@ impl LogHistogramBuilder {
 
     /// Sets the precision of this histogram directly.
     ///
+    /// The precision (meaning: the ratio `n/bucket_range(n)` for some given `n`) will be `2^-p`.
+    ///
+    /// To match the behavior of the previous log histogram implementation, use
+    /// `builder.precision_exact(0)`. This creates a histogram where each bucket is twice the size
+    /// of the previous value.
+    ///
     /// # Panics
-    /// - `p` < 2
     /// - `p` > 10
     pub fn precision_exact(mut self, p: u32) -> Self {
-        if p < 2 {
-            panic!("precision must be >= 2");
-        };
-        if p > 10 {
+        if p > MAX_PRECISION {
             panic!("precision must be <= 10");
         };
         self.precision = Some(p);
@@ -234,16 +249,17 @@ impl LogHistogramBuilder {
 
     /// Builds the log histogram
     pub fn build(&self) -> LogHistogram {
-        let max_value = duration_as_u64(self.max_value.unwrap_or(DEFAULT_MAX_VALUE));
-        let max_value = max_value.next_power_of_two();
+        let requested_max_value = duration_as_u64(self.max_value.unwrap_or(DEFAULT_MAX_VALUE));
+        let max_value = requested_max_value.next_power_of_two();
         let min_value = duration_as_u64(self.min_value.unwrap_or(DEFAULT_MIN_VALUE));
         let p = self.precision.unwrap_or(DEFAULT_PRECISION);
         // determine the bucket offset by finding the bucket for the minimum value. We need to lower
         // this by one to ensure we are at least as granular as requested.
         let bucket_offset = cmp::max(bucket_index(min_value, p), 1) - 1;
         // n must be at least as large as p
-        let n = max_value.ilog2().max(p);
+        let n = max_value.ilog2().max(p) + 1;
         LogHistogram::from_n_p(n, p, bucket_offset as usize)
+            .truncate_to_max_value(requested_max_value)
     }
 }
 
@@ -291,21 +307,51 @@ mod test {
     use super::InvalidHistogramConfiguration;
     use crate::runtime::metrics::histogram::h2_histogram::LogHistogram;
     use crate::runtime::metrics::histogram::HistogramType;
+    use std::time::Duration;
 
     #[cfg(not(target_family = "wasm"))]
     mod proptests {
         use super::*;
+        use crate::runtime::metrics::batch::duration_as_u64;
+        use crate::runtime::metrics::histogram::h2_histogram::MAX_PRECISION;
         use proptest::prelude::*;
+        use std::time::Duration;
+
         fn valid_log_histogram_strategy() -> impl Strategy<Value = LogHistogram> {
-            (2..=50u32, 2..=16u32, 0..100usize).prop_map(|(n, p, bucket_offset)| {
+            (1..=50u32, 0..=MAX_PRECISION, 0..100usize).prop_map(|(n, p, bucket_offset)| {
                 let p = p.min(n);
                 let base = LogHistogram::from_n_p(n, p, 0);
                 LogHistogram::from_n_p(n, p, bucket_offset.min(base.num_buckets - 1))
             })
         }
 
+        fn log_histogram_settings() -> impl Strategy<Value = (u64, u64, u32)> {
+            (
+                duration_as_u64(Duration::from_nanos(1))..duration_as_u64(Duration::from_secs(20)),
+                duration_as_u64(Duration::from_secs(1))..duration_as_u64(Duration::from_secs(1000)),
+                0..MAX_PRECISION,
+            )
+        }
+
         // test against a wide assortment of different histogram configurations to ensure invariants hold
         proptest! {
+            #[test]
+            fn log_histogram_settings_maintain_invariants((min_value, max_value, p) in log_histogram_settings()) {
+                if max_value < min_value {
+                    return Ok(())
+                }
+                let (min_value, max_value) = (Duration::from_nanos(min_value), Duration::from_nanos(max_value));
+                let histogram = LogHistogram::builder().min_value(min_value).max_value(max_value).precision_exact(p).build();
+                let first_bucket_end = Duration::from_nanos(histogram.bucket_range(0).end);
+                let last_bucket_start = Duration::from_nanos(histogram.bucket_range(histogram.num_buckets - 1).start);
+                let second_last_bucket_start = Duration::from_nanos(histogram.bucket_range(histogram.num_buckets - 2).start);
+                prop_assert!(first_bucket_end  <= min_value, "first bucket {first_bucket_end:?} must be less than {min_value:?}");
+                prop_assert!(last_bucket_start > max_value, "last bucket start ({last_bucket_start:?} must be at least as big as `max_value` ({max_value:?})");
+
+                // We should have the exact right number of buckets. The second to last bucket should be strictly less than max value.
+                prop_assert!(second_last_bucket_start < max_value, "second last bucket end ({second_last_bucket_start:?} must be at least as big as `max_value` ({max_value:?})");
+            }
+
             #[test]
             fn proptest_log_histogram_invariants(histogram in valid_log_histogram_strategy()) {
                 // 1. Assert that the first bucket always starts at 0
@@ -400,6 +446,27 @@ mod test {
         }
     }
 
+    #[test]
+    fn configuration_edge_cases() {
+        let (min_value, max_value, p) =
+            (Duration::from_nanos(1), Duration::from_nanos(1000000000), 1);
+        let histogram = LogHistogram::builder()
+            .min_value(min_value)
+            .max_value(max_value)
+            .precision_exact(p)
+            .build();
+        // print bucket ranges as durations
+        for i in 0..histogram.num_buckets {
+            let range = histogram.bucket_range(i);
+            println!(
+                "bucket {} range: {:?} - {:?}",
+                i,
+                Duration::from_nanos(range.start),
+                Duration::from_nanos(range.end)
+            );
+        }
+    }
+
     // test buckets against known values
     #[test]
     fn bucket_computation_spot_check() {
@@ -469,7 +536,7 @@ mod test {
                 required_bucket_count,
             } => required_bucket_count,
         };
-        assert_eq!(num_buckets, 27549);
+        assert_eq!(num_buckets, 27291);
     }
 
     #[test]

diff --git a/tokio/tests/rt_unstable_metrics.rs b/tokio/tests/rt_unstable_metrics.rs
@@ -424,6 +424,29 @@ fn log_histogram() {
     assert_eq!(N, n);
 }
 
+#[test]
+fn minimal_log_histogram() {
+    let rt = tokio::runtime::Builder::new_current_thread()
+        .enable_all()
+        .enable_metrics_poll_time_histogram()
+        .metrics_poll_time_histogram_configuration(HistogramConfiguration::log(
+            LogHistogram::builder()
+                .max_value(Duration::from_millis(4))
+                .min_value(Duration::from_micros(20))
+                .precision_exact(0),
+        ))
+        .build()
+        .unwrap();
+    let metrics = rt.metrics();
+    let num_buckets = rt.metrics().poll_time_histogram_num_buckets();
+    for b in 0..num_buckets {
+        let range = metrics.poll_time_histogram_bucket_range(b);
+        let size = range.end - range.start;
+        println!("bucket {b}: {range:?} (size: {size:?})");
+    }
+    assert_eq!(num_buckets, 10);
+}
+
 #[test]
 #[allow(deprecated)]
 fn legacy_log_histogram() {