diff --git a/changelog.d/20921_add_stratified_sampling_to_sample_transform.enhancement.md b/changelog.d/20921_add_stratified_sampling_to_sample_transform.enhancement.md new file mode 100644 index 0000000000000..5fad60cda3d94 --- /dev/null +++ b/changelog.d/20921_add_stratified_sampling_to_sample_transform.enhancement.md @@ -0,0 +1,3 @@ +The `sample` transform can now take in a `group_by` configuration option that will allow logs with unique values for the patterns passed in to be sampled independently. This can reduce the complexity of the topology, since users would no longer need to create separate samplers with similar configuration to handle different log streams. + +authors: hillmandj diff --git a/src/transforms/sample/config.rs b/src/transforms/sample/config.rs index d05338493ef80..532bc2b5d0bbd 100644 --- a/src/transforms/sample/config.rs +++ b/src/transforms/sample/config.rs @@ -10,6 +10,7 @@ use crate::{ TransformOutput, }, schema, + template::Template, transforms::Transform, }; @@ -44,6 +45,16 @@ pub struct SampleConfig { #[configurable(metadata(docs::examples = "message"))] pub key_field: Option, + /// The value to group events into separate buckets to be sampled independently. + /// + /// If left unspecified, or if the event doesn't have `group_by`, then the event is not + /// sampled separately. + #[configurable(metadata( + docs::examples = "{{ service }}", + docs::examples = "{{ hostname }}-{{ service }}" + ))] + pub group_by: Option