From a48c0ace56362256444a4360284b65959c923034 Mon Sep 17 00:00:00 2001 From: William Dumont Date: Thu, 28 Mar 2024 17:38:57 +0100 Subject: [PATCH] filter instance label from the target labels used to compute the hash in clustering --- CHANGELOG.md | 2 ++ internal/component/discovery/discovery.go | 11 ++++++++--- internal/component/discovery/discovery_test.go | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 internal/component/discovery/discovery_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index b544365fef3a..db0e5a67ce44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,8 @@ Main (unreleased) whenever that argument is explicitly configured. This issue only affected a small subset of arguments across 15 components. (@erikbaranowski, @rfratto) +- Fix an issue where targets exposed by exporters were not distributed correctly between agents in clustering mode. (@wildum) + ### Other changes - Clustering for Grafana Agent in Flow mode has graduated from beta to stable. diff --git a/internal/component/discovery/discovery.go b/internal/component/discovery/discovery.go index 2d4014cbb776..c3ed55498f28 100644 --- a/internal/component/discovery/discovery.go +++ b/internal/component/discovery/discovery.go @@ -52,7 +52,7 @@ func (t *DistributedTargets) Get() []Target { res := make([]Target, 0, resCap) for _, tgt := range t.targets { - peers, err := t.cluster.Lookup(shard.StringKey(tgt.NonMetaLabels().String()), 1, shard.OpReadWrite) + peers, err := t.cluster.Lookup(shard.StringKey(tgt.FilteredLabels().String()), 1, shard.OpReadWrite) if err != nil { // This can only fail in case we ask for more owners than the // available peers. This will never happen, but in any case we fall @@ -77,10 +77,15 @@ func (t Target) Labels() labels.Labels { return lset } -func (t Target) NonMetaLabels() labels.Labels { +// FilteredLabels drops the label "instance" and the labels starting by MetaLabelPrefix. +// The "instance" label is set by default to the host of the collector in exporters. If the +// collectors are not running on the same host, they will have different default values. +// If the targets have don't have the same set of labels between the collectors in the cluster, +// they won't be able to correctly distribute the workload because they won't compute the same hashes. +func (t Target) FilteredLabels() labels.Labels { var lset labels.Labels for k, v := range t { - if !strings.HasPrefix(k, model.MetaLabelPrefix) { + if k != "instance" && !strings.HasPrefix(k, model.MetaLabelPrefix) { lset = append(lset, labels.Label{Name: k, Value: v}) } } diff --git a/internal/component/discovery/discovery_test.go b/internal/component/discovery/discovery_test.go new file mode 100644 index 000000000000..ccae5c3717ae --- /dev/null +++ b/internal/component/discovery/discovery_test.go @@ -0,0 +1,18 @@ +package discovery + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFilteredLabels(t *testing.T) { + target := Target{ + "instance": "instanceTest", + "__meta_test": "metaTest", + "job": "jobTest", + } + labels := target.FilteredLabels() + require.Equal(t, labels.Len(), 1) + require.True(t, labels.Has("job")) +}