Skip to content

Commit

Permalink
heuristic: make wildcard character configurable (#1418)
Browse files Browse the repository at this point in the history
  • Loading branch information
marevers authored Dec 12, 2024
1 parent 064fb93 commit e05d16c
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 41 deletions.
12 changes: 9 additions & 3 deletions docs/sources/configure/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -828,8 +828,14 @@ Possible values for the `unmatched` property are:
- 🚨 Caution: this option could lead to cardinality explosion at the ingester side.
- `wildcard` will set the `http.route` field property to a generic asterisk based `/**` value.
- `heuristic` will automatically derive the `http.route` field property from the path value, based on the following rules:
- Any path components which have numbers or characters outside of the ASCII alphabet (or `-` and `_`), will be replaced by an asterisk `*`.
- Any alphabetical components which don't look like words, will be replaced by an asterisk `*`.
- Any path components which have numbers or characters outside of the ASCII alphabet (or `-` and `_`), are replaced by `wildcard_char`.
- Any alphabetical components which don't look like words, are replaced by `wildcard_char`.

| YAML | Environment variable | Type | Default |
| ----------- | ------- | ------ | ---------- |
| `wildcard_char` | -- | string | `'*'` |

Can be used together with `unmatched: heuristic` to choose what character the path components identified by the heuristic mode are replaced by. By default, an asterisk (`'*'`) is used. The value should be quoted and must be a single character.

### Special considerations when using the `heuristic` route decorator mode

Expand All @@ -848,7 +854,7 @@ document/d/CfMkAGbE_aivhFydEpaRafPuGWbmHfG/edit (no numbers in the ID)
document/d/C2fMkAGb3E_aivhFyd5EpaRafP123uGWbmHfG/edit
```

will be converted to a low cardinality route:
are converted to a low cardinality route (using the default `wildcard_char`):

```
document/d/*/edit
Expand Down
9 changes: 8 additions & 1 deletion pkg/beyla/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ var DefaultConfig = Config{
FetchTimeout: 500 * time.Millisecond,
},
},
Routes: &transform.RoutesConfig{Unmatch: transform.UnmatchDefault},
Routes: &transform.RoutesConfig{
Unmatch: transform.UnmatchDefault,
WildcardChar: "*",
},
NetworkFlows: defaultNetworkConfig,
Processes: process.CollectConfig{
RunMode: process.RunModePrivileged,
Expand Down Expand Up @@ -275,6 +278,10 @@ func (c *Config) Validate() error {
" grafana, otel_metrics_export, otel_traces_export or prometheus_export")
}

if len(c.Routes.WildcardChar) > 1 {
return ConfigError("wildcard_char can only be a single character, multiple characters are not allowed")
}

return nil
}

Expand Down
39 changes: 38 additions & 1 deletion pkg/beyla/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ network:
},
},
Routes: &transform.RoutesConfig{
Unmatch: transform.UnmatchHeuristic,
Unmatch: transform.UnmatchHeuristic,
WildcardChar: "*",
},
NameResolver: &transform.NameResolverConfig{
Sources: []string{"k8s", "dns"},
Expand Down Expand Up @@ -365,6 +366,42 @@ func TestConfigValidate_TracePrinterFallback(t *testing.T) {
assert.Equal(t, cfg.TracePrinter, debug.TracePrinterText)
}

func TestConfigValidateRoutes(t *testing.T) {
userConfig := bytes.NewBufferString(`executable_name: foo
trace_printer: text
routes:
unmatched: heuristic
wildcard_char: "*"
`)
cfg, err := LoadConfig(userConfig)
require.NoError(t, err)
require.NoError(t, cfg.Validate())
}

func TestConfigValidateRoutes_Errors(t *testing.T) {
for _, tc := range []string{
`executable_name: foo
trace_printer: text
routes:
unmatched: heuristic
wildcard_char: "##"
`, `executable_name: foo
trace_printer: text
routes:
unmatched: heuristic
wildcard_char: "random"
`,
} {
testCaseName := regexp.MustCompile("wildcard_char: (.+)\n").FindStringSubmatch(tc)[1]
t.Run(testCaseName, func(t *testing.T) {
userConfig := bytes.NewBufferString(tc)
cfg, err := LoadConfig(userConfig)
require.NoError(t, err)
require.Error(t, cfg.Validate())
})
}
}

func TestConfig_OtelGoAutoEnv(t *testing.T) {
// OTEL_GO_AUTO_TARGET_EXE is an alias to BEYLA_EXECUTABLE_NAME
// (Compatibility with OpenTelemetry)
Expand Down
10 changes: 5 additions & 5 deletions pkg/internal/transform/route/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func InitAutoClassifier() error {
}

//nolint:cyclop
func ClusterPath(path string) string {
func ClusterPath(path string, replacement byte) string {
if path == "" {
return path
}
Expand All @@ -61,11 +61,11 @@ func ClusterPath(path string) string {
if c == '/' {
nSegments++
if skip {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else if sFwd > sPos {
if !okWord(string(p[sPos:sFwd])) {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else {
sPos = sFwd
Expand Down Expand Up @@ -95,11 +95,11 @@ func ClusterPath(path string) string {
}

if skip {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else if sFwd > sPos {
if !okWord(string(p[sPos:sFwd])) {
p[sPos] = '*'
p[sPos] = replacement
sPos++
} else {
sPos = sFwd
Expand Down
52 changes: 30 additions & 22 deletions pkg/internal/transform/route/cluster_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package route

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
Expand All @@ -9,26 +10,33 @@ import (
func TestURLClustering(t *testing.T) {
err := InitAutoClassifier()
assert.NoError(t, err)
assert.Equal(t, "", ClusterPath(""))
assert.Equal(t, "/", ClusterPath("/"))
assert.Equal(t, "/users/*/j4elk/*/job/*", ClusterPath("/users/fdklsd/j4elk/23993/job/2"))
assert.Equal(t, "*", ClusterPath("123"))
assert.Equal(t, "/*", ClusterPath("/123"))
assert.Equal(t, "*/", ClusterPath("123/"))
assert.Equal(t, "*/*", ClusterPath("123/ljgdflgjf"))
assert.Equal(t, "/*", ClusterPath("/**"))
assert.Equal(t, "/u/*", ClusterPath("/u/2"))
assert.Equal(t, "/v1/products/*", ClusterPath("/v1/products/2"))
assert.Equal(t, "/v1/products/*", ClusterPath("/v1/products/22"))
assert.Equal(t, "/v1/products/*", ClusterPath("/v1/products/22j"))
assert.Equal(t, "/products/*/org/*", ClusterPath("/products/1/org/3"))
assert.Equal(t, "/products//org/*", ClusterPath("/products//org/3"))
assert.Equal(t, "/v1/k6-test-runs/*", ClusterPath("/v1/k6-test-runs/1"))
assert.Equal(t, "/attach", ClusterPath("/attach"))
assert.Equal(t, "/usuarios/*/j4elk/*/trabajo/*", ClusterPath("/usuarios/fdklsd/j4elk/23993/trabajo/2"))
assert.Equal(t, "/Benutzer/*/j4elk/*/Arbeit/*", ClusterPath("/Benutzer/fdklsd/j4elk/23993/Arbeit/2"))
assert.Equal(t, "/utilisateurs/*/j4elk/*/tache/*", ClusterPath("/utilisateurs/fdklsd/j4elk/23993/tache/2"))
assert.Equal(t, "/products/", ClusterPath("/products/"))
assert.Equal(t, "/user-space/", ClusterPath("/user-space/"))
assert.Equal(t, "/user_space/", ClusterPath("/user_space/"))

for _, tc := range []byte{'*', '#', '_', '-'} {
tcString := string(tc)

t.Run(tcString, func(t *testing.T) {
assert.Equal(t, "", ClusterPath("", tc))
assert.Equal(t, "/", ClusterPath("/", tc))
assert.Equal(t, fmt.Sprintf("/users/%[1]s/j4elk/%[1]s/job/%[1]s", tcString), ClusterPath("/users/fdklsd/j4elk/23993/job/2", tc))
assert.Equal(t, tcString, ClusterPath("123", tc))
assert.Equal(t, fmt.Sprintf("/%s", tcString), ClusterPath("/123", tc))
assert.Equal(t, fmt.Sprintf("%s/", tcString), ClusterPath("123/", tc))
assert.Equal(t, fmt.Sprintf("%[1]s/%[1]s", tcString), ClusterPath("123/ljgdflgjf", tc))
assert.Equal(t, fmt.Sprintf("/%s", tcString), ClusterPath("/**", tc))
assert.Equal(t, fmt.Sprintf("/u/%s", tcString), ClusterPath("/u/2", tc))
assert.Equal(t, fmt.Sprintf("/v1/products/%s", tcString), ClusterPath("/v1/products/2", tc))
assert.Equal(t, fmt.Sprintf("/v1/products/%s", tcString), ClusterPath("/v1/products/22", tc))
assert.Equal(t, fmt.Sprintf("/v1/products/%s", tcString), ClusterPath("/v1/products/22j", tc))
assert.Equal(t, fmt.Sprintf("/products/%[1]s/org/%[1]s", tcString), ClusterPath("/products/1/org/3", tc))
assert.Equal(t, fmt.Sprintf("/products//org/%s", tcString), ClusterPath("/products//org/3", tc))
assert.Equal(t, fmt.Sprintf("/v1/k6-test-runs/%s", tcString), ClusterPath("/v1/k6-test-runs/1", tc))
assert.Equal(t, "/attach", ClusterPath("/attach", tc))
assert.Equal(t, fmt.Sprintf("/usuarios/%[1]s/j4elk/%[1]s/trabajo/%[1]s", tcString), ClusterPath("/usuarios/fdklsd/j4elk/23993/trabajo/2", tc))
assert.Equal(t, fmt.Sprintf("/Benutzer/%[1]s/j4elk/%[1]s/Arbeit/%[1]s", tcString), ClusterPath("/Benutzer/fdklsd/j4elk/23993/Arbeit/2", tc))
assert.Equal(t, fmt.Sprintf("/utilisateurs/%[1]s/j4elk/%[1]s/tache/%[1]s", tcString), ClusterPath("/utilisateurs/fdklsd/j4elk/23993/tache/2", tc))
assert.Equal(t, "/products/", ClusterPath("/products/", tc))
assert.Equal(t, "/user-space/", ClusterPath("/user-space/", tc))
assert.Equal(t, "/user_space/", ClusterPath("/user_space/", tc))
})
}
}
18 changes: 10 additions & 8 deletions pkg/transform/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ type RoutesConfig struct {
Patterns []string `yaml:"patterns"`
IgnorePatterns []string `yaml:"ignored_patterns"`
IgnoredEvents IgnoreMode `yaml:"ignore_mode"`
// Character that will be used to replace route segments
WildcardChar string `yaml:"wildcard_char,omitempty"`
}

func RoutesProvider(rc *RoutesConfig) pipe.MiddleProvider[[]request.Span, []request.Span] {
Expand Down Expand Up @@ -98,15 +100,15 @@ func (rn *routerNode) provideRoutes() (pipe.MiddleFunc[[]request.Span, []request
if routesEnabled {
s.Route = matcher.Find(s.Path)
}
unmatchAction(s)
unmatchAction(rc, s)
}
out <- spans
}
}, nil
}

func chooseUnmatchPolicy(rc *RoutesConfig) (func(span *request.Span), error) {
var unmatchAction func(span *request.Span)
func chooseUnmatchPolicy(rc *RoutesConfig) (func(rc *RoutesConfig, span *request.Span), error) {
var unmatchAction func(rc *RoutesConfig, span *request.Span)

switch rc.Unmatch {
case UnmatchWildcard, "":
Expand Down Expand Up @@ -142,23 +144,23 @@ func chooseUnmatchPolicy(rc *RoutesConfig) (func(span *request.Span), error) {
return unmatchAction, nil
}

func leaveUnmatchEmpty(_ *request.Span) {}
func leaveUnmatchEmpty(_ *RoutesConfig, _ *request.Span) {}

func setUnmatchToWildcard(str *request.Span) {
func setUnmatchToWildcard(_ *RoutesConfig, str *request.Span) {
if str.Route == "" {
str.Route = wildCard
}
}

func setUnmatchToPath(str *request.Span) {
func setUnmatchToPath(_ *RoutesConfig, str *request.Span) {
if str.Route == "" {
str.Route = str.Path
}
}

func classifyFromPath(s *request.Span) {
func classifyFromPath(rc *RoutesConfig, s *request.Span) {
if s.Route == "" && (s.Type == request.EventTypeHTTP || s.Type == request.EventTypeHTTPClient) {
s.Route = route.ClusterPath(s.Path)
s.Route = route.ClusterPath(s.Path, rc.WildcardChar[0])
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/transform/routes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func TestUnmatchedEmpty(t *testing.T) {
func TestUnmatchedAuto(t *testing.T) {
for _, tc := range []UnmatchType{UnmatchHeuristic} {
t.Run(string(tc), func(t *testing.T) {
router, err := RoutesProvider(&RoutesConfig{Unmatch: tc, Patterns: []string{"/user/:id"}})()
router, err := RoutesProvider(&RoutesConfig{Unmatch: tc, Patterns: []string{"/user/:id"}, WildcardChar: "*"})()
require.NoError(t, err)
in, out := make(chan []request.Span, 10), make(chan []request.Span, 10)
defer close(in)
Expand Down

0 comments on commit e05d16c

Please sign in to comment.