Skip to content

Commit 2524a91

Browse files
authored
Fix FastRegexMatcher matching multibyte runes with . (#14059)
When `zeroOrOneCharacterStringMatcher` wach checking the input string, it assumed that if there are more than one bytes, then there are more than one runes, but that's not necessarily true. Signed-off-by: Oleg Zaytsev <[email protected]>
1 parent 4b7a44c commit 2524a91

File tree

2 files changed

+24
-3
lines changed

2 files changed

+24
-3
lines changed

model/labels/regexp.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package labels
1616
import (
1717
"slices"
1818
"strings"
19+
"unicode/utf8"
1920

2021
"github.com/grafana/regexp"
2122
"github.com/grafana/regexp/syntax"
@@ -827,8 +828,7 @@ type zeroOrOneCharacterStringMatcher struct {
827828
}
828829

829830
func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
830-
// Zero or one.
831-
if len(s) > 1 {
831+
if moreThanOneRune(s) {
832832
return false
833833
}
834834

@@ -840,6 +840,27 @@ func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool {
840840
return s[0] != '\n'
841841
}
842842

843+
// moreThanOneRune returns true if there are more than one runes in the string.
844+
// It doesn't check whether the string is valid UTF-8.
845+
// The return value should be always equal to utf8.RuneCountInString(s) > 1,
846+
// but the function is optimized for the common case where the string prefix is ASCII.
847+
func moreThanOneRune(s string) bool {
848+
// If len(s) is exactly one or zero, there can't be more than one rune.
849+
// Exit through this path quickly.
850+
if len(s) <= 1 {
851+
return false
852+
}
853+
854+
// There's one or more bytes:
855+
// If first byte is ASCII then there are multiple runes if there are more bytes after that.
856+
if s[0] < utf8.RuneSelf {
857+
return len(s) > 1
858+
}
859+
860+
// Less common case: first is a multibyte rune.
861+
return utf8.RuneCountInString(s) > 1
862+
}
863+
843864
// trueMatcher is a stringMatcher which matches any string (always returns true).
844865
type trueMatcher struct{}
845866

model/labels/regexp_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ var (
8484
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
8585
"FOO", "Foo", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
8686
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
87-
"foofoo0", "foofoo",
87+
"foofoo0", "foofoo", "😀foo0",
8888

8989
// Values matching / not matching the test regexps on long alternations.
9090
"zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX",

0 commit comments

Comments
 (0)