model/labels: improve performance of regex matchers like .*-.*-.* (#17707)

#14173 introduced an optimisation to better handle regex patterns like .*-.*-.*. It identifies strings the pattern cannot possibly match (because they do not contain all of the literal values) and returns false from MatchString early.

However, if the string does contain all literal values, then the Go regex engine is used to confirm that the string does match the pattern. But this is not necessary in the case where the start and end of the pattern is .* and everything in between is either a literal or .*: if the string contains all of the literals in order, then it matches the pattern, and invoking Go's regex engine to confirm this is unnecessary and quite slow.

* Add some more test cases
* Add benchmark, since existing benchmark doesn't show much impact given most of the random test strings will not match the patterns.

Signed-off-by: Charles Korn <charles.korn@grafana.com>
This commit is contained in:
Charles Korn 2026-01-08 21:20:23 +11:00 committed by GitHub
parent 4f337c2a41
commit a919e6d5ef
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 102 additions and 10 deletions

View file

@ -77,7 +77,18 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
if matches, caseSensitive := findSetMatches(parsed); caseSensitive {
m.setMatches = matches
}
m.stringMatcher = stringMatcherFromRegexp(parsed)
// Check if we have a pattern like .*-.*-.*.
// If so, then we can rely on the containsInOrder check in compileMatchStringFunction,
// so no further inspection of the string is required.
// We can't do this in stringMatcherFromRegexpInternal as we only want to apply this
// if the top-level pattern satisfies this requirement.
if isSimpleConcatenationPattern(parsed) {
m.stringMatcher = trueMatcher{}
} else {
m.stringMatcher = stringMatcherFromRegexp(parsed)
}
m.matchString = m.compileMatchStringFunction()
}
@ -566,6 +577,40 @@ func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher {
return nil
}
// isSimpleConcatenationPattern returns true if re contains only literals or wildcard matchers,
// and starts and ends with a wildcard matcher (eg. .*-.*-.*).
func isSimpleConcatenationPattern(re *syntax.Regexp) bool {
if re.Op != syntax.OpConcat {
return false
}
if len(re.Sub) < 2 {
return false
}
first := re.Sub[0]
last := re.Sub[len(re.Sub)-1]
if !isMatchAny(first) || !isMatchAny(last) {
return false
}
for _, re := range re.Sub[1 : len(re.Sub)-1] {
if !isMatchAny(re) && !isCaseSensitiveLiteral(re) {
return false
}
}
return true
}
func isMatchAny(re *syntax.Regexp) bool {
return re.Op == syntax.OpStar && re.Sub[0].Op == syntax.OpAnyChar
}
func isCaseSensitiveLiteral(re *syntax.Regexp) bool {
return re.Op == syntax.OpLiteral && isCaseSensitive(re)
}
// containsStringMatcher matches a string if it contains any of the substrings.
// If left and right are not nil, it's a contains operation where left and right must match.
// If left is nil, it's a hasPrefix operation and right must match.

View file

@ -87,6 +87,9 @@ var (
"ſſs",
// Concat of literals and wildcards.
".*-.*-.*-.*-.*",
".+-.*-.*-.*-.+",
"-.*-.*-.*-.*",
".*-.*-.*-.*-",
"(.+)-(.+)-(.+)-(.+)-(.+)",
"((.*))(?i:f)((.*))o((.*))o((.*))",
"((.*))f((.*))(?i:o)((.*))o((.*))",
@ -96,6 +99,11 @@ var (
"FOO", "Foo", "fOo", "foO", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
"foofoo0", "foofoo", "😀foo0", "ſſs", "ſſS", "AAAAAAAAAAAAAAAAAAAAAAAA", "BBBBBBBBBBBBBBBBBBBBBBBB", "cccccccccccccccccccccccC", "ſſſſſſſſſſſſſſſſſſſſſſſſS", "SSSSSSSSSSSSSSSSSSSSSSSSſ",
"a-b-c-d-e",
"aaaaaa-bbbbbb-cccccc-dddddd-eeeeee",
"aaaaaa----eeeeee",
"----",
"-a-a-a-",
// Values matching / not matching the test regexps on long alternations.
"zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX",
@ -162,6 +170,7 @@ func TestOptimizeConcatRegex(t *testing.T) {
{regex: "^5..$", prefix: "5", suffix: "", contains: nil},
{regex: "^release.*", prefix: "release", suffix: "", contains: nil},
{regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: []string{"laio"}},
{regex: ".*-.*-.*-.*-.*", prefix: "", suffix: "", contains: []string{"-", "-", "-", "-"}},
}
for _, c := range cases {
@ -341,7 +350,7 @@ func BenchmarkToNormalizedLower(b *testing.B) {
}
}
func TestStringMatcherFromRegexp(t *testing.T) {
func TestNewFastRegexMatcher(t *testing.T) {
for _, c := range []struct {
pattern string
exp StringMatcher
@ -364,12 +373,12 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
{"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})},
{"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})},
{".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{".*foo.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient.
{"(.*)foo.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient.
{"(.*)foo(.*)", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient.
{"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: trueMatcher{}}},
{"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: &anyNonEmptyStringMatcher{matchNL: true}}},
{"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}},
{"^(.*)(foo)(.*)$", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient.
{"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: trueMatcher{}}},
{"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}},
{"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}},
@ -388,7 +397,7 @@ func TestStringMatcherFromRegexp(t *testing.T) {
{"(api|rpc)_(v1|prom)_((?i)push|query)", nil},
{"[a-z][a-z]", nil},
{"[1^3]", nil},
{".*foo.*bar.*", nil},
{".*foo.*bar.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient.
{`\d*`, nil},
{".", nil},
{"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: trueMatcher{}}}}},
@ -415,10 +424,9 @@ func TestStringMatcherFromRegexp(t *testing.T) {
} {
t.Run(c.pattern, func(t *testing.T) {
t.Parallel()
parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL)
matcher, err := NewFastRegexMatcher(c.pattern)
require.NoError(t, err)
matches := stringMatcherFromRegexp(parsed)
require.Equal(t, c.exp, matches)
require.Equal(t, c.exp, matcher.stringMatcher)
})
}
}
@ -1389,3 +1397,42 @@ func TestToNormalisedLower(t *testing.T) {
require.Equal(t, expectedOutput, toNormalisedLower(input, nil))
}
}
func TestIsSimpleConcatenationPattern(t *testing.T) {
testCases := map[string]bool{
".*-.*-.*-.*-.*": true,
".+-.*-.*-.*-.+": false,
"-.*-.*-.*-.*": false,
".*-.*-.*-.*-": false,
"-": false,
".*": false,
}
for testCase, expected := range testCases {
t.Run(testCase, func(t *testing.T) {
re, err := syntax.Parse(testCase, syntax.Perl|syntax.DotNL)
require.NoError(t, err)
require.Equal(t, expected, isSimpleConcatenationPattern(re))
})
}
}
func BenchmarkFastRegexMatcher_ConcatenatedPattern(b *testing.B) {
pattern, err := NewFastRegexMatcher(".*-.*-.*-.*-.*")
require.NoError(b, err)
testCases := []string{
"a-b-c-d-e",
"aaaaaa-bbbbbb-cccccc-dddddd-eeeeee",
"aaaaaa----eeeeee",
"----",
"-a-a-a-",
"abcd",
}
for b.Loop() {
for _, s := range testCases {
pattern.MatchString(s)
}
}
}