From a919e6d5ef9f058cb7a2b1ed83f13f85c8cb147f Mon Sep 17 00:00:00 2001 From: Charles Korn Date: Thu, 8 Jan 2026 21:20:23 +1100 Subject: [PATCH] model/labels: improve performance of regex matchers like `.*-.*-.*` (#17707) #14173 introduced an optimisation to better handle regex patterns like .*-.*-.*. It identifies strings the pattern cannot possibly match (because they do not contain all of the literal values) and returns false from MatchString early. However, if the string does contain all literal values, then the Go regex engine is used to confirm that the string does match the pattern. But this is not necessary in the case where the start and end of the pattern is .* and everything in between is either a literal or .*: if the string contains all of the literals in order, then it matches the pattern, and invoking Go's regex engine to confirm this is unnecessary and quite slow. * Add some more test cases * Add benchmark, since existing benchmark doesn't show much impact given most of the random test strings will not match the patterns. Signed-off-by: Charles Korn --- model/labels/regexp.go | 47 ++++++++++++++++++++++++++- model/labels/regexp_test.go | 65 ++++++++++++++++++++++++++++++++----- 2 files changed, 102 insertions(+), 10 deletions(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 5123bbc7dd..a4bdf885ee 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -77,7 +77,18 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { if matches, caseSensitive := findSetMatches(parsed); caseSensitive { m.setMatches = matches } - m.stringMatcher = stringMatcherFromRegexp(parsed) + + // Check if we have a pattern like .*-.*-.*. + // If so, then we can rely on the containsInOrder check in compileMatchStringFunction, + // so no further inspection of the string is required. + // We can't do this in stringMatcherFromRegexpInternal as we only want to apply this + // if the top-level pattern satisfies this requirement. + if isSimpleConcatenationPattern(parsed) { + m.stringMatcher = trueMatcher{} + } else { + m.stringMatcher = stringMatcherFromRegexp(parsed) + } + m.matchString = m.compileMatchStringFunction() } @@ -566,6 +577,40 @@ func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher { return nil } +// isSimpleConcatenationPattern returns true if re contains only literals or wildcard matchers, +// and starts and ends with a wildcard matcher (eg. .*-.*-.*). +func isSimpleConcatenationPattern(re *syntax.Regexp) bool { + if re.Op != syntax.OpConcat { + return false + } + + if len(re.Sub) < 2 { + return false + } + + first := re.Sub[0] + last := re.Sub[len(re.Sub)-1] + if !isMatchAny(first) || !isMatchAny(last) { + return false + } + + for _, re := range re.Sub[1 : len(re.Sub)-1] { + if !isMatchAny(re) && !isCaseSensitiveLiteral(re) { + return false + } + } + + return true +} + +func isMatchAny(re *syntax.Regexp) bool { + return re.Op == syntax.OpStar && re.Sub[0].Op == syntax.OpAnyChar +} + +func isCaseSensitiveLiteral(re *syntax.Regexp) bool { + return re.Op == syntax.OpLiteral && isCaseSensitive(re) +} + // containsStringMatcher matches a string if it contains any of the substrings. // If left and right are not nil, it's a contains operation where left and right must match. // If left is nil, it's a hasPrefix operation and right must match. diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 2fb5e806f0..85cbe02a1f 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -87,6 +87,9 @@ var ( "ſſs", // Concat of literals and wildcards. ".*-.*-.*-.*-.*", + ".+-.*-.*-.*-.+", + "-.*-.*-.*-.*", + ".*-.*-.*-.*-", "(.+)-(.+)-(.+)-(.+)-(.+)", "((.*))(?i:f)((.*))o((.*))o((.*))", "((.*))f((.*))(?i:o)((.*))o((.*))", @@ -96,6 +99,11 @@ var ( "FOO", "Foo", "fOo", "foO", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo", "10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40", "foofoo0", "foofoo", "😀foo0", "ſſs", "ſſS", "AAAAAAAAAAAAAAAAAAAAAAAA", "BBBBBBBBBBBBBBBBBBBBBBBB", "cccccccccccccccccccccccC", "ſſſſſſſſſſſſſſſſſſſſſſſſS", "SSSSSSSSSSSSSSSSSSSSSSSSſ", + "a-b-c-d-e", + "aaaaaa-bbbbbb-cccccc-dddddd-eeeeee", + "aaaaaa----eeeeee", + "----", + "-a-a-a-", // Values matching / not matching the test regexps on long alternations. "zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX", @@ -162,6 +170,7 @@ func TestOptimizeConcatRegex(t *testing.T) { {regex: "^5..$", prefix: "5", suffix: "", contains: nil}, {regex: "^release.*", prefix: "release", suffix: "", contains: nil}, {regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: []string{"laio"}}, + {regex: ".*-.*-.*-.*-.*", prefix: "", suffix: "", contains: []string{"-", "-", "-", "-"}}, } for _, c := range cases { @@ -341,7 +350,7 @@ func BenchmarkToNormalizedLower(b *testing.B) { } } -func TestStringMatcherFromRegexp(t *testing.T) { +func TestNewFastRegexMatcher(t *testing.T) { for _, c := range []struct { pattern string exp StringMatcher @@ -364,12 +373,12 @@ func TestStringMatcherFromRegexp(t *testing.T) { {`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, {"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, - {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, - {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, - {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {".*foo.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. + {"(.*)foo.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. + {"(.*)foo(.*)", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: trueMatcher{}}}, {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, - {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo)(.*)$", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: trueMatcher{}}}, {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, @@ -388,7 +397,7 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"(api|rpc)_(v1|prom)_((?i)push|query)", nil}, {"[a-z][a-z]", nil}, {"[1^3]", nil}, - {".*foo.*bar.*", nil}, + {".*foo.*bar.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. {`\d*`, nil}, {".", nil}, {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: trueMatcher{}}}}}, @@ -415,10 +424,9 @@ func TestStringMatcherFromRegexp(t *testing.T) { } { t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) + matcher, err := NewFastRegexMatcher(c.pattern) require.NoError(t, err) - matches := stringMatcherFromRegexp(parsed) - require.Equal(t, c.exp, matches) + require.Equal(t, c.exp, matcher.stringMatcher) }) } } @@ -1389,3 +1397,42 @@ func TestToNormalisedLower(t *testing.T) { require.Equal(t, expectedOutput, toNormalisedLower(input, nil)) } } + +func TestIsSimpleConcatenationPattern(t *testing.T) { + testCases := map[string]bool{ + ".*-.*-.*-.*-.*": true, + ".+-.*-.*-.*-.+": false, + "-.*-.*-.*-.*": false, + ".*-.*-.*-.*-": false, + "-": false, + ".*": false, + } + + for testCase, expected := range testCases { + t.Run(testCase, func(t *testing.T) { + re, err := syntax.Parse(testCase, syntax.Perl|syntax.DotNL) + require.NoError(t, err) + require.Equal(t, expected, isSimpleConcatenationPattern(re)) + }) + } +} + +func BenchmarkFastRegexMatcher_ConcatenatedPattern(b *testing.B) { + pattern, err := NewFastRegexMatcher(".*-.*-.*-.*-.*") + require.NoError(b, err) + + testCases := []string{ + "a-b-c-d-e", + "aaaaaa-bbbbbb-cccccc-dddddd-eeeeee", + "aaaaaa----eeeeee", + "----", + "-a-a-a-", + "abcd", + } + + for b.Loop() { + for _, s := range testCases { + pattern.MatchString(s) + } + } +}