From 91fa0fd5b77d20c458fd0107b2e3ba5992ad71b3 Mon Sep 17 00:00:00 2001 From: chencs Date: Tue, 2 Jun 2026 02:52:24 -0700 Subject: [PATCH] model/labels: Add case-insensitive prefix matching optimization (#18540) This change adds support for case-insensitive prefix matching, with the goal of especially improving performance when evaluating long case-insensitive regexes, without degrading performance particularly in other cases. Signed-off-by: Casie Chen --- model/labels/matcher_test.go | 18 ++++++++++++++++-- model/labels/regexp.go | 23 ++++++++++++++++++----- model/labels/regexp_test.go | 34 ++++++++++++++++++++++------------ 3 files changed, 56 insertions(+), 19 deletions(-) diff --git a/model/labels/matcher_test.go b/model/labels/matcher_test.go index 11ed6dd29c..48e505ef01 100644 --- a/model/labels/matcher_test.go +++ b/model/labels/matcher_test.go @@ -27,6 +27,13 @@ func mustNewMatcher(t *testing.T, mType MatchType, value string) *Matcher { return m } +func (m *Matcher) hasCaseInsensitivePrefix() bool { + if m.re == nil { + return false + } + return m.re.caseInsensitivePrefix +} + func TestMatcher(t *testing.T) { tests := []struct { matcher *Matcher @@ -137,8 +144,9 @@ func TestInverse(t *testing.T) { func TestPrefix(t *testing.T) { for i, tc := range []struct { - matcher *Matcher - prefix string + matcher *Matcher + prefix string + caseInsensitivePrefix bool }{ { matcher: mustNewMatcher(t, MatchEqual, "abc"), @@ -180,9 +188,15 @@ func TestPrefix(t *testing.T) { matcher: mustNewMatcher(t, MatchRegexp, ".+def"), prefix: "", }, + { + matcher: mustNewMatcher(t, MatchNotRegexp, "(?i)abc.+"), + prefix: "ABC", + caseInsensitivePrefix: true, + }, } { t.Run(fmt.Sprintf("%d: %s", i, tc.matcher), func(t *testing.T) { require.Equal(t, tc.prefix, tc.matcher.Prefix()) + require.Equal(t, tc.caseInsensitivePrefix, tc.matcher.hasCaseInsensitivePrefix()) }) } } diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 603f57d3c2..9374400f71 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -46,6 +46,9 @@ type FastRegexMatcher struct { suffix string contains []string + // caseInsensitivePrefix is true if prefix exists and should be matched case-insensitively + caseInsensitivePrefix bool + // matchString is the "compiled" function to run by MatchString(). matchString func(string) bool } @@ -79,7 +82,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { clearCapture(parsed) if parsed.Op == syntax.OpConcat { - m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed) + m.caseInsensitivePrefix, m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed) } if matches, caseSensitive := findSetMatches(parsed); caseSensitive { m.setMatches = matches @@ -109,6 +112,15 @@ func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool { return m.stringMatcher.Matches } + if m.caseInsensitivePrefix && m.prefix != "" { + return func(s string) bool { + if !hasPrefixCaseInsensitive(s, m.prefix) { + return false + } + return m.re.MatchString(s) + } + } + return func(s string) bool { if len(m.setMatches) != 0 { return slices.Contains(m.setMatches, s) @@ -411,7 +423,7 @@ func optimizeAlternatingSimpleContains(r *syntax.Regexp) *syntax.Regexp { // optimizeConcatRegex returns literal prefix/suffix text that can be safely // checked against the label value before running the regexp matcher. -func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []string) { +func optimizeConcatRegex(r *syntax.Regexp) (caseInsensitivePrefix bool, prefix, suffix string, contains []string) { sub := r.Sub clearCapture(sub...) @@ -425,14 +437,15 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []st } if len(sub) == 0 { - return prefix, suffix, contains + return caseInsensitivePrefix, prefix, suffix, contains } // Given Prometheus regex matchers are always anchored to the begin/end // of the text, if the first/last operations are literals, we can safely // treat them as prefix/suffix. - if sub[0].Op == syntax.OpLiteral && (sub[0].Flags&syntax.FoldCase) == 0 { + if sub[0].Op == syntax.OpLiteral { prefix = string(sub[0].Rune) + caseInsensitivePrefix = (sub[0].Flags & syntax.FoldCase) != 0 } if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral && (sub[last].Flags&syntax.FoldCase) == 0 { suffix = string(sub[last].Rune) @@ -446,7 +459,7 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []st } } - return prefix, suffix, contains + return caseInsensitivePrefix, prefix, suffix, contains } // StringMatcher is a matcher that matches a string in place of a regular expression. diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index be3417a8c0..79d2253d96 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -47,6 +47,7 @@ var ( ".*foo.*", ".+foo.+", ".*foo.*|", + "(?i).*foo.*", ".*foo.*|bar.*", "foo.*|.*bar.*", ".*foo.*|.*bar.*", @@ -67,6 +68,8 @@ var ( "10\\.0\\.(1|2)\\.+", "10\\.0\\.(1|2).+", "((fo(bar))|.+foo)", + "(?i)report.scheduled.job_runscheduledreports", + "report.scheduled.job_runscheduledreports", // A long case sensitive alternation. "zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb", // An extremely long case sensitive alternation. This is a special @@ -108,6 +111,7 @@ var ( "foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "", "FOO", "Foo", "fOo", "foO", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo", "10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40", + "report.scheduled.job_runscheduledreports", "Report.Scheduled.JobRunScheduledReports", "Report.Scheduled.Job_RunScheduledReports", "foofoo0", "foofoo", "😀foo0", "ſſs", "ſſS", "AAAAAAAAAAAAAAAAAAAAAAAA", "BBBBBBBBBBBBBBBBBBBBBBBB", "cccccccccccccccccccccccC", "ſſſſſſſſſſſſſſſſſſſſſſſſS", "SSSSSSSSSSSSSSSSSSSSSSSSſ", "a-b-c-d-e", "aaaaaa-bbbbbb-cccccc-dddddd-eeeeee", @@ -154,10 +158,11 @@ func readable(s string) string { func TestOptimizeConcatRegex(t *testing.T) { cases := []struct { - regex string - prefix string - suffix string - contains []string + regex string + prefix string + isCaseInsensitivePrefix bool + suffix string + contains []string }{ {regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: nil}, {regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: nil}, @@ -171,12 +176,12 @@ func TestOptimizeConcatRegex(t *testing.T) { {regex: ".*[abc].*", prefix: "", suffix: "", contains: nil}, {regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: nil}, {regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: nil}, - {regex: "(?i:abc).*", prefix: "", suffix: "", contains: nil}, + {regex: "(?i:abc).*", prefix: "ABC", isCaseInsensitivePrefix: true, suffix: "", contains: nil}, {regex: ".*(?i:abc)", prefix: "", suffix: "", contains: nil}, {regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: []string{"def"}}, {regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: []string{"abc"}}, {regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: []string{"abc"}}, - {regex: "[aA]bc.*", prefix: "", suffix: "", contains: []string{"bc"}}, + {regex: "[aA]bc.*", prefix: "A", isCaseInsensitivePrefix: true, suffix: "", contains: []string{"bc"}}, {regex: "^5..$", prefix: "5", suffix: "", contains: nil}, {regex: "^release.*", prefix: "release", suffix: "", contains: nil}, {regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: []string{"laio"}}, @@ -184,13 +189,16 @@ func TestOptimizeConcatRegex(t *testing.T) { } for _, c := range cases { - parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL) - require.NoError(t, err) + t.Run(c.regex, func(t *testing.T) { + parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL) + require.NoError(t, err) - prefix, suffix, contains := optimizeConcatRegex(parsed) - require.Equal(t, c.prefix, prefix) - require.Equal(t, c.suffix, suffix) - require.Equal(t, c.contains, contains) + caseInsensitivePrefix, prefix, suffix, contains := optimizeConcatRegex(parsed) + require.Equal(t, c.prefix, prefix) + require.Equal(t, c.isCaseInsensitivePrefix, caseInsensitivePrefix) + require.Equal(t, c.suffix, suffix) + require.Equal(t, c.contains, contains) + }) } } @@ -432,6 +440,8 @@ func TestNewFastRegexMatcher(t *testing.T) { {"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}}, {"f.?o", nil}, {".*foo.*|.*bar.*|.*baz.*", &containsStringMatcher{left: trueMatcher{}, substrings: []string{"foo", "bar", "baz"}, right: trueMatcher{}}}, + {"(?i)report.scheduled.job_runscheduledreports", nil}, + {"report.scheduled.job_runscheduledreports", nil}, } { t.Run(c.pattern, func(t *testing.T) { t.Parallel()