model/labels: Add case-insensitive prefix matching optimization (#18540)

This change adds support for case-insensitive prefix matching, with the goal of especially improving performance when evaluating long case-insensitive regexes, without degrading performance particularly in other cases.

Signed-off-by: Casie Chen <casie.chen@grafana.com>
This commit is contained in:
chencs 2026-06-02 02:52:24 -07:00 committed by GitHub
parent 87866e0c3f
commit 91fa0fd5b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 56 additions and 19 deletions

View file

@ -27,6 +27,13 @@ func mustNewMatcher(t *testing.T, mType MatchType, value string) *Matcher {
return m
}
func (m *Matcher) hasCaseInsensitivePrefix() bool {
if m.re == nil {
return false
}
return m.re.caseInsensitivePrefix
}
func TestMatcher(t *testing.T) {
tests := []struct {
matcher *Matcher
@ -137,8 +144,9 @@ func TestInverse(t *testing.T) {
func TestPrefix(t *testing.T) {
for i, tc := range []struct {
matcher *Matcher
prefix string
matcher *Matcher
prefix string
caseInsensitivePrefix bool
}{
{
matcher: mustNewMatcher(t, MatchEqual, "abc"),
@ -180,9 +188,15 @@ func TestPrefix(t *testing.T) {
matcher: mustNewMatcher(t, MatchRegexp, ".+def"),
prefix: "",
},
{
matcher: mustNewMatcher(t, MatchNotRegexp, "(?i)abc.+"),
prefix: "ABC",
caseInsensitivePrefix: true,
},
} {
t.Run(fmt.Sprintf("%d: %s", i, tc.matcher), func(t *testing.T) {
require.Equal(t, tc.prefix, tc.matcher.Prefix())
require.Equal(t, tc.caseInsensitivePrefix, tc.matcher.hasCaseInsensitivePrefix())
})
}
}

View file

@ -46,6 +46,9 @@ type FastRegexMatcher struct {
suffix string
contains []string
// caseInsensitivePrefix is true if prefix exists and should be matched case-insensitively
caseInsensitivePrefix bool
// matchString is the "compiled" function to run by MatchString().
matchString func(string) bool
}
@ -79,7 +82,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
clearCapture(parsed)
if parsed.Op == syntax.OpConcat {
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
m.caseInsensitivePrefix, m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
}
if matches, caseSensitive := findSetMatches(parsed); caseSensitive {
m.setMatches = matches
@ -109,6 +112,15 @@ func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
return m.stringMatcher.Matches
}
if m.caseInsensitivePrefix && m.prefix != "" {
return func(s string) bool {
if !hasPrefixCaseInsensitive(s, m.prefix) {
return false
}
return m.re.MatchString(s)
}
}
return func(s string) bool {
if len(m.setMatches) != 0 {
return slices.Contains(m.setMatches, s)
@ -411,7 +423,7 @@ func optimizeAlternatingSimpleContains(r *syntax.Regexp) *syntax.Regexp {
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
// checked against the label value before running the regexp matcher.
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []string) {
func optimizeConcatRegex(r *syntax.Regexp) (caseInsensitivePrefix bool, prefix, suffix string, contains []string) {
sub := r.Sub
clearCapture(sub...)
@ -425,14 +437,15 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []st
}
if len(sub) == 0 {
return prefix, suffix, contains
return caseInsensitivePrefix, prefix, suffix, contains
}
// Given Prometheus regex matchers are always anchored to the begin/end
// of the text, if the first/last operations are literals, we can safely
// treat them as prefix/suffix.
if sub[0].Op == syntax.OpLiteral && (sub[0].Flags&syntax.FoldCase) == 0 {
if sub[0].Op == syntax.OpLiteral {
prefix = string(sub[0].Rune)
caseInsensitivePrefix = (sub[0].Flags & syntax.FoldCase) != 0
}
if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral && (sub[last].Flags&syntax.FoldCase) == 0 {
suffix = string(sub[last].Rune)
@ -446,7 +459,7 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []st
}
}
return prefix, suffix, contains
return caseInsensitivePrefix, prefix, suffix, contains
}
// StringMatcher is a matcher that matches a string in place of a regular expression.

View file

@ -47,6 +47,7 @@ var (
".*foo.*",
".+foo.+",
".*foo.*|",
"(?i).*foo.*",
".*foo.*|bar.*",
"foo.*|.*bar.*",
".*foo.*|.*bar.*",
@ -67,6 +68,8 @@ var (
"10\\.0\\.(1|2)\\.+",
"10\\.0\\.(1|2).+",
"((fo(bar))|.+foo)",
"(?i)report.scheduled.job_runscheduledreports",
"report.scheduled.job_runscheduledreports",
// A long case sensitive alternation.
"zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb",
// An extremely long case sensitive alternation. This is a special
@ -108,6 +111,7 @@ var (
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
"FOO", "Foo", "fOo", "foO", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
"report.scheduled.job_runscheduledreports", "Report.Scheduled.JobRunScheduledReports", "Report.Scheduled.Job_RunScheduledReports",
"foofoo0", "foofoo", "😀foo0", "ſſs", "ſſS", "AAAAAAAAAAAAAAAAAAAAAAAA", "BBBBBBBBBBBBBBBBBBBBBBBB", "cccccccccccccccccccccccC", "ſſſſſſſſſſſſſſſſſſſſſſſſS", "SSSSSSSSSSSSSSSSSSSSSSSSſ",
"a-b-c-d-e",
"aaaaaa-bbbbbb-cccccc-dddddd-eeeeee",
@ -154,10 +158,11 @@ func readable(s string) string {
func TestOptimizeConcatRegex(t *testing.T) {
cases := []struct {
regex string
prefix string
suffix string
contains []string
regex string
prefix string
isCaseInsensitivePrefix bool
suffix string
contains []string
}{
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: nil},
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: nil},
@ -171,12 +176,12 @@ func TestOptimizeConcatRegex(t *testing.T) {
{regex: ".*[abc].*", prefix: "", suffix: "", contains: nil},
{regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: nil},
{regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: nil},
{regex: "(?i:abc).*", prefix: "", suffix: "", contains: nil},
{regex: "(?i:abc).*", prefix: "ABC", isCaseInsensitivePrefix: true, suffix: "", contains: nil},
{regex: ".*(?i:abc)", prefix: "", suffix: "", contains: nil},
{regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: []string{"def"}},
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: []string{"abc"}},
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: []string{"abc"}},
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: []string{"bc"}},
{regex: "[aA]bc.*", prefix: "A", isCaseInsensitivePrefix: true, suffix: "", contains: []string{"bc"}},
{regex: "^5..$", prefix: "5", suffix: "", contains: nil},
{regex: "^release.*", prefix: "release", suffix: "", contains: nil},
{regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: []string{"laio"}},
@ -184,13 +189,16 @@ func TestOptimizeConcatRegex(t *testing.T) {
}
for _, c := range cases {
parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL)
require.NoError(t, err)
t.Run(c.regex, func(t *testing.T) {
parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL)
require.NoError(t, err)
prefix, suffix, contains := optimizeConcatRegex(parsed)
require.Equal(t, c.prefix, prefix)
require.Equal(t, c.suffix, suffix)
require.Equal(t, c.contains, contains)
caseInsensitivePrefix, prefix, suffix, contains := optimizeConcatRegex(parsed)
require.Equal(t, c.prefix, prefix)
require.Equal(t, c.isCaseInsensitivePrefix, caseInsensitivePrefix)
require.Equal(t, c.suffix, suffix)
require.Equal(t, c.contains, contains)
})
}
}
@ -432,6 +440,8 @@ func TestNewFastRegexMatcher(t *testing.T) {
{"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}},
{"f.?o", nil},
{".*foo.*|.*bar.*|.*baz.*", &containsStringMatcher{left: trueMatcher{}, substrings: []string{"foo", "bar", "baz"}, right: trueMatcher{}}},
{"(?i)report.scheduled.job_runscheduledreports", nil},
{"report.scheduled.job_runscheduledreports", nil},
} {
t.Run(c.pattern, func(t *testing.T) {
t.Parallel()