mirror of
https://github.com/prometheus/prometheus.git
synced 2026-06-05 06:32:15 -04:00
model/labels: Add case-insensitive prefix matching optimization (#18540)
This change adds support for case-insensitive prefix matching, with the goal of especially improving performance when evaluating long case-insensitive regexes, without degrading performance particularly in other cases. Signed-off-by: Casie Chen <casie.chen@grafana.com>
This commit is contained in:
parent
87866e0c3f
commit
91fa0fd5b7
3 changed files with 56 additions and 19 deletions
|
|
@ -27,6 +27,13 @@ func mustNewMatcher(t *testing.T, mType MatchType, value string) *Matcher {
|
|||
return m
|
||||
}
|
||||
|
||||
func (m *Matcher) hasCaseInsensitivePrefix() bool {
|
||||
if m.re == nil {
|
||||
return false
|
||||
}
|
||||
return m.re.caseInsensitivePrefix
|
||||
}
|
||||
|
||||
func TestMatcher(t *testing.T) {
|
||||
tests := []struct {
|
||||
matcher *Matcher
|
||||
|
|
@ -137,8 +144,9 @@ func TestInverse(t *testing.T) {
|
|||
|
||||
func TestPrefix(t *testing.T) {
|
||||
for i, tc := range []struct {
|
||||
matcher *Matcher
|
||||
prefix string
|
||||
matcher *Matcher
|
||||
prefix string
|
||||
caseInsensitivePrefix bool
|
||||
}{
|
||||
{
|
||||
matcher: mustNewMatcher(t, MatchEqual, "abc"),
|
||||
|
|
@ -180,9 +188,15 @@ func TestPrefix(t *testing.T) {
|
|||
matcher: mustNewMatcher(t, MatchRegexp, ".+def"),
|
||||
prefix: "",
|
||||
},
|
||||
{
|
||||
matcher: mustNewMatcher(t, MatchNotRegexp, "(?i)abc.+"),
|
||||
prefix: "ABC",
|
||||
caseInsensitivePrefix: true,
|
||||
},
|
||||
} {
|
||||
t.Run(fmt.Sprintf("%d: %s", i, tc.matcher), func(t *testing.T) {
|
||||
require.Equal(t, tc.prefix, tc.matcher.Prefix())
|
||||
require.Equal(t, tc.caseInsensitivePrefix, tc.matcher.hasCaseInsensitivePrefix())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,9 @@ type FastRegexMatcher struct {
|
|||
suffix string
|
||||
contains []string
|
||||
|
||||
// caseInsensitivePrefix is true if prefix exists and should be matched case-insensitively
|
||||
caseInsensitivePrefix bool
|
||||
|
||||
// matchString is the "compiled" function to run by MatchString().
|
||||
matchString func(string) bool
|
||||
}
|
||||
|
|
@ -79,7 +82,7 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
|
|||
clearCapture(parsed)
|
||||
|
||||
if parsed.Op == syntax.OpConcat {
|
||||
m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
||||
m.caseInsensitivePrefix, m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed)
|
||||
}
|
||||
if matches, caseSensitive := findSetMatches(parsed); caseSensitive {
|
||||
m.setMatches = matches
|
||||
|
|
@ -109,6 +112,15 @@ func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
|
|||
return m.stringMatcher.Matches
|
||||
}
|
||||
|
||||
if m.caseInsensitivePrefix && m.prefix != "" {
|
||||
return func(s string) bool {
|
||||
if !hasPrefixCaseInsensitive(s, m.prefix) {
|
||||
return false
|
||||
}
|
||||
return m.re.MatchString(s)
|
||||
}
|
||||
}
|
||||
|
||||
return func(s string) bool {
|
||||
if len(m.setMatches) != 0 {
|
||||
return slices.Contains(m.setMatches, s)
|
||||
|
|
@ -411,7 +423,7 @@ func optimizeAlternatingSimpleContains(r *syntax.Regexp) *syntax.Regexp {
|
|||
|
||||
// optimizeConcatRegex returns literal prefix/suffix text that can be safely
|
||||
// checked against the label value before running the regexp matcher.
|
||||
func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []string) {
|
||||
func optimizeConcatRegex(r *syntax.Regexp) (caseInsensitivePrefix bool, prefix, suffix string, contains []string) {
|
||||
sub := r.Sub
|
||||
clearCapture(sub...)
|
||||
|
||||
|
|
@ -425,14 +437,15 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []st
|
|||
}
|
||||
|
||||
if len(sub) == 0 {
|
||||
return prefix, suffix, contains
|
||||
return caseInsensitivePrefix, prefix, suffix, contains
|
||||
}
|
||||
|
||||
// Given Prometheus regex matchers are always anchored to the begin/end
|
||||
// of the text, if the first/last operations are literals, we can safely
|
||||
// treat them as prefix/suffix.
|
||||
if sub[0].Op == syntax.OpLiteral && (sub[0].Flags&syntax.FoldCase) == 0 {
|
||||
if sub[0].Op == syntax.OpLiteral {
|
||||
prefix = string(sub[0].Rune)
|
||||
caseInsensitivePrefix = (sub[0].Flags & syntax.FoldCase) != 0
|
||||
}
|
||||
if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral && (sub[last].Flags&syntax.FoldCase) == 0 {
|
||||
suffix = string(sub[last].Rune)
|
||||
|
|
@ -446,7 +459,7 @@ func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []st
|
|||
}
|
||||
}
|
||||
|
||||
return prefix, suffix, contains
|
||||
return caseInsensitivePrefix, prefix, suffix, contains
|
||||
}
|
||||
|
||||
// StringMatcher is a matcher that matches a string in place of a regular expression.
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ var (
|
|||
".*foo.*",
|
||||
".+foo.+",
|
||||
".*foo.*|",
|
||||
"(?i).*foo.*",
|
||||
".*foo.*|bar.*",
|
||||
"foo.*|.*bar.*",
|
||||
".*foo.*|.*bar.*",
|
||||
|
|
@ -67,6 +68,8 @@ var (
|
|||
"10\\.0\\.(1|2)\\.+",
|
||||
"10\\.0\\.(1|2).+",
|
||||
"((fo(bar))|.+foo)",
|
||||
"(?i)report.scheduled.job_runscheduledreports",
|
||||
"report.scheduled.job_runscheduledreports",
|
||||
// A long case sensitive alternation.
|
||||
"zQPbMkNO|NNSPdvMi|iWuuSoAl|qbvKMimS|IecrXtPa|seTckYqt|NxnyHkgB|fIDlOgKb|UhlWIygH|OtNoJxHG|cUTkFVIV|mTgFIHjr|jQkoIDtE|PPMKxRXl|AwMfwVkQ|CQyMrTQJ|BzrqxVSi|nTpcWuhF|PertdywG|ZZDgCtXN|WWdDPyyE|uVtNQsKk|BdeCHvPZ|wshRnFlH|aOUIitIp|RxZeCdXT|CFZMslCj|AVBZRDxl|IzIGCnhw|ythYuWiz|oztXVXhl|VbLkwqQx|qvaUgyVC|VawUjPWC|ecloYJuj|boCLTdSU|uPrKeAZx|hrMWLWBq|JOnUNHRM|rYnujkPq|dDEdZhIj|DRrfvugG|yEGfDxVV|YMYdJWuP|PHUQZNWM|AmKNrLis|zTxndVfn|FPsHoJnc|EIulZTua|KlAPhdzg|ScHJJCLt|NtTfMzME|eMCwuFdo|SEpJVJbR|cdhXZeCx|sAVtBwRh|kVFEVcMI|jzJrxraA|tGLHTell|NNWoeSaw|DcOKSetX|UXZAJyka|THpMphDP|rizheevl|kDCBRidd|pCZZRqyu|pSygkitl|SwZGkAaW|wILOrfNX|QkwVOerj|kHOMxPDr|EwOVycJv|AJvtzQFS|yEOjKYYB|LizIINLL|JBRSsfcG|YPiUqqNl|IsdEbvee|MjEpGcBm|OxXZVgEQ|xClXGuxa|UzRCGFEb|buJbvfvA|IPZQxRet|oFYShsMc|oBHffuHO|bzzKrcBR|KAjzrGCl|IPUsAVls|OGMUMbIU|gyDccHuR|bjlalnDd|ZLWjeMna|fdsuIlxQ|dVXtiomV|XxedTjNg|XWMHlNoA|nnyqArQX|opfkWGhb|wYtnhdYb",
|
||||
// An extremely long case sensitive alternation. This is a special
|
||||
|
|
@ -108,6 +111,7 @@ var (
|
|||
"foo", " foo bar", "bar", "buzz\nbar", "bar foo", "bfoo", "\n", "\nfoo", "foo\n", "hello foo world", "hello foo\n world", "",
|
||||
"FOO", "Foo", "fOo", "foO", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo",
|
||||
"10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40",
|
||||
"report.scheduled.job_runscheduledreports", "Report.Scheduled.JobRunScheduledReports", "Report.Scheduled.Job_RunScheduledReports",
|
||||
"foofoo0", "foofoo", "😀foo0", "ſſs", "ſſS", "AAAAAAAAAAAAAAAAAAAAAAAA", "BBBBBBBBBBBBBBBBBBBBBBBB", "cccccccccccccccccccccccC", "ſſſſſſſſſſſſſſſſſſſſſſſſS", "SSSSSSSSSSSSSSSSSSSSSSSSſ",
|
||||
"a-b-c-d-e",
|
||||
"aaaaaa-bbbbbb-cccccc-dddddd-eeeeee",
|
||||
|
|
@ -154,10 +158,11 @@ func readable(s string) string {
|
|||
|
||||
func TestOptimizeConcatRegex(t *testing.T) {
|
||||
cases := []struct {
|
||||
regex string
|
||||
prefix string
|
||||
suffix string
|
||||
contains []string
|
||||
regex string
|
||||
prefix string
|
||||
isCaseInsensitivePrefix bool
|
||||
suffix string
|
||||
contains []string
|
||||
}{
|
||||
{regex: "foo(hello|bar)", prefix: "foo", suffix: "", contains: nil},
|
||||
{regex: "foo(hello|bar)world", prefix: "foo", suffix: "world", contains: nil},
|
||||
|
|
@ -171,12 +176,12 @@ func TestOptimizeConcatRegex(t *testing.T) {
|
|||
{regex: ".*[abc].*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*((?i)abc).*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*(?i:abc).*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: "(?i:abc).*", prefix: "", suffix: "", contains: nil},
|
||||
{regex: "(?i:abc).*", prefix: "ABC", isCaseInsensitivePrefix: true, suffix: "", contains: nil},
|
||||
{regex: ".*(?i:abc)", prefix: "", suffix: "", contains: nil},
|
||||
{regex: ".*(?i:abc)def.*", prefix: "", suffix: "", contains: []string{"def"}},
|
||||
{regex: "(?i).*(?-i:abc)def", prefix: "", suffix: "", contains: []string{"abc"}},
|
||||
{regex: ".*(?msU:abc).*", prefix: "", suffix: "", contains: []string{"abc"}},
|
||||
{regex: "[aA]bc.*", prefix: "", suffix: "", contains: []string{"bc"}},
|
||||
{regex: "[aA]bc.*", prefix: "A", isCaseInsensitivePrefix: true, suffix: "", contains: []string{"bc"}},
|
||||
{regex: "^5..$", prefix: "5", suffix: "", contains: nil},
|
||||
{regex: "^release.*", prefix: "release", suffix: "", contains: nil},
|
||||
{regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: []string{"laio"}},
|
||||
|
|
@ -184,13 +189,16 @@ func TestOptimizeConcatRegex(t *testing.T) {
|
|||
}
|
||||
|
||||
for _, c := range cases {
|
||||
parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL)
|
||||
require.NoError(t, err)
|
||||
t.Run(c.regex, func(t *testing.T) {
|
||||
parsed, err := syntax.Parse(c.regex, syntax.Perl|syntax.DotNL)
|
||||
require.NoError(t, err)
|
||||
|
||||
prefix, suffix, contains := optimizeConcatRegex(parsed)
|
||||
require.Equal(t, c.prefix, prefix)
|
||||
require.Equal(t, c.suffix, suffix)
|
||||
require.Equal(t, c.contains, contains)
|
||||
caseInsensitivePrefix, prefix, suffix, contains := optimizeConcatRegex(parsed)
|
||||
require.Equal(t, c.prefix, prefix)
|
||||
require.Equal(t, c.isCaseInsensitivePrefix, caseInsensitivePrefix)
|
||||
require.Equal(t, c.suffix, suffix)
|
||||
require.Equal(t, c.contains, contains)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -432,6 +440,8 @@ func TestNewFastRegexMatcher(t *testing.T) {
|
|||
{"foo.?", &literalPrefixSensitiveStringMatcher{prefix: "foo", right: &zeroOrOneCharacterStringMatcher{matchNL: true}}},
|
||||
{"f.?o", nil},
|
||||
{".*foo.*|.*bar.*|.*baz.*", &containsStringMatcher{left: trueMatcher{}, substrings: []string{"foo", "bar", "baz"}, right: trueMatcher{}}},
|
||||
{"(?i)report.scheduled.job_runscheduledreports", nil},
|
||||
{"report.scheduled.job_runscheduledreports", nil},
|
||||
} {
|
||||
t.Run(c.pattern, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
|
|
|||
Loading…
Reference in a new issue