diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 5123bbc7dd..a4bdf885ee 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -77,7 +77,18 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { if matches, caseSensitive := findSetMatches(parsed); caseSensitive { m.setMatches = matches } - m.stringMatcher = stringMatcherFromRegexp(parsed) + + // Check if we have a pattern like .*-.*-.*. + // If so, then we can rely on the containsInOrder check in compileMatchStringFunction, + // so no further inspection of the string is required. + // We can't do this in stringMatcherFromRegexpInternal as we only want to apply this + // if the top-level pattern satisfies this requirement. + if isSimpleConcatenationPattern(parsed) { + m.stringMatcher = trueMatcher{} + } else { + m.stringMatcher = stringMatcherFromRegexp(parsed) + } + m.matchString = m.compileMatchStringFunction() } @@ -566,6 +577,40 @@ func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher { return nil } +// isSimpleConcatenationPattern returns true if re contains only literals or wildcard matchers, +// and starts and ends with a wildcard matcher (eg. .*-.*-.*). +func isSimpleConcatenationPattern(re *syntax.Regexp) bool { + if re.Op != syntax.OpConcat { + return false + } + + if len(re.Sub) < 2 { + return false + } + + first := re.Sub[0] + last := re.Sub[len(re.Sub)-1] + if !isMatchAny(first) || !isMatchAny(last) { + return false + } + + for _, re := range re.Sub[1 : len(re.Sub)-1] { + if !isMatchAny(re) && !isCaseSensitiveLiteral(re) { + return false + } + } + + return true +} + +func isMatchAny(re *syntax.Regexp) bool { + return re.Op == syntax.OpStar && re.Sub[0].Op == syntax.OpAnyChar +} + +func isCaseSensitiveLiteral(re *syntax.Regexp) bool { + return re.Op == syntax.OpLiteral && isCaseSensitive(re) +} + // containsStringMatcher matches a string if it contains any of the substrings. // If left and right are not nil, it's a contains operation where left and right must match. // If left is nil, it's a hasPrefix operation and right must match. diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 2fb5e806f0..85cbe02a1f 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -87,6 +87,9 @@ var ( "ſſs", // Concat of literals and wildcards. ".*-.*-.*-.*-.*", + ".+-.*-.*-.*-.+", + "-.*-.*-.*-.*", + ".*-.*-.*-.*-", "(.+)-(.+)-(.+)-(.+)-(.+)", "((.*))(?i:f)((.*))o((.*))o((.*))", "((.*))f((.*))(?i:o)((.*))o((.*))", @@ -96,6 +99,11 @@ var ( "FOO", "Foo", "fOo", "foO", "OO", "Oo", "\nfoo\n", strings.Repeat("f", 20), "prometheus", "prometheus_api_v1", "prometheus_api_v1_foo", "10.0.1.20", "10.0.2.10", "10.0.3.30", "10.0.4.40", "foofoo0", "foofoo", "😀foo0", "ſſs", "ſſS", "AAAAAAAAAAAAAAAAAAAAAAAA", "BBBBBBBBBBBBBBBBBBBBBBBB", "cccccccccccccccccccccccC", "ſſſſſſſſſſſſſſſſſſſſſſſſS", "SSSSSSSSSSSSSSSSSSSSSSSSſ", + "a-b-c-d-e", + "aaaaaa-bbbbbb-cccccc-dddddd-eeeeee", + "aaaaaa----eeeeee", + "----", + "-a-a-a-", // Values matching / not matching the test regexps on long alternations. "zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX", @@ -162,6 +170,7 @@ func TestOptimizeConcatRegex(t *testing.T) { {regex: "^5..$", prefix: "5", suffix: "", contains: nil}, {regex: "^release.*", prefix: "release", suffix: "", contains: nil}, {regex: "^env-[0-9]+laio[1]?[^0-9].*", prefix: "env-", suffix: "", contains: []string{"laio"}}, + {regex: ".*-.*-.*-.*-.*", prefix: "", suffix: "", contains: []string{"-", "-", "-", "-"}}, } for _, c := range cases { @@ -341,7 +350,7 @@ func BenchmarkToNormalizedLower(b *testing.B) { } } -func TestStringMatcherFromRegexp(t *testing.T) { +func TestNewFastRegexMatcher(t *testing.T) { for _, c := range []struct { pattern string exp StringMatcher @@ -364,12 +373,12 @@ func TestStringMatcherFromRegexp(t *testing.T) { {`(?i:((foo1|foo2|bar)))`, orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, {"^((?i:foo|oo)|(bar))$", orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO", caseSensitive: false}, &equalStringMatcher{s: "OO", caseSensitive: false}, &equalStringMatcher{s: "bar", caseSensitive: true}})}, {"(?i:(foo1|foo2|bar))", orStringMatcher([]StringMatcher{orStringMatcher([]StringMatcher{&equalStringMatcher{s: "FOO1", caseSensitive: false}, &equalStringMatcher{s: "FOO2", caseSensitive: false}}), &equalStringMatcher{s: "BAR", caseSensitive: false}})}, - {".*foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, - {"(.*)foo.*", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, - {"(.*)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {".*foo.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. + {"(.*)foo.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. + {"(.*)foo(.*)", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. {"(.+)foo(.*)", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: trueMatcher{}}}, {"^.+foo.+", &containsStringMatcher{substrings: []string{"foo"}, left: &anyNonEmptyStringMatcher{matchNL: true}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, - {"^(.*)(foo)(.*)$", &containsStringMatcher{substrings: []string{"foo"}, left: trueMatcher{}, right: trueMatcher{}}}, + {"^(.*)(foo)(.*)$", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. {"^(.*)(foo|foobar)(.*)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: trueMatcher{}}}, {"^(.*)(foo|foobar)(.+)$", &containsStringMatcher{substrings: []string{"foo", "foobar"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, {"^(.*)(bar|b|buzz)(.+)$", &containsStringMatcher{substrings: []string{"bar", "b", "buzz"}, left: trueMatcher{}, right: &anyNonEmptyStringMatcher{matchNL: true}}}, @@ -388,7 +397,7 @@ func TestStringMatcherFromRegexp(t *testing.T) { {"(api|rpc)_(v1|prom)_((?i)push|query)", nil}, {"[a-z][a-z]", nil}, {"[1^3]", nil}, - {".*foo.*bar.*", nil}, + {".*foo.*bar.*", trueMatcher{}}, // The containsInOrder check done in the function returned by compileMatchStringFunction is sufficient. {`\d*`, nil}, {".", nil}, {"/|/bar.*", &literalPrefixSensitiveStringMatcher{prefix: "/", right: orStringMatcher{emptyStringMatcher{}, &literalPrefixSensitiveStringMatcher{prefix: "bar", right: trueMatcher{}}}}}, @@ -415,10 +424,9 @@ func TestStringMatcherFromRegexp(t *testing.T) { } { t.Run(c.pattern, func(t *testing.T) { t.Parallel() - parsed, err := syntax.Parse(c.pattern, syntax.Perl|syntax.DotNL) + matcher, err := NewFastRegexMatcher(c.pattern) require.NoError(t, err) - matches := stringMatcherFromRegexp(parsed) - require.Equal(t, c.exp, matches) + require.Equal(t, c.exp, matcher.stringMatcher) }) } } @@ -1389,3 +1397,42 @@ func TestToNormalisedLower(t *testing.T) { require.Equal(t, expectedOutput, toNormalisedLower(input, nil)) } } + +func TestIsSimpleConcatenationPattern(t *testing.T) { + testCases := map[string]bool{ + ".*-.*-.*-.*-.*": true, + ".+-.*-.*-.*-.+": false, + "-.*-.*-.*-.*": false, + ".*-.*-.*-.*-": false, + "-": false, + ".*": false, + } + + for testCase, expected := range testCases { + t.Run(testCase, func(t *testing.T) { + re, err := syntax.Parse(testCase, syntax.Perl|syntax.DotNL) + require.NoError(t, err) + require.Equal(t, expected, isSimpleConcatenationPattern(re)) + }) + } +} + +func BenchmarkFastRegexMatcher_ConcatenatedPattern(b *testing.B) { + pattern, err := NewFastRegexMatcher(".*-.*-.*-.*-.*") + require.NoError(b, err) + + testCases := []string{ + "a-b-c-d-e", + "aaaaaa-bbbbbb-cccccc-dddddd-eeeeee", + "aaaaaa----eeeeee", + "----", + "-a-a-a-", + "abcd", + } + + for b.Loop() { + for _, s := range testCases { + pattern.MatchString(s) + } + } +}