From 26088d01b9518b3315805186dcee534b986bf79f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 7 May 2025 11:13:40 +0200 Subject: [PATCH] fix(parser): parenthesis around aggregate expression parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The position range of nested aggregate expression was wrong, for the expression "(sum(foo))" the position of "sum(foo)" should be 1-9, but the parser could not decide the end of the expression on pos 9, instead it read ahead to pos 10 and then emitted the aggregate. But we only kept the last closing position (10) and wrote that into the aggregate. The reason for this is that the parser cannot know from "(sum(foo)" alone if the aggregate is finished. It could be finished as in "(sum(foo))" but equally it could continue with group modifier as "(sum(foo) by (bar))". The fix is to track ")" tokens in a stack in addition to the lastClosing position, which is overloaded with other things like offset number tracking. Signed-off-by: György Krajcsovits --- promql/parser/generated_parser.y | 26 ++++++++++++++--- promql/parser/generated_parser.y.go | 14 +++++++++- promql/parser/parse.go | 21 ++++++++++++-- promql/parser/parse_test.go | 43 +++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 7 deletions(-) diff --git a/promql/parser/generated_parser.y b/promql/parser/generated_parser.y index cdb4532d3b..ec8e4082dc 100644 --- a/promql/parser/generated_parser.y +++ b/promql/parser/generated_parser.y @@ -242,9 +242,23 @@ expr : */ aggregate_expr : aggregate_op aggregate_modifier function_call_body - { $$ = yylex.(*parser).newAggregateExpr($1, $2, $3) } + { + // Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input + // like 'sum (some_metric) by test' + if len(yylex.(*parser).closingParens) > 1 { + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] + } + $$ = yylex.(*parser).newAggregateExpr($1, $2, $3) + } | aggregate_op function_call_body aggregate_modifier - { $$ = yylex.(*parser).newAggregateExpr($1, $3, $2) } + { + // Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input + // like 'sum by test (some_metric)' + if len(yylex.(*parser).closingParens) > 1 { + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] + } + $$ = yylex.(*parser).newAggregateExpr($1, $3, $2) + } | aggregate_op function_call_body { $$ = yylex.(*parser).newAggregateExpr($1, &AggregateExpr{}, $2) } | aggregate_op error @@ -398,9 +412,10 @@ function_call : IDENTIFIER function_call_body Args: $2.(Expressions), PosRange: posrange.PositionRange{ Start: $1.Pos, - End: yylex.(*parser).lastClosing, + End: yylex.(*parser).closingParens[0], }, } + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] } ; @@ -426,7 +441,10 @@ function_call_args: function_call_args COMMA expr */ paren_expr : LEFT_PAREN expr RIGHT_PAREN - { $$ = &ParenExpr{Expr: $2.(Expr), PosRange: mergeRanges(&$1, &$3)} } + { + $$ = &ParenExpr{Expr: $2.(Expr), PosRange: mergeRanges(&$1, &$3)} + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] + } ; /* diff --git a/promql/parser/generated_parser.y.go b/promql/parser/generated_parser.y.go index 78d5e15245..fdbf02b065 100644 --- a/promql/parser/generated_parser.y.go +++ b/promql/parser/generated_parser.y.go @@ -1063,11 +1063,21 @@ yydefault: case 20: yyDollar = yyS[yypt-3 : yypt+1] { + // Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input + // like 'sum (some_metric) by test' + if len(yylex.(*parser).closingParens) > 1 { + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] + } yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[2].node, yyDollar[3].node) } case 21: yyDollar = yyS[yypt-3 : yypt+1] { + // Need to consume the position of the first RIGHT_PAREN. It might not exist on garbage input + // like 'sum by test (some_metric)' + if len(yylex.(*parser).closingParens) > 1 { + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] + } yyVAL.node = yylex.(*parser).newAggregateExpr(yyDollar[1].item, yyDollar[3].node, yyDollar[2].node) } case 22: @@ -1295,9 +1305,10 @@ yydefault: Args: yyDollar[2].node.(Expressions), PosRange: posrange.PositionRange{ Start: yyDollar[1].item.Pos, - End: yylex.(*parser).lastClosing, + End: yylex.(*parser).closingParens[0], }, } + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] } case 62: yyDollar = yyS[yypt-3 : yypt+1] @@ -1329,6 +1340,7 @@ yydefault: yyDollar = yyS[yypt-3 : yypt+1] { yyVAL.node = &ParenExpr{Expr: yyDollar[2].node.(Expr), PosRange: mergeRanges(&yyDollar[1].item, &yyDollar[3].item)} + yylex.(*parser).closingParens = yylex.(*parser).closingParens[1:] } case 68: yyDollar = yyS[yypt-3 : yypt+1] diff --git a/promql/parser/parse.go b/promql/parser/parse.go index 9bf27264a8..aa271b37a9 100644 --- a/promql/parser/parse.go +++ b/promql/parser/parse.go @@ -56,6 +56,13 @@ type parser struct { // Everytime an Item is lexed that could be the end // of certain expressions its end position is stored here. lastClosing posrange.Pos + // Keep track of closing parentheses in addition, because sometimes the + // parser needs to read past a closing parenthesis to find the end of an + // expression, e.g. reading ony '(sum(foo)' cannot tell the end of the + // aggregation expression, since it could continue with either + // '(sum(foo))' or '(sum(foo) by (bar))' by which time we set lastClosing + // to the last paren. + closingParens []posrange.Pos yyParser yyParserImpl @@ -79,6 +86,7 @@ func NewParser(input string, opts ...Opt) *parser { //nolint:revive // unexporte p.injecting = false p.parseErrors = nil p.generatedParserResult = nil + p.closingParens = make([]posrange.Pos, 0) // Clear lexer struct before reusing. p.lex = Lexer{ @@ -168,6 +176,11 @@ func EnrichParseError(err error, enrich func(parseErr *ParseErr)) { func ParseExpr(input string) (expr Expr, err error) { p := NewParser(input) defer p.Close() + + if len(p.closingParens) > 0 { + return nil, fmt.Errorf("internal parser error, not all closing parens consumed: %v", p.closingParens) + } + return p.ParseExpr() } @@ -371,7 +384,10 @@ func (p *parser) Lex(lval *yySymType) int { case EOF: lval.item.Typ = EOF p.InjectItem(0) - case RIGHT_BRACE, RIGHT_PAREN, RIGHT_BRACKET, DURATION, NUMBER: + case RIGHT_PAREN: + p.closingParens = append(p.closingParens, lval.item.Pos+posrange.Pos(len(lval.item.Val))) + fallthrough + case RIGHT_BRACE, RIGHT_BRACKET, DURATION, NUMBER: p.lastClosing = lval.item.Pos + posrange.Pos(len(lval.item.Val)) } @@ -434,8 +450,9 @@ func (p *parser) newAggregateExpr(op Item, modifier, args Node) (ret *AggregateE ret.PosRange = posrange.PositionRange{ Start: op.Pos, - End: p.lastClosing, + End: p.closingParens[0], } + p.closingParens = p.closingParens[1:] ret.Op = op.Typ diff --git a/promql/parser/parse_test.go b/promql/parser/parse_test.go index 14af0b16ae..1dda72699e 100644 --- a/promql/parser/parse_test.go +++ b/promql/parser/parse_test.go @@ -3947,6 +3947,7 @@ var testExpr = []struct { }, }, }, + // Test that nested parentheses result in the correct position range. { input: "(sum(foo))", expected: &ParenExpr{ @@ -3967,6 +3968,48 @@ var testExpr = []struct { PosRange: posrange.PositionRange{Start: 0, End: 10}, }, }, + { + input: "(sum(foo) by (bar))", + expected: &ParenExpr{ + Expr: &AggregateExpr{ + Op: SUM, + Grouping: []string{"bar"}, + Expr: &VectorSelector{ + Name: "foo", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"), + }, + PosRange: posrange.PositionRange{ + Start: 5, + End: 8, + }, + }, + PosRange: posrange.PositionRange{Start: 1, End: 18}, + }, + PosRange: posrange.PositionRange{Start: 0, End: 19}, + }, + }, + { + input: "(sum by (bar) (foo))", + expected: &ParenExpr{ + Expr: &AggregateExpr{ + Op: SUM, + Grouping: []string{"bar"}, + Expr: &VectorSelector{ + Name: "foo", + LabelMatchers: []*labels.Matcher{ + MustLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"), + }, + PosRange: posrange.PositionRange{ + Start: 15, + End: 18, + }, + }, + PosRange: posrange.PositionRange{Start: 1, End: 19}, + }, + PosRange: posrange.PositionRange{Start: 0, End: 20}, + }, + }, } func makeInt64Pointer(val int64) *int64 {