From f7d2d134bb3490f20e4e9f3689ee75dba0f71926 Mon Sep 17 00:00:00 2001 From: Ember Date: Tue, 21 Apr 2026 17:01:42 -0700 Subject: [PATCH] nginx: parse comments between a block header and its opening brace The parser previously absorbed a leading `#` in the pre-bracket position as a header token (losing the comment) or failed when the comment contained a `{`. Recognize such comments explicitly and fold them into the start of the block body so they survive dumps(). Fixes #10264. --- .../_internal/plugins/nginx/nginxparser.py | 48 +++++++++- .../tests/plugins/nginx/nginxparser_test.py | 93 +++++++++++-------- newsfragments/10264.fixed | 1 + 3 files changed, 100 insertions(+), 42 deletions(-) create mode 100644 newsfragments/10264.fixed diff --git a/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py b/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py index be46368e0..65f11ed64 100644 --- a/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py +++ b/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py @@ -55,14 +55,60 @@ class RawNginxParser: comment = space + Literal('#') + restOfLine + # Strict tokens exclude `#`; used only for block_begin when it is followed + # by pre-bracket comments so we can recognize the comment as a comment + # rather than absorbing its `#` into the header's tokens. See #10264. + head_tokenchars_strict = Regex(r"(\$\{)|[^{};\s'\"#]") + tail_tokenchars_strict = Regex(r"(\$\{)|[^{;\s#]") + tokenchars_strict = Combine(head_tokenchars_strict + ZeroOrMore(tail_tokenchars_strict)) + paren_quote_extend_strict = Combine( + quoted + Literal(')') + ZeroOrMore(tail_tokenchars_strict) + ) + token_strict = paren_quote_extend_strict | tokenchars_strict | quoted + whitespace_token_group_strict = ( + space + token_strict + ZeroOrMore(required_space + token_strict) + space + ) + block = Forward() # order matters! see issue 518, and also http { # server { \n} contents = Group(comment) | Group(block) | Group(assignment) block_begin = Group(whitespace_token_group) + block_begin_strict = Group(whitespace_token_group_strict) block_innards = Group(ZeroOrMore(contents) + space).leave_whitespace() - block << block_begin + left_bracket + block_innards + right_bracket + + # Pre-bracket comments: comments that appear between the block header and + # its opening `{`. Nginx ignores such comments, but the original grammar + # would absorb their leading `#` into the block header as a token, which + # then left a stray `{` on the following line with nowhere to bind. We + # capture them here and fold them into the start of the block body so + # they round-trip through dumps(). See #10264. + pre_bracket_comments = ZeroOrMore(Group(comment) + space.suppress()) + + @staticmethod + def _merge_pre_bracket_comments(tokens: ParseResults) -> Any: + if len(tokens) <= 2: + return None + parts = tokens.asList() + begin = parts[0] + innards = parts[-1] + # Wrap each moved comment in leading and trailing newlines so it dumps + # on its own line inside the block body; otherwise it would be + # concatenated with the following directive and, on reparse, the + # comment would swallow that directive. + prepared_comments = [['\n'] + c + ['\n'] for c in parts[1:-1]] + return [begin, prepared_comments + innards] + + block_with_pre_bracket_comments = ( + block_begin_strict + pre_bracket_comments + + left_bracket + block_innards + right_bracket + ).set_parse_action(_merge_pre_bracket_comments) + block_legacy = block_begin + left_bracket + block_innards + right_bracket + # Try strict form first so pre-bracket comments are recognized; fall back + # to the legacy form for configs where `#` appears mid-directive (e.g. + # multi-line `server_name foo\n# comment\n bar;`). + block << (block_with_pre_bracket_comments | block_legacy) script = ZeroOrMore(contents) + space + stringEnd script.parse_with_tabs().leave_whitespace() diff --git a/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py b/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py index 58c7f4faa..f604b3e7f 100644 --- a/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py +++ b/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py @@ -375,47 +375,58 @@ class TestRawNginxParser(unittest.TestCase): loads(test) def test_location_comment_issue(self): - # See discussion at https://github.com/certbot/certbot/issues/10264 - already_good = ''' - location = /resume - # x - { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } - ''' - loads(already_good) - already_good = ''' - location = /resume - { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } - # { - ''' - loads(already_good) - needs_fixing = ''' - location = /resume - # { - { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } - ''' - with pytest.raises(ParseException): - loads(needs_fixing) # fails - needs_fixing = ''' - location = /resume - # x{ - { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } - ''' - with pytest.raises(ParseException): - loads(needs_fixing) # fails - needs_fixing = ''' - location = /resume - #{ - { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } - ''' - with pytest.raises(ParseException): - loads(needs_fixing) # fails - needs_fixing = ''' - location = /resume - # {x - { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } - ''' - with pytest.raises(ParseException): - loads(needs_fixing) # fails + # See https://github.com/certbot/certbot/issues/10264. Each input puts + # a comment between a block header and its `{`; this used to raise a + # ParseException (or, for `# x`, silently absorb `#` and `x` as block + # header tokens and lose the comment). Now all parse and round-trip. + fixtures = [ + ''' + location = /resume + # x + { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } + ''', + ''' + location = /resume + { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } + # { + ''', + ''' + location = /resume + # { + { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } + ''', + ''' + location = /resume + # x{ + { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } + ''', + ''' + location = /resume + #{ + { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } + ''', + ''' + location = /resume + # {x + { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } + ''', + ''' + location = /resume + # { rewrite .* /resume.php redirect; } + { rewrite .* /Files/Adam_Lein_resume.pdf redirect; } + ''', + ] + for src in fixtures: + parsed = loads(src) + # The comment text must survive the round trip; it may move from + # between the header and `{` to the start of the block body, + # which is semantically equivalent nginx. + assert '#' in dumps(parsed), src + # Round trip stabilizes after one cycle: dumps(parsed) may inject + # separators, but dumps(loads(dumps(parsed))) equals dumps(parsed). + once = dumps(loads(dumps(parsed))) + twice = dumps(loads(once)) + assert once == twice, src class TestUnspacedList(unittest.TestCase): diff --git a/newsfragments/10264.fixed b/newsfragments/10264.fixed new file mode 100644 index 000000000..0ac3f4aff --- /dev/null +++ b/newsfragments/10264.fixed @@ -0,0 +1 @@ +The nginx plugin now parses configs that place a comment between a block header and its opening `{` (e.g. `location = /resume\n# note\n{ ... }`) without raising a parse error or silently dropping the comment.