nginx: parse comments between a block header and its opening brace

The parser previously absorbed a leading `#` in the pre-bracket position as a header token (losing the comment) or failed when the comment contained a `{`. Recognize such comments explicitly and fold them into the start of the block body so they survive dumps(). Fixes #10264.
2026-05-20 17:30:43 -04:00 · 2026-04-21 17:01:42 -07:00 · 2026-04-21 17:01:42 -07:00 · f7d2d134bb
commit f7d2d134bb
parent 3a5c92c6be
3 changed files with 100 additions and 42 deletions
--- a/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py
+++ b/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py
@ -55,14 +55,60 @@ class RawNginxParser:

    comment = space + Literal('#') + restOfLine

+    # Strict tokens exclude `#`; used only for block_begin when it is followed
+    # by pre-bracket comments so we can recognize the comment as a comment
+    # rather than absorbing its `#` into the header's tokens. See #10264.
+    head_tokenchars_strict = Regex(r"(\$\{)|[^{};\s'\"#]")
+    tail_tokenchars_strict = Regex(r"(\$\{)|[^{;\s#]")
+    tokenchars_strict = Combine(head_tokenchars_strict + ZeroOrMore(tail_tokenchars_strict))
+    paren_quote_extend_strict = Combine(
+        quoted + Literal(')') + ZeroOrMore(tail_tokenchars_strict)
+    )
+    token_strict = paren_quote_extend_strict | tokenchars_strict | quoted
+    whitespace_token_group_strict = (
+        space + token_strict + ZeroOrMore(required_space + token_strict) + space
+    )
+
    block = Forward()

    # order matters! see issue 518, and also http { # server { \n}
    contents = Group(comment) | Group(block) | Group(assignment)

    block_begin = Group(whitespace_token_group)
+    block_begin_strict = Group(whitespace_token_group_strict)
    block_innards = Group(ZeroOrMore(contents) + space).leave_whitespace()
-    block << block_begin + left_bracket + block_innards + right_bracket
+
+    # Pre-bracket comments: comments that appear between the block header and
+    # its opening `{`. Nginx ignores such comments, but the original grammar
+    # would absorb their leading `#` into the block header as a token, which
+    # then left a stray `{` on the following line with nowhere to bind. We
+    # capture them here and fold them into the start of the block body so
+    # they round-trip through dumps(). See #10264.
+    pre_bracket_comments = ZeroOrMore(Group(comment) + space.suppress())
+
+    @staticmethod
+    def _merge_pre_bracket_comments(tokens: ParseResults) -> Any:
+        if len(tokens) <= 2:
+            return None
+        parts = tokens.asList()
+        begin = parts[0]
+        innards = parts[-1]
+        # Wrap each moved comment in leading and trailing newlines so it dumps
+        # on its own line inside the block body; otherwise it would be
+        # concatenated with the following directive and, on reparse, the
+        # comment would swallow that directive.
+        prepared_comments = [['\n'] + c + ['\n'] for c in parts[1:-1]]
+        return [begin, prepared_comments + innards]
+
+    block_with_pre_bracket_comments = (
+        block_begin_strict + pre_bracket_comments
+        + left_bracket + block_innards + right_bracket
+    ).set_parse_action(_merge_pre_bracket_comments)
+    block_legacy = block_begin + left_bracket + block_innards + right_bracket
+    # Try strict form first so pre-bracket comments are recognized; fall back
+    # to the legacy form for configs where `#` appears mid-directive (e.g.
+    # multi-line `server_name foo\n# comment\n bar;`).
+    block << (block_with_pre_bracket_comments | block_legacy)

    script = ZeroOrMore(contents) + space + stringEnd
    script.parse_with_tabs().leave_whitespace()
--- a/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py
+++ b/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py
@ -375,47 +375,58 @@ class TestRawNginxParser(unittest.TestCase):
        loads(test)

    def test_location_comment_issue(self):
-        # See discussion at https://github.com/certbot/certbot/issues/10264
-        already_good = '''
-        location = /resume
-        # x
-        { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
-        '''
-        loads(already_good)
-        already_good = '''
-        location = /resume
-        { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
-        # {
-        '''
-        loads(already_good)
-        needs_fixing = '''
-        location = /resume
-        # {
-        { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
-        '''
-        with pytest.raises(ParseException):
-            loads(needs_fixing) # fails
-        needs_fixing = '''
-        location = /resume
-        # x{
-        { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
-        '''
-        with pytest.raises(ParseException):
-            loads(needs_fixing) # fails
-        needs_fixing = '''
-        location = /resume
-        #{
-        { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
-        '''
-        with pytest.raises(ParseException):
-            loads(needs_fixing) # fails
-        needs_fixing = '''
-        location = /resume
-        # {x
-        { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
-        '''
-        with pytest.raises(ParseException):
-            loads(needs_fixing) # fails
+        # See https://github.com/certbot/certbot/issues/10264. Each input puts
+        # a comment between a block header and its `{`; this used to raise a
+        # ParseException (or, for `# x`, silently absorb `#` and `x` as block
+        # header tokens and lose the comment). Now all parse and round-trip.
+        fixtures = [
+            '''
+            location = /resume
+            # x
+            { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
+            ''',
+            '''
+            location = /resume
+            { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
+            # {
+            ''',
+            '''
+            location = /resume
+            # {
+            { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
+            ''',
+            '''
+            location = /resume
+            # x{
+            { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
+            ''',
+            '''
+            location = /resume
+            #{
+            { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
+            ''',
+            '''
+            location = /resume
+            # {x
+            { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
+            ''',
+            '''
+            location = /resume
+            # { rewrite .* /resume.php redirect; }
+            { rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
+            ''',
+        ]
+        for src in fixtures:
+            parsed = loads(src)
+            # The comment text must survive the round trip; it may move from
+            # between the header and `{` to the start of the block body,
+            # which is semantically equivalent nginx.
+            assert '#' in dumps(parsed), src
+            # Round trip stabilizes after one cycle: dumps(parsed) may inject
+            # separators, but dumps(loads(dumps(parsed))) equals dumps(parsed).
+            once = dumps(loads(dumps(parsed)))
+            twice = dumps(loads(once))
+            assert once == twice, src


 class TestUnspacedList(unittest.TestCase):
--- a/newsfragments/10264.fixed
+++ b/newsfragments/10264.fixed
@ -0,0 +1 @@
+The nginx plugin now parses configs that place a comment between a block header and its opening `{` (e.g. `location = /resume\n# note\n{ ... }`) without raising a parse error or silently dropping the comment.
				`@ -0,0 +1 @@`
				The nginx plugin now parses configs that place a comment between a block header and its opening `{` (e.g. `location = /resume\n# note\n{ ... }`) without raising a parse error or silently dropping the comment.