nginx: parse comments between a block header and its opening brace

The parser previously absorbed a leading `#` in the pre-bracket
position as a header token (losing the comment) or failed when the
comment contained a `{`. Recognize such comments explicitly and fold
them into the start of the block body so they survive dumps().

Fixes #10264.
This commit is contained in:
Ember 2026-04-21 17:01:42 -07:00
parent 3a5c92c6be
commit f7d2d134bb
3 changed files with 100 additions and 42 deletions

View file

@ -55,14 +55,60 @@ class RawNginxParser:
comment = space + Literal('#') + restOfLine
# Strict tokens exclude `#`; used only for block_begin when it is followed
# by pre-bracket comments so we can recognize the comment as a comment
# rather than absorbing its `#` into the header's tokens. See #10264.
head_tokenchars_strict = Regex(r"(\$\{)|[^{};\s'\"#]")
tail_tokenchars_strict = Regex(r"(\$\{)|[^{;\s#]")
tokenchars_strict = Combine(head_tokenchars_strict + ZeroOrMore(tail_tokenchars_strict))
paren_quote_extend_strict = Combine(
quoted + Literal(')') + ZeroOrMore(tail_tokenchars_strict)
)
token_strict = paren_quote_extend_strict | tokenchars_strict | quoted
whitespace_token_group_strict = (
space + token_strict + ZeroOrMore(required_space + token_strict) + space
)
block = Forward()
# order matters! see issue 518, and also http { # server { \n}
contents = Group(comment) | Group(block) | Group(assignment)
block_begin = Group(whitespace_token_group)
block_begin_strict = Group(whitespace_token_group_strict)
block_innards = Group(ZeroOrMore(contents) + space).leave_whitespace()
block << block_begin + left_bracket + block_innards + right_bracket
# Pre-bracket comments: comments that appear between the block header and
# its opening `{`. Nginx ignores such comments, but the original grammar
# would absorb their leading `#` into the block header as a token, which
# then left a stray `{` on the following line with nowhere to bind. We
# capture them here and fold them into the start of the block body so
# they round-trip through dumps(). See #10264.
pre_bracket_comments = ZeroOrMore(Group(comment) + space.suppress())
@staticmethod
def _merge_pre_bracket_comments(tokens: ParseResults) -> Any:
if len(tokens) <= 2:
return None
parts = tokens.asList()
begin = parts[0]
innards = parts[-1]
# Wrap each moved comment in leading and trailing newlines so it dumps
# on its own line inside the block body; otherwise it would be
# concatenated with the following directive and, on reparse, the
# comment would swallow that directive.
prepared_comments = [['\n'] + c + ['\n'] for c in parts[1:-1]]
return [begin, prepared_comments + innards]
block_with_pre_bracket_comments = (
block_begin_strict + pre_bracket_comments
+ left_bracket + block_innards + right_bracket
).set_parse_action(_merge_pre_bracket_comments)
block_legacy = block_begin + left_bracket + block_innards + right_bracket
# Try strict form first so pre-bracket comments are recognized; fall back
# to the legacy form for configs where `#` appears mid-directive (e.g.
# multi-line `server_name foo\n# comment\n bar;`).
block << (block_with_pre_bracket_comments | block_legacy)
script = ZeroOrMore(contents) + space + stringEnd
script.parse_with_tabs().leave_whitespace()

View file

@ -375,47 +375,58 @@ class TestRawNginxParser(unittest.TestCase):
loads(test)
def test_location_comment_issue(self):
# See discussion at https://github.com/certbot/certbot/issues/10264
already_good = '''
location = /resume
# x
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
'''
loads(already_good)
already_good = '''
location = /resume
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
# {
'''
loads(already_good)
needs_fixing = '''
location = /resume
# {
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
'''
with pytest.raises(ParseException):
loads(needs_fixing) # fails
needs_fixing = '''
location = /resume
# x{
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
'''
with pytest.raises(ParseException):
loads(needs_fixing) # fails
needs_fixing = '''
location = /resume
#{
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
'''
with pytest.raises(ParseException):
loads(needs_fixing) # fails
needs_fixing = '''
location = /resume
# {x
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
'''
with pytest.raises(ParseException):
loads(needs_fixing) # fails
# See https://github.com/certbot/certbot/issues/10264. Each input puts
# a comment between a block header and its `{`; this used to raise a
# ParseException (or, for `# x`, silently absorb `#` and `x` as block
# header tokens and lose the comment). Now all parse and round-trip.
fixtures = [
'''
location = /resume
# x
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
''',
'''
location = /resume
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
# {
''',
'''
location = /resume
# {
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
''',
'''
location = /resume
# x{
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
''',
'''
location = /resume
#{
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
''',
'''
location = /resume
# {x
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
''',
'''
location = /resume
# { rewrite .* /resume.php redirect; }
{ rewrite .* /Files/Adam_Lein_resume.pdf redirect; }
''',
]
for src in fixtures:
parsed = loads(src)
# The comment text must survive the round trip; it may move from
# between the header and `{` to the start of the block body,
# which is semantically equivalent nginx.
assert '#' in dumps(parsed), src
# Round trip stabilizes after one cycle: dumps(parsed) may inject
# separators, but dumps(loads(dumps(parsed))) equals dumps(parsed).
once = dumps(loads(dumps(parsed)))
twice = dumps(loads(once))
assert once == twice, src
class TestUnspacedList(unittest.TestCase):

View file

@ -0,0 +1 @@
The nginx plugin now parses configs that place a comment between a block header and its opening `{` (e.g. `location = /resume\n# note\n{ ... }`) without raising a parse error or silently dropping the comment.