diff --git a/AUTHORS.md b/AUTHORS.md index 02f90d956..17fa33134 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -84,6 +84,7 @@ Authors * [Eric Engestrom](https://github.com/1ace) * [Eric Rescorla](https://github.com/ekr) * [Eric Wustrow](https://github.com/ewust) +* [Erik Morén](https://github.com/morre95) * [Erik Rose](https://github.com/erikrose) * [Eugene Kazakov](https://github.com/xgin) * [Fabian](https://github.com/faerbit) @@ -306,3 +307,4 @@ Authors * [Zach Shepherd](https://github.com/zjs) * [陈三](https://github.com/chenxsan) * [Shahar Naveh](https://github.com/ShaharNaveh) + diff --git a/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py b/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py index be46368e0..2d6c721f6 100644 --- a/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py +++ b/certbot/src/certbot/_internal/plugins/nginx/nginxparser.py @@ -13,6 +13,7 @@ from typing import SupportsIndex from typing import Union from pyparsing import Combine +from pyparsing import FollowedBy from pyparsing import Forward from pyparsing import Group from pyparsing import Literal @@ -50,15 +51,29 @@ class RawNginxParser: token = paren_quote_extend | tokenchars | quoted - whitespace_token_group = space + token + ZeroOrMore(required_space + token) + space - assignment = whitespace_token_group + semicolon + # An nginx comment runs from '#' to the end of the line. `restOfLine` does + # not consume the trailing newline; the following `required_space` does. + comment = Literal('#') + restOfLine + # A separator between two tokens is whitespace, optionally interleaved with + # one or more comments. The `FollowedBy(token)` lookahead means a comment is + # only treated as inline when a real token follows it — otherwise we leave + # the `#` alone so existing fallback paths (e.g. comment-as-token in a block + # header) still parse. This lets a multi-line directive contain comment + # lines between its tokens (valid nginx), which the old grammar mis-parsed: + # an unbalanced `"` inside such a comment would be sucked up by the + # `multiline=True` quoted strings and run away across the rest of the file + # (issue #10598). + token_separator = required_space + ZeroOrMore( + comment + required_space + FollowedBy(token) + ) - comment = space + Literal('#') + restOfLine + whitespace_token_group = space + token + ZeroOrMore(token_separator + token) + space + assignment = whitespace_token_group + semicolon block = Forward() # order matters! see issue 518, and also http { # server { \n} - contents = Group(comment) | Group(block) | Group(assignment) + contents = Group(space + comment) | Group(block) | Group(assignment) block_begin = Group(whitespace_token_group) block_innards = Group(ZeroOrMore(contents) + space).leave_whitespace() diff --git a/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py b/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py index 58c7f4faa..bf02e35c2 100644 --- a/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py +++ b/certbot/src/certbot/_internal/tests/plugins/nginx/nginxparser_test.py @@ -201,6 +201,20 @@ class TestRawNginxParser(unittest.TestCase): [['#', ' server{']]] ] + def test_comment_inside_multiline_directive(self): + # See https://github.com/certbot/certbot/issues/10598 + # An unbalanced double-quote inside a comment that sits between tokens + # of a multi-line directive must not be sucked into a multiline quoted + # string and run away across the rest of the file. + source = ("http {\n" + " log_format json_test escape=json '{'\n" + " # '\"req\": \"$request\"' # GET \"/test?...\n" + " '}';\n" + "}\n") + parsed = loads(source) + # The config parses and round-trips byte-for-byte. + assert dumps(parsed) == source + def test_access_log(self): # see issue #3798 parsed = loads('access_log syslog:server=unix:/dev/log,facility=auth,' diff --git a/newsfragments/10598.fixed b/newsfragments/10598.fixed new file mode 100644 index 000000000..f0a68b533 --- /dev/null +++ b/newsfragments/10598.fixed @@ -0,0 +1 @@ +Fixed nginx configuration parsing when comments appear between tokens of a multi-line directive.