From b7cf9288524cd9ec717f6c88d2b26226d8d04bf0 Mon Sep 17 00:00:00 2001 From: Peter Eckersley Date: Wed, 13 Jul 2016 17:17:45 -0700 Subject: [PATCH 1/4] Parse charset_map correctly (though we still don't emit it correctly...) --- .../chive/chive-nginx-master/win-utf | 1 + certbot-nginx/certbot_nginx/nginxparser.py | 27 ++++++++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf b/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf index ed8bc007a..e4b1e8360 100644 --- a/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf +++ b/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf @@ -2,6 +2,7 @@ # This map is not a full windows-1251 <> utf8 map: it does not # contain Serbian and Macedonian letters. If you need a full map, # use contrib/unicode2nginx/win-utf map instead. +# charset_map windows-1251 utf-8 { diff --git a/certbot-nginx/certbot_nginx/nginxparser.py b/certbot-nginx/certbot_nginx/nginxparser.py index 1859777d8..10a26ecb2 100644 --- a/certbot-nginx/certbot_nginx/nginxparser.py +++ b/certbot-nginx/certbot_nginx/nginxparser.py @@ -40,6 +40,7 @@ class RawNginxParser(object): assignment = space + key + Optional(space + value, default=None) + semicolon location_statement = space + Optional(modifier) + Optional(space + location + space) if_statement = space + Literal("if") + space + condition + space + charset_map_statement = space + Literal("charset_map") + space + value + space + value map_statement = space + Literal("map") + space + nonspace + space + dollar_var + space # This is NOT an accurate way to parse nginx map entries; it's almost @@ -52,28 +53,36 @@ class RawNginxParser(object): map_pattern = Regex(r'".*"') | Regex(r"'.*'") | nonspace map_entry = space + map_pattern + space + value + space + semicolon map_block = Group( - # key could for instance be "server" or "http", or "location" (in which case - # location_statement needs to have a non-empty location) Group(map_statement).leaveWhitespace() + left_bracket + Group(ZeroOrMore(Group(comment | map_entry)) + space).leaveWhitespace() + right_bracket) block = Forward() - block << Group( - # key could for instance be "server" or "http", or "location" (in which case - # location_statement needs to have a non-empty location) - (Group(space + key + location_statement) ^ Group(if_statement)).leaveWhitespace() + - left_bracket + - Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() - + right_bracket) + + # key could for instance be "server" or "http", or "location" (in which case + # location_statement needs to have a non-empty location) + + block_begin = (Group(space + key + location_statement) ^ + Group(if_statement) ^ + Group(charset_map_statement)).leaveWhitespace() + + block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + + space).leaveWhitespace() + + block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd script.parseWithTabs() + testLine = OneOrMore(Group(space + key + location_statement)).leaveWhitespace() + testTwo = OneOrMore(block) def __init__(self, source): self.source = source + def test(self): + return self.testLine.parseString(self.source) + def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) From 7183896ed76ac5b44d895303c8a7f6c08726b5f7 Mon Sep 17 00:00:00 2001 From: Peter Eckersley Date: Wed, 13 Jul 2016 17:24:05 -0700 Subject: [PATCH 2/4] Preserve spaces before comments at the beginning of a file --- .../chive/chive-nginx-master/win-utf | 1 - certbot-nginx/certbot_nginx/nginxparser.py | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf b/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf index e4b1e8360..ed8bc007a 100644 --- a/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf +++ b/certbot-compatibility-test/nginx/nginx-roundtrip-testdata/chive/chive-nginx-master/win-utf @@ -2,7 +2,6 @@ # This map is not a full windows-1251 <> utf8 map: it does not # contain Serbian and Macedonian letters. If you need a full map, # use contrib/unicode2nginx/win-utf map instead. -# charset_map windows-1251 utf-8 { diff --git a/certbot-nginx/certbot_nginx/nginxparser.py b/certbot-nginx/certbot_nginx/nginxparser.py index 10a26ecb2..d02687d2f 100644 --- a/certbot-nginx/certbot_nginx/nginxparser.py +++ b/certbot-nginx/certbot_nginx/nginxparser.py @@ -73,16 +73,11 @@ class RawNginxParser(object): block << Group(block_begin + left_bracket + block_innards + right_bracket) script = OneOrMore(Group(comment | assignment) ^ block ^ map_block) + space + stringEnd - script.parseWithTabs() - testLine = OneOrMore(Group(space + key + location_statement)).leaveWhitespace() - testTwo = OneOrMore(block) + script.parseWithTabs().leaveWhitespace() def __init__(self, source): self.source = source - def test(self): - return self.testLine.parseString(self.source) - def parse(self): """Returns the parsed tree.""" return self.script.parseString(self.source) From b6966fc05e568bcc1381524bcfff676ab8e89bf2 Mon Sep 17 00:00:00 2001 From: Peter Eckersley Date: Wed, 13 Jul 2016 18:47:05 -0700 Subject: [PATCH 3/4] lint --- certbot-nginx/certbot_nginx/nginxparser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/certbot-nginx/certbot_nginx/nginxparser.py b/certbot-nginx/certbot_nginx/nginxparser.py index d02687d2f..372ecef6a 100644 --- a/certbot-nginx/certbot_nginx/nginxparser.py +++ b/certbot-nginx/certbot_nginx/nginxparser.py @@ -63,11 +63,11 @@ class RawNginxParser(object): # key could for instance be "server" or "http", or "location" (in which case # location_statement needs to have a non-empty location) - block_begin = (Group(space + key + location_statement) ^ + block_begin = (Group(space + key + location_statement) ^ Group(if_statement) ^ Group(charset_map_statement)).leaveWhitespace() - block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + block_innards = Group(ZeroOrMore(Group(comment | assignment) | block | map_block) + space).leaveWhitespace() block << Group(block_begin + left_bracket + block_innards + right_bracket) From e3ab49a93bfda1c7dbe7c46567671e63411eeeba Mon Sep 17 00:00:00 2001 From: Peter Eckersley Date: Fri, 15 Jul 2016 16:57:10 -0700 Subject: [PATCH 4/4] Rework "value" parser: - Now handles any${VAR_SUBSTITUTION}inthemiddle/of/values - Don't use a single giant janky Regex; use small ones and have PyParsing combine them --- certbot-nginx/certbot_nginx/nginxparser.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/certbot-nginx/certbot_nginx/nginxparser.py b/certbot-nginx/certbot_nginx/nginxparser.py index 372ecef6a..c7a9f0c75 100644 --- a/certbot-nginx/certbot_nginx/nginxparser.py +++ b/certbot-nginx/certbot_nginx/nginxparser.py @@ -25,11 +25,19 @@ class RawNginxParser(object): key = Word(alphanums + "_/+-.") dollar_var = Combine(Literal('$') + Regex(r"[^\{\};,\s]+")) condition = Regex(r"\(.+\)") - # Matches anything that is not a special character AND any chars in single - # or double quotes + # Matches anything that is not a special character, and ${SHELL_VARS}, AND + # any chars in single or double quotes # All of these COULD be upgraded to something like # https://stackoverflow.com/a/16130746 - value = Regex(r"((\".*\")?(\'.*\')?[^\{\};,]?)+") + dquoted = Regex(r'(\".*\")') + squoted = Regex(r"(\'.*\')") + nonspecial = Regex(r"[^\{\};,]") + varsub = Regex(r"(\$\{\w+\})") + # nonspecial nibbles one character at a time, but the other objects take + # precedence. We use ZeroOrMore to allow entries like "break ;" to be + # parsed as assignments + value = Combine(ZeroOrMore(dquoted | squoted | varsub | nonspecial)) + location = CharsNotIn("{};," + string.whitespace) # modifier for location uri [ = | ~ | ~* | ^~ ] modifier = Literal("=") | Literal("~*") | Literal("~") | Literal("^~")