From e74505968ba24a364a5be70ef86bbe20620d17db Mon Sep 17 00:00:00 2001 From: cumul Date: Tue, 19 Dec 2017 05:34:31 +0900 Subject: [PATCH 1/3] Use UTF-8 encoding for nginx plugin --- certbot-nginx/certbot_nginx/parser.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/certbot-nginx/certbot_nginx/parser.py b/certbot-nginx/certbot_nginx/parser.py index 9f13bc59f..fcbf69105 100644 --- a/certbot-nginx/certbot_nginx/parser.py +++ b/certbot-nginx/certbot_nginx/parser.py @@ -1,5 +1,6 @@ """NginxParser is a member object of the NginxConfigurator class.""" import copy +import codecs import functools import glob import logging @@ -202,7 +203,7 @@ class NginxParser(object): if item in self.parsed and not override: continue try: - with open(item) as _file: + with codecs.open(item, "r", "utf-8") as _file: parsed = nginxparser.load(_file) self.parsed[item] = parsed trees.append(parsed) @@ -378,7 +379,7 @@ class NginxParser(object): def _parse_ssl_options(ssl_options): if ssl_options is not None: try: - with open(ssl_options) as _file: + with codecs.open(ssl_options, "r", "utf-8") as _file: return nginxparser.load(_file) except IOError: logger.warn("Missing NGINX TLS options file: %s", ssl_options) From 71bc3e071fcafe59970d9d3f8557bf73027f5239 Mon Sep 17 00:00:00 2001 From: cumul Date: Wed, 31 Oct 2018 15:45:30 +0900 Subject: [PATCH 2/3] Use `io` module instead of `codecs` See https://mail.python.org/pipermail/python-list/2015-March/687124.html --- certbot-nginx/certbot_nginx/parser.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/certbot-nginx/certbot_nginx/parser.py b/certbot-nginx/certbot_nginx/parser.py index fcbf69105..b97a4a332 100644 --- a/certbot-nginx/certbot_nginx/parser.py +++ b/certbot-nginx/certbot_nginx/parser.py @@ -1,8 +1,8 @@ """NginxParser is a member object of the NginxConfigurator class.""" import copy -import codecs import functools import glob +import io import logging import os import pyparsing @@ -203,12 +203,14 @@ class NginxParser(object): if item in self.parsed and not override: continue try: - with codecs.open(item, "r", "utf-8") as _file: + with io.open(item, "r", encoding="utf-8") as _file: parsed = nginxparser.load(_file) self.parsed[item] = parsed trees.append(parsed) except IOError: logger.warning("Could not open file: %s", item) + except UnicodeDecodeError: + logger.warning("Could not read file: %s due to invalid unicode character. Only UTF-8 encoding is supported.", item) except pyparsing.ParseException as err: logger.debug("Could not parse file: %s due to %s", item, err) return trees @@ -379,10 +381,12 @@ class NginxParser(object): def _parse_ssl_options(ssl_options): if ssl_options is not None: try: - with codecs.open(ssl_options, "r", "utf-8") as _file: + with io.open(ssl_options, "r", encoding="utf-8") as _file: return nginxparser.load(_file) except IOError: logger.warn("Missing NGINX TLS options file: %s", ssl_options) + except UnicodeDecodeError: + logger.warn("Could not read file: %s due to invalid unicode character. Only UTF-8 encoding is supported.", ssl_options) except pyparsing.ParseBaseException as err: logger.debug("Could not parse file: %s due to %s", ssl_options, err) return [] From b50cddc772269c768b856b867457fb3bb8e0a011 Mon Sep 17 00:00:00 2001 From: cumul Date: Wed, 31 Oct 2018 16:48:01 +0900 Subject: [PATCH 3/3] Added test for valid/invalid unicode characters --- certbot-nginx/certbot_nginx/tests/parser_test.py | 12 ++++++++++++ .../unicode_support/invalid_unicode_comments.conf | 7 +++++++ .../unicode_support/valid_unicode_comments.conf | 9 +++++++++ 3 files changed, 28 insertions(+) create mode 100644 certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/invalid_unicode_comments.conf create mode 100644 certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/valid_unicode_comments.conf diff --git a/certbot-nginx/certbot_nginx/tests/parser_test.py b/certbot-nginx/certbot_nginx/tests/parser_test.py index e21acb8ea..7dc7f9c57 100644 --- a/certbot-nginx/certbot_nginx/tests/parser_test.py +++ b/certbot-nginx/certbot_nginx/tests/parser_test.py @@ -429,6 +429,18 @@ class NginxParserTest(util.NginxTest): #pylint: disable=too-many-public-methods self.assertEqual(len(default.raw), len(new_vhost_parsed.raw)) self.assertTrue(next(iter(default.addrs)).super_eq(next(iter(new_vhost_parsed.addrs)))) + def test_valid_unicode_characters(self): + nparser = parser.NginxParser(self.config_path) + # pylint: disable=protected-access + parsed = nparser._parse_files(nparser.abs_path('unicode_support/valid_unicode_comments.conf')) + self.assertEqual(['server'], parsed[0][2][0]) + self.assertEqual(['listen', '80'], parsed[0][2][1][3]) + + def test_invalid_unicode_characters(self): + nparser = parser.NginxParser(self.config_path) + # pylint: disable=protected-access + parsed = nparser._parse_files(nparser.abs_path('unicode_support/invalid_unicode_comments.conf')) + self.assertEqual([], parsed) if __name__ == "__main__": unittest.main() # pragma: no cover diff --git a/certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/invalid_unicode_comments.conf b/certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/invalid_unicode_comments.conf new file mode 100644 index 000000000..596044cc9 --- /dev/null +++ b/certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/invalid_unicode_comments.conf @@ -0,0 +1,7 @@ +# This configuration file is saved with EUC-KR (a.k.a. cp949) encoding, +# including some Korean alphabets. + +server { + # ȳϼ. 80 Ʈ û ٸ. + listen 80; +} diff --git a/certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/valid_unicode_comments.conf b/certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/valid_unicode_comments.conf new file mode 100644 index 000000000..89c978b2e --- /dev/null +++ b/certbot-nginx/certbot_nginx/tests/testdata/etc_nginx/unicode_support/valid_unicode_comments.conf @@ -0,0 +1,9 @@ +# This configuration file is saved with valid UTF-8 encoding, +# including some CJK alphabets. + +server { + # 안녕하세요. 80번 포트에서 요청을 기다린다. + # こんにちは。80番ポートからリクエストを待つ。 + # 你好。等待端口80上的请求。 + listen 80; +}