From 1f128b0e0a3f7fb4370ebcbda57588951dd9742d Mon Sep 17 00:00:00 2001 From: Brad Warren Date: Wed, 30 Jul 2025 16:31:59 -0700 Subject: [PATCH] improve ARI error handling and logging (#10374) fixes https://github.com/certbot/certbot/issues/10336 and fixes https://github.com/certbot/certbot/issues/10357 using the plan at https://github.com/certbot/certbot/issues/10336#issuecomment-3109192677 while this PR makes the renewal_time function slightly less nice, i think us catching and handling the exceptions in certbot makes the most sense so we can do exactly what we want around terminal and file logging with this change, a output from a failed `sudo certbot renew` run looks like ``` $ sudo certbot renew Saving debug log to /var/log/letsencrypt/letsencrypt.log - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Processing /etc/letsencrypt/renewal/example.org.conf - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - An error occurred requesting ACME Renewal Information (ARI). If this problem persists and you think it's a bug in Certbot, please open an issue at https://github.com/certbot/certbot/issues/new/choose. Certificate not yet due for renewal - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - The following certificates are not due for renewal yet: /etc/letsencrypt/live/example.org/fullchain.pem expires on 2025-10-23 (skipped) No renewals were attempted. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ``` `sudo certbot renew -q` produces no output and the relevant messages in the log file look like: ``` 2025-07-30 19:51:13,267:WARNING:certbot._internal.renewal:An error occurred requesting ACME Renewal Information (ARI). If this problem persists and you think it's a bug in Certbot, please open an issue at https://github.com/certbot/certbot/issues/new/choose. 2025-07-30 19:51:13,267:DEBUG:certbot._internal.renewal:Error while requesting ARI was: Traceback (most recent call last): File "/home/brad/certbot/acme/src/acme/client.py", line 366, in renewal_time raise ValueError('im some error') ValueError: im some error The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/home/brad/certbot/certbot/src/certbot/_internal/renewal.py", line 351, in _ari_renewal_time return acme.renewal_time(cert_pem)[0] File "/home/brad/certbot/acme/src/acme/client.py", line 370, in renewal_time raise errors.ARIError(error_msg, now + default_retry_after) from e acme.errors.ARIError: ('failed to fetch renewal_info URL https://acme-staging-v02.api.letsencrypt.org/acme/renewal-info/oXQaBm1Qt4YtSizBfrSNiElszRY.LMjTHFS4HPbSRMOzLrA9OZId', datetime.datetime(2025, 7, 31, 1, 51, 13, 267088)) ``` --- acme/src/acme/_internal/tests/client_test.py | 27 +++++--- acme/src/acme/client.py | 13 ++-- acme/src/acme/errors.py | 8 +++ certbot/src/certbot/_internal/renewal.py | 66 +++++++++++-------- .../certbot/_internal/tests/renewal_test.py | 32 ++++++++- 5 files changed, 106 insertions(+), 40 deletions(-) diff --git a/acme/src/acme/_internal/tests/client_test.py b/acme/src/acme/_internal/tests/client_test.py index 885c6ac5b..7d8f44f9c 100644 --- a/acme/src/acme/_internal/tests/client_test.py +++ b/acme/src/acme/_internal/tests/client_test.py @@ -548,27 +548,38 @@ class ClientV2Test(unittest.TestCase): @mock.patch('acme.client.datetime') def test_renewal_time_renewal_info_errors(self, dt_mock): - utc_now = datetime.datetime(2025, 3, 15, tzinfo=datetime.timezone.utc) - dt_mock.datetime.now.return_value = utc_now + def now(tzinfo=None): + return datetime.datetime(2025, 3, 15, tzinfo=tzinfo) + dt_mock.datetime.now.side_effect = now + dt_mock.timedelta = datetime.timedelta + dt_mock.timezone = datetime.timezone + self.client.directory = messages.Directory({ 'renewalInfo': 'https://www.letsencrypt-demo.org/acme/renewal-info', }) - # Failure to fetch the 'renewalInfo' URL should return None - self.net.get.side_effect = requests.exceptions.RequestException + # Failure to fetch the 'renewalInfo' URL should raise an ARIError with the exception raised + # by self.net.get as the __cause__ and a Retry-After 6 hours in the future + expected_cause = requests.exceptions.RequestException + expected_retry_after = now() + datetime.timedelta(seconds=6 * 60 * 60) + self.net.get.side_effect = expected_cause cert_pem = make_cert_for_renewal( not_before=datetime.datetime(2025, 3, 12, 00, 00, 00), not_after=datetime.datetime(2025, 3, 20, 00, 00, 00), ) - t, _ = self.client.renewal_time(cert_pem) - assert t == None + with pytest.raises(errors.ARIError) as exception_info: + self.client.renewal_time(cert_pem) + assert isinstance(exception_info.value.__cause__, expected_cause) + assert exception_info.value.retry_after == expected_retry_after cert_pem = make_cert_for_renewal( not_before=datetime.datetime(2025, 3, 12, 00, 00, 00), not_after=datetime.datetime(2025, 3, 30, 00, 00, 00), ) - t, _ = self.client.renewal_time(cert_pem) - assert t == None + with pytest.raises(errors.ARIError) as exception_info: + self.client.renewal_time(cert_pem) + assert isinstance(exception_info.value.__cause__, expected_cause) + assert exception_info.value.retry_after == expected_retry_after @mock.patch('acme.client.datetime') def test_renewal_time_returns_retry_after(self, dt_mock): diff --git a/acme/src/acme/client.py b/acme/src/acme/client.py index 238ebd27b..64c90e06e 100644 --- a/acme/src/acme/client.py +++ b/acme/src/acme/client.py @@ -335,6 +335,12 @@ class ClientV2: :param bytes cert_pem: cert as pem file :returns: Tuple of time to attempt renewal, next time to ask for renewal info + + :raises errors.ARIError: If an error occurs fetching ARI from the + server. Explicit exception chaining is used so the original error + can be accessed through the __cause__ attribute on the ARIError if + desired. + """ now = datetime.datetime.now() # https://www.ietf.org/archive/id/draft-ietf-acme-ari-08.html#section-4.3.3 @@ -358,10 +364,9 @@ class ClientV2: ari_url = renewal_info_base_url + '/' + _renewal_info_path_component(cert) try: resp = self.net.get(ari_url, content_type='application/json') - except (requests.exceptions.RequestException, messages.Error) as error: - logger.info("failed to fetch renewal_info URL (%s): %s", ari_url, error) - return None, now + default_retry_after - + except Exception as e: # pylint: disable=broad-except + error_msg = f'failed to fetch renewal_info URL {ari_url}' + raise errors.ARIError(error_msg, now + default_retry_after) from e renewal_info: messages.RenewalInfo = messages.RenewalInfo.from_json(resp.json()) start = renewal_info.suggested_window.start # pylint: disable=no-member diff --git a/acme/src/acme/errors.py b/acme/src/acme/errors.py index 5e0a5379d..9b51b6987 100644 --- a/acme/src/acme/errors.py +++ b/acme/src/acme/errors.py @@ -1,4 +1,5 @@ """ACME errors.""" +import datetime import typing from typing import Any from typing import List @@ -149,3 +150,10 @@ class ConflictError(ClientError): class WildcardUnsupportedError(Error): """Error for when a wildcard is requested but is unsupported by ACME CA.""" + + +class ARIError(ClientError): + """An error occurred during an ARI request and we want to suggest a Retry-After time.""" + def __init__(self, message: str, retry_after: datetime.datetime) -> None: + super().__init__(message, retry_after) + self.retry_after = retry_after diff --git a/certbot/src/certbot/_internal/renewal.py b/certbot/src/certbot/_internal/renewal.py index 64a7b6af8..414b93c4c 100644 --- a/certbot/src/certbot/_internal/renewal.py +++ b/certbot/src/certbot/_internal/renewal.py @@ -325,6 +325,45 @@ def should_renew(config: configuration.NamespaceConfig, return False +def _ari_renewal_time(config: configuration.NamespaceConfig, + lineage: storage.RenewableCert, + acme_clients: Dict[str, acme_client.ClientV2], + cert_pem: bytes, + )-> Optional[datetime.datetime]: + """Return the ARI suggested renewal time if it's available.""" + # For ARI requests, we want to use the ACME directory URL from which the + # cert was originally requested. Since `config.server` can be overridden on + # the command line, we're using the server stored in the cert's renewal + # conf, i.e. `lineage.server` + # + # Fixes https://github.com/certbot/certbot/issues/10339 + if lineage.server: + try: + # Creating a new ACME client makes a network request, so check if we have + # one cached for this cert's server already + if lineage.server not in acme_clients: + acme_clients[lineage.server] = \ + client.create_acme_client(config, server_override=lineage.server) + acme = acme_clients.get(lineage.server, None) + + # Attempt to get the ARI-defined renewal time + if acme: + return acme.renewal_time(cert_pem)[0] + except Exception: # pylint: disable=broad-except + # We want to stop errors around ARI preventing renewal so we catch all exceptions here + # with a warning asking users to tell us about any problems they are experiencing + logger.warning("An error occurred requesting ACME Renewal Information (ARI). If this " + "problem persists and you think it's a bug in Certbot, please open an " + "issue at https://github.com/certbot/certbot/issues/new/choose.") + logger.debug("Error while requesting ARI was:", exc_info=True) + else: + renewal_conf_file = storage.renewal_filename_for_lineagename(config, lineage.lineagename) + logger.warning("Skipping ARI check because %s has no 'server' field. This issue will not " + "prevent certificate renewal", renewal_conf_file) + + return None + + def _default_renewal_time(cert_pem: bytes) -> datetime.datetime: """Return an reasonable default time to attempt renewal of the certificate based on the certificate lifetime. @@ -374,32 +413,7 @@ def should_autorenew(config: configuration.NamespaceConfig, with open(cert, 'rb') as f: cert_pem = f.read() - renewal_time = None - # For ARI requests, we want to use the ACME directory URL from which the - # cert was originally requested. Since `config.server` can be overridden on - # the command line, we're using the server stored in the cert's renewal - # conf, i.e. `lineage.server` - # - # Fixes https://github.com/certbot/certbot/issues/10339 - if lineage.server: - # Creating a new ACME client makes a network request, so check if we have - # one cached for this cert's server already - if lineage.server not in acme_clients: - try: - acme_clients[lineage.server] = \ - client.create_acme_client(config, server_override=lineage.server) - except Exception as error: # pylint: disable=broad-except - logger.info("Unable to connect to %s to request ACME Renewal Information (ARI). " - "Error was: %s", lineage.server, error) - acme = acme_clients.get(lineage.server, None) - - # Attempt to get the ARI-defined renewal time - if acme: - renewal_time, _ = acme.renewal_time(cert_pem) - else: - renewal_conf_file = storage.renewal_filename_for_lineagename(config, lineage.lineagename) - logger.warning("Skipping ARI check because %s has no 'server' field. This issue will not " - "prevent certificate renewal", renewal_conf_file) + renewal_time = _ari_renewal_time(config, lineage, acme_clients, cert_pem) now = datetime.datetime.now(datetime.timezone.utc) diff --git a/certbot/src/certbot/_internal/tests/renewal_test.py b/certbot/src/certbot/_internal/tests/renewal_test.py index 561984e43..ba1f5a379 100644 --- a/certbot/src/certbot/_internal/tests/renewal_test.py +++ b/certbot/src/certbot/_internal/tests/renewal_test.py @@ -9,6 +9,7 @@ from unittest import mock import pytest from acme import challenges +from acme import errors as acme_errors from certbot import configuration from certbot import errors from certbot._internal import storage @@ -439,8 +440,35 @@ class RenewalTest(test_util.ConfigTestCase): mock_ocsp.return_value = True with mock.patch('certbot._internal.renewal.open', mock.mock_open(read_data=b'')): - assert renewal.should_autorenew(self.config, mock_rc, acme_clients) - assert mock_renewal_time.call_count == 0 + with mock.patch('certbot._internal.renewal.logger') as mock_logger: + assert renewal.should_autorenew(self.config, mock_rc, acme_clients) + assert mock_renewal_time.call_count == 0 + # Ensure we logged about skipping the ARI check and the underlying exception + assert any('ARI' in call.args[0] for call in mock_logger.warning.call_args_list) + assert any(call.kwargs.get('exc_info') for call in mock_logger.debug.call_args_list) + + + @mock.patch('certbot._internal.storage.RenewableCert.ocsp_revoked') + def test_resilient_ari_check(self, mock_ocsp): + from certbot._internal import renewal + + mock_acme = mock.MagicMock() + ari_error = acme_errors.ARIError('some error', datetime.datetime.now()) + ari_server = 'http://ari' + mock_acme.renewal_time.side_effect = ari_error + acme_clients = {} + acme_clients[ari_server] = mock_acme + mock_rc = mock.MagicMock() + mock_rc.server = ari_server + mock_rc.autorenewal_is_enabled.return_value = True + mock_ocsp.return_value = True + + with mock.patch('certbot._internal.renewal.open', mock.mock_open(read_data=b'')): + with mock.patch('certbot._internal.renewal.logger') as mock_logger: + assert renewal.should_autorenew(self.config, mock_rc, acme_clients) + # Ensure we logged about skipping the ARI check and the underlying exception + assert any('ARI' in call.args[0] for call in mock_logger.warning.call_args_list) + assert any(call.kwargs.get('exc_info') for call in mock_logger.debug.call_args_list) class RestoreRequiredConfigElementsTest(test_util.ConfigTestCase):