improve ARI error handling and logging (#10374)

fixes https://github.com/certbot/certbot/issues/10336 and fixes
https://github.com/certbot/certbot/issues/10357 using the plan at
https://github.com/certbot/certbot/issues/10336#issuecomment-3109192677

while this PR makes the renewal_time function slightly less nice, i
think us catching and handling the exceptions in certbot makes the most
sense so we can do exactly what we want around terminal and file logging

with this change, a output from a failed `sudo certbot renew` run looks
like
```
$ sudo certbot renew
Saving debug log to /var/log/letsencrypt/letsencrypt.log

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
Processing /etc/letsencrypt/renewal/example.org.conf
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
An error occurred requesting ACME Renewal Information (ARI). If this problem persists and you think it's a bug in Certbot, please open an issue at https://github.com/certbot/certbot/issues/new/choose.
Certificate not yet due for renewal

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
The following certificates are not due for renewal yet:
  /etc/letsencrypt/live/example.org/fullchain.pem expires on 2025-10-23 (skipped)
No renewals were attempted.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
```
`sudo certbot renew -q` produces no output and the relevant messages in
the log file look like:
```
2025-07-30 19:51:13,267:WARNING:certbot._internal.renewal:An error occurred requesting ACME Renewal Information (ARI). If this problem persists and you think it's a bug in Certbot, please open an issue at https://github.com/certbot/certbot/issues/new/choose.
2025-07-30 19:51:13,267:DEBUG:certbot._internal.renewal:Error while requesting ARI was:
Traceback (most recent call last):
  File "/home/brad/certbot/acme/src/acme/client.py", line 366, in renewal_time
    raise ValueError('im some error')
ValueError: im some error

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/brad/certbot/certbot/src/certbot/_internal/renewal.py", line 351, in _ari_renewal_time
    return acme.renewal_time(cert_pem)[0]
  File "/home/brad/certbot/acme/src/acme/client.py", line 370, in renewal_time
    raise errors.ARIError(error_msg, now + default_retry_after) from e
acme.errors.ARIError: ('failed to fetch renewal_info URL https://acme-staging-v02.api.letsencrypt.org/acme/renewal-info/oXQaBm1Qt4YtSizBfrSNiElszRY.LMjTHFS4HPbSRMOzLrA9OZId', datetime.datetime(2025, 7, 31, 1, 51, 13, 267088))

```
This commit is contained in:
Brad Warren 2025-07-30 16:31:59 -07:00 committed by GitHub
parent ae2a00bbad
commit 1f128b0e0a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 106 additions and 40 deletions

View file

@ -548,27 +548,38 @@ class ClientV2Test(unittest.TestCase):
@mock.patch('acme.client.datetime')
def test_renewal_time_renewal_info_errors(self, dt_mock):
utc_now = datetime.datetime(2025, 3, 15, tzinfo=datetime.timezone.utc)
dt_mock.datetime.now.return_value = utc_now
def now(tzinfo=None):
return datetime.datetime(2025, 3, 15, tzinfo=tzinfo)
dt_mock.datetime.now.side_effect = now
dt_mock.timedelta = datetime.timedelta
dt_mock.timezone = datetime.timezone
self.client.directory = messages.Directory({
'renewalInfo': 'https://www.letsencrypt-demo.org/acme/renewal-info',
})
# Failure to fetch the 'renewalInfo' URL should return None
self.net.get.side_effect = requests.exceptions.RequestException
# Failure to fetch the 'renewalInfo' URL should raise an ARIError with the exception raised
# by self.net.get as the __cause__ and a Retry-After 6 hours in the future
expected_cause = requests.exceptions.RequestException
expected_retry_after = now() + datetime.timedelta(seconds=6 * 60 * 60)
self.net.get.side_effect = expected_cause
cert_pem = make_cert_for_renewal(
not_before=datetime.datetime(2025, 3, 12, 00, 00, 00),
not_after=datetime.datetime(2025, 3, 20, 00, 00, 00),
)
t, _ = self.client.renewal_time(cert_pem)
assert t == None
with pytest.raises(errors.ARIError) as exception_info:
self.client.renewal_time(cert_pem)
assert isinstance(exception_info.value.__cause__, expected_cause)
assert exception_info.value.retry_after == expected_retry_after
cert_pem = make_cert_for_renewal(
not_before=datetime.datetime(2025, 3, 12, 00, 00, 00),
not_after=datetime.datetime(2025, 3, 30, 00, 00, 00),
)
t, _ = self.client.renewal_time(cert_pem)
assert t == None
with pytest.raises(errors.ARIError) as exception_info:
self.client.renewal_time(cert_pem)
assert isinstance(exception_info.value.__cause__, expected_cause)
assert exception_info.value.retry_after == expected_retry_after
@mock.patch('acme.client.datetime')
def test_renewal_time_returns_retry_after(self, dt_mock):

View file

@ -335,6 +335,12 @@ class ClientV2:
:param bytes cert_pem: cert as pem file
:returns: Tuple of time to attempt renewal, next time to ask for renewal info
:raises errors.ARIError: If an error occurs fetching ARI from the
server. Explicit exception chaining is used so the original error
can be accessed through the __cause__ attribute on the ARIError if
desired.
"""
now = datetime.datetime.now()
# https://www.ietf.org/archive/id/draft-ietf-acme-ari-08.html#section-4.3.3
@ -358,10 +364,9 @@ class ClientV2:
ari_url = renewal_info_base_url + '/' + _renewal_info_path_component(cert)
try:
resp = self.net.get(ari_url, content_type='application/json')
except (requests.exceptions.RequestException, messages.Error) as error:
logger.info("failed to fetch renewal_info URL (%s): %s", ari_url, error)
return None, now + default_retry_after
except Exception as e: # pylint: disable=broad-except
error_msg = f'failed to fetch renewal_info URL {ari_url}'
raise errors.ARIError(error_msg, now + default_retry_after) from e
renewal_info: messages.RenewalInfo = messages.RenewalInfo.from_json(resp.json())
start = renewal_info.suggested_window.start # pylint: disable=no-member

View file

@ -1,4 +1,5 @@
"""ACME errors."""
import datetime
import typing
from typing import Any
from typing import List
@ -149,3 +150,10 @@ class ConflictError(ClientError):
class WildcardUnsupportedError(Error):
"""Error for when a wildcard is requested but is unsupported by ACME CA."""
class ARIError(ClientError):
"""An error occurred during an ARI request and we want to suggest a Retry-After time."""
def __init__(self, message: str, retry_after: datetime.datetime) -> None:
super().__init__(message, retry_after)
self.retry_after = retry_after

View file

@ -325,6 +325,45 @@ def should_renew(config: configuration.NamespaceConfig,
return False
def _ari_renewal_time(config: configuration.NamespaceConfig,
lineage: storage.RenewableCert,
acme_clients: Dict[str, acme_client.ClientV2],
cert_pem: bytes,
)-> Optional[datetime.datetime]:
"""Return the ARI suggested renewal time if it's available."""
# For ARI requests, we want to use the ACME directory URL from which the
# cert was originally requested. Since `config.server` can be overridden on
# the command line, we're using the server stored in the cert's renewal
# conf, i.e. `lineage.server`
#
# Fixes https://github.com/certbot/certbot/issues/10339
if lineage.server:
try:
# Creating a new ACME client makes a network request, so check if we have
# one cached for this cert's server already
if lineage.server not in acme_clients:
acme_clients[lineage.server] = \
client.create_acme_client(config, server_override=lineage.server)
acme = acme_clients.get(lineage.server, None)
# Attempt to get the ARI-defined renewal time
if acme:
return acme.renewal_time(cert_pem)[0]
except Exception: # pylint: disable=broad-except
# We want to stop errors around ARI preventing renewal so we catch all exceptions here
# with a warning asking users to tell us about any problems they are experiencing
logger.warning("An error occurred requesting ACME Renewal Information (ARI). If this "
"problem persists and you think it's a bug in Certbot, please open an "
"issue at https://github.com/certbot/certbot/issues/new/choose.")
logger.debug("Error while requesting ARI was:", exc_info=True)
else:
renewal_conf_file = storage.renewal_filename_for_lineagename(config, lineage.lineagename)
logger.warning("Skipping ARI check because %s has no 'server' field. This issue will not "
"prevent certificate renewal", renewal_conf_file)
return None
def _default_renewal_time(cert_pem: bytes) -> datetime.datetime:
"""Return an reasonable default time to attempt renewal of the certificate
based on the certificate lifetime.
@ -374,32 +413,7 @@ def should_autorenew(config: configuration.NamespaceConfig,
with open(cert, 'rb') as f:
cert_pem = f.read()
renewal_time = None
# For ARI requests, we want to use the ACME directory URL from which the
# cert was originally requested. Since `config.server` can be overridden on
# the command line, we're using the server stored in the cert's renewal
# conf, i.e. `lineage.server`
#
# Fixes https://github.com/certbot/certbot/issues/10339
if lineage.server:
# Creating a new ACME client makes a network request, so check if we have
# one cached for this cert's server already
if lineage.server not in acme_clients:
try:
acme_clients[lineage.server] = \
client.create_acme_client(config, server_override=lineage.server)
except Exception as error: # pylint: disable=broad-except
logger.info("Unable to connect to %s to request ACME Renewal Information (ARI). "
"Error was: %s", lineage.server, error)
acme = acme_clients.get(lineage.server, None)
# Attempt to get the ARI-defined renewal time
if acme:
renewal_time, _ = acme.renewal_time(cert_pem)
else:
renewal_conf_file = storage.renewal_filename_for_lineagename(config, lineage.lineagename)
logger.warning("Skipping ARI check because %s has no 'server' field. This issue will not "
"prevent certificate renewal", renewal_conf_file)
renewal_time = _ari_renewal_time(config, lineage, acme_clients, cert_pem)
now = datetime.datetime.now(datetime.timezone.utc)

View file

@ -9,6 +9,7 @@ from unittest import mock
import pytest
from acme import challenges
from acme import errors as acme_errors
from certbot import configuration
from certbot import errors
from certbot._internal import storage
@ -439,8 +440,35 @@ class RenewalTest(test_util.ConfigTestCase):
mock_ocsp.return_value = True
with mock.patch('certbot._internal.renewal.open', mock.mock_open(read_data=b'')):
assert renewal.should_autorenew(self.config, mock_rc, acme_clients)
assert mock_renewal_time.call_count == 0
with mock.patch('certbot._internal.renewal.logger') as mock_logger:
assert renewal.should_autorenew(self.config, mock_rc, acme_clients)
assert mock_renewal_time.call_count == 0
# Ensure we logged about skipping the ARI check and the underlying exception
assert any('ARI' in call.args[0] for call in mock_logger.warning.call_args_list)
assert any(call.kwargs.get('exc_info') for call in mock_logger.debug.call_args_list)
@mock.patch('certbot._internal.storage.RenewableCert.ocsp_revoked')
def test_resilient_ari_check(self, mock_ocsp):
from certbot._internal import renewal
mock_acme = mock.MagicMock()
ari_error = acme_errors.ARIError('some error', datetime.datetime.now())
ari_server = 'http://ari'
mock_acme.renewal_time.side_effect = ari_error
acme_clients = {}
acme_clients[ari_server] = mock_acme
mock_rc = mock.MagicMock()
mock_rc.server = ari_server
mock_rc.autorenewal_is_enabled.return_value = True
mock_ocsp.return_value = True
with mock.patch('certbot._internal.renewal.open', mock.mock_open(read_data=b'')):
with mock.patch('certbot._internal.renewal.logger') as mock_logger:
assert renewal.should_autorenew(self.config, mock_rc, acme_clients)
# Ensure we logged about skipping the ARI check and the underlying exception
assert any('ARI' in call.args[0] for call in mock_logger.warning.call_args_list)
assert any(call.kwargs.get('exc_info') for call in mock_logger.debug.call_args_list)
class RestoreRequiredConfigElementsTest(test_util.ConfigTestCase):