mirror of
https://github.com/NLnetLabs/unbound.git
synced 2026-02-02 19:59:28 -05:00
Serve expired cache update fixes (#1174)
- Fixes a regression bug with serve-expired that appeared in 1.22.0 and would not allow the iterator to update the cache with not-yet-validated entries resulting in increased outgoing traffic. - Treat serve_expired_norec_ttl as a backoff timer for failed updates of expired records. - Try to use expired answers instead of SERVFAIL if serve-expired is enabled even without serve-expired-client-timeout. - Add suggestion to refresh the cached norec_ttl and expired_ttl when a response cannot update the usable expired entry.
This commit is contained in:
parent
e57e537c85
commit
fff9f62a1e
7 changed files with 170 additions and 46 deletions
|
|
@ -1845,10 +1845,10 @@ lookup_cache:
|
|||
* its qname must be that used for cache
|
||||
* lookup. */
|
||||
if((worker->env.cfg->prefetch &&
|
||||
*worker->env.now >= rep->prefetch_ttl) ||
|
||||
rep->prefetch_ttl <= *worker->env.now) ||
|
||||
(worker->env.cfg->serve_expired &&
|
||||
*worker->env.now > rep->ttl)) {
|
||||
|
||||
rep->ttl < *worker->env.now &&
|
||||
!(*worker->env.now < rep->serve_expired_norec_ttl))) {
|
||||
time_t leeway = rep->ttl - *worker->env.now;
|
||||
if(rep->ttl < *worker->env.now)
|
||||
leeway = 0;
|
||||
|
|
|
|||
41
services/cache/dns.c
vendored
41
services/cache/dns.c
vendored
|
|
@ -1056,7 +1056,7 @@ dns_cache_lookup(struct module_env* env,
|
|||
|
||||
int
|
||||
dns_cache_store(struct module_env* env, struct query_info* msgqinf,
|
||||
struct reply_info* msgrep, int is_referral, time_t leeway, int pside,
|
||||
struct reply_info* msgrep, int is_referral, time_t leeway, int pside,
|
||||
struct regional* region, uint32_t flags, time_t qstarttime,
|
||||
int is_valrec)
|
||||
{
|
||||
|
|
@ -1066,7 +1066,7 @@ dns_cache_store(struct module_env* env, struct query_info* msgqinf,
|
|||
* useful expired record exists. */
|
||||
struct msgreply_entry* e = msg_cache_lookup(env,
|
||||
msgqinf->qname, msgqinf->qname_len, msgqinf->qtype,
|
||||
msgqinf->qclass, flags, 0, 0);
|
||||
msgqinf->qclass, flags, 0, 1);
|
||||
if(e) {
|
||||
struct reply_info* cached = e->entry.data;
|
||||
if(cached->ttl < *env->now
|
||||
|
|
@ -1081,7 +1081,42 @@ dns_cache_store(struct module_env* env, struct query_info* msgqinf,
|
|||
&& cached->security != sec_status_bogus
|
||||
&& (env->need_to_validate &&
|
||||
msgrep->security == sec_status_unchecked)
|
||||
&& !is_valrec) {
|
||||
/* Exceptions to that rule are:
|
||||
* o recursions that don't need validation but
|
||||
* need to update the cache for coherence
|
||||
* (delegation information while iterating,
|
||||
* DNSKEY and DS lookups from validator)
|
||||
* o explicit RRSIG queries that are not
|
||||
* validated. */
|
||||
&& !is_valrec
|
||||
&& msgqinf->qtype != LDNS_RR_TYPE_RRSIG) {
|
||||
if((int)FLAGS_GET_RCODE(msgrep->flags) !=
|
||||
LDNS_RCODE_NOERROR &&
|
||||
(int)FLAGS_GET_RCODE(msgrep->flags) !=
|
||||
LDNS_RCODE_NXDOMAIN) {
|
||||
/* The current response has an
|
||||
* erroneous rcode. Adjust norec time
|
||||
* so that additional lookups are not
|
||||
* performed for some time. */
|
||||
verbose(VERB_ALGO, "set "
|
||||
"serve-expired-norec-ttl for "
|
||||
"response in cache");
|
||||
cached->serve_expired_norec_ttl =
|
||||
NORR_TTL + *env->now;
|
||||
if(env->cfg->serve_expired_ttl_reset &&
|
||||
cached->serve_expired_ttl
|
||||
< *env->now +
|
||||
env->cfg->serve_expired_ttl) {
|
||||
/* Reset serve-expired-ttl for
|
||||
* valid response in cache. */
|
||||
verbose(VERB_ALGO, "reset "
|
||||
"serve-expired-ttl "
|
||||
"for response in cache");
|
||||
cached->serve_expired_ttl =
|
||||
*env->now +
|
||||
env->cfg->serve_expired_ttl;
|
||||
}
|
||||
}
|
||||
verbose(VERB_ALGO, "a validated expired entry "
|
||||
"could be overwritten, skip caching "
|
||||
"the new message at this stage");
|
||||
|
|
|
|||
|
|
@ -1512,8 +1512,10 @@ void mesh_query_done(struct mesh_state* mstate)
|
|||
}
|
||||
if(mstate->s.return_rcode == LDNS_RCODE_SERVFAIL ||
|
||||
(rep && FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_SERVFAIL)) {
|
||||
/* we are SERVFAILing; check for expired answer here */
|
||||
mesh_serve_expired_callback(mstate);
|
||||
if(mstate->s.env->cfg->serve_expired) {
|
||||
/* we are SERVFAILing; check for expired answer here */
|
||||
mesh_respond_serve_expired(mstate);
|
||||
}
|
||||
if((mstate->reply_list || mstate->cb_list)
|
||||
&& mstate->s.env->cfg->log_servfail
|
||||
&& !mstate->s.env->cfg->val_log_squelch) {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ server:
|
|||
ede: yes
|
||||
ede-serve-expired: yes
|
||||
|
||||
|
||||
stub-zone:
|
||||
name: "example.com"
|
||||
stub-addr: 1.2.3.4
|
||||
|
|
@ -25,8 +24,10 @@ SCENARIO_BEGIN Test serve-expired with client-timeout and a SERVFAIL upstream re
|
|||
; - check that we get the expired cached answer
|
||||
; - query again (the answer is available on the upstream server now)
|
||||
; - check that we get the immediate expired answer back instead
|
||||
; - (the upstream query does happen after the expired reply and updates the cache)
|
||||
; - query again (the upstream has no answer)
|
||||
; - query again (the answer is available on the upstream server now)
|
||||
; - check that we *still* get the immediate expired answer back instead, recursion is blocked for NORR_TTL(5)
|
||||
; - wait for NORR_TTL(5) to expire
|
||||
; - query again
|
||||
; - check that we get the freshly cached answer
|
||||
|
||||
; ns.example.com.
|
||||
|
|
@ -73,7 +74,7 @@ RANGE_BEGIN 30 40
|
|||
RANGE_END
|
||||
|
||||
; ns.example.com.
|
||||
RANGE_BEGIN 50 60
|
||||
RANGE_BEGIN 50 100
|
||||
ADDRESS 1.2.3.4
|
||||
; response to A query
|
||||
ENTRY_BEGIN
|
||||
|
|
@ -148,12 +149,8 @@ ENTRY_BEGIN
|
|||
example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; Allow for upstream query to resolve.
|
||||
STEP 51 TRAFFIC
|
||||
|
||||
; Check that we got an immediate stale answer because of the previous failure,
|
||||
; regardless if upstream has the answer already in this range. The query will
|
||||
; be resolved after the immediate cached answer and will cache the result.
|
||||
; regardless if upstream has the answer already in this range.
|
||||
STEP 60 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl ede=3
|
||||
|
|
@ -170,15 +167,42 @@ ENTRY_END
|
|||
|
||||
; Query again
|
||||
STEP 70 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD DO
|
||||
SECTION QUESTION
|
||||
example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; Check that we still get the immediate stale answer because of the previous failure,
|
||||
; regardless if upstream has the answer already in this range. NORR_TTL(5) blocks us from
|
||||
; recursion.
|
||||
STEP 80 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl ede=3
|
||||
REPLY QR RD RA DO NOERROR
|
||||
SECTION QUESTION
|
||||
example.com. IN A
|
||||
SECTION ANSWER
|
||||
example.com. 123 IN A 5.6.7.8
|
||||
SECTION AUTHORITY
|
||||
example.com. 123 IN NS ns.example.com.
|
||||
SECTION ADDITIONAL
|
||||
ns.example.com. 123 IN A 1.2.3.4
|
||||
ENTRY_END
|
||||
|
||||
; Let NORR_TTL(5) expire
|
||||
STEP 81 TIME_PASSES ELAPSE 5
|
||||
|
||||
; Query again
|
||||
STEP 90 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD
|
||||
SECTION QUESTION
|
||||
example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; Check that we got the cached updated answer from the previous step since
|
||||
; there is no upstream in this range.
|
||||
STEP 80 CHECK_ANSWER
|
||||
; Check fresh reply
|
||||
STEP 100 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl
|
||||
REPLY QR RD RA NOERROR
|
||||
|
|
|
|||
|
|
@ -30,13 +30,13 @@ SCENARIO_BEGIN Test serve-expired with client-timeout and bogus answer
|
|||
; - wait for the record to expire
|
||||
; - (upstream now has a bogus response)
|
||||
; - query again for www.example.com. IN A
|
||||
; - check that we get the expired valid response instead
|
||||
; - query once more
|
||||
; - check that we get the expired valid response instead; recursion is blocked for NORR_TTL(5) because of the failure
|
||||
; - (upstream has the valid response again)
|
||||
; - query once more
|
||||
; - check that we get the immediate expired valid response
|
||||
; - (the prefetch query updates the cache with the valid response)
|
||||
; - let NORR_TTL(5) expire
|
||||
; - query one last time
|
||||
; - check that we get the immediate valid cache response; upstream does not have an answer at this moment
|
||||
; - check that we get the immediate valid cache response
|
||||
|
||||
; The example.com NS and ns.example.com A record are commented out.
|
||||
; This to make the test succeed. It then keeps the dnssec valid lookup.
|
||||
|
|
@ -199,7 +199,7 @@ RANGE_END
|
|||
;;
|
||||
;; ns.example.com. with valid data again
|
||||
;;
|
||||
RANGE_BEGIN 40 60
|
||||
RANGE_BEGIN 40 70
|
||||
ADDRESS 1.2.3.4
|
||||
; response to query of interest
|
||||
ENTRY_BEGIN
|
||||
|
|
@ -279,7 +279,7 @@ SECTION QUESTION
|
|||
www.example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; immediate cached answer because upstream is valid again
|
||||
; immediate cached answer; although upstream is valid again
|
||||
STEP 50 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl ede=3
|
||||
|
|
@ -297,7 +297,9 @@ SECTION ADDITIONAL
|
|||
;ns.example.com. 123 IN RRSIG A 3 3 3600 20070926134150 20070829134150 2854 example.com. MC0CFQCQMyTjn7WWwpwAR1LlVeLpRgZGuQIUCcJDEkwAuzytTDRlYK7nIMwH1CM= ;{id = 2854}
|
||||
ENTRY_END
|
||||
|
||||
; upstream query is resolved before this query comes in
|
||||
STEP 51 TIME_PASSES ELAPSE 5
|
||||
|
||||
; query one last time
|
||||
STEP 60 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD DO
|
||||
|
|
@ -305,7 +307,7 @@ SECTION QUESTION
|
|||
www.example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; prefetch query updated the cache, since there is no upstream response in this range
|
||||
; this is the fresh valid response
|
||||
STEP 70 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl
|
||||
|
|
|
|||
22
testdata/serve_expired_ttl_reset.rpl
vendored
22
testdata/serve_expired_ttl_reset.rpl
vendored
|
|
@ -16,11 +16,11 @@ SCENARIO_BEGIN Serve expired ttl with reset on forwarder with a timeout on upstr
|
|||
; - Wait for it to expire (+ serve-expired-ttl)
|
||||
; - Send query again
|
||||
; - Upstream timeouts
|
||||
; - Error response from iterator SERVFAIL, resets expired-ttl on cache
|
||||
; - Check we are getting the SERVFAIL response
|
||||
; - Error response from iterator SERVFAIL, resets expired-ttl on cache and sets norec_ttl blocking recursion
|
||||
; - Check we are getting the cached response because it was expired-ttl-reset
|
||||
; - Query again
|
||||
; - Check we are getting the expired answer
|
||||
; - Upstream still timeouts
|
||||
; - Check we are getting the expired answer; prefetching is blocked by norec_ttl
|
||||
; - If there was prefetching the test would fail with the pending upstream query
|
||||
|
||||
STEP 1 QUERY
|
||||
ENTRY_BEGIN
|
||||
|
|
@ -54,7 +54,7 @@ STEP 4 TIME_PASSES ELAPSE 12
|
|||
|
||||
STEP 5 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD
|
||||
REPLY RD DO
|
||||
SECTION QUESTION
|
||||
www.example.com. IN A
|
||||
ENTRY_END
|
||||
|
|
@ -67,14 +67,16 @@ STEP 8 TIMEOUT
|
|||
STEP 9 TIMEOUT
|
||||
STEP 10 TIMEOUT
|
||||
|
||||
; Returns servfail
|
||||
; Returns
|
||||
; but error response from iterator resets the expired ttl
|
||||
STEP 11 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl
|
||||
REPLY QR RA RD SERVFAIL
|
||||
MATCH all ttl ede=3
|
||||
REPLY QR RA RD DO NOERROR
|
||||
SECTION QUESTION
|
||||
www.example.com. IN A
|
||||
SECTION ANSWER
|
||||
www.example.com. 123 IN A 0.0.0.0
|
||||
ENTRY_END
|
||||
|
||||
; Query again
|
||||
|
|
@ -96,8 +98,4 @@ SECTION ANSWER
|
|||
www.example.com. 123 IN A 0.0.0.0
|
||||
ENTRY_END
|
||||
|
||||
; But the pending query times out!
|
||||
; Only one because RTT reached the limit.
|
||||
STEP 16 TIMEOUT
|
||||
|
||||
SCENARIO_END
|
||||
|
|
|
|||
75
testdata/serve_expired_val_bogus.rpl
vendored
75
testdata/serve_expired_val_bogus.rpl
vendored
|
|
@ -31,12 +31,18 @@ SCENARIO_BEGIN Test serve-expired with client-timeout and bogus answer
|
|||
; - (upstream now has a bogus response)
|
||||
; - query again for www.example.com. IN A
|
||||
; - check that we get the immediate expired valid response
|
||||
; - (prefetch response is bogus and is not cached)
|
||||
; - (prefetch response is bogus and is not cached; recursion is blocked for NORR_TTL(5) because of the failure)
|
||||
; - (upstream has a valid response again)
|
||||
; - query once more
|
||||
; - check that we still get the immediate expired valid response and not the fresh bogus one
|
||||
; - (upstream has a valid response again; prefetch will update the cache)
|
||||
; - check that we still get the immediate expired valid response (prefetch will not trigger because of NORR_TTL(5))
|
||||
; - query and check that cache was not updated
|
||||
; - let NORR_TTL(5) expire
|
||||
; - query once more
|
||||
; - check that we still get the immediate expired valid response
|
||||
; - (prefetch should be allowed to refresh the record at this point)
|
||||
; - (upstream does not have the answer anymore)
|
||||
; - query one last time
|
||||
; - check that we get an immediate valid cache response
|
||||
; - check that we get the immediate valid cache response
|
||||
|
||||
; The example.com NS and ns.example.com A record are commented out.
|
||||
; This to make the test succeed. It then keeps the dnssec valid lookup.
|
||||
|
|
@ -273,6 +279,7 @@ SECTION ADDITIONAL
|
|||
;ns.example.com. 123 IN RRSIG A 3 3 3600 20070926134150 20070829134150 2854 example.com. MC0CFQCQMyTjn7WWwpwAR1LlVeLpRgZGuQIUCcJDEkwAuzytTDRlYK7nIMwH1CM= ;{id = 2854}
|
||||
ENTRY_END
|
||||
|
||||
; query with response available on the server
|
||||
STEP 40 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD DO
|
||||
|
|
@ -280,7 +287,8 @@ SECTION QUESTION
|
|||
www.example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; this is still the immediate cache response because the previous upstream response was bogus
|
||||
; this is still the immediate expired cache response because the previous upstream response was bogus
|
||||
; upstream query did not go out because of the previous failure NORR_TTL(5).
|
||||
STEP 50 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl ede=3
|
||||
|
|
@ -298,6 +306,7 @@ SECTION ADDITIONAL
|
|||
;ns.example.com. 123 IN RRSIG A 3 3 3600 20070926134150 20070829134150 2854 example.com. MC0CFQCQMyTjn7WWwpwAR1LlVeLpRgZGuQIUCcJDEkwAuzytTDRlYK7nIMwH1CM= ;{id = 2854}
|
||||
ENTRY_END
|
||||
|
||||
; query with response available
|
||||
STEP 60 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD DO
|
||||
|
|
@ -305,9 +314,63 @@ SECTION QUESTION
|
|||
www.example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; this is the immediate cache response because the previous upstream response was valid
|
||||
; this is still the immediate expired cache response because resolution is blocked for NORR_TTL(5)
|
||||
STEP 70 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl ede=3
|
||||
REPLY QR RD RA AD DO NOERROR
|
||||
SECTION QUESTION
|
||||
www.example.com. IN A
|
||||
SECTION ANSWER
|
||||
www.example.com. 123 IN A 10.20.30.40
|
||||
www.example.com. 123 IN RRSIG A 3 3 3600 20070926134150 20070829134150 2854 example.com. MC0CFC99iE9K5y2WNgI0gFvBWaTi9wm6AhUAoUqOpDtG5Zct+Qr9F3mSdnbc6V4= ;{id = 2854}
|
||||
SECTION AUTHORITY
|
||||
;example.com. 123 IN NS ns.example.com.
|
||||
;example.com. 123 IN RRSIG NS 3 2 3600 20070926134150 20070829134150 2854 example.com. MC0CFQCN+qHdJxoI/2tNKwsb08pra/G7aAIUAWA5sDdJTbrXA1/3OaesGBAO3sI= ;{id = 2854}
|
||||
SECTION ADDITIONAL
|
||||
;ns.example.com. 123 IN A 1.2.3.4
|
||||
;ns.example.com. 123 IN RRSIG A 3 3 3600 20070926134150 20070829134150 2854 example.com. MC0CFQCQMyTjn7WWwpwAR1LlVeLpRgZGuQIUCcJDEkwAuzytTDRlYK7nIMwH1CM= ;{id = 2854}
|
||||
ENTRY_END
|
||||
|
||||
; expire NORR_TTL(5)
|
||||
STEP 71 TIME_PASSES ELAPSE 5
|
||||
|
||||
; query again
|
||||
STEP 80 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD DO
|
||||
SECTION QUESTION
|
||||
www.example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; this is still the immediate expired cache response but prefetching will be allowed to update the cache
|
||||
STEP 90 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl ede=3
|
||||
REPLY QR RD RA AD DO NOERROR
|
||||
SECTION QUESTION
|
||||
www.example.com. IN A
|
||||
SECTION ANSWER
|
||||
www.example.com. 123 IN A 10.20.30.40
|
||||
www.example.com. 123 IN RRSIG A 3 3 3600 20070926134150 20070829134150 2854 example.com. MC0CFC99iE9K5y2WNgI0gFvBWaTi9wm6AhUAoUqOpDtG5Zct+Qr9F3mSdnbc6V4= ;{id = 2854}
|
||||
SECTION AUTHORITY
|
||||
;example.com. 123 IN NS ns.example.com.
|
||||
;example.com. 123 IN RRSIG NS 3 2 3600 20070926134150 20070829134150 2854 example.com. MC0CFQCN+qHdJxoI/2tNKwsb08pra/G7aAIUAWA5sDdJTbrXA1/3OaesGBAO3sI= ;{id = 2854}
|
||||
SECTION ADDITIONAL
|
||||
;ns.example.com. 123 IN A 1.2.3.4
|
||||
;ns.example.com. 123 IN RRSIG A 3 3 3600 20070926134150 20070829134150 2854 example.com. MC0CFQCQMyTjn7WWwpwAR1LlVeLpRgZGuQIUCcJDEkwAuzytTDRlYK7nIMwH1CM= ;{id = 2854}
|
||||
ENTRY_END
|
||||
|
||||
STEP 100 QUERY
|
||||
ENTRY_BEGIN
|
||||
REPLY RD DO
|
||||
SECTION QUESTION
|
||||
www.example.com. IN A
|
||||
ENTRY_END
|
||||
|
||||
; this is the immediate cache response because the previous upstream response was valid
|
||||
STEP 110 CHECK_ANSWER
|
||||
ENTRY_BEGIN
|
||||
MATCH all ttl
|
||||
REPLY QR RD RA AD DO NOERROR
|
||||
SECTION QUESTION
|
||||
|
|
|
|||
Loading…
Reference in a new issue