diff --git a/daemon/worker.c b/daemon/worker.c index 809286720..89d81528e 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -1121,9 +1121,10 @@ worker_init(struct worker* worker, struct config_file *cfg, worker_probe_timer_cb, worker); if(!worker->env.probe_timer) { log_err("could not create 5011-probe timer"); + } else { + /* let timer fire, then it can reset itself */ + comm_timer_set(worker->env.probe_timer, &tv); } - /* let timer fire, then it can reset itself */ - comm_timer_set(worker->env.probe_timer, &tv); } if(!worker->env.mesh || !worker->env.scratch_buffer) { worker_delete(worker); diff --git a/doc/Changelog b/doc/Changelog index 8c4f49441..681f88ed4 100644 --- a/doc/Changelog +++ b/doc/Changelog @@ -2,6 +2,7 @@ - Thanks to Surfnet found bug in new dnssec-retry code that failed to combine well when combined with DLV and a particular failure. - Fixed unbound-control -h output about argument optionality. + - review comments. 5 November 2009: Wouter - lint fixes and portability tests. diff --git a/doc/TODO b/doc/TODO index 91ce69bdf..a88edd154 100644 --- a/doc/TODO +++ b/doc/TODO @@ -62,167 +62,8 @@ o infra and lame cache: easier size config (in Mb), show usage in graphs. - store time of dump in cachedumps, so that on a load the ttls can be compared to the absolute time, and now-expired items can be dealt with. -1.3.x: -- spoofed delegpt fixes - if DNSKEY prime fails - - set DNSKEY bogus and DNSKEY query msg bogus. - - make NS set bogus too - if not validated as secure. - - check where queries go - otherwise reduce TTL on NS. - - also make DS NSEC bogus. Also DS msg cache entry. -- mark bogus under stringent conditions - - if DS at parent and validly signed. Then DNSKEY must exist. - - Also for trust anchor points themselves. DNSKEY must exist. - - so if then DNSKEY keyprime fails - - then it is not simply a server that only answers qtype A. - - then parent is agreeing (somewhat) with the DS record - - but it could still be a lame domain, these exist - The objective is to keep tries for genuinely lame domains to a - minimum, while detecting forgeries quickly. exponential backoff. - - for unbound we can check if we got something to verify while - building that chain of trust. If so - not lame, agressive retry. - - but security-lame zones also exist and should not pose - too high a burden. Exponential backoff again. - (fe. badly signed or dnskey reply too large fails). - - the delegation NS for the domain is bogus. - The referral retried, with exponential backoff. - This exponential backoff should go towards values which are close - to the TTLs that are used now (on lame delegations for example). - so that the extra traffic is manageable. - - for unbound, reset the TTL on the NS rrset. Let it timeout. - Set NS rrset bogus - no more queries to the domain are done. - Also set DNSKEY and DS (rrset, NSEC, msg) bogus and ttl like that. - (to the same absolute value, so a clean retry is done). - TTL of NS is (rounddown) timeout in seconds. - Until the NS times out and referral is done again. - Make sure multiple validations for chains of trust do not result - in a flood of queries or backoff too quickly. -- bogus exponential backoff cache. hash(name,t,c), size(1M, 5%). - TTL of 24h. Backoff from 200msec to 24h. - x2 on bogus(18 tries), x8 backoff on lameness(6 tries), - when servfail for DNSKEY. - remove entry when validated as secure. - delegptspoofrecheck on lameness when harden-referral-path NS - query has servfail, then build chain of trust down (check DS, - then perform DNSKEY query) if that DNSKEY query fails servfail, - perform the x8 lameness retry fallback. - -* keep a list of guilty IP addresses in the qstate, which contains both - the child side guilty IPs and the parent guilty IPs. Valid signed DSes - are not made guilty in the global cache. The child IP is made guilty - in the global cache. -* Retry to higher trust anchors. - * option not to retry to higher from this ta. - * keep longest must-be-secure name. Do no accept insecure above this point. - * if failed ta, blame all lower tas for their DNSKEY (get IP from cached - rrset), if failure is insecure - nothing, if at bogus - blame that too. - lower tas have isdata=false, so the IP address for the dnskeyrrset in - the cache is set to avoid in qstate. Nothing in infracache, no childretry. - -Retry harder to get valid DNSSEC data. -Triggered by a trust anchor or by a signed DS record for a zone. -* If data is fetched and validation fails for it - or DNSKEY is fetched and validated into chain-of-trust fails for it - or DS is fetched and validated into chain-of-trust fails for it - Then - blame(signer zone, IP origin of the data/DNSKEY/DS, x2, isdata) -* If data was not fetched (SERVFAIL, lame, ...), and the data - is under a signed DS then: - blame(thatDSname, IP origin of the data/DNSKEY/DS, x8) - x8 because the zone may be lame. - This means a chain of trust is built also for unfetched data, to - determine if a signed DS is present. If insecure, nothing is done. -* If DNSKEY was not fetched for chain of trust (SERVFAIL, lame, ...), - Then - blame(DNSKEYname, IP origin of the data/DNSKEY/DS, x8) - x8 because the zone may be lame. -* blame(zonename, guiltyIP, multiplier, isdata): - * if isdata: - Set the guiltyIP,zonename as DNSSEC-bogus-data=true in lameness cache. - Thusly marked servers are avoided if possible, used as last resort. - The guilt TTL is the infra cache ttl (15 minutes). - The dnssec retry scheme works without this cache entry. - * If the key cache entry 'being-backed-off' is true and isdata then: - The parent is backedoff, it must be the childs fault. Retry to child. - if the child-dnskey is bogus, then retry is useless, stop. - Perform a child-retry - purge dataonly, childside, mark - data-IPaddress from child as to avoid-forquery. counterperquery, - max is 3, if reached, set this data element RRset&msg to the - current backoff TTL end-time or bogus-ttl(60 seconds) whichever is less - and done. - * if no retry entry exists for the zone key, create one with 24h TTL, 10 ms. - else the backoff *= multiplier. - * If the backoff is less than a second, remove entries from cache and - restart query. Else set the TTL for the entries to that value. - * Entries to set or remove: DNSKEY RRset&msg, DS RRset&msg, NS RRset&msg, - in-zone glue (A and AAAA) RRset&msg, and key-cache-entry TTL. - The the data element RRset&msg to the backoff TTL or bogusttl. - If TTL>1sec set key-cache-entry flag 'being-backed-off' to true. - when entry times out that flag is reset to false again. -* Storage extra is: - IP address per RRset and message. A lot of memory really, since that is - 132 bytes per RRset and per message. Store plain IP: 4/16 bytes, len byte. - port number 2bytes. +19bytes per RRset, per msg. - guilt flag in infra(lameness) cache. - being-backed-off flag for key cache, also backoff time value and its TTL. - child-retry-count and guilty-ip-list in qstate. -* Load on authorities: - For lame servers: 7 tries per day (one per three hours on average). - Others get up to 23 tries per day (one per hour on average). - +1 for original try makes 8/24 hours and 24/24 hours. - Unless the cache entry falls out of the cache due to memory. In that - case it can be tried more often, this is similar to the NS entry falling - out of the cache due to memory, in that case it also has to be retried. -* Performance analysis: - * domain is sold. Unbound sees invalid signature (expired) or the old - servers refuse the queries. Retry within the second, if parent has - new DS and NS available instantly works again (no downtime). - * domain is bogus signed. Parent gets 1 query per hour. - Domain itself gets couple tries per queryname, per minute. - * domain partly bogus. Parent gets 1 query per hour. - Domain itself gets couple tries per bogus queryname, per minute. - * spoof attempt. Unbound tries a couple times. If not spoofed again, - it works, if spoofed every time unbound backs off and stops trying. - But childretry is attempted more often, once per minute. - * parent has inconsistently signed DS records. Together with a subzone that - is badly managed. Unbound backs up to the root once per hour. - * parent has bad DS records, different sets on different servers, but they - are signed ok. Works as for every query a list of bad nameserver, parent - and child side is kept, walks through them. But as backoff increases - and becomes bigger than the TTL on the DS records, unbound will blackout. - The parent really has to be fixed... - The issue is that it is validly signed, but bad data. Unbound will very - conservatively retry it. - * domain is sold, but decommission is faster than the setup of new server. - Unbound does exponential backoff, if new setup is fast, it'll pickup the - new data fast. - * key rollover failed. The zone has bad keys. Like it was bogus signed. - * one nameserver has bad data. Unbound goes back to the parent but also - marks that server as guilty. Picks data from other server right after, - retry without blackout for the user. - When parent starts to get backed off, if the nameserver is childside, - queryretries for childservers are made when queries fail. - * domain was sold, but unbound has old entries in the cache. These somehow - need (re)validation (were queried with +cd, now -cd). The entries are - bogus. - Unbound performs childretry for these entries. Works once the keys - have been successfully reprimed with parentretry. - * unbound is configured to talk to upstream caches. These caches have - inconsistent bad data. If one is bad, it is marked bad for that zone. - If all are bad, there may not be any way for unbound to remove the - bad entries from the upstream caches. It simply fails. - Recommendation: make the upstream caches validate as well. - * Old data that was valid with a long TTL remains in the cache. - Valid data has a TTL and this is the protocol. - * listing bad servers and trying again may not be good enough, since - a combinatorial explosion for DSxDNSKEYxdata is possible for every - signature validation (using different nameservers for DS, DNSKEY and - data, assuming only the right combination has a chain of trust to data). - The parentretries perform DS and DNSKEY searching. - childretries perform data searching. - - later - selective verbosity; ubcontrol trace example.com -- option to log only bogus domainname encountered, for demos - cache fork-dump, pre-load - for fwds, send queries to N servers in fwd-list, use first reply. document high scalable, high available unbound setup onepager. diff --git a/iterator/iterator.c b/iterator/iterator.c index 1a754d08b..65b649335 100644 --- a/iterator/iterator.c +++ b/iterator/iterator.c @@ -946,8 +946,9 @@ processInitRequest(struct module_qstate* qstate, struct iter_qstate* iq, delnamelen = iq->qchase.qname_len; } if(iq->qchase.qtype == LDNS_RR_TYPE_DS || iq->refetch_glue) { - /* remove first label from delname, root goes to hints */ - if(dname_is_root(delname)) + /* remove first label from delname, root goes to hints, + * but only to fetch glue, not for qtype=DS. */ + if(dname_is_root(delname) && iq->refetch_glue) delname = NULL; /* go to root priming */ else dname_remove_label(&delname, &delnamelen); iq->refetch_glue = 0; /* if CNAME causes restart, no refetch */ diff --git a/testcode/checklocks.c b/testcode/checklocks.c index 3ab9ebaf5..520e6ddd1 100644 --- a/testcode/checklocks.c +++ b/testcode/checklocks.c @@ -87,7 +87,6 @@ static void lock_error(struct checked_lock* lock, (lock->type==check_lock_rwlock)?"rwlock": "badtype")), err); log_err("complete status display:"); total_debug_info(); - abort(); fatal_exit("bailing out"); }