diff --git a/CHANGES b/CHANGES index 0763e91614..fd2bea9351 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,7 @@ +2525. [experimental] New logging category "query-errors" to provide detailed + internal information about query failures, especially + about server failures. [RT #19027] + 2524. [port] sunos: dnssec-signzone needs strtoul(). [RT #19129] 2523. [bug] Random type rdata freed by dns_nsec_typepresent(). diff --git a/bin/check/check-tool.c b/bin/check/check-tool.c index 98bc721484..d3ba400d73 100644 --- a/bin/check/check-tool.c +++ b/bin/check/check-tool.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: check-tool.c,v 1.35 2008/10/24 00:28:00 marka Exp $ */ +/* $Id: check-tool.c,v 1.35.36.1 2009/01/07 01:50:14 jinmei Exp $ */ /*! \file */ @@ -115,6 +115,7 @@ static isc_logcategory_t categories[] = { { "queries", 0 }, { "unmatched", 0 }, { "update-security", 0 }, + { "query-errors", 0 }, { NULL, 0 } }; diff --git a/bin/named/include/named/log.h b/bin/named/include/named/log.h index e8a4f321ab..2035ea5686 100644 --- a/bin/named/include/named/log.h +++ b/bin/named/include/named/log.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: log.h,v 1.25 2007/06/19 23:46:59 tbox Exp $ */ +/* $Id: log.h,v 1.25.332.1 2009/01/07 01:50:15 jinmei Exp $ */ #ifndef NAMED_LOG_H #define NAMED_LOG_H 1 @@ -36,6 +36,7 @@ #define NS_LOGCATEGORY_QUERIES (&ns_g_categories[4]) #define NS_LOGCATEGORY_UNMATCHED (&ns_g_categories[5]) #define NS_LOGCATEGORY_UPDATE_SECURITY (&ns_g_categories[6]) +#define NS_LOGCATEGORY_QUERY_EERRORS (&ns_g_categories[7]) /* * Backwards compatibility. diff --git a/bin/named/log.c b/bin/named/log.c index 5cbe3077a8..359ab9f655 100644 --- a/bin/named/log.c +++ b/bin/named/log.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: log.c,v 1.46.334.2 2009/01/05 23:47:22 tbox Exp $ */ +/* $Id: log.c,v 1.46.334.3 2009/01/07 01:50:14 jinmei Exp $ */ /*! \file */ @@ -44,6 +44,7 @@ static isc_logcategory_t categories[] = { { "queries", 0 }, { "unmatched", 0 }, { "update-security", 0 }, + { "query-errors", 0 }, { NULL, 0 } }; diff --git a/bin/named/query.c b/bin/named/query.c index 307986a37b..9289821d8c 100644 --- a/bin/named/query.c +++ b/bin/named/query.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: query.c,v 1.313.20.1 2008/12/29 23:50:35 jinmei Exp $ */ +/* $Id: query.c,v 1.313.20.2 2009/01/07 01:50:14 jinmei Exp $ */ /*! \file */ @@ -120,7 +120,7 @@ typedef struct client_additionalctx { dns_rdataset_t *rdataset; } client_additionalctx_t; -static void +static isc_result_t query_find(ns_client_t *client, dns_fetchevent_t *event, dns_rdatatype_t qtype); static isc_boolean_t @@ -134,6 +134,9 @@ query_findclosestnsec3(dns_name_t *qname, dns_db_t *db, dns_name_t *fname, isc_boolean_t exact, dns_name_t *found); +static inline void +log_queryerror(ns_client_t *client, isc_result_t result, int line, int level); + /*% * Increment query statistics counters. */ @@ -178,9 +181,12 @@ query_send(ns_client_t *client) { } static void -query_error(ns_client_t *client, isc_result_t result) { +query_error(ns_client_t *client, isc_result_t result, int line) { + int loglevel = ISC_LOG_DEBUG(3); + switch (result) { case DNS_R_SERVFAIL: + loglevel = ISC_LOG_DEBUG(1); inc_stats(client, dns_nsstatscounter_servfail); break; case DNS_R_FORMERR: @@ -190,6 +196,9 @@ query_error(ns_client_t *client, isc_result_t result) { inc_stats(client, dns_nsstatscounter_failure); break; } + + log_queryerror(client, result, line, loglevel); + ns_client_error(client, result); } @@ -3086,8 +3095,12 @@ query_addnxrrsetnsec(ns_client_t *client, dns_db_t *db, static void query_resume(isc_task_t *task, isc_event_t *event) { dns_fetchevent_t *devent = (dns_fetchevent_t *)event; + dns_fetch_t *fetch; ns_client_t *client; isc_boolean_t fetch_cancelled, client_shuttingdown; + isc_result_t result; + isc_logcategory_t *logcategory = NS_LOGCATEGORY_QUERY_EERRORS; + int errorloglevel; /* * Resume a query after recursion. @@ -3124,7 +3137,8 @@ query_resume(isc_task_t *task, isc_event_t *event) { INSIST(client->query.fetch == NULL); client->query.attributes &= ~NS_QUERYATTR_RECURSING; - dns_resolver_destroyfetch(&devent->fetch); + fetch = devent->fetch; + devent->fetch = NULL; /* * If this client is shutting down, or this transaction @@ -3141,7 +3155,7 @@ query_resume(isc_task_t *task, isc_event_t *event) { query_putrdataset(client, &devent->sigrdataset); isc_event_free(&event); if (fetch_cancelled) - query_error(client, DNS_R_SERVFAIL); + query_error(client, DNS_R_SERVFAIL, __LINE__); else query_next(client, ISC_R_CANCELED); /* @@ -3149,8 +3163,22 @@ query_resume(isc_task_t *task, isc_event_t *event) { */ ns_client_detach(&client); } else { - query_find(client, devent, 0); + result = query_find(client, devent, 0); + if (result != ISC_R_SUCCESS) { + if (result == DNS_R_SERVFAIL) + errorloglevel = ISC_LOG_DEBUG(2); + else + errorloglevel = ISC_LOG_DEBUG(4); + if (isc_log_wouldlog(ns_g_lctx, errorloglevel)) { + dns_resolver_logfetch(fetch, ns_g_lctx, + logcategory, + NS_LOGMODULE_QUERY, + errorloglevel, ISC_FALSE); + } + } } + + dns_resolver_destroyfetch(&fetch); } static isc_result_t @@ -3277,6 +3305,7 @@ query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qdomain, do { \ eresult = r; \ want_restart = ISC_FALSE; \ + line = __LINE__; \ } while (0) /* @@ -3653,7 +3682,7 @@ query_findclosestnsec3(dns_name_t *qname, dns_db_t *db, * If 'event' is non-NULL, we are returning from recursion and 'qtype' * is ignored. Otherwise, 'qtype' is the query type. */ -static void +static isc_result_t query_find(ns_client_t *client, dns_fetchevent_t *event, dns_rdatatype_t qtype) { dns_db_t *db, *zdb; @@ -3682,6 +3711,7 @@ query_find(ns_client_t *client, dns_fetchevent_t *event, dns_rdatatype_t qtype) isc_boolean_t empty_wild; dns_rdataset_t *noqname; isc_boolean_t resuming; + int line = -1; CTRACE("query_find"); @@ -4839,7 +4869,8 @@ query_find(ns_client_t *client, dns_fetchevent_t *event, dns_rdatatype_t qtype) * or if the client requested recursion and thus wanted * the complete answer, send an error response. */ - query_error(client, eresult); + INSIST(line >= 0); + query_error(client, eresult, line); } ns_client_detach(&client); } else if (!RECURSING(client)) { @@ -4865,10 +4896,22 @@ query_find(ns_client_t *client, dns_fetchevent_t *event, dns_rdatatype_t qtype) client->view->auth_nxdomain == ISC_TRUE) client->message->flags |= DNS_MESSAGEFLAG_AA; + /* + * If the response is somehow unexpected for the client and this + * is a result of recursion, return an error to the caller + * to indicate it may need to be logged. + */ + if (resuming && + (ISC_LIST_EMPTY(client->message->sections[DNS_SECTION_ANSWER]) || + client->message->rcode != dns_rcode_noerror)) + eresult = ISC_R_FAILURE; + query_send(client); ns_client_detach(&client); } CTRACE("query_find: done"); + + return (eresult); } static inline void @@ -4897,6 +4940,48 @@ log_query(ns_client_t *client, unsigned int flags, unsigned int extflags) { ((flags & DNS_MESSAGEFLAG_CD) != 0) ? "C" : ""); } +static inline void +log_queryerror(ns_client_t *client, isc_result_t result, int line, int level) { + char namebuf[DNS_NAME_FORMATSIZE]; + char typename[DNS_RDATATYPE_FORMATSIZE]; + char classname[DNS_RDATACLASS_FORMATSIZE]; + const char *namep, *typep, *classp, *sep1, *sep2; + dns_rdataset_t *rdataset; + + if (!isc_log_wouldlog(ns_g_lctx, level)) + return; + + namep = typep = classp = sep1 = sep2 = ""; + + /* + * Query errors can happen for various reasons. In some cases we cannot + * even assume the query contains a valid question section, so we should + * expect exceptional cases. + */ + if (client->query.origqname != NULL) { + dns_name_format(client->query.origqname, namebuf, + sizeof(namebuf)); + namep = namebuf; + sep1 = " for "; + + rdataset = ISC_LIST_HEAD(client->query.origqname->list); + if (rdataset != NULL) { + dns_rdataclass_format(rdataset->rdclass, classname, + sizeof(classname)); + classp = classname; + dns_rdatatype_format(rdataset->type, typename, + sizeof(typename)); + typep = typename; + sep2 = "/"; + } + } + + ns_client_log(client, NS_LOGCATEGORY_QUERY_EERRORS, NS_LOGMODULE_QUERY, + level, "query failed (%s)%s%s%s%s%s%s at %s:%d", + isc_result_totext(result), sep1, namep, sep2, + classp, sep2, typep, __FILE__, line); +} + void ns_query_start(ns_client_t *client) { isc_result_t result; @@ -4965,7 +5050,7 @@ ns_query_start(ns_client_t *client) { */ result = dns_message_firstname(message, DNS_SECTION_QUESTION); if (result != ISC_R_SUCCESS) { - query_error(client, result); + query_error(client, result, __LINE__); return; } dns_message_currentname(message, DNS_SECTION_QUESTION, @@ -4978,9 +5063,9 @@ ns_query_start(ns_client_t *client) { * There's more than one QNAME in the question * section. */ - query_error(client, DNS_R_FORMERR); + query_error(client, DNS_R_FORMERR, __LINE__); } else - query_error(client, result); + query_error(client, result, __LINE__); return; } @@ -4991,7 +5076,7 @@ ns_query_start(ns_client_t *client) { * Check for multiple question queries, since edns1 is dead. */ if (message->counts[DNS_SECTION_QUESTION] > 1) { - query_error(client, DNS_R_FORMERR); + query_error(client, DNS_R_FORMERR, __LINE__); return; } @@ -5012,7 +5097,7 @@ ns_query_start(ns_client_t *client) { return; case dns_rdatatype_maila: case dns_rdatatype_mailb: - query_error(client, DNS_R_NOTIMP); + query_error(client, DNS_R_NOTIMP, __LINE__); return; case dns_rdatatype_tkey: result = dns_tkey_processquery(client->message, @@ -5021,10 +5106,10 @@ ns_query_start(ns_client_t *client) { if (result == ISC_R_SUCCESS) query_send(client); else - query_error(client, result); + query_error(client, result, __LINE__); return; default: /* TSIG, etc. */ - query_error(client, DNS_R_FORMERR); + query_error(client, DNS_R_FORMERR, __LINE__); return; } } @@ -5092,5 +5177,5 @@ ns_query_start(ns_client_t *client) { qclient = NULL; ns_client_attach(client, &qclient); - query_find(qclient, NULL, qtype); + (void)query_find(qclient, NULL, qtype); } diff --git a/doc/arm/Bv9ARM-book.xml b/doc/arm/Bv9ARM-book.xml index 8c17589da6..608e9f91ed 100644 --- a/doc/arm/Bv9ARM-book.xml +++ b/doc/arm/Bv9ARM-book.xml @@ -18,7 +18,7 @@ - PERFORMANCE OF THIS SOFTWARE. --> - + BIND 9 Administrator Reference Manual @@ -4287,6 +4287,17 @@ category notify { null; }; + + + query-errors + + + + Information about queries that resulted in some + failure. + + + dispatch @@ -4373,6 +4384,226 @@ category notify { null; }; + + The <command>query-errors</command> Category + + The query-errors category is + specifically intended for debugging purposes: To identify + why and how specific queries result in responses which + indicate an error. + Messages of this category are therefore only logged + with debug levels. + + + + At the debug levels of 1 or higher, each response with the + rcode of SERVFAIL is logged as follows: + + + client 127.0.0.1#61502: query failed (SERVFAIL) for www.example.com/IN/AAAA at query.c:3880 + + + This means an error resulting in SERVFAIL was + detected at line 3880 of source file + query.c. + Log messages of this level will particularly + help identify the cause of SERVFAIL for an + authoritative server. + + + At the debug levels of 2 or higher, detailed context + information of recursive resolutions that resulted in + SERVFAIL is logged. + The log message will look like as follows: + + + fetch completed at resolver.c:2970 for www.example.com/A in 30.000183: timed out/success [domain:example.com,referral:2,restart:7,qrysent:8,timeout:5,lame:0,neterr:0,badresp:1,adberr:0,findfail:0,valfail:0] + + + The first part before the colon shows that a recursive + resolution for AAAA records of www.example.com completed + in 30.000183 seconds and the final result that led to the + SERVFAIL was determined at line 2970 of source file + resolver.c. + + + The following part shows the detected final result and the + latest result of DNSSEC validation. + The latter is always success when no validation attempt + is made. + In this example, this query resulted in SERVFAIL probably + because all name servers are down or unreachable, leading + to a timeout in 30 seconds. + DNSSEC validation was probably not attempted. + + + The last part enclosed in square brackets shows statistics + information collected for this particular resolution + attempt. + The domain field shows the deepest zone + that the resolver reached; + it is the zone where the error was finally detected. + The meaning of the other fields is summarized in the + following table. + + + + + + + + + + referral + + + + The number of referrals the resolver received + throughout the resolution process. + In the above example this is 2, which are most + likely com and example.com. + + + + + + restart + + + + The number of cycles that the resolver tried + remote servers at the domain + zone. + In each cycle the resolver sends one query + (possibly resending it, depending on the response) + to each known name server of + the domain zone. + + + + + + qrysent + + + + The number of queries the resolver sent at the + domain zone. + + + + + + timeout + + + + The number of timeouts since the resolver + received the last response. + + + + + + lame + + + + The number of lame servers the resolver detected + at the domain zone. + A server is detected to be lame either by an + invalid response or as a result of lookup in + BIND9's address database (ADB), where lame + servers are cached. + + + + + + neterr + + + + The number of erroneous results that the + resolver encountered in sending queries + at the domain zone. + One common case is the remote server is + unreachable and the resolver receives an ICMP + unreachable error message. + + + + + + badresp + + + + The number of unexpected responses (other than + lame) to queries sent by the + resolver at the domain zone. + + + + + + adberr + + + + Failures in finding remote server addresses + of the domain zone in the ADB. + One common case of this is that the remote + server's name does not have any address records. + + + + + + findfail + + + + Failures of resolving remote server addresses. + This is a total number of failures throughout + the resolution process. + + + + + + valfail + + + + Failures of DNSSEC validation. + Validation failures are counted throughout + the resolution process (not limited to + the domain zone), but should + only happen in domain. + + + + + + + + At the debug levels of 3 or higher, the same messages + as those at the debug 1 level are logged for other errors + than SERVFAIL. + Note that negative responses such as NXDOMAIN are not + regarded as errors here. + + + At the debug levels of 4 or higher, the same messages + as those at the debug 2 level are logged for other errors + than SERVFAIL. + Unlike the above case of level 3, messages are logged for + negative responses. + This is because any unexpected results can be difficult to + debug in the recursion case. + + diff --git a/lib/dns/include/dns/resolver.h b/lib/dns/include/dns/resolver.h index 2318e206e5..65304ef9c9 100644 --- a/lib/dns/include/dns/resolver.h +++ b/lib/dns/include/dns/resolver.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: resolver.h,v 1.60 2008/09/08 05:41:22 marka Exp $ */ +/* $Id: resolver.h,v 1.60.56.1 2009/01/07 01:50:15 jinmei Exp $ */ #ifndef DNS_RESOLVER_H #define DNS_RESOLVER_H 1 @@ -347,6 +347,23 @@ dns_resolver_destroyfetch(dns_fetch_t **fetchp); *\li *fetchp == NULL. */ +void +dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx, + isc_logcategory_t *category, isc_logmodule_t *module, + int level, isc_boolean_t duplicateok); +/*%< + * Dump a log message on internal state at the completion of given 'fetch'. + * 'lctx', 'category', 'module', and 'level' are used to write the log message. + * By default, only one log message is written even if the corresponding fetch + * context serves multiple clients; if 'duplicateok' is true the suppression + * is disabled and the message can be written every time this function is + * called. + * + * Requires: + * + *\li 'fetch' is a valid fetch, and has completed. + */ + dns_dispatchmgr_t * dns_resolver_dispatchmgr(dns_resolver_t *resolver); diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index feebf394e8..71a74eacb2 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: resolver.c,v 1.384.14.2 2009/01/05 23:47:22 tbox Exp $ */ +/* $Id: resolver.c,v 1.384.14.3 2009/01/07 01:50:15 jinmei Exp $ */ /*! \file */ @@ -157,6 +157,12 @@ typedef enum { fetchstate_done /*%< FETCHDONE events posted. */ } fetchstate; +typedef enum { + badns_unreachable = 0, + badns_response, + badns_validation +} badnstype_t; + struct fetchctx { /*% Not locked. */ unsigned int magic; @@ -245,6 +251,24 @@ struct fetchctx { */ isc_uint32_t rand_buf; isc_uint32_t rand_bits; + + /*% + * Fetch-local statistics for detailed logging. + */ + isc_result_t result; /*%< fetch result */ + isc_result_t vresult; /*%< validation result */ + int exitline; + isc_time_t start; + isc_uint64_t duration; + isc_boolean_t logged; + unsigned int querysent; + unsigned int referrals; + unsigned int lamecount; + unsigned int neterr; + unsigned int badresp; + unsigned int adberr; + unsigned int findfail; + unsigned int valfail; }; #define FCTX_MAGIC ISC_MAGIC('F', '!', '!', '!') @@ -395,7 +419,7 @@ static isc_result_t ncache_adderesult(dns_message_t *message, static void validated(isc_task_t *task, isc_event_t *event); static void maybe_destroy(fetchctx_t *fctx); static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, - isc_result_t reason); + isc_result_t reason, badnstype_t badtype); /*% * Increment resolver-related statistics counters. @@ -825,12 +849,13 @@ fctx_stopeverything(fetchctx_t *fctx, isc_boolean_t no_response) { } static inline void -fctx_sendevents(fetchctx_t *fctx, isc_result_t result) { +fctx_sendevents(fetchctx_t *fctx, isc_result_t result, int line) { dns_fetchevent_t *event, *next_event; isc_task_t *task; unsigned int count = 0; isc_interval_t i; isc_boolean_t logit = ISC_FALSE; + isc_time_t now; unsigned int old_spillat; unsigned int new_spillat = 0; /* initialized to silence compiler warnings */ @@ -842,6 +867,14 @@ fctx_sendevents(fetchctx_t *fctx, isc_result_t result) { FCTXTRACE("sendevents"); + /* + * Keep some record of fetch result for logging later (if required). + */ + fctx->result = result; + fctx->exitline = line; + TIME_NOW(&now); + fctx->duration = isc_time_microdiff(&now, &fctx->start); + for (event = ISC_LIST_HEAD(fctx->events); event != NULL; event = next_event) { @@ -917,10 +950,12 @@ log_edns(fetchctx_t *fctx) { } static void -fctx_done(fetchctx_t *fctx, isc_result_t result) { +fctx_done(fetchctx_t *fctx, isc_result_t result, int line) { dns_resolver_t *res; isc_boolean_t no_response; + REQUIRE(line >= 0); + FCTXTRACE("done"); res = fctx->res; @@ -941,7 +976,7 @@ fctx_done(fetchctx_t *fctx, isc_result_t result) { fctx->state = fetchstate_done; fctx->attributes &= ~FCTX_ATTR_ADDRWAIT; - fctx_sendevents(fctx, result); + fctx_sendevents(fctx, result, line); UNLOCK(&res->buckets[fctx->bucketnum].lock); } @@ -979,7 +1014,8 @@ process_sendevent(resquery_t *query, isc_event_t *event) { /* * No route to remote. */ - add_bad(fctx, query->addrinfo, sevent->result); + add_bad(fctx, query->addrinfo, sevent->result, + badns_unreachable); fctx_cancelquery(&query, NULL, NULL, ISC_TRUE); retry = ISC_TRUE; break; @@ -1000,7 +1036,7 @@ process_sendevent(resquery_t *query, isc_event_t *event) { fctx->attributes &= ~FCTX_ATTR_ADDRWAIT; result = fctx_stopidletimer(fctx); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); else fctx_try(fctx, ISC_TRUE); } @@ -1339,6 +1375,7 @@ fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, if (result != ISC_R_SUCCESS) goto cleanup_dispatch; } + fctx->querysent++; ISC_LIST_APPEND(fctx->queries, query, link); query->fctx->nqueries++; @@ -1814,7 +1851,7 @@ resquery_connected(isc_task_t *task, isc_event_t *event) { result = fctx_startidletimer(query->fctx, &interval); if (result != ISC_R_SUCCESS) { fctx_cancelquery(&query, NULL, NULL, ISC_FALSE); - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); break; } /* @@ -1850,7 +1887,7 @@ resquery_connected(isc_task_t *task, isc_event_t *event) { if (result != ISC_R_SUCCESS) { fctx_cancelquery(&query, NULL, NULL, ISC_FALSE); - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); } break; @@ -1885,7 +1922,7 @@ resquery_connected(isc_task_t *task, isc_event_t *event) { fctx->attributes &= ~FCTX_ATTR_ADDRWAIT; result = fctx_stopidletimer(fctx); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); else fctx_try(fctx, ISC_TRUE); } @@ -1921,13 +1958,16 @@ fctx_finddone(isc_task_t *task, isc_event_t *event) { fctx->attributes &= ~FCTX_ATTR_ADDRWAIT; if (event->ev_type == DNS_EVENT_ADBMOREADDRESSES) want_try = ISC_TRUE; - else if (fctx->pending == 0) { - /* - * We've got nothing else to wait for and don't - * know the answer. There's nothing to do but - * fail the fctx. - */ - want_done = ISC_TRUE; + else { + fctx->findfail++; + if (fctx->pending == 0) { + /* + * We've got nothing else to wait for and don't + * know the answer. There's nothing to do but + * fail the fctx. + */ + want_done = ISC_TRUE; + } } } else if (SHUTTINGDOWN(fctx) && fctx->pending == 0 && fctx->nqueries == 0 && ISC_LIST_EMPTY(fctx->validators)) { @@ -1948,7 +1988,7 @@ fctx_finddone(isc_task_t *task, isc_event_t *event) { if (want_try) fctx_try(fctx, ISC_TRUE); else if (want_done) - fctx_done(fctx, ISC_R_FAILURE); + fctx_done(fctx, ISC_R_FAILURE, __LINE__); else if (bucket_empty) empty_bucket(res); } @@ -2036,7 +2076,9 @@ mark_bad(fetchctx_t *fctx) { } static void -add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason) { +add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason, + badnstype_t badtype) +{ char namebuf[DNS_NAME_FORMATSIZE]; char addrbuf[ISC_SOCKADDR_FORMATSIZE]; char classbuf[64]; @@ -2047,6 +2089,21 @@ add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason) { const char *sep1, *sep2; isc_sockaddr_t *address = &addrinfo->sockaddr; + if (reason == DNS_R_LAME) + fctx->lamecount++; + else { + switch (badtype) { + case badns_unreachable: + fctx->neterr++; + break; + case badns_response: + fctx->badresp++; + break; + case badns_validation: + break; /* counted as 'valfail' */ + } + } + if (bad_server(fctx, address)) { /* * We already know this server is bad. @@ -2265,6 +2322,7 @@ findname(fetchctx_t *fctx, dns_name_t *name, in_port_t port, * XXXRTH Follow the CNAME/DNAME chain? */ dns_adb_destroyfind(&find); + fctx->adberr++; } } else if (!ISC_LIST_EMPTY(find->list)) { /* @@ -2308,6 +2366,11 @@ findname(fetchctx_t *fctx, dns_name_t *name, in_port_t port, find->result_v4 != DNS_R_NXDOMAIN))) *need_alternate = ISC_TRUE; } else { + if ((find->options & DNS_ADBFIND_LAMEPRUNED) != 0) + fctx->lamecount++; /* cached lame server */ + else + fctx->adberr++; /* unreachable server, etc. */ + /* * If we know there are no addresses for * the family we are using then try to add @@ -2785,7 +2848,7 @@ fctx_try(fetchctx_t *fctx, isc_boolean_t retrying) { /* * Something bad happened. */ - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); return; } @@ -2795,14 +2858,14 @@ fctx_try(fetchctx_t *fctx, isc_boolean_t retrying) { * might be bad ones. In this case, return SERVFAIL. */ if (addrinfo == NULL) { - fctx_done(fctx, DNS_R_SERVFAIL); + fctx_done(fctx, DNS_R_SERVFAIL, __LINE__); return; } } result = fctx_query(fctx, addrinfo, fctx->options); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); else if (retrying) inc_stats(fctx->res, dns_resstatscounter_retry); } @@ -2904,7 +2967,7 @@ fctx_timeout(isc_task_t *task, isc_event_t *event) { if (event->ev_type == ISC_TIMEREVENT_LIFE) { fctx->reason = NULL; - fctx_done(fctx, ISC_R_TIMEDOUT); + fctx_done(fctx, ISC_R_TIMEDOUT, __LINE__); } else { isc_result_t result; @@ -2930,7 +2993,7 @@ fctx_timeout(isc_task_t *task, isc_event_t *event) { */ result = fctx_starttimer(fctx); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); else /* * Keep trying. @@ -3025,7 +3088,7 @@ fctx_doshutdown(isc_task_t *task, isc_event_t *event) { if (fctx->state != fetchstate_done) { fctx->state = fetchstate_done; - fctx_sendevents(fctx, ISC_R_CANCELED); + fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__); } if (fctx->references == 0 && fctx->pending == 0 && @@ -3064,7 +3127,7 @@ fctx_start(isc_task_t *task, isc_event_t *event) { */ fctx->attributes |= FCTX_ATTR_SHUTTINGDOWN; fctx->state = fetchstate_done; - fctx_sendevents(fctx, ISC_R_CANCELED); + fctx_sendevents(fctx, ISC_R_CANCELED, __LINE__); /* * Since we haven't started, we INSIST that we have no * pending ADB finds and no pending validations. @@ -3103,7 +3166,7 @@ fctx_start(isc_task_t *task, isc_event_t *event) { */ result = fctx_starttimer(fctx); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); else fctx_try(fctx, ISC_FALSE); } else if (bucket_empty) @@ -3235,7 +3298,20 @@ fctx_create(dns_resolver_t *res, dns_name_t *name, dns_rdatatype_t type, fctx->altfind = NULL; fctx->pending = 0; fctx->restarts = 0; + fctx->querysent = 0; + fctx->referrals = 0; + TIME_NOW(&fctx->start); fctx->timeouts = 0; + fctx->lamecount = 0; + fctx->adberr = 0; + fctx->neterr = 0; + fctx->badresp = 0; + fctx->findfail = 0; + fctx->valfail = 0; + fctx->result = ISC_R_FAILURE; + fctx->vresult = ISC_R_SUCCESS; + fctx->exitline = -1; /* sentinel */ + fctx->logged = ISC_FALSE; fctx->attributes = 0; fctx->spilled = ISC_FALSE; fctx->nqueries = 0; @@ -3712,6 +3788,8 @@ validated(isc_task_t *task, isc_event_t *event) { if (vevent->result != ISC_R_SUCCESS) { FCTXTRACE("validation failed"); inc_stats(fctx->res, dns_resstatscounter_valfail); + fctx->valfail++; + fctx->vresult = vevent->result; result = ISC_R_NOTFOUND; if (vevent->rdataset != NULL) result = dns_db_findnode(fctx->cache, vevent->name, @@ -3726,7 +3804,7 @@ validated(isc_task_t *task, isc_event_t *event) { if (result == ISC_R_SUCCESS) dns_db_detachnode(fctx->cache, &node); result = vevent->result; - add_bad(fctx, addrinfo, result); + add_bad(fctx, addrinfo, result, badns_validation); isc_event_free(&event); UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock); INSIST(fctx->validator == NULL); @@ -3734,7 +3812,7 @@ validated(isc_task_t *task, isc_event_t *event) { if (fctx->validator != NULL) { dns_validator_send(fctx->validator); } else if (sentresponse) - fctx_done(fctx, result); /* Locks bucket. */ + fctx_done(fctx, result, __LINE__); /* Locks bucket. */ else fctx_try(fctx, ISC_TRUE); /* Locks bucket. */ return; @@ -3921,7 +3999,7 @@ validated(isc_task_t *task, isc_event_t *event) { UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock); - fctx_done(fctx, result); /* Locks bucket. */ + fctx_done(fctx, result, __LINE__); /* Locks bucket. */ cleanup_event: INSIST(node == NULL); @@ -5497,7 +5575,7 @@ resume_dslookup(isc_task_t *task, isc_event_t *event) { bucketnum = fctx->bucketnum; if (fevent->result == ISC_R_CANCELED) { dns_resolver_destroyfetch(&fctx->nsfetch); - fctx_done(fctx, ISC_R_CANCELED); + fctx_done(fctx, ISC_R_CANCELED, __LINE__); } else if (fevent->result == ISC_R_SUCCESS) { FCTXTRACE("resuming DS lookup"); @@ -5513,7 +5591,7 @@ resume_dslookup(isc_task_t *task, isc_event_t *event) { fctx->res->buckets[bucketnum].mctx, &fctx->domain); if (result != ISC_R_SUCCESS) { - fctx_done(fctx, DNS_R_SERVFAIL); + fctx_done(fctx, DNS_R_SERVFAIL, __LINE__); goto cleanup; } /* @@ -5531,7 +5609,7 @@ resume_dslookup(isc_task_t *task, isc_event_t *event) { domain = dns_fixedname_name(&fixed); dns_name_copy(&fctx->nsfetch->private->domain, domain, NULL); if (dns_name_equal(&fctx->nsname, domain)) { - fctx_done(fctx, DNS_R_SERVFAIL); + fctx_done(fctx, DNS_R_SERVFAIL, __LINE__); dns_resolver_destroyfetch(&fctx->nsfetch); goto cleanup; } @@ -5558,7 +5636,7 @@ resume_dslookup(isc_task_t *task, isc_event_t *event) { &fctx->nsrrset, NULL, &fctx->nsfetch); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); else { LOCK(&res->buckets[bucketnum].lock); locked = ISC_TRUE; @@ -5739,6 +5817,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { unsigned int options; unsigned int findoptions; isc_result_t broken_server; + badnstype_t broken_type = badns_response; REQUIRE(VALID_QUERY(query)); fctx = query->fctx; @@ -5815,6 +5894,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { devent->result == ISC_R_CONNREFUSED || devent->result == ISC_R_CANCELED)) { broken_server = devent->result; + broken_type = badns_unreachable; } } goto done; @@ -6213,6 +6293,18 @@ resquery_response(isc_task_t *task, isc_event_t *event) { * has not experienced any restarts yet. */ fctx->restarts = 0; + + /* + * Update local statistics counters collected for each + * new zone. + */ + fctx->referrals++; + fctx->querysent = 0; + fctx->lamecount = 0; + fctx->neterr = 0; + fctx->badresp = 0; + fctx->adberr = 0; + result = ISC_R_SUCCESS; } else if (result != ISC_R_SUCCESS) { /* @@ -6286,7 +6378,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { * Add this server to the list of bad servers for * this fctx. */ - add_bad(fctx, addrinfo, broken_server); + add_bad(fctx, addrinfo, broken_server, broken_type); } if (get_nameservers) { @@ -6294,7 +6386,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { dns_fixedname_init(&foundname); fname = dns_fixedname_name(&foundname); if (result != ISC_R_SUCCESS) { - fctx_done(fctx, DNS_R_SERVFAIL); + fctx_done(fctx, DNS_R_SERVFAIL, __LINE__); return; } findoptions = 0; @@ -6312,7 +6404,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { NULL); if (result != ISC_R_SUCCESS) { FCTXTRACE("couldn't find a zonecut"); - fctx_done(fctx, DNS_R_SERVFAIL); + fctx_done(fctx, DNS_R_SERVFAIL, __LINE__); return; } if (!dns_name_issubdomain(fname, &fctx->domain)) { @@ -6321,7 +6413,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { * QDOMAIN. */ FCTXTRACE("nameservers now above QDOMAIN"); - fctx_done(fctx, DNS_R_SERVFAIL); + fctx_done(fctx, DNS_R_SERVFAIL, __LINE__); return; } dns_name_free(&fctx->domain, @@ -6331,7 +6423,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { fctx->res->buckets[fctx->bucketnum].mctx, &fctx->domain); if (result != ISC_R_SUCCESS) { - fctx_done(fctx, DNS_R_SERVFAIL); + fctx_done(fctx, DNS_R_SERVFAIL, __LINE__); return; } fctx_cancelqueries(fctx, ISC_TRUE); @@ -6352,7 +6444,7 @@ resquery_response(isc_task_t *task, isc_event_t *event) { inc_stats(fctx->res, dns_resstatscounter_retry); result = fctx_query(fctx, addrinfo, options); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); } else if (result == ISC_R_SUCCESS && !HAVE_ANSWER(fctx)) { /* * All has gone well so far, but we are waiting for the @@ -6366,10 +6458,10 @@ resquery_response(isc_task_t *task, isc_event_t *event) { */ result = fctx_stopidletimer(fctx); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); } else if (result == DNS_R_CHASEDSSERVERS) { unsigned int n; - add_bad(fctx, addrinfo, result); + add_bad(fctx, addrinfo, result, broken_type); fctx_cancelqueries(fctx, ISC_TRUE); fctx_cleanupfinds(fctx); fctx_cleanupforwaddrs(fctx); @@ -6386,18 +6478,18 @@ resquery_response(isc_task_t *task, isc_event_t *event) { &fctx->nsrrset, NULL, &fctx->nsfetch); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); LOCK(&fctx->res->buckets[fctx->bucketnum].lock); fctx->references++; UNLOCK(&fctx->res->buckets[fctx->bucketnum].lock); result = fctx_stopidletimer(fctx); if (result != ISC_R_SUCCESS) - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); } else { /* * We're done. */ - fctx_done(fctx, result); + fctx_done(fctx, result, __LINE__); } } @@ -7270,6 +7362,47 @@ dns_resolver_destroyfetch(dns_fetch_t **fetchp) { empty_bucket(res); } +void +dns_resolver_logfetch(dns_fetch_t *fetch, isc_log_t *lctx, + isc_logcategory_t *category, isc_logmodule_t *module, + int level, isc_boolean_t duplicateok) +{ + fetchctx_t *fctx; + dns_resolver_t *res; + char domainbuf[DNS_NAME_FORMATSIZE]; + + REQUIRE(DNS_FETCH_VALID(fetch)); + fctx = fetch->private; + REQUIRE(VALID_FCTX(fctx)); + res = fctx->res; + + LOCK(&res->buckets[fctx->bucketnum].lock); + + INSIST(fctx->exitline >= 0); + if (!fctx->logged || duplicateok) { + dns_name_format(&fctx->domain, domainbuf, sizeof(domainbuf)); + isc_log_write(lctx, category, module, level, + "fetch completed at %s:%d for %s in " + "%" ISC_PRINT_QUADFORMAT "u." + "%06" ISC_PRINT_QUADFORMAT "u: %s/%s " + "[domain:%s,referral:%u,restart:%u,qrysent:%u," + "timeout:%u,lame:%u,neterr:%u,badresp:%u," + "adberr:%u,findfail:%u,valfail:%u]", + __FILE__, fctx->exitline, fctx->info, + fctx->duration / 1000000, + fctx->duration % 1000000, + isc_result_totext(fctx->result), + isc_result_totext(fctx->vresult), domainbuf, + fctx->referrals, fctx->restarts, + fctx->querysent, fctx->timeouts, fctx->lamecount, + fctx->neterr, fctx->badresp, fctx->adberr, + fctx->findfail, fctx->valfail); + fctx->logged = ISC_TRUE; + } + + UNLOCK(&res->buckets[fctx->bucketnum].lock); +} + dns_dispatchmgr_t * dns_resolver_dispatchmgr(dns_resolver_t *resolver) { REQUIRE(VALID_RESOLVER(resolver));