diff --git a/bin/named/client.c b/bin/named/client.c index b6fb755b5a..19ea1dbaf9 100644 --- a/bin/named/client.c +++ b/bin/named/client.c @@ -360,7 +360,7 @@ ns_client_send(ns_client_t *client) { } CTRACE("sendto"); result = isc_socket_sendto(socket, &r, client->task, client_senddone, - client, address); + client, address, NULL); if (result == ISC_R_SUCCESS) client->nsends++; diff --git a/bin/tests/dispatch_tcp_test.c b/bin/tests/dispatch_tcp_test.c index 2f9d9f996b..278b4c774e 100644 --- a/bin/tests/dispatch_tcp_test.c +++ b/bin/tests/dispatch_tcp_test.c @@ -243,9 +243,9 @@ start_response(void) dns_message_destroy(&msg); isc_buffer_used(&render, ®ion); - result = isc_socket_sendto(dns_dispatch_getsocket(disp), ®ion, - t2, send_done, resp, &from); - CHECKRESULT(result, "isc_socket_sendto()"); + result = isc_socket_send(dns_dispatch_getsocket(disp), ®ion, + t2, send_done, resp); + CHECKRESULT(result, "isc_socket_send()"); } void diff --git a/bin/tests/dispatch_test.c b/bin/tests/dispatch_test.c index bf5594cff8..4abafc57c3 100644 --- a/bin/tests/dispatch_test.c +++ b/bin/tests/dispatch_test.c @@ -223,7 +223,7 @@ start_response(clictx_t *cli, char *query, isc_task_t *task) isc_buffer_used(&cli->render, ®ion); result = isc_socket_sendto(dns_dispatch_getsocket(disp), ®ion, - task, send_done, cli->resp, &from); + task, send_done, cli->resp, &from, NULL); CHECKRESULT(result, "isc_socket_sendto()"); } diff --git a/bin/tests/sdig.c b/bin/tests/sdig.c index 52b169ae22..b0b3e76e7e 100644 --- a/bin/tests/sdig.c +++ b/bin/tests/sdig.c @@ -376,7 +376,7 @@ main(int argc, char *argv[]) { check_result(result, "isc_socket_recvv()"); ISC_LIST_ENQUEUE(bufferlist, &b, link); result = isc_socket_sendtov(sock, &bufferlist, task, send_done, NULL, - &sockaddr); + &sockaddr, NULL); check_result(result, "isc_socket_sendtov()"); isc_app_run(); diff --git a/bin/tests/tkey_test.c b/bin/tests/tkey_test.c index 6abc940808..5a509645bf 100644 --- a/bin/tests/tkey_test.c +++ b/bin/tests/tkey_test.c @@ -202,7 +202,8 @@ buildquery(void) { CHECK("dns_message_renderend", result); isc_buffer_used(&qbuffer, &r); - result = isc_socket_sendto(s, &r, task1, senddone, NULL, &address); + result = isc_socket_sendto(s, &r, task1, senddone, NULL, &address, + NULL); CHECK("isc_socket_sendto", result); inr.base = rdata; inr.length = sizeof(rdata); @@ -242,7 +243,8 @@ buildquery2(void) { CHECK("dns_message_renderend", result); isc_buffer_used(&qbuffer, &r); - result = isc_socket_sendto(s, &r, task2, senddone2, NULL, &address); + result = isc_socket_sendto(s, &r, task2, senddone2, NULL, &address, + NULL); CHECK("isc_socket_sendto", result); inr.base = rdata; inr.length = sizeof(rdata); diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 3b195ac168..d7bc9a998e 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -421,6 +421,9 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) disp->recvs--; if (ev->result != ISC_R_SUCCESS) { + XDEBUG(("recv result %d (%s)\n", ev->result, + isc_result_totext(ev->result))); + /* * If the recv() was canceled pass the word on. */ diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index b7790003d5..a2f0e208fc 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -789,7 +789,7 @@ resquery_send(resquery_t *query) { address = query->addrinfo->sockaddr; isc_buffer_used(buffer, &r); result = isc_socket_sendto(socket, &r, task, resquery_senddone, - query, address); + query, address, NULL); if (result != ISC_R_SUCCESS) goto cleanup_message; QTRACE("sent"); diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h index 016fbe1a18..93b477a643 100644 --- a/lib/isc/include/isc/socket.h +++ b/lib/isc/include/isc/socket.h @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,8 @@ struct isc_socketevent { isc_region_t region; /* for single-buffer i/o */ isc_bufferlist_t bufferlist; /* list of buffers */ isc_sockaddr_t address; /* source address */ + isc_time_t timestamp; /* timestamp of packet recv */ + struct in6_pktinfo pktinfo; /* ipv6 pktinfo */ }; typedef struct isc_socket_newconnev isc_socket_newconnev_t; @@ -112,10 +115,21 @@ struct isc_socket_connev { ISC_EVENT_COMMON(isc_socket_connev_t); isc_result_t result; /* OK, EOF, whatever else */ }; + +/* + * _ATTACHED: Internal use only. + * _TRUNC: Packet was truncated on receive. + * _CTRUNC: Packet control information was truncated. This can + * indicate that the packet is not complete, even though + * all the data is valid. + * _TIMESTAMP: The timestamp member is valid. + * _PKTINFO: The pktinfo member is valid. + */ #define ISC_SOCKEVENTATTR_ATTACHED 0x8000000U /* internal */ #define ISC_SOCKEVENTATTR_TRUNC 0x0080000U /* public */ #define ISC_SOCKEVENTATTR_CTRUNC 0x0040000U /* public */ #define ISC_SOCKEVENTATTR_TIMESTAMP 0x0020000U /* public */ +#define ISC_SOCKEVENTATTR_PKTINFO 0x0010000U /* public */ #define ISC_SOCKEVENT_ANYEVENT (0) #define ISC_SOCKEVENT_RECVDONE (ISC_EVENTCLASS_SOCKET + 1) @@ -491,14 +505,14 @@ isc_socket_send(isc_socket_t *sock, isc_region_t *region, isc_result_t isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, isc_taskaction_t action, void *arg, - isc_sockaddr_t *address); + isc_sockaddr_t *address, struct in6_pktinfo *pktinfo); isc_result_t isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, isc_task_t *task, isc_taskaction_t action, void *arg); isc_result_t isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, isc_task_t *task, isc_taskaction_t action, void *arg, - isc_sockaddr_t *address); + isc_sockaddr_t *address, struct in6_pktinfo *pktinfo); /* * Send the contents of 'region' to the socket's peer. * diff --git a/lib/isc/unix/socket.c b/lib/isc/unix/socket.c index d351ff3dc2..209abc3dd6 100644 --- a/lib/isc/unix/socket.c +++ b/lib/isc/unix/socket.c @@ -18,6 +18,8 @@ #include #include +#include +#include #include #include @@ -73,7 +75,7 @@ #define TRACE_SEND 0x0010 #define TRACE_MANAGER 0x0020 -int trace_level = TRACE_RECV | TRACE_WATCHER; +int trace_level = TRACE_RECV; #define XTRACE(l, a) do { \ if ((l) & trace_level) { \ printf("[%s:%d] ", __FILE__, __LINE__); \ @@ -100,6 +102,56 @@ typedef isc_event_t intev_t; #define SOCKET_MAGIC 0x494f696fU /* IOio */ #define VALID_SOCKET(t) ((t) != NULL && (t)->magic == SOCKET_MAGIC) +/* + * IPv6 control information. If the socket is an IPv6 socket we want + * to collect the destination address and interface so the client can + * set them on outgoing packets. + */ +#ifdef ISC_PLATFORM_HAVEIPV6 +#define PKTINFO_SPACE CMSG_SPACE(sizeof(struct in6_pktinfo)) +#ifndef USE_CMSG +#define USE_CMSG 1 +#endif +#else +#define PKTINFO_SPACE 0 +#endif + +/* + * NetBSD (and FreeBSD?) can timestamp packets. XXXMLG Should we have + * a setsockopt() like interface to request timestamps, and if the OS + * doesn't do it for us, call gettimeofday() on every UDP receive? + */ +#ifdef SO_TIMESTAMP +#define TIMESTAMP_SPACE CMSG_SPACE(sizeof(struct timeval)) +#ifndef USE_CMSG +#define USE_CMSG 1 +#endif +#else +#define TIMESTAMP_SPACE 0 +#endif + +/* + * Total cmsg space needed for all of the above bits. + */ +#define TOTAL_SPACE (PKTINFO_SPACE + TIMESTAMP_SPACE) + +/* + * At this point, it is possible to have USE_CMSG defined, but the OS + * doesn't provide the CMSG_ macros we need. Rather than toy around with + * things, don't use the CMSG stuff if the macros we need aren't defined. + */ +#ifdef USE_CMSG +#if !defined(CMSG_SPACE) || !defined(CMSG_NXTHDR) || !defined(CMSG_FIRSTHDR) \ + || !defined(CMSG_LEN) || !defined(CMSG_DATA) +#warn Not using ipv6 pktinfo or timestamp because of partial CMSG_ implementation. +#undef USE_CMSG +#endif +#if !defined(ISC_NET_BSD44MSGHDR) +#warn Not using ipv6 pktinfo or timestamp because of lack of BSD44 msghdr +#undef USE_CMSG +#endif +#endif + struct isc_socket { /* Not locked. */ unsigned int magic; @@ -138,8 +190,8 @@ struct isc_socket { #ifdef ISC_NET_RECVOVERFLOW unsigned char overflow; /* used for MSG_TRUNC fake */ #endif -#ifdef notyet - unsigned char cmsg[1024]; /* XXX size? */ +#ifdef USE_CMSG + unsigned char cmsg[TOTAL_SPACE]; #endif }; @@ -273,11 +325,22 @@ make_nonblock(int fd) /* * Process control messages received on a socket. + * XXXMLG This is #ifdef hell. */ static void process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) { +#ifdef USE_CMSG + struct cmsghdr *cmsgp, cmsg; +#ifdef ISC_PLATFORM_HAVEIPV6 + struct in6_pktinfo *pktinfop; +#endif +#ifdef SO_TIMESTAMP + struct timeval *timevalp; +#endif +#endif + (void)sock; #ifdef ISC_NET_BSD44MSGHDR @@ -291,8 +354,52 @@ process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC; #endif +#ifndef USE_CMSG + return; +#else if (msg->msg_controllen == 0 || msg->msg_control == NULL) return; + +#ifdef SO_TIMESTAMP + timevalp = NULL; +#endif +#ifdef ISC_PLATFORM_HAVEIPV6 + pktinfop = NULL; +#endif + + cmsgp = CMSG_FIRSTHDR(msg); + while (cmsgp != NULL) { + cmsg = *cmsgp; + XTRACE(TRACE_RECV, ("Processing cmsg %p\n", cmsgp)); + +#ifdef ISC_PLATFORM_HAVEIPV6 + if (cmsg.cmsg_level == IPPROTO_IPV6 + && cmsg.cmsg_type == IPV6_PKTINFO) { + pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp); + dev->pktinfo = *pktinfop; + dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; + fprintf(stderr, "Found IPv6 PKTINFO\n"); + goto next; + } +#endif + +#ifdef SO_TIMESTAMP + if (cmsg.cmsg_level == SOL_SOCKET + && cmsg.cmsg_type == SCM_TIMESTAMP) { + timevalp = (struct timeval *)CMSG_DATA(cmsgp); + dev->timestamp.seconds = timevalp->tv_sec; + dev->timestamp.nanoseconds = timevalp->tv_usec * 1000; + dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP; + fprintf(stderr, "Found UDP timestamp\n"); + goto next; + } +#endif + + next: + cmsgp = CMSG_NXTHDR(msg, cmsgp); + } +#endif /* USE_CMSG */ + #endif /* ISC_NET_BSD44MSGHDR */ } @@ -494,6 +601,12 @@ build_msghdr_recv(isc_socket_t *sock, isc_socketevent_t *dev, #ifdef ISC_NET_BSD44MSGHDR msg->msg_control = NULL; msg->msg_controllen = 0; +#if defined(USE_CMSG) /* XXXMLG implement! */ + if (sock->type == isc_sockettype_udp) { + msg->msg_control = &sock->cmsg[0]; + msg->msg_controllen = sizeof(sock->cmsg); + } +#endif msg->msg_flags = 0; #else msg->msg_accrights = NULL; @@ -552,10 +665,14 @@ dump_msg(struct msghdr *msg) printf("MSGHDR %p\n", msg); printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen); printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen); - for (i = 0 ; i < msg->msg_iovlen ; i++) + for (i = 0 ; i < (unsigned int)msg->msg_iovlen ; i++) printf("\t\t%d\tbase %p, len %d\n", i, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len); +#ifdef ISC_NET_BSD44MSGHDR + printf("\tcontrol %p, controllen %d\n", msg->msg_control, + msg->msg_controllen); +#endif } #endif @@ -583,13 +700,10 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) #endif cc = recvmsg(sock->fd, &msghdr, 0); - if (sock->type == isc_sockettype_udp) - dev->address.length = msghdr.msg_namelen; - XTRACE(TRACE_RECV, - ("do_recv: recvmsg(%d) %d bytes, err %d/%s, from %s\n", - sock->fd, cc, errno, strerror(errno), - inet_ntoa(dev->address.type.sin.sin_addr))); + ("doio_recv: recvmsg(%d) %d bytes, err %d/%s\n", + sock->fd, cc, errno, strerror(errno))); + XTRACE(TRACE_RECV, ("errno %d, addr %p\n", errno, &errno)); if (cc < 0) { if (SOFT_ERROR(errno)) @@ -615,6 +729,7 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) * This might not be a permanent error. */ if (errno == ENOBUFS) { + /* XXXMLG Unexpected error?!? */ send_recvdone_event(sock, &dev, ISC_R_UNEXPECTED); return (DOIO_HARD); } @@ -634,6 +749,9 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) return (DOIO_EOF); } + if (sock->type == isc_sockettype_udp) + dev->address.length = msghdr.msg_namelen; + /* * Overflow bit detection. If we received MORE bytes than we should, * this indicates an overflow situation. Set the flag in the @@ -649,12 +767,9 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) /* * If there are control messages attached, run through them and pull * out the interesting bits. - * - * Note that for multi-read TCP this isn't as interesting in that - * only the last packet will set some of these, but that is better - * than nothing. */ - process_cmsg(sock, &msghdr, dev); + if (sock->type == isc_sockettype_udp) + process_cmsg(sock, &msghdr, dev); /* * update the buffers (if any) and the i/o count @@ -1406,6 +1521,8 @@ internal_recv(isc_task_t *me, isc_event_t *ev) } if (sock->recv_result != ISC_R_SUCCESS) { + XTRACE(TRACE_RECV, ("STICKY RESULT: %d\n", + sock->recv_result)); send_recvdone_event(sock, &dev, sock->recv_result); goto next; } @@ -2134,13 +2251,14 @@ isc_socket_send(isc_socket_t *sock, isc_region_t *region, /* * REQUIRE() checking performed in isc_socket_sendto() */ - return (isc_socket_sendto(sock, region, task, action, arg, NULL)); + return (isc_socket_sendto(sock, region, task, action, arg, NULL, + NULL)); } isc_result_t isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, isc_task_t *task, isc_taskaction_t action, void *arg, - isc_sockaddr_t *address) + isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) { isc_socketevent_t *dev; isc_socketmgr_t *manager; @@ -2219,13 +2337,14 @@ isc_result_t isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, isc_task_t *task, isc_taskaction_t action, void *arg) { - return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL)); + return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL, + NULL)); } isc_result_t isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, isc_task_t *task, isc_taskaction_t action, void *arg, - isc_sockaddr_t *address) + isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) { isc_socketevent_t *dev; isc_socketmgr_t *manager;