From 2d9db0bc63c0aaa20e68254ab9252603e1b93939 Mon Sep 17 00:00:00 2001 From: Eric van Gyzen Date: Sun, 2 Oct 2016 01:42:45 +0000 Subject: [PATCH] Add GARP retransmit capability A single gratuitous ARP (GARP) is always transmitted when an IPv4 address is added to an interface, and that is usually sufficient. However, in some circumstances, such as when a shared address is passed between cluster nodes, this single GARP may occasionally be dropped or lost. This can lead to neighbors on the network link working with a stale ARP cache and sending packets destined for that address to the node that previously owned the address, which may not respond. To avoid this situation, GARP retransmissions can be enabled by setting the net.link.ether.inet.garp_rexmit_count sysctl to a value greater than zero. The setting represents the maximum number of retransmissions. The interval between retransmissions is calculated using an exponential backoff algorithm, doubling each time, so the retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds). Due to the exponential backoff algorithm used for the interval between GARP retransmissions, the maximum number of retransmissions is limited to 16 for sanity. This limit corresponds to a maximum interval between retransmissions of 2^16 seconds ~= 18 hours. Increasing this limit is possible, but sending out GARPs spaced days apart would be of little use. Submitted by: David A. Bright MFC after: 1 month Relnotes: yes Sponsored by: Dell EMC Differential Revision: https://reviews.freebsd.org/D7695 --- sys/netinet/if_ether.c | 128 +++++++++++++++++++++++++++++++++++++++++ sys/netinet/in.c | 8 +++ sys/netinet/in_var.h | 2 + 3 files changed, 138 insertions(+) diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index 9f36c328d3a..98ccf6a951a 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -137,6 +137,28 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second, "Maximum number of remotely triggered ARP messages that can be " "logged per second"); +/* + * Due to the exponential backoff algorithm used for the interval between GARP + * retransmissions, the maximum number of retransmissions is limited for + * sanity. This limit corresponds to a maximum interval between retransmissions + * of 2^16 seconds ~= 18 hours. + * + * Making this limit more dynamic is more complicated than worthwhile, + * especially since sending out GARPs spaced days apart would be of little + * use. A maximum dynamic limit would look something like: + * + * const int max = fls(INT_MAX / hz) - 1; + */ +#define MAX_GARP_RETRANSMITS 16 +static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS); +static int garp_rexmit_count = 0; /* GARP retransmission setting. */ + +SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE, + &garp_rexmit_count, 0, sysctl_garp_rexmit, "I", + "Number of times to retransmit GARP packets;" + " 0 to disable, maximum of 16"); + #define ARP_LOG(pri, ...) do { \ if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \ log((pri), "arp: " __VA_ARGS__); \ @@ -1287,6 +1309,109 @@ arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst) lltable_free_entry(LLTABLE(ifp), lle_tmp); } +/* + * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range + * of valid values. + */ +static int +sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS) +{ + int error; + int rexmit_count = *(int *)arg1; + + error = sysctl_handle_int(oidp, &rexmit_count, 0, req); + + /* Enforce limits on any new value that may have been set. */ + if (!error && req->newptr) { + /* A new value was set. */ + if (rexmit_count < 0) { + rexmit_count = 0; + } else if (rexmit_count > MAX_GARP_RETRANSMITS) { + rexmit_count = MAX_GARP_RETRANSMITS; + } + *(int *)arg1 = rexmit_count; + } + + return (error); +} + +/* + * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to + * retransmit it again. A pending callout owns a reference to the ifa. + */ +static void +garp_rexmit(void *arg) +{ + struct in_ifaddr *ia = arg; + + if (callout_pending(&ia->ia_garp_timer) || + !callout_active(&ia->ia_garp_timer)) { + IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp); + ifa_free(&ia->ia_ifa); + return; + } + + /* + * Drop lock while the ARP request is generated. + */ + IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp); + + arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr, + &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp)); + + /* + * Increment the count of retransmissions. If the count has reached the + * maximum value, stop sending the GARP packets. Otherwise, schedule + * the callout to retransmit another GARP packet. + */ + ++ia->ia_garp_count; + if (ia->ia_garp_count >= garp_rexmit_count) { + ifa_free(&ia->ia_ifa); + } else { + int rescheduled; + IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp); + rescheduled = callout_reset(&ia->ia_garp_timer, + (1 << ia->ia_garp_count) * hz, + garp_rexmit, ia); + IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp); + if (rescheduled) { + ifa_free(&ia->ia_ifa); + } + } +} + +/* + * Start the GARP retransmit timer. + * + * A single GARP is always transmitted when an IPv4 address is added + * to an interface and that is usually sufficient. However, in some + * circumstances, such as when a shared address is passed between + * cluster nodes, this single GARP may occasionally be dropped or + * lost. This can lead to neighbors on the network link working with a + * stale ARP cache and sending packets destined for that address to + * the node that previously owned the address, which may not respond. + * + * To avoid this situation, GARP retransmits can be enabled by setting + * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater + * than zero. The setting represents the maximum number of + * retransmissions. The interval between retransmissions is calculated + * using an exponential backoff algorithm, doubling each time, so the + * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds). + */ +static void +garp_timer_start(struct ifaddr *ifa) +{ + struct in_ifaddr *ia = (struct in_ifaddr *) ifa; + + IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp); + ia->ia_garp_count = 0; + if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz, + garp_rexmit, ia) == 0) { + ifa_ref(ifa); + } + IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp); +} + void arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) { @@ -1302,6 +1427,9 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY) return; arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp)); + if (garp_rexmit_count > 0) { + garp_timer_start(ifa); + } arp_add_ifa_lle(ifp, dst); } diff --git a/sys/netinet/in.c b/sys/netinet/in.c index cf1c81489d1..1d98d178469 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -397,6 +397,8 @@ in_aifaddr_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; + callout_init_rw(&ia->ia_garp_timer, &ifp->if_addr_lock, + CALLOUT_RETURNUNLOCKED); ia->ia_ifp = ifp; ia->ia_addr = *addr; @@ -635,6 +637,12 @@ in_difaddr_ioctl(caddr_t data, struct ifnet *ifp, struct thread *td) IN_MULTI_UNLOCK(); } + IF_ADDR_WLOCK(ifp); + if (callout_stop(&ia->ia_garp_timer) == 1) { + ifa_free(&ia->ia_ifa); + } + IF_ADDR_WUNLOCK(ifp); + EVENTHANDLER_INVOKE(ifaddr_event, ifp); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index af83e9a1116..08055c4fad1 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -82,6 +82,8 @@ struct in_ifaddr { struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ #define ia_broadaddr ia_dstaddr struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ + struct callout ia_garp_timer; /* timer for retransmitting GARPs */ + int ia_garp_count; /* count of retransmitted GARPs */ }; /*