From c7a6a1b32539a0f18f461e2095f79db740b16e4c Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 30 May 2017 02:25:47 +0000 Subject: [PATCH 01/14] mtx: fix whitespace damage in _mtx_trylock_flags_ MFC after: 3 days --- sys/kern/kern_mutex.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index ed9dfbbb725..bc03a722401 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -410,10 +410,10 @@ _mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file, int line) if (v == tid && ((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || (opts & MTX_RECURSE) != 0)) { - m->mtx_recurse++; - atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); - recursed = true; - break; + m->mtx_recurse++; + atomic_set_ptr(&m->mtx_lock, MTX_RECURSED); + recursed = true; + break; } rval = 0; break; From 1e9fb89962095a532753fb194811c323d6ea147b Mon Sep 17 00:00:00 2001 From: Zbigniew Bodek Date: Tue, 30 May 2017 11:53:18 +0000 Subject: [PATCH 02/14] Add mbuf defragmentation to the ENA driver When mbuf chain is too long and device cannot handle that number of segments in DMA transaction, mbuf chain will be defragmented. Initially, driver was dropping all mbuf chains that were exceeding supported number of segments. Submitted by: Michal Krawczyk Obtained from: Semihalf Sponsored by: Amazon.com Inc. Differential revision: https://reviews.freebsd.org/D10923 --- sys/dev/ena/ena.c | 59 +++++++++++++++++++++++++++++----------- sys/dev/ena/ena.h | 2 ++ sys/dev/ena/ena_sysctl.c | 8 ++++++ 3 files changed, 53 insertions(+), 16 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 5eb76250baf..0bc0aa7208f 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -155,7 +155,7 @@ static void ena_update_hwassist(struct ena_adapter *); static int ena_setup_ifnet(device_t, struct ena_adapter *, struct ena_com_dev_get_features_ctx *); static void ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *); -static int ena_xmit_mbuf(struct ena_ring *, struct mbuf *); +static int ena_xmit_mbuf(struct ena_ring *, struct mbuf **); static void ena_start_xmit(struct ena_ring *); static int ena_mq_start(if_t, struct mbuf *); static void ena_deferred_mq_start(void *, int); @@ -2601,7 +2601,34 @@ ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf) } static int -ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf *mbuf) +ena_check_and_defragment_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) +{ + struct ena_adapter *adapter; + struct mbuf *defrag_mbuf; + int num_frags; + + adapter = tx_ring->adapter; + num_frags = ena_mbuf_count(*mbuf); + + /* One segment must be reserved for configuration descriptor. */ + if (num_frags < adapter->max_tx_sgl_size) + return (0); + counter_u64_add(tx_ring->tx_stats.defragment, 1); + + defrag_mbuf = m_defrag(*mbuf, M_NOWAIT); + if (defrag_mbuf == NULL) { + counter_u64_add(tx_ring->tx_stats.defragment_err, 1); + return (ENOMEM); + } + + /* If mbuf was defragmented succesfully, original mbuf is released. */ + *mbuf = defrag_mbuf; + + return (0); +} + +static int +ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf) { struct ena_adapter *adapter; struct ena_tx_buffer *tx_info; @@ -2617,40 +2644,40 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf *mbuf) uint16_t ena_qid; uint32_t len, nsegs, header_len; int i, rc; - int nb_hw_desc, num_frags; + int nb_hw_desc; ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id); adapter = tx_ring->que->adapter; ena_dev = adapter->ena_dev; io_sq = &adapter->ena_dev->io_sq_queues[ena_qid]; - ENA_ASSERT(mbuf, "mbuf is NULL\n"); + ENA_ASSERT(*mbuf, "mbuf is NULL\n"); - num_frags = ena_mbuf_count(mbuf); - if (num_frags > (adapter->max_tx_sgl_size - 2)) { - device_printf(adapter->pdev, - "too many fragments. Last fragment: %d!\n", num_frags); - return (ENA_COM_INVAL); + rc = ena_check_and_defragment_mbuf(tx_ring, mbuf); + if (rc) { + ena_trace(ENA_WARNING, + "Failed to defragment mbuf! err: %d", rc); + return (rc); } next_to_use = tx_ring->next_to_use; req_id = tx_ring->free_tx_ids[next_to_use]; tx_info = &tx_ring->tx_buffer_info[req_id]; - tx_info->mbuf = mbuf; + tx_info->mbuf = *mbuf; tx_info->num_of_bufs = 0; ena_buf = tx_info->bufs; - len = mbuf->m_len; + len = (*mbuf)->m_len; - ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", mbuf->m_pkthdr.len); + ena_trace(ENA_DBG | ENA_TXPTH, "Tx: %d bytes", (*mbuf)->m_pkthdr.len); push_len = 0; header_len = min_t(uint32_t, len, tx_ring->tx_max_header_size); push_hdr = NULL; rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag, tx_info->map, - mbuf, segs, &nsegs, BUS_DMA_NOWAIT); + *mbuf, segs, &nsegs, BUS_DMA_NOWAIT); if (rc || (nsegs == 0)) { ena_trace(ENA_WARNING, @@ -2678,7 +2705,7 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf *mbuf) ena_tx_ctx.header_len = header_len; /* Set flags and meta data */ - ena_tx_csum(&ena_tx_ctx, mbuf); + ena_tx_csum(&ena_tx_ctx, *mbuf); /* Prepare the packet's descriptors and send them to device */ rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc); if (rc != 0) { @@ -2692,7 +2719,7 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf *mbuf) counter_enter(); counter_u64_add_protected(tx_ring->tx_stats.cnt, 1); - counter_u64_add_protected(tx_ring->tx_stats.bytes, mbuf->m_pkthdr.len); + counter_u64_add_protected(tx_ring->tx_stats.bytes, (*mbuf)->m_pkthdr.len); counter_exit(); tx_info->tx_descs = nb_hw_desc; @@ -2740,7 +2767,7 @@ ena_start_xmit(struct ena_ring *tx_ring) if (ena_com_sq_empty_space(io_sq) < ENA_TX_CLEANUP_TRESHOLD) ena_tx_cleanup(tx_ring); - if ((ret = ena_xmit_mbuf(tx_ring, mbuf)) != 0) { + if ((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0) { if (ret == ENA_COM_NO_MEM) { drbr_putback(adapter->ifp, tx_ring->br, mbuf); } else if (ret == ENA_COM_NO_SPACE) { diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index 1090f0e62da..abb3871c701 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -226,6 +226,8 @@ struct ena_stats_tx { counter_u64_t doorbells; counter_u64_t missing_tx_comp; counter_u64_t bad_req_id; + counter_u64_t defragment; + counter_u64_t defragment_err; }; struct ena_stats_rx { diff --git a/sys/dev/ena/ena_sysctl.c b/sys/dev/ena/ena_sysctl.c index 2b55da0711b..5fc1894c967 100644 --- a/sys/dev/ena/ena_sysctl.c +++ b/sys/dev/ena/ena_sysctl.c @@ -155,6 +155,14 @@ ena_sysctl_add_stats(struct ena_adapter *adapter) SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "stops", CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops"); + SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, + "defragmentations", CTLFLAG_RD, + &tx_stats->defragment, + "Mbuf defragmentations"); + SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, + "defragmentation_err", CTLFLAG_RD, + &tx_stats->defragment_err, + "Mbuf defragmentation failures"); /* RX specific stats */ rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, From e67c655431b463c93cb8194a118f8a7cd9dc7dff Mon Sep 17 00:00:00 2001 From: Zbigniew Bodek Date: Tue, 30 May 2017 11:55:02 +0000 Subject: [PATCH 03/14] Add locks before each ena_up and ena_down Lock only ena_up and ena_down calls in ioctl handler, instead of whole ioctl. Locking ioctl with sx lock that is sleepable, is not allowed in some cases, e.g. when multicast options are being changed. Additional locking was added in deatch function to prevent race condition with ioctl function. Submitted by: Michal Krawczyk Obtained from: Semihalf Sponsored by: Amazon.com Inc. Differential revision: https://reviews.freebsd.org/D10924 --- sys/dev/ena/ena.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 0bc0aa7208f..7a9e0350afc 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -2285,16 +2285,16 @@ ena_ioctl(if_t ifp, u_long command, caddr_t data) /* * Acquiring lock to prevent from running up and down routines parallel. */ - sx_xlock(&adapter->ioctl_sx); - rc = 0; switch (command) { case SIOCSIFMTU: + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); ena_change_mtu(ifp, ifr->ifr_mtu); rc = ena_up(adapter); + sx_unlock(&adapter->ioctl_sx); break; case SIOCSIFFLAGS: @@ -2306,11 +2306,16 @@ ena_ioctl(if_t ifp, u_long command, caddr_t data) "ioctl promisc/allmulti\n"); } } else { + sx_xlock(&adapter->ioctl_sx); rc = ena_up(adapter); + sx_unlock(&adapter->ioctl_sx); } } else { - if (ifp->if_drv_flags & IFF_DRV_RUNNING) + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); + sx_unlock(&adapter->ioctl_sx); + } } break; @@ -2333,8 +2338,10 @@ ena_ioctl(if_t ifp, u_long command, caddr_t data) } if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); rc = ena_up(adapter); + sx_unlock(&adapter->ioctl_sx); } } @@ -2344,8 +2351,6 @@ ena_ioctl(if_t ifp, u_long command, caddr_t data) break; } - sx_unlock(&adapter->ioctl_sx); - return (rc); } @@ -3666,7 +3671,9 @@ ena_detach(device_t pdev) taskqueue_drain(adapter->reset_tq, &adapter->reset_task); taskqueue_free(adapter->reset_tq); + sx_xlock(&adapter->ioctl_sx); ena_down(adapter); + sx_unlock(&adapter->ioctl_sx); if (adapter->ifp != NULL) { ether_ifdetach(adapter->ifp); From 081169f24c22c9445bfee5c37ff9fcf824206215 Mon Sep 17 00:00:00 2001 From: Zbigniew Bodek Date: Tue, 30 May 2017 11:56:54 +0000 Subject: [PATCH 04/14] Add error handling to the ENA driver if init of the reset task fails Also, to simplify cleaning routine, reset task is initialized before allocating statistics and other resources. Submitted by: Michal Krawczyk Obtained from: Semihalf Sponsored by: Amazon.com Inc. Differential revision: https://reviews.freebsd.org/D10925 --- sys/dev/ena/ena.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 7a9e0350afc..3528f365cc1 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -3604,6 +3604,18 @@ ena_attach(device_t pdev) goto err_ifp_free; } + /* Initialize reset task queue */ + TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter); + adapter->reset_tq = taskqueue_create("ena_reset_enqueue", + M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq); + if (adapter->reset_tq == NULL) { + device_printf(adapter->pdev, + "Unable to create reset task queue\n"); + goto err_reset_tq; + } + taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, + "%s rstq", device_get_nameunit(adapter->pdev)); + /* Initialize statistics */ ena_alloc_counters((counter_u64_t *)&adapter->dev_stats, sizeof(struct ena_stats_dev)); @@ -3613,16 +3625,12 @@ ena_attach(device_t pdev) /* Tell the stack that the interface is not active */ if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); - /* Initialize reset task queue */ - TASK_INIT(&adapter->reset_task, 0, ena_reset_task, adapter); - adapter->reset_tq = taskqueue_create("ena_reset_enqueue", - M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->reset_tq); - taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, - "%s rstq", device_get_nameunit(adapter->pdev)); - adapter->running = true; return (0); +err_reset_tq: + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); err_ifp_free: if_detach(adapter->ifp); if_free(adapter->ifp); From b9252a8889bbc8abc10b048203ed91b32c8eae0d Mon Sep 17 00:00:00 2001 From: Zbigniew Bodek Date: Tue, 30 May 2017 11:58:51 +0000 Subject: [PATCH 05/14] Move ENA's hw stats updating routine to separate task Initially, stats were being updated each time OS was requesting for the first statistic. To read statistics from hw, condvar was used. cv_timedwait cannot be called when unsleepable lock is held, and this happens when FreeBSD is requesting statistic. Seperate task is reading statistics from NIC each 1 second. Submitted by: Michal Krawczyk Obtained from: Semihalf Sponsored by: Amazon.com Inc. Differential revision: https://reviews.freebsd.org/D10926 --- sys/dev/ena/ena.c | 58 ++++++++++++++++++++++++++++++++++------------- sys/dev/ena/ena.h | 4 ++++ 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 3528f365cc1..3ef1f41f460 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -141,6 +141,7 @@ static void ena_free_irqs(struct ena_adapter*); static void ena_disable_msix(struct ena_adapter *); static void ena_unmask_all_io_irqs(struct ena_adapter *); static int ena_rss_configure(struct ena_adapter *); +static void ena_update_hw_stats(void *, int); static int ena_up_complete(struct ena_adapter *); static int ena_up(struct ena_adapter *); static void ena_down(struct ena_adapter *); @@ -2058,6 +2059,25 @@ static int ena_rss_configure(struct ena_adapter *adapter) return 0; } +static void +ena_update_hw_stats(void *arg, int pending) +{ + struct ena_adapter *adapter = arg; + int rc; + + for (;;) { + if (!adapter->up) + return; + + rc = ena_update_stats_counters(adapter); + if (rc) + ena_trace(ENA_WARNING, + "Error updating stats counters, rc = %d", rc); + + pause("ena update hw stats", hz); + } +} + static int ena_up_complete(struct ena_adapter *adapter) { @@ -2141,6 +2161,8 @@ ena_up(struct ena_adapter *adapter) callout_reset_sbt(&adapter->timer_service, SBT_1S, SBT_1S, ena_timer_service, (void *)adapter, 0); + taskqueue_enqueue(adapter->stats_tq, &adapter->stats_task); + adapter->up = true; } @@ -2193,24 +2215,8 @@ ena_get_counter(if_t ifp, ift_counter cnt) { struct ena_adapter *adapter; struct ena_hw_stats *stats; - int rc; adapter = if_getsoftc(ifp); - - /* - * Update only when asking for first counter and interface is up. - * Usually asks for all statistics in sequence. - */ - if (adapter->up) { - if (cnt == 0) { - rc = ena_update_stats_counters(adapter); - if (rc) { - ena_trace(ENA_WARNING, - "Error updating stats counters, rc = %d", - rc); - } - } - } stats = &adapter->hw_stats; switch (cnt) { @@ -2501,6 +2507,10 @@ ena_down(struct ena_adapter *adapter) if_setdrvflagbits(adapter->ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); + /* Drain task responsible for updating hw stats */ + while (taskqueue_cancel(adapter->stats_tq, &adapter->stats_task, NULL)) + taskqueue_drain(adapter->stats_tq, &adapter->stats_task); + ena_free_io_irq(adapter); ena_destroy_all_io_queues(adapter); @@ -3616,6 +3626,18 @@ ena_attach(device_t pdev) taskqueue_start_threads(&adapter->reset_tq, 1, PI_NET, "%s rstq", device_get_nameunit(adapter->pdev)); + /* Initialize task queue responsible for updating hw stats */ + TASK_INIT(&adapter->stats_task, 0, ena_update_hw_stats, adapter); + adapter->stats_tq = taskqueue_create_fast("ena_stats_update", + M_WAITOK | M_ZERO, taskqueue_thread_enqueue, &adapter->stats_tq); + if (adapter->stats_tq == NULL) { + device_printf(adapter->pdev, + "Unable to create taskqueue for updating hw stats\n"); + goto err_stats_tq; + } + taskqueue_start_threads(&adapter->stats_tq, 1, PI_REALTIME, + "%s stats tq", device_get_nameunit(adapter->pdev)); + /* Initialize statistics */ ena_alloc_counters((counter_u64_t *)&adapter->dev_stats, sizeof(struct ena_stats_dev)); @@ -3628,6 +3650,8 @@ ena_attach(device_t pdev) adapter->running = true; return (0); +err_stats_tq: + taskqueue_free(adapter->reset_tq); err_reset_tq: ena_free_mgmnt_irq(adapter); ena_disable_msix(adapter); @@ -3683,6 +3707,8 @@ ena_detach(device_t pdev) ena_down(adapter); sx_unlock(&adapter->ioctl_sx); + taskqueue_free(adapter->stats_tq); + if (adapter->ifp != NULL) { ether_ifdetach(adapter->ifp); if_free(adapter->ifp); diff --git a/sys/dev/ena/ena.h b/sys/dev/ena/ena.h index abb3871c701..a20498546db 100644 --- a/sys/dev/ena/ena.h +++ b/sys/dev/ena/ena.h @@ -403,6 +403,10 @@ struct ena_adapter { uint32_t missing_tx_max_queues; uint32_t missing_tx_threshold; + /* Task updating hw stats */ + struct task stats_task; + struct taskqueue *stats_tq; + /* Statistics */ struct ena_stats_dev dev_stats; struct ena_hw_stats hw_stats; From 416e886499fd2fd6f94e4939a6d32211e98c9c95 Mon Sep 17 00:00:00 2001 From: Zbigniew Bodek Date: Tue, 30 May 2017 12:00:56 +0000 Subject: [PATCH 06/14] Introduce additional locks when releasing TX resources and buffers in ENA There could be race condition with TX cleaning routine when cleaning mbufs, when it was called directly from main sending thread (ena_mq_start). Submitted by: Michal Krawczyk Obtained from: Semihalf Sponsored by: Amazon.com Inc. Differential revision: https://reviews.freebsd.org/D10927 --- sys/dev/ena/ena.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sys/dev/ena/ena.c b/sys/dev/ena/ena.c index 3ef1f41f460..fc82ed4aa17 100644 --- a/sys/dev/ena/ena.c +++ b/sys/dev/ena/ena.c @@ -713,6 +713,7 @@ ena_free_tx_resources(struct ena_adapter *adapter, int qid) drbr_flush(adapter->ifp, tx_ring->br); /* Free buffer DMA maps, */ + ENA_RING_MTX_LOCK(tx_ring); for (int i = 0; i < tx_ring->ring_size; i++) { m_freem(tx_ring->tx_buffer_info[i].mbuf); tx_ring->tx_buffer_info[i].mbuf = NULL; @@ -721,6 +722,7 @@ ena_free_tx_resources(struct ena_adapter *adapter, int qid) bus_dmamap_destroy(adapter->tx_buf_tag, tx_ring->tx_buffer_info[i].map); } + ENA_RING_MTX_UNLOCK(tx_ring); /* And free allocated memory. */ ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->tx_buffer_info); @@ -1121,6 +1123,7 @@ ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid) { struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + ENA_RING_MTX_LOCK(tx_ring); for (int i = 0; i < tx_ring->ring_size; i++) { struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; @@ -1134,6 +1137,7 @@ ena_free_tx_bufs(struct ena_adapter *adapter, unsigned int qid) m_free(tx_info->mbuf); tx_info->mbuf = NULL; } + ENA_RING_MTX_UNLOCK(tx_ring); return; } From cae91bbe96be5a42a7dfe5b003b4d79e9d41b0fd Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 30 May 2017 13:53:03 +0000 Subject: [PATCH 07/14] fix indentation MFC after: 4 days --- sys/x86/x86/identcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index 22f39668a24..9cf95e85101 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -906,7 +906,7 @@ printcpuinfo(void) "\033DBE" /* Data Breakpoint extension */ "\034PTSC" /* Performance TSC */ "\035PL2I" /* L2I perf count */ - "\036MWAITX" /* MONITORX/MWAITX instructions */ + "\036MWAITX" /* MONITORX/MWAITX instructions */ "\037" "\040" ); From 382a6bbcf1ac37ba61723f59f8d16e465cc31bd5 Mon Sep 17 00:00:00 2001 From: "Jonathan T. Looney" Date: Tue, 30 May 2017 14:32:44 +0000 Subject: [PATCH 08/14] Enforce the limit on ICMP messages before doing work to formulate the response. Delete an unneeded rate limit for UDP under IPv6. Because ICMP6 messages have their own rate limit, it is unnecessary to apply a second rate limit to UDP messages. Reviewed by: glebius MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D10387 --- sys/netinet/ip_icmp.c | 12 +++++------- sys/netinet6/udp6_usrreq.c | 4 ---- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index 9db8c2aa7f8..5983b3386af 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -540,11 +540,10 @@ icmp_input(struct mbuf **mp, int *offp, int proto) ICMPSTAT_INC(icps_bmcastecho); break; } - icp->icmp_type = ICMP_ECHOREPLY; if (badport_bandlim(BANDLIM_ICMP_ECHO) < 0) goto freeit; - else - goto reflect; + icp->icmp_type = ICMP_ECHOREPLY; + goto reflect; case ICMP_TSTAMP: if (V_icmptstamprepl == 0) @@ -558,13 +557,12 @@ icmp_input(struct mbuf **mp, int *offp, int proto) ICMPSTAT_INC(icps_badlen); break; } + if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) + goto freeit; icp->icmp_type = ICMP_TSTAMPREPLY; icp->icmp_rtime = iptime(); icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */ - if (badport_bandlim(BANDLIM_ICMP_TSTAMP) < 0) - goto freeit; - else - goto reflect; + goto reflect; case ICMP_MASKREQ: if (V_icmpmaskrepl == 0) diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c index 1c544e14bae..2a831afa28b 100644 --- a/sys/netinet6/udp6_usrreq.c +++ b/sys/netinet6/udp6_usrreq.c @@ -104,9 +104,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include -#include #include #include #include @@ -481,8 +479,6 @@ udp6_input(struct mbuf **mp, int *offp, int proto) } if (V_udp_blackhole) goto badunlocked; - if (badport_bandlim(BANDLIM_ICMP6_UNREACH) < 0) - goto badunlocked; icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOPORT, 0); return (IPPROTO_DONE); } From fb04394554fade10d2da72e3d166fec59bb7d2bc Mon Sep 17 00:00:00 2001 From: "Jonathan T. Looney" Date: Tue, 30 May 2017 14:41:31 +0000 Subject: [PATCH 09/14] Fix two places in the ICMP6 code where we could dereference a NULL pointer in the icmp6_input() function. When processing an ICMP6_ECHO_REQUEST, if IP6_EXTHDR_GET fails, it will set nicmp6 and n to NULL. Therefore, we should condition our modification to nicmp6 on n being not NULL. And, when processing an ICMP6_WRUREQUEST in the (mode != FQDN) case, if m_dup_pkthdr() fails, the code will set n to NULL. However, the very next line dereferences n. Therefore, when m_dup_pkthdr() fails, we should discontinue further processing and follow the same path as when m_gethdr() fails. Reported by: clang static analyzer Reviewed by: ae MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D10941 --- sys/netinet6/icmp6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index bd6e1d373a9..c6084d68b66 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -597,9 +597,9 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) sizeof(*nicmp6)); noff = off; } - nicmp6->icmp6_type = ICMP6_ECHO_REPLY; - nicmp6->icmp6_code = 0; if (n) { + nicmp6->icmp6_type = ICMP6_ECHO_REPLY; + nicmp6->icmp6_code = 0; ICMP6STAT_INC(icp6s_reflect); ICMP6STAT_INC(icp6s_outhist[ICMP6_ECHO_REPLY]); icmp6_reflect(n, noff); @@ -689,6 +689,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) */ m_free(n); n = NULL; + break; } maxhlen = M_TRAILINGSPACE(n) - (sizeof(*nip6) + sizeof(*nicmp6) + 4); From 8b07e00e9972b71957784209fd124dc6d08077f4 Mon Sep 17 00:00:00 2001 From: "Jonathan T. Looney" Date: Tue, 30 May 2017 14:50:28 +0000 Subject: [PATCH 10/14] Fix an unnecessary/incorrect check in the PKTOPT_EXTHDRCPY macro. This macro allocates memory and, if malloc does not return NULL, copies data into the new memory. However, it doesn't just check whether malloc returns NULL. It also checks whether we called malloc with M_NOWAIT. That is not necessary. While it may be that malloc() will only return NULL when the M_NOWAIT flag is set, we don't need to check for this when checking malloc's return value. Further, in this case, the check was not completely accurate, because it checked for flags == M_NOWAIT, rather than treating it as a bit field and checking for (flags & M_NOWAIT). Reviewed by: ae MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D10942 --- sys/netinet6/ip6_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index c714a5c1a7c..c8a0a1716b0 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -2465,7 +2465,7 @@ do {\ if (src->type) {\ int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ - if (dst->type == NULL && canwait == M_NOWAIT)\ + if (dst->type == NULL)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ From bd1c3bad76c8484ae000d14b6f71daa2be40ac8b Mon Sep 17 00:00:00 2001 From: Ed Maste Date: Tue, 30 May 2017 15:51:48 +0000 Subject: [PATCH 11/14] add a rescue/sh sanity check before installworld on the running system FreeBSD does not guarantee kernel forward compatibility (that is, running a newer userland on an older kernel). The documented upgrade procedure specifies that installkernel should be performed, followed by a reboot and then installworld. As a sanity check when installing onto the running system (DESTDIR is / or unset), attempt to run "sh echo OK" using rescue from the objdir. If rescue fails (e.g., because the system has not been rebooted and the "old" kernel lacks a system call required by the to-be-installed world), abort the installation. This should avoid ino64 foot-shooting when the proper upgrade procedure is not followed. Reviewed by: allanjude, gjb, kib MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D10987 --- Makefile.inc1 | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Makefile.inc1 b/Makefile.inc1 index a06b1797e24..e3b778d40dd 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -975,6 +975,22 @@ __installcheck_UGID: .PHONY fi .endfor .endif +# +# If installing over the running system (DESTDIR is / or unset) and the install +# includes rescue, try running rescue from the objdir as a sanity check. If +# rescue is not functional (e.g., because it depends on a system call not +# supported by the currently running kernel), abort the installation. +# +.if !make(distributeworld) && ${MK_RESCUE} != "no" && \ + (empty(DESTDIR) || ${DESTDIR} == "/") && empty(BYPASS_INSTALLCHECK_SH) +_installcheck_world: __installcheck_sh_check +__installcheck_sh_check: .PHONY + @if [ "`${OBJTREE}${.CURDIR}/rescue/rescue/rescue sh -c 'echo OK'`" != \ + OK ]; then \ + echo "rescue/sh check failed, installation aborted" >&2; \ + false; \ + fi +.endif # # Required install tools to be saved in a scratch dir for safety. From cb564d2436494011a27f9cdbdc5424e800b1accd Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 30 May 2017 17:16:08 +0000 Subject: [PATCH 12/14] Add some miscellaneous definitions to support DRM drivers. Reviewed by: hselasky MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D10985 --- .../linuxkpi/common/include/linux/compiler.h | 1 + .../linuxkpi/common/include/linux/device.h | 17 +++++++++++++++++ sys/compat/linuxkpi/common/include/linux/io.h | 1 + .../linuxkpi/common/include/linux/kernel.h | 6 +++++- .../linuxkpi/common/include/linux/module.h | 2 ++ sys/compat/linuxkpi/common/include/linux/pci.h | 4 ++++ .../linuxkpi/common/include/linux/preempt.h | 3 +++ .../linuxkpi/common/include/linux/types.h | 2 ++ 8 files changed, 35 insertions(+), 1 deletion(-) diff --git a/sys/compat/linuxkpi/common/include/linux/compiler.h b/sys/compat/linuxkpi/common/include/linux/compiler.h index e0916849950..6a1f5286667 100644 --- a/sys/compat/linuxkpi/common/include/linux/compiler.h +++ b/sys/compat/linuxkpi/common/include/linux/compiler.h @@ -56,6 +56,7 @@ #define __devexit #define __exit #define __rcu +#define __malloc #define ___stringify(...) #__VA_ARGS__ #define __stringify(...) ___stringify(__VA_ARGS__) #define __attribute_const__ __attribute__((__const__)) diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h index d85282af4c1..af69fa05e2e 100644 --- a/sys/compat/linuxkpi/common/include/linux/device.h +++ b/sys/compat/linuxkpi/common/include/linux/device.h @@ -61,6 +61,23 @@ struct class { char * (*devnode)(struct device *dev, umode_t *mode); }; +struct dev_pm_ops { + int (*suspend)(struct device *dev); + int (*suspend_late)(struct device *dev); + int (*resume)(struct device *dev); + int (*resume_early)(struct device *dev); + int (*freeze)(struct device *dev); + int (*freeze_late)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*poweroff_late)(struct device *dev); + int (*restore)(struct device *dev); + int (*restore_early)(struct device *dev); + int (*runtime_suspend)(struct device *dev); + int (*runtime_resume)(struct device *dev); + int (*runtime_idle)(struct device *dev); +}; + struct device { struct device *parent; struct list_head irqents; diff --git a/sys/compat/linuxkpi/common/include/linux/io.h b/sys/compat/linuxkpi/common/include/linux/io.h index 09093467568..17647986563 100644 --- a/sys/compat/linuxkpi/common/include/linux/io.h +++ b/sys/compat/linuxkpi/common/include/linux/io.h @@ -36,6 +36,7 @@ #include #include +#include static inline uint32_t __raw_readl(const volatile void *addr) diff --git a/sys/compat/linuxkpi/common/include/linux/kernel.h b/sys/compat/linuxkpi/common/include/linux/kernel.h index d4999e82005..3631ba5f242 100644 --- a/sys/compat/linuxkpi/common/include/linux/kernel.h +++ b/sys/compat/linuxkpi/common/include/linux/kernel.h @@ -87,7 +87,9 @@ #define S64_C(x) x ## LL #define U64_C(x) x ## ULL -#define BUILD_BUG_ON(x) CTASSERT(!(x)) +#define BUILD_BUG_ON(x) CTASSERT(!(x)) +#define BUILD_BUG_ON_MSG(x, msg) BUILD_BUG_ON(x) +#define BUILD_BUG_ON_NOT_POWER_OF_2(x) BUILD_BUG_ON(!powerof2(x)) #define BUG() panic("BUG at %s:%d", __FILE__, __LINE__) #define BUG_ON(cond) do { \ @@ -119,6 +121,8 @@ unlikely(__ret); \ }) +#define oops_in_progress SCHEDULER_STOPPED() + #undef ALIGN #define ALIGN(x, y) roundup2((x), (y)) #undef PTR_ALIGN diff --git a/sys/compat/linuxkpi/common/include/linux/module.h b/sys/compat/linuxkpi/common/include/linux/module.h index 7db9f08c84b..59c30a7ab77 100644 --- a/sys/compat/linuxkpi/common/include/linux/module.h +++ b/sys/compat/linuxkpi/common/include/linux/module.h @@ -45,6 +45,8 @@ #define MODULE_AUTHOR(name) #define MODULE_DESCRIPTION(name) #define MODULE_LICENSE(name) +#define MODULE_INFO(tag, info) +#define MODULE_FIRMWARE(firmware) #define THIS_MODULE ((struct module *)0) diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h index a7fcb04c480..ae96f8c1d9f 100644 --- a/sys/compat/linuxkpi/common/include/linux/pci.h +++ b/sys/compat/linuxkpi/common/include/linux/pci.h @@ -72,16 +72,20 @@ struct pci_device_id { #define PCI_VENDOR_ID_IBM 0x1014 #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_VENDOR_ID_MELLANOX 0x15b3 +#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4 #define PCI_VENDOR_ID_SERVERWORKS 0x1166 #define PCI_VENDOR_ID_SONY 0x104d #define PCI_VENDOR_ID_TOPSPIN 0x1867 #define PCI_VENDOR_ID_VIA 0x1106 +#define PCI_SUBVENDOR_ID_REDHAT_QUMRANET 0x1af4 +#define PCI_DEVICE_ID_ATI_RADEON_QY 0x5159 #define PCI_DEVICE_ID_MELLANOX_TAVOR 0x5a44 #define PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE 0x5a46 #define PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT 0x6278 #define PCI_DEVICE_ID_MELLANOX_ARBEL 0x6282 #define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c #define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274 +#define PCI_SUBDEVICE_ID_QEMU 0x1100 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) diff --git a/sys/compat/linuxkpi/common/include/linux/preempt.h b/sys/compat/linuxkpi/common/include/linux/preempt.h index 2c64166b1c7..4a51e292ef6 100644 --- a/sys/compat/linuxkpi/common/include/linux/preempt.h +++ b/sys/compat/linuxkpi/common/include/linux/preempt.h @@ -34,4 +34,7 @@ #define in_interrupt() \ (curthread->td_intr_nesting_level || curthread->td_critnest) +#define preempt_disable() critical_enter() +#define preempt_enable() critical_exit() + #endif /* _LINUX_PREEMPT_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/types.h b/sys/compat/linuxkpi/common/include/linux/types.h index 41fecfc9a9f..8852a4d3101 100644 --- a/sys/compat/linuxkpi/common/include/linux/types.h +++ b/sys/compat/linuxkpi/common/include/linux/types.h @@ -65,6 +65,8 @@ typedef u64 phys_addr_t; #define DECLARE_BITMAP(n, bits) \ unsigned long n[howmany(bits, sizeof(long) * 8)] +typedef unsigned long irq_hw_number_t; + struct rcu_head { void *raw[2]; } __aligned(sizeof(void *)); From 714d5fbcef84a08adcbc0cbf762a110d0013313f Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Tue, 30 May 2017 18:03:34 +0000 Subject: [PATCH 13/14] Use .Xr to reference libblacklist(3), blacklistctl(8), and blacklistd(8) MFC after: now Sponsored by: Dell EMC Isilon --- tools/build/options/WITHOUT_BLACKLIST | 5 ++++- tools/build/options/WITHOUT_BLACKLIST_SUPPORT | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/build/options/WITHOUT_BLACKLIST b/tools/build/options/WITHOUT_BLACKLIST index 1ab779fab27..39ccd585aaa 100644 --- a/tools/build/options/WITHOUT_BLACKLIST +++ b/tools/build/options/WITHOUT_BLACKLIST @@ -1,2 +1,5 @@ .\" $FreeBSD$ -Set this if you do not want to build blacklistd/blacklistctl. +Set this if you do not want to build +.Xr blacklistd 8 +and +.Xr blacklistctl 8 . diff --git a/tools/build/options/WITHOUT_BLACKLIST_SUPPORT b/tools/build/options/WITHOUT_BLACKLIST_SUPPORT index 674c967a0c6..e190c534e30 100644 --- a/tools/build/options/WITHOUT_BLACKLIST_SUPPORT +++ b/tools/build/options/WITHOUT_BLACKLIST_SUPPORT @@ -1,5 +1,7 @@ .\" $FreeBSD$ -Set to build some programs without blacklistd support, like +Set to build some programs without +.Xr libblacklist 3 +support, like .Xr fingerd 8 , .Xr ftpd 8 , .Xr rlogind 8 , From 4650b8ae52a7002085dca4b00cb9dfa5cbcf8b0d Mon Sep 17 00:00:00 2001 From: Enji Cooper Date: Tue, 30 May 2017 18:06:19 +0000 Subject: [PATCH 14/14] Regenerate src.conf(5) This contains a number of content changes due to src.opts.mk changes. Sponsored by: Dell EMC Isilon --- share/man/man5/src.conf.5 | 66 ++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 4115ed31267..cccd8d1bc2f 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,6 +1,6 @@ .\" DO NOT EDIT-- this file is generated by tools/build/options/makeman. .\" $FreeBSD$ -.Dd April 21, 2017 +.Dd May 30, 2017 .Dt SRC.CONF 5 .Os .Sh NAME @@ -180,7 +180,10 @@ as part of the bootstrap process. This is a default setting on amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. .It Va WITHOUT_BLACKLIST -Set this if you do not want to build blacklistd/blacklistctl. +Set this if you do not want to build +.Xr blacklistd 8 +and +.Xr blacklistctl 8 . When set, it enforces these options: .Pp .Bl -item -compact @@ -188,7 +191,9 @@ When set, it enforces these options: .Va WITHOUT_BLACKLIST_SUPPORT .El .It Va WITHOUT_BLACKLIST_SUPPORT -Set to build some programs without blacklistd support, like +Set to build some programs without +.Xr libblacklist 3 +support, like .Xr fingerd 8 , .Xr ftpd 8 , .Xr rlogind 8 , @@ -705,13 +710,34 @@ Set to not build .Xr gdb 1 . .Pp This is a default setting on -amd64/amd64, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and riscv/riscv64sf. +arm64/aarch64, riscv/riscv64 and riscv/riscv64sf. .It Va WITH_GDB Set to build .Xr gdb 1 . .Pp This is a default setting on +amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe and sparc64/sparc64. +.It Va WITHOUT_GDB_LIBEXEC +Set to install +.Xr gdb 1 +into +.Pa /usr/bin . +.Pp +This is a default setting on arm/arm, arm/armeb, arm/armv6 and sparc64/sparc64. +.It Va WITH_GDB_LIBEXEC +Set to install +.Xr gdb 1 +into +.Pa /usr/libexec . +This permits +.Xr gdb 1 +to be used as a fallback for +.Xr crashinfo 8 +if a newer version is not installed. +.Pp +This is a default setting on +amd64/amd64, arm64/aarch64, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64 and riscv/riscv64sf. .It Va WITHOUT_GNUCXX Do not build the GNU C++ stack (g++, libstdc++). This is the default on platforms where clang is the system compiler. @@ -943,12 +969,6 @@ Set to not build LLVM's lld linker. .Pp This is a default setting on mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. -When set, it enforces these options: -.Pp -.Bl -item -compact -.It -.Va WITHOUT_LLD_IS_LD -.El .It Va WITH_LLD Set to build LLVM's lld linker. .Pp @@ -987,14 +1007,6 @@ Set to use LLVM's LLD as the system linker, instead of GNU binutils ld. .Pp This is a default setting on arm64/aarch64. -When set, these options are also in effect: -.Pp -.Bl -inset -compact -.It Va WITHOUT_SYSTEM_COMPILER -(unless -.Va WITH_SYSTEM_COMPILER -is set explicitly) -.El .It Va WITHOUT_LLVM_LIBUNWIND Set to use GCC's stack unwinder (instead of LLVM's libunwind). .Pp @@ -1412,24 +1424,6 @@ The and .Va WITHOUT_GCC options control those. -.Pp -This is a default setting on -arm64/aarch64. -.It Va WITH_SYSTEM_COMPILER -Set to opportunistically skip building a cross-compiler during the -bootstrap phase of the build. -If the currently installed compiler matches the planned bootstrap compiler -type and revision, then it will not be built. -This does not prevent a compiler from being built for installation though, -only for building one for the build itself. -The -.Va WITHOUT_CLANG -and -.Va WITHOUT_GCC -options control those. -.Pp -This is a default setting on -amd64/amd64, arm/arm, arm/armeb, arm/armv6, i386/i386, mips/mipsel, mips/mips, mips/mips64el, mips/mips64, mips/mipsn32, mips/mipselhf, mips/mipshf, mips/mips64elhf, mips/mips64hf, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpcspe, riscv/riscv64, riscv/riscv64sf and sparc64/sparc64. .It Va WITHOUT_TALK Set to not build or install .Xr talk 1