mirror of
https://github.com/opnsense/src.git
synced 2026-02-18 18:20:26 -05:00
axgbe: gracefully handle i2c bus failures
In (unknown) situations it seems the i2c bus can have trouble,
while nothing about the current link state has changed, the driver
would react by going into a link down state, and start busylooping
on up to 4 cores. Even if there was a valid link, such spinning
on a cpu by a kernel thread would wreak havoc to existing and
new connections.
This patch does the following:
1. If such a bus failure occurs, we keep the last known link state.
2. Prevent busy looping by implementing the lockmgr() facility to
be able to sleep while the i2c code waits on the i2c ISR. We cap
this with a timeout.
3. Pin the admin queues to the last CPU in the system, to prevent
other scenarios where busy looping might occur from landing on CPU
0, which especially seems to cause a lot of issues.
Given the design constraints both in hardware and in software,
the lockmgr() seems to be the only viable option, even though
FreeBSD explicitly forbids sleeping in callout context, but
fails to explain why this is or offer alternatives.
axgbe: revert allocating admin queues to last CPU
The issue was resolved in 52454a1e5b.
Scheduled threads such as CARP are now no longer pinned to CPU 0, making sure
they always get their time slice even if CPUs are blocked.
This commit is contained in:
parent
699872bcac
commit
2cd816109c
6 changed files with 111 additions and 45 deletions
|
|
@ -835,7 +835,8 @@ xgbe_service(void *ctx, int pending)
|
|||
|
||||
pdata->phy_if.phy_status(pdata);
|
||||
|
||||
if (prev_state != pdata->phy.link) {
|
||||
if (prev_state != pdata->phy.link &&
|
||||
pdata->phy.link != XGBE_LINK_UNKNOWN) {
|
||||
pdata->phy_link = pdata->phy.link;
|
||||
axgbe_if_update_admin_status(sc->ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,6 +115,14 @@
|
|||
#include "xgbe.h"
|
||||
#include "xgbe-common.h"
|
||||
|
||||
#include <sys/systm.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/lockmgr.h>
|
||||
|
||||
struct lock xgbe_i2c_lock;
|
||||
|
||||
#define XGBE_ABORT_COUNT 500
|
||||
#define XGBE_DISABLE_COUNT 1000
|
||||
|
||||
|
|
@ -324,8 +332,12 @@ out:
|
|||
axgbe_printf(3, "%s: ret %d stop %d\n", __func__, state->ret,
|
||||
XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET));
|
||||
|
||||
if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET))
|
||||
if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET)) {
|
||||
pdata->i2c_complete = true;
|
||||
if (!cold) {
|
||||
wakeup_one(&pdata->i2c_complete);
|
||||
}
|
||||
}
|
||||
|
||||
reissue_check:
|
||||
/* Reissue interrupt if status is not clear */
|
||||
|
|
@ -417,23 +429,35 @@ xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op)
|
|||
*/
|
||||
xgbe_i2c_enable_interrupts(pdata);
|
||||
|
||||
timeout = ticks + (20 * hz);
|
||||
while (ticks < timeout) {
|
||||
if (cold) {
|
||||
timeout = ticks + (20 * hz);
|
||||
while (ticks < timeout) {
|
||||
|
||||
if (!pdata->i2c_complete) {
|
||||
DELAY(200);
|
||||
continue;
|
||||
if (!pdata->i2c_complete) {
|
||||
DELAY(200);
|
||||
continue;
|
||||
}
|
||||
|
||||
axgbe_printf(1, "%s: I2C OP complete\n", __func__);
|
||||
break;
|
||||
}
|
||||
|
||||
axgbe_printf(1, "%s: I2C OP complete\n", __func__);
|
||||
break;
|
||||
if ((ticks >= timeout) && !pdata->i2c_complete) {
|
||||
axgbe_error("%s: operation timed out\n", __func__);
|
||||
ret = -ETIMEDOUT;
|
||||
goto disable;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (mtx_sleep(&pdata->i2c_complete, &pdata->i2c_mutex, 0, "i2c_xfer",
|
||||
10*hz) == EWOULDBLOCK) {
|
||||
axgbe_error("%s: operation timed out\n", __func__);
|
||||
ret = (-ETIMEDOUT);
|
||||
goto disable;
|
||||
}
|
||||
}
|
||||
|
||||
if ((ticks >= timeout) && !pdata->i2c_complete) {
|
||||
axgbe_error("%s: operation timed out\n", __func__);
|
||||
ret = -ETIMEDOUT;
|
||||
goto disable;
|
||||
}
|
||||
axgbe_printf(1, "%s: I2C OP complete\n", __func__);
|
||||
|
||||
ret = state->ret;
|
||||
axgbe_printf(3, "%s: i2c xfer ret %d abrt_source 0x%x \n", __func__,
|
||||
|
|
|
|||
|
|
@ -1384,12 +1384,15 @@ xgbe_phy_status(struct xgbe_prv_data *pdata)
|
|||
axgbe_printf(1, "link_status returned Link:%d an_restart:%d aneg:%d\n",
|
||||
pdata->phy.link, an_restart, link_aneg);
|
||||
|
||||
if (pdata->phy.link == XGBE_LINK_UNKNOWN)
|
||||
return;
|
||||
|
||||
if (an_restart) {
|
||||
xgbe_phy_config_aneg(pdata);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pdata->phy.link) {
|
||||
if (pdata->phy.link == XGBE_LINK_UP) {
|
||||
axgbe_printf(2, "Link Active\n");
|
||||
if (link_aneg && !xgbe_phy_aneg_done(pdata)) {
|
||||
axgbe_printf(1, "phy_link set check timeout\n");
|
||||
|
|
|
|||
|
|
@ -594,7 +594,7 @@ xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
|
|||
reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
|
||||
reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
|
||||
|
||||
return ((reg & MDIO_STAT1_LSTATUS) ? 1 : 0);
|
||||
return ((reg & MDIO_STAT1_LSTATUS) ? XGBE_LINK_UP : XGBE_LINK_DOWN);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -115,7 +115,12 @@
|
|||
#include "xgbe.h"
|
||||
#include "xgbe-common.h"
|
||||
|
||||
struct mtx xgbe_phy_comm_lock;
|
||||
#include <sys/types.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/lockmgr.h>
|
||||
|
||||
struct mtx xgbe_cold_comm_lock;
|
||||
struct lock xgbe_comm_lock;
|
||||
|
||||
#define XGBE_PHY_PORT_SPEED_100 BIT(0)
|
||||
#define XGBE_PHY_PORT_SPEED_1000 BIT(1)
|
||||
|
|
@ -669,7 +674,11 @@ xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata)
|
|||
static void
|
||||
xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata)
|
||||
{
|
||||
mtx_unlock(&xgbe_phy_comm_lock);
|
||||
if (cold) {
|
||||
mtx_unlock(&xgbe_cold_comm_lock);
|
||||
} else {
|
||||
lockmgr(&xgbe_comm_lock, LK_RELEASE, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -683,7 +692,12 @@ xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
|
|||
* the driver needs to take the software mutex and then the hardware
|
||||
* mutexes before being able to use the busses.
|
||||
*/
|
||||
mtx_lock(&xgbe_phy_comm_lock);
|
||||
if (cold) {
|
||||
mtx_lock(&xgbe_cold_comm_lock);
|
||||
} else if (lockmgr(&xgbe_comm_lock, LK_EXCLUSIVE, NULL) == EWOULDBLOCK) {
|
||||
axgbe_error("unable to grab lockmgr comms lock\n");
|
||||
return (-ETIMEDOUT);
|
||||
}
|
||||
|
||||
/* Clear the mutexes */
|
||||
XP_IOWRITE(pdata, XP_I2C_MUTEX, XGBE_MUTEX_RELEASE);
|
||||
|
|
@ -710,7 +724,7 @@ xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
|
|||
return (0);
|
||||
}
|
||||
|
||||
mtx_unlock(&xgbe_phy_comm_lock);
|
||||
xgbe_phy_put_comm_ownership(pdata);
|
||||
|
||||
axgbe_error("unable to obtain hardware mutexes\n");
|
||||
|
||||
|
|
@ -1573,12 +1587,13 @@ put:
|
|||
return (ret);
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
|
||||
{
|
||||
struct xgbe_phy_data *phy_data = pdata->phy_data;
|
||||
uint8_t gpio_reg, gpio_ports[2];
|
||||
int ret, prev_sfp_inputs = phy_data->port_sfp_inputs;
|
||||
int ret = 0;
|
||||
int prev_sfp_inputs = phy_data->port_sfp_inputs;
|
||||
int shift = GPIO_MASK_WIDTH * (3 - phy_data->port_id);
|
||||
|
||||
/* Read the input port registers */
|
||||
|
|
@ -1588,7 +1603,7 @@ xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
|
|||
ret = xgbe_phy_sfp_get_mux(pdata);
|
||||
if (ret) {
|
||||
axgbe_error("I2C error setting SFP MUX\n");
|
||||
return;
|
||||
return (-EIO);
|
||||
}
|
||||
|
||||
gpio_reg = 0;
|
||||
|
|
@ -1613,7 +1628,13 @@ xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
|
|||
__func__, phy_data->sfp_mod_absent, phy_data->sfp_gpio_inputs);
|
||||
|
||||
put_mux:
|
||||
xgbe_phy_sfp_put_mux(pdata);
|
||||
ret = xgbe_phy_sfp_put_mux(pdata);
|
||||
if (ret) {
|
||||
axgbe_error("I2C error putting SFP MUX\n");
|
||||
ret = (-EIO);
|
||||
}
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -1765,36 +1786,40 @@ xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data)
|
|||
phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN;
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata)
|
||||
{
|
||||
struct xgbe_phy_data *phy_data = pdata->phy_data;
|
||||
int ret, prev_sfp_state = phy_data->sfp_mod_absent;
|
||||
int prev_sfp_state = phy_data->sfp_mod_absent;
|
||||
int ret = 0;
|
||||
|
||||
ret = xgbe_phy_get_comm_ownership(pdata);
|
||||
if (ret)
|
||||
return;
|
||||
return (-EIO);
|
||||
|
||||
/* Read the SFP signals and check for module presence */
|
||||
xgbe_phy_sfp_signals(pdata);
|
||||
ret = xgbe_phy_sfp_signals(pdata);
|
||||
if (ret) {
|
||||
ret = (-EIO);
|
||||
goto put;
|
||||
}
|
||||
|
||||
if (phy_data->sfp_mod_absent) {
|
||||
if (prev_sfp_state != phy_data->sfp_mod_absent)
|
||||
axgbe_error("%s: mod absent\n", __func__);
|
||||
xgbe_phy_sfp_mod_absent(pdata);
|
||||
goto put;
|
||||
goto phy_settings;
|
||||
}
|
||||
|
||||
ret = xgbe_phy_sfp_read_eeprom(pdata);
|
||||
if (ret) {
|
||||
/* Treat any error as if there isn't an SFP plugged in */
|
||||
/* Assume last known state when an error occurs */
|
||||
axgbe_error("%s: eeprom read failed\n", __func__);
|
||||
ret = xgbe_read_gpio_expander(pdata);
|
||||
|
||||
if (!ret)
|
||||
xgbe_log_gpio_expander(pdata);
|
||||
|
||||
xgbe_phy_sfp_reset(phy_data);
|
||||
xgbe_phy_sfp_mod_absent(pdata);
|
||||
goto put;
|
||||
}
|
||||
|
||||
|
|
@ -1802,14 +1827,16 @@ xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata)
|
|||
|
||||
xgbe_phy_sfp_external_phy(pdata);
|
||||
|
||||
put:
|
||||
phy_settings:
|
||||
xgbe_phy_sfp_phy_settings(pdata);
|
||||
|
||||
axgbe_printf(3, "%s: phy speed: 0x%x duplex: 0x%x autoneg: 0x%x "
|
||||
"pause_autoneg: 0x%x\n", __func__, pdata->phy.speed,
|
||||
pdata->phy.duplex, pdata->phy.autoneg, pdata->phy.pause_autoneg);
|
||||
|
||||
put:
|
||||
xgbe_phy_put_comm_ownership(pdata);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -3231,12 +3258,16 @@ xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
|
|||
if (phy_data->port_mode == XGBE_PORT_MODE_SFP) {
|
||||
/* Check SFP signals */
|
||||
axgbe_printf(3, "%s: calling phy detect\n", __func__);
|
||||
xgbe_phy_sfp_detect(pdata);
|
||||
ret = xgbe_phy_sfp_detect(pdata);
|
||||
|
||||
if (ret) {
|
||||
return (XGBE_LINK_UNKNOWN);
|
||||
}
|
||||
|
||||
if (phy_data->sfp_changed) {
|
||||
axgbe_printf(1, "%s: SFP changed observed\n", __func__);
|
||||
*an_restart = 1;
|
||||
return (0);
|
||||
return (XGBE_LINK_DOWN);
|
||||
}
|
||||
|
||||
if (phy_data->sfp_mod_absent || phy_data->sfp_rx_los) {
|
||||
|
|
@ -3248,7 +3279,7 @@ xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
|
|||
xgbe_rrc(pdata);
|
||||
}
|
||||
|
||||
return (0);
|
||||
return (XGBE_LINK_DOWN);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3264,7 +3295,7 @@ xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
|
|||
ret = xgbe_phy_read_status(pdata);
|
||||
if (ret) {
|
||||
axgbe_error("Link: Read status returned %d\n", ret);
|
||||
return (0);
|
||||
return (XGBE_LINK_UNKNOWN);
|
||||
}
|
||||
|
||||
axgbe_printf(2, "%s: link speed %#x duplex %#x media %#x "
|
||||
|
|
@ -3274,10 +3305,10 @@ xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
|
|||
ret = (ret < 0) ? ret : (ret & BMSR_ACOMP);
|
||||
axgbe_printf(2, "Link: BMCR returned %d\n", ret);
|
||||
if ((pdata->phy.autoneg == AUTONEG_ENABLE) && !ret)
|
||||
return (0);
|
||||
return (XGBE_LINK_DOWN);
|
||||
|
||||
if (pdata->phy.link)
|
||||
return (1);
|
||||
return (XGBE_LINK_UP);
|
||||
|
||||
xgbe_rrc(pdata);
|
||||
}
|
||||
|
|
@ -3291,12 +3322,12 @@ mdio_read:
|
|||
reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
|
||||
axgbe_printf(1, "%s: link_status reg: 0x%x\n", __func__, reg);
|
||||
if (reg & MDIO_STAT1_LSTATUS)
|
||||
return (1);
|
||||
return (XGBE_LINK_UP);
|
||||
|
||||
/* No link, attempt a receiver reset cycle */
|
||||
xgbe_rrc(pdata);
|
||||
|
||||
return (0);
|
||||
return (XGBE_LINK_DOWN);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -3886,9 +3917,11 @@ xgbe_phy_init(struct xgbe_prv_data *pdata)
|
|||
struct xgbe_phy_data *phy_data;
|
||||
int ret;
|
||||
|
||||
/* Initialize the global lock */
|
||||
if (!mtx_initialized(&xgbe_phy_comm_lock))
|
||||
mtx_init(&xgbe_phy_comm_lock, "xgbe phy common lock", NULL, MTX_DEF);
|
||||
/* Initialize the global locks */
|
||||
if (!mtx_initialized(&xgbe_cold_comm_lock))
|
||||
mtx_init(&xgbe_cold_comm_lock, "xgbe cold boot common lock",
|
||||
NULL, MTX_DEF | MTX_RECURSE);
|
||||
lockinit(&xgbe_comm_lock, 0, "xgbe", 500, LK_TIMELOCK | LK_CANRECURSE);
|
||||
|
||||
/* Check if enabled */
|
||||
if (!xgbe_phy_port_enabled(pdata)) {
|
||||
|
|
|
|||
|
|
@ -128,6 +128,11 @@
|
|||
|
||||
#include "xgbe_osdep.h"
|
||||
|
||||
/* Link states */
|
||||
#define XGBE_LINK_DOWN 0
|
||||
#define XGBE_LINK_UP 1
|
||||
#define XGBE_LINK_UNKNOWN 2
|
||||
|
||||
/* From linux/dcbnl.h */
|
||||
#define IEEE_8021QAZ_MAX_TCS 8
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue