diff --git a/sys/conf/files b/sys/conf/files index abe8da6a5d8..1dc9e132059 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4732,6 +4732,8 @@ dev/mlx5/mlx5_core/mlx5_cmd.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_cq.c optional mlx5 pci \ compile-with "${OFED_C}" +dev/mlx5/mlx5_core/mlx5_crspace.c optional mlx5 pci \ + compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_diagnostics.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_eq.c optional mlx5 pci \ diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index da1ada1dd05..8bcc64726be 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -651,6 +651,7 @@ struct mlx5_core_dev { struct mlx5_flow_root_namespace *sniffer_tx_root_ns; u32 num_q_counter_allocated[MLX5_INTERFACE_NUMBER]; struct mlx5_dump_data *dump_data; + u32 vsec_addr; }; enum { diff --git a/sys/dev/mlx5/mlx5_core/mlx5_core.h b/sys/dev/mlx5/mlx5_core/mlx5_core.h index 572abceed72..650dd457430 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_core.h +++ b/sys/dev/mlx5/mlx5_core/mlx5_core.h @@ -64,6 +64,16 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; +enum mlx5_semaphore_space_address { + MLX5_SEMAPHORE_SW_RESET = 0x20, +}; + +enum { + UNLOCK = 0, + LOCK = 1, + CAP_ID = 0x9, +}; + struct mlx5_core_dev; int mlx5_query_hca_caps(struct mlx5_core_dev *dev); @@ -95,4 +105,8 @@ struct mlx5_crspace_regmap { extern struct pci_driver mlx5_core_driver; +void mlx5_vsec_init(struct mlx5_core_dev *dev); +int mlx5_pciconf_cap9_sem(struct mlx5_core_dev *dev, int state); +int mlx5_pciconf_set_sem_addr_space(struct mlx5_core_dev *dev, + u32 sem_space_address, int state); #endif /* __MLX5_CORE_H__ */ diff --git a/sys/dev/mlx5/mlx5_core/mlx5_crspace.c b/sys/dev/mlx5/mlx5_core/mlx5_crspace.c new file mode 100644 index 00000000000..ee4fefda59d --- /dev/null +++ b/sys/dev/mlx5/mlx5_core/mlx5_crspace.c @@ -0,0 +1,248 @@ +/*- + * Copyright (c) 2013-2018, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include +#include +#include "mlx5_core.h" + +enum { + PCI_CTRL_OFFSET = 0x4, + PCI_COUNTER_OFFSET = 0x8, + PCI_SEMAPHORE_OFFSET = 0xc, + + PCI_ADDR_OFFSET = 0x10, + PCI_DATA_OFFSET = 0x14, + + PCI_FLAG_BIT_OFFS = 31, + PCI_SPACE_BIT_OFFS = 0, + PCI_SPACE_BIT_LEN = 16, + PCI_SIZE_VLD_BIT_OFFS = 28, + PCI_SIZE_VLD_BIT_LEN = 1, + PCI_STATUS_BIT_OFFS = 29, + PCI_STATUS_BIT_LEN = 3, +}; + +enum { + IFC_MAX_RETRIES = 2048 +}; + +#define MLX5_EXTRACT_C(source, offset, size) \ + ((((unsigned)(source)) >> (offset)) & MLX5_ONES32(size)) +#define MLX5_EXTRACT(src, start, len) \ + (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len)) +#define MLX5_ONES32(size) \ + ((size) ? (0xffffffff >> (32 - (size))) : 0) +#define MLX5_MASK32(offset, size) \ + (MLX5_ONES32(size) << (offset)) +#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \ + ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \ + ((rsrc1) & (~MLX5_MASK32((start), (len))))) +#define MLX5_MERGE(rsrc1, rsrc2, start, len) \ + (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len)) + +static int mlx5_pciconf_wait_on_flag(struct mlx5_core_dev *dev, + u8 expected_val) +{ + int retries = 0; + u32 flag; + + for(;;) { + pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_ADDR_OFFSET, &flag); + flag = MLX5_EXTRACT(flag, PCI_FLAG_BIT_OFFS, 1); + if (flag == expected_val) + return (0); + retries++; + if (retries > IFC_MAX_RETRIES) + return (-EBUSY); + if ((retries & 0xf) == 0) + usleep_range(1000, 2000); + } +} + +static int mlx5_pciconf_read(struct mlx5_core_dev *dev, + unsigned int offset, u32 *data) +{ + u32 address; + int ret; + + if (MLX5_EXTRACT(offset, 31, 1)) + return -EINVAL; + address = MLX5_MERGE(offset, 0, PCI_FLAG_BIT_OFFS, 1); + pci_write_config_dword(dev->pdev, dev->vsec_addr + + PCI_ADDR_OFFSET, address); + ret = mlx5_pciconf_wait_on_flag(dev, 1); + if (ret) + return (ret); + return pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_DATA_OFFSET, data); +} + +static int mlx5_pciconf_write(struct mlx5_core_dev *dev, + unsigned int offset, u32 data) +{ + u32 address; + + if (MLX5_EXTRACT(offset, 31, 1)) + return -EINVAL; + + /* Set flag to 0x1 */ + address = MLX5_MERGE(offset, 1, PCI_FLAG_BIT_OFFS, 1); + + pci_write_config_dword(dev->pdev, dev->vsec_addr + + PCI_DATA_OFFSET, data); + + pci_write_config_dword(dev->pdev, dev->vsec_addr + + PCI_ADDR_OFFSET, address); + + /* Wait for the flag to be cleared */ + return mlx5_pciconf_wait_on_flag(dev, 0); + +} + +int mlx5_pciconf_cap9_sem(struct mlx5_core_dev *dev, int state) +{ + u32 counter = 0; + int retries = 0; + u32 lock_val; + + if (state == UNLOCK) { + pci_write_config_dword(dev->pdev, dev->vsec_addr + + PCI_SEMAPHORE_OFFSET, 0); + return (0); + } + do { + if (retries > IFC_MAX_RETRIES * 10) + return -EBUSY; + pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_SEMAPHORE_OFFSET, &lock_val); + if (lock_val != 0) { + retries++; + if (retries > IFC_MAX_RETRIES * 10) + return -EBUSY; + usleep_range(1000, 2000); + continue; + } + pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_COUNTER_OFFSET, &counter); + pci_write_config_dword(dev->pdev, dev->vsec_addr + + PCI_SEMAPHORE_OFFSET, counter); + pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_SEMAPHORE_OFFSET, &lock_val); + retries++; + } while (counter != lock_val); + return 0; +} + +#define MLX5_PROTECTED_CR_SPACE_DOMAIN 0x6 +static int mlx5_pciconf_set_addr_space(struct mlx5_core_dev *dev, + u16 space) +{ + u32 val; + + pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_CTRL_OFFSET, &val); + + val = MLX5_MERGE(val, space, PCI_SPACE_BIT_OFFS, + PCI_SPACE_BIT_LEN); + pci_write_config_dword(dev->pdev, dev->vsec_addr + + PCI_CTRL_OFFSET, val); + + pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_CTRL_OFFSET, &val); + + if (MLX5_EXTRACT(val, PCI_STATUS_BIT_OFFS, + PCI_STATUS_BIT_LEN) == 0) + return -EINVAL; + + return 0; +} + +#define MLX5_CR_SPACE_DOMAIN 0x2 +static int mlx5_get_vendor_cap_addr(struct mlx5_core_dev *dev) +{ + int vend_cap; + int ret; + + vend_cap = pci_find_capability(dev->pdev, CAP_ID); + if (!vend_cap) + return 0; + dev->vsec_addr = vend_cap; + ret = mlx5_pciconf_cap9_sem(dev, LOCK); + if (ret) { + mlx5_core_warn(dev, + "pciconf_cap9_sem locking failure\n"); + return 0; + } + if (mlx5_pciconf_set_addr_space(dev, MLX5_CR_SPACE_DOMAIN)) + vend_cap = 0; + ret = mlx5_pciconf_cap9_sem(dev, UNLOCK); + if (ret) + mlx5_core_warn(dev, + "pciconf_cap9_sem unlocking failure\n"); + return vend_cap; +} + +#define MLX5_SEMAPHORE_SPACE_DOMAIN 0xA +int mlx5_pciconf_set_sem_addr_space(struct mlx5_core_dev *dev, + u32 sem_space_address, int state) +{ + u32 data, id = 0; + int ret; + + ret = mlx5_pciconf_set_addr_space(dev, + MLX5_SEMAPHORE_SPACE_DOMAIN); + if (ret) + return (ret); + + if (state == LOCK) + /* Get a unique ID based on the counter */ + pci_read_config_dword(dev->pdev, dev->vsec_addr + + PCI_COUNTER_OFFSET, &id); + + /* Try to modify lock */ + ret = mlx5_pciconf_write(dev, sem_space_address, id); + if (ret) + return (ret); + + /* Verify lock was modified */ + ret = mlx5_pciconf_read(dev, sem_space_address, &data); + if (ret) + return -EINVAL; + + if (data != id) + return -EBUSY; + + return 0; +} + +void mlx5_vsec_init(struct mlx5_core_dev *dev) +{ + dev->vsec_addr = mlx5_get_vendor_cap_addr(dev); +} + diff --git a/sys/dev/mlx5/mlx5_core/mlx5_health.c b/sys/dev/mlx5/mlx5_core/mlx5_health.c index 98e91245ca7..dbfcf02ab54 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_health.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_health.c @@ -48,6 +48,8 @@ enum { enum { MLX5_DROP_NEW_HEALTH_WORK, MLX5_DROP_NEW_RECOVERY_WORK, + MLX5_SKIP_SW_RESET, + MLX5_SW_RESET_SEM_LOCKED, }; enum { @@ -59,6 +61,33 @@ enum { MLX5_SENSOR_FW_SYND_RFR = 5, }; +static int lock_sem_sw_reset(struct mlx5_core_dev *dev, int state) +{ + int ret, err; + + /* Lock GW access */ + ret = mlx5_pciconf_cap9_sem(dev, LOCK); + if (ret) { + mlx5_core_warn(dev, "Timed out locking gateway %d, %d\n", state, ret); + return ret; + } + + ret = mlx5_pciconf_set_sem_addr_space(dev, MLX5_SEMAPHORE_SW_RESET, state); + if (ret && state == LOCK) { + if (ret == -EBUSY) + mlx5_core_dbg(dev, "SW reset FW semaphore already locked, another function will handle the reset\n"); + else + mlx5_core_warn(dev, "SW reset semaphore lock return %d\n", ret); + } + + /* Unlock GW access */ + err = mlx5_pciconf_cap9_sem(dev, UNLOCK); + if (err) + mlx5_core_warn(dev, "Timed out unlocking gateway: state %d, err %d\n", state, err); + + return ret; +} + static u8 get_nic_mode(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; @@ -138,6 +167,7 @@ static void reset_fw_if_needed(struct mlx5_core_dev *dev) { bool supported = (ioread32be(&dev->iseg->initializing) >> MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; + struct mlx5_core_health *health = &dev->priv.health; u32 cmdq_addr, fatal_error; if (!supported) @@ -151,7 +181,8 @@ static void reset_fw_if_needed(struct mlx5_core_dev *dev) fatal_error = check_fatal_sensors(dev); if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || fatal_error == MLX5_SENSOR_NIC_DISABLED || - fatal_error == MLX5_SENSOR_NIC_SW_RESET) { + fatal_error == MLX5_SENSOR_NIC_SW_RESET || + test_bit(MLX5_SKIP_SW_RESET, &health->flags)) { mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); return; } @@ -223,6 +254,7 @@ static void health_recover(struct work_struct *work) struct delayed_work *dwork; struct mlx5_core_dev *dev; struct mlx5_priv *priv; + bool recover = true; u8 nic_mode; dwork = container_of(work, struct delayed_work, work); @@ -232,7 +264,8 @@ static void health_recover(struct work_struct *work) if (sensor_pci_no_comm(dev)) { dev_err(&dev->pdev->dev, "health recovery flow aborted, PCI reads still not working\n"); - return; + recover = false; + goto clear_sem; } nic_mode = get_nic_mode(dev); @@ -245,11 +278,21 @@ static void health_recover(struct work_struct *work) if (nic_mode != MLX5_NIC_IFC_DISABLED) { dev_err(&dev->pdev->dev, "health recovery flow aborted, unexpected NIC IFC mode %d.\n", nic_mode); - return; + recover = false; } - dev_err(&dev->pdev->dev, "starting health recovery flow\n"); - mlx5_recover_device(dev); +clear_sem: + if (test_and_clear_bit(MLX5_SW_RESET_SEM_LOCKED, &health->flags)) { + mlx5_core_dbg(dev, "Unlocking FW reset semaphore\n"); + lock_sem_sw_reset(dev, UNLOCK); + } + + test_and_clear_bit(MLX5_SKIP_SW_RESET, &health->flags); + + if (recover) { + dev_err(&dev->pdev->dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); + } } /* How much time to wait until health resetting the driver (in msecs) */ @@ -269,10 +312,29 @@ static void health_care(struct work_struct *work) struct mlx5_core_dev *dev; struct mlx5_priv *priv; unsigned long flags; + int ret; health = container_of(work, struct mlx5_core_health, work); priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); + + if (mlx5_core_is_pf(dev)) { + ret = lock_sem_sw_reset(dev, LOCK); + if (!ret) { + mlx5_core_warn(dev, "Locked FW reset semaphore\n"); + set_bit(MLX5_SW_RESET_SEM_LOCKED, &health->flags); + } + else if (ret == -EBUSY) { + /* sw reset will be skipped only in case we detect the + * semaphore was already taken. In case of an error + * while taking the semaphore we prefer to issue a + * reset since longer cr-dump time and multiple resets + * are better than a stuck fw. + */ + set_bit(MLX5_SKIP_SW_RESET, &health->flags); + } + } + mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); recover_delay = msecs_to_jiffies(get_recovery_delay(dev)); diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c index 8715924d336..e80d844610e 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_main.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -873,6 +873,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) struct pci_dev *pdev = dev->pdev; int err; + mlx5_vsec_init(dev); + err = mlx5_query_hca_caps(dev); if (err) { dev_err(&pdev->dev, "query hca failed\n"); diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile index 573561e3a52..cb7d2c68d50 100644 --- a/sys/modules/mlx5/Makefile +++ b/sys/modules/mlx5/Makefile @@ -6,6 +6,7 @@ SRCS= \ mlx5_alloc.c \ mlx5_cmd.c \ mlx5_cq.c \ +mlx5_crspace.c \ mlx5_diagnostics.c \ mlx5_eq.c \ mlx5_fs_cmd.c \