diff --git a/sys/dev/mlx5/cq.h b/sys/dev/mlx5/cq.h index 55386422efc..d5e167498fd 100644 --- a/sys/dev/mlx5/cq.h +++ b/sys/dev/mlx5/cq.h @@ -42,7 +42,7 @@ struct mlx5_core_cq { int irqn; void (*comp) (struct mlx5_core_cq *, struct mlx5_eqe *); void (*event) (struct mlx5_core_cq *, int); - struct mlx5_uar *uar; + struct mlx5_uars_page *uar; u32 cons_index; unsigned arm_sn; struct mlx5_rsc_debug *dbg; diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h index 3431193cdaf..69057d5f247 100644 --- a/sys/dev/mlx5/device.h +++ b/sys/dev/mlx5/device.h @@ -245,10 +245,22 @@ enum { }; enum { - MLX5_BF_REGS_PER_PAGE = 4, - MLX5_MAX_UAR_PAGES = 1 << 8, - MLX5_NON_FP_BF_REGS_PER_PAGE = 2, - MLX5_MAX_UUARS = MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE, + MLX5_ADAPTER_PAGE_SHIFT = 12, + MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, +}; + +enum { + MLX5_BFREGS_PER_UAR = 4, + MLX5_MAX_UARS = 1 << 8, + MLX5_NON_FP_BFREGS_PER_UAR = 2, + MLX5_FP_BFREGS_PER_UAR = MLX5_BFREGS_PER_UAR - + MLX5_NON_FP_BFREGS_PER_UAR, + MLX5_MAX_BFREGS = MLX5_MAX_UARS * + MLX5_NON_FP_BFREGS_PER_UAR, + MLX5_UARS_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE, + MLX5_NON_FP_BFREGS_IN_PAGE = MLX5_NON_FP_BFREGS_PER_UAR * MLX5_UARS_IN_PAGE, + MLX5_MIN_DYN_BFREGS = 512, + MLX5_MAX_DYN_BFREGS = 1024, }; enum { @@ -407,11 +419,6 @@ enum { MLX5_MAX_PAGE_SHIFT = 31 }; -enum { - MLX5_ADAPTER_PAGE_SHIFT = 12, - MLX5_ADAPTER_PAGE_SIZE = 1 << MLX5_ADAPTER_PAGE_SHIFT, -}; - enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; @@ -1146,16 +1153,6 @@ enum { MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2, }; -enum { - MLX5_NUM_UUARS_PER_PAGE = MLX5_NON_FP_BF_REGS_PER_PAGE, - MLX5_DEF_TOT_UUARS = 8 * MLX5_NUM_UUARS_PER_PAGE, -}; - -enum { - NUM_DRIVER_UARS = 4, - NUM_LOW_LAT_UUARS = 4, -}; - enum { MLX5_CAP_PORT_TYPE_IB = 0x0, MLX5_CAP_PORT_TYPE_ETH = 0x1, diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index ec9c4f618f6..5d4f58d7e1f 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -219,38 +219,6 @@ enum { #define MLX5_PROT_MASK(link_mode) (1 << link_mode) -struct mlx5_uuar_info { - struct mlx5_uar *uars; - int num_uars; - int num_low_latency_uuars; - unsigned long *bitmap; - unsigned int *count; - struct mlx5_bf *bfs; - - /* - * protect uuar allocation data structs - */ - struct mutex lock; - u32 ver; -}; - -struct mlx5_bf { - void __iomem *reg; - void __iomem *regreg; - int buf_size; - struct mlx5_uar *uar; - unsigned long offset; - int need_lock; - /* protect blue flame buffer selection when needed - */ - spinlock_t lock; - - /* serialize 64 bit writes when done as two 32 bit accesses - */ - spinlock_t lock32; - int uuarn; -}; - struct mlx5_cmd_first { __be32 data[4]; }; @@ -464,6 +432,39 @@ struct mlx5_core_rsc_common { struct completion free; }; +struct mlx5_uars_page { + void __iomem *map; + bool wc; + u32 index; + struct list_head list; + unsigned int bfregs; + unsigned long *reg_bitmap; /* for non fast path bf regs */ + unsigned long *fp_bitmap; + unsigned int reg_avail; + unsigned int fp_avail; + struct kref ref_count; + struct mlx5_core_dev *mdev; +}; + +struct mlx5_bfreg_head { + /* protect blue flame registers allocations */ + struct mutex lock; + struct list_head list; +}; + +struct mlx5_bfreg_data { + struct mlx5_bfreg_head reg_head; + struct mlx5_bfreg_head wc_head; +}; + +struct mlx5_sq_bfreg { + void __iomem *map; + struct mlx5_uars_page *up; + bool wc; + u32 index; + unsigned int offset; +}; + struct mlx5_core_srq { struct mlx5_core_rsc_common common; /* must be first */ u32 srqn; @@ -489,13 +490,6 @@ struct mlx5_eq_table { spinlock_t lock; }; -struct mlx5_uar { - u32 index; - void __iomem *bf_map; - void __iomem *map; -}; - - struct mlx5_core_health { struct mlx5_health_buffer __iomem *health; __be32 __iomem *health_counter; @@ -578,12 +572,9 @@ struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; struct msix_entry *msix_arr; - struct mlx5_uuar_info uuari; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); int disable_irqs; - struct io_mapping *bf_mapping; - /* pages stuff */ struct workqueue_struct *pg_wq; struct rb_root page_root; @@ -632,6 +623,9 @@ struct mlx5_priv { struct mlx5_pme_stats pme_stats; struct mlx5_eswitch *eswitch; + + struct mlx5_bfreg_data bfregs; + struct mlx5_uars_page *uar; }; enum mlx5_device_state { @@ -976,10 +970,11 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path); +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); @@ -1049,7 +1044,7 @@ struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vector, enum mlx5_cmd_mode mode); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, struct mlx5_uar *uar); + int nent, u64 mask); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); int mlx5_stop_eqs(struct mlx5_core_dev *dev); diff --git a/sys/dev/mlx5/mlx5_core/mlx5_cq.c b/sys/dev/mlx5/mlx5_core/mlx5_cq.c index 929fd1d6bc3..248d8c9d75c 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_cq.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_cq.c @@ -149,6 +149,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, goto err_cmd; cq->pid = curthread->td_proc->p_pid; + cq->uar = dev->priv.uar; return 0; diff --git a/sys/dev/mlx5/mlx5_core/mlx5_eq.c b/sys/dev/mlx5/mlx5_core/mlx5_eq.c index 9e744e132c3..2dc134d0f9c 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_eq.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_eq.c @@ -419,7 +419,7 @@ static void init_eq_buf(struct mlx5_eq *eq) } int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, - int nent, u64 mask, struct mlx5_uar *uar) + int nent, u64 mask) { u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; @@ -454,7 +454,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); - MLX5_SET(eqc, eqc, uar_page, uar->index); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -466,7 +466,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = vecidx; eq->dev = dev; - eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(priv->msix_arr[vecidx].vector, mlx5_msix_handler, 0, "mlx5_core", eq); if (err) @@ -569,8 +569,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) } err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, - MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - &dev->priv.uuari.uars[0]); + MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -579,8 +578,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) mlx5_cmd_use_events(dev); err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, - MLX5_NUM_ASYNC_EQE, async_event_mask, - &dev->priv.uuari.uars[0]); + MLX5_NUM_ASYNC_EQE, async_event_mask); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; @@ -589,8 +587,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->pages_eq, MLX5_EQ_VEC_PAGES, /* TODO: sriov max_vf + */ 1, - 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, - &dev->priv.uuari.uars[0]); + 1 << MLX5_EVENT_TYPE_PAGE_REQUEST); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); goto err2; diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c index 843227f928f..27326d69226 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_main.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2019, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -455,6 +455,12 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable cmdif checksum */ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + /* Enable 4K UAR only when HCA supports it and page size is bigger + * than 4K. + */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + /* enable drain sigerr */ MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1); @@ -650,8 +656,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) eq = kzalloc(sizeof(*eq), GFP_KERNEL); err = mlx5_create_map_eq(dev, eq, - i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - &dev->priv.uuari.uars[0]); + i + MLX5_EQ_VEC_COMP_BASE, nent, 0); if (err) { kfree(eq); goto clean; @@ -670,22 +675,6 @@ clean: return err; } -static int map_bf_area(struct mlx5_core_dev *dev) -{ - resource_size_t bf_start = pci_resource_start(dev->pdev, 0); - resource_size_t bf_len = pci_resource_len(dev->pdev, 0); - - dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len); - - return dev->priv.bf_mapping ? 0 : -ENOMEM; -} - -static void unmap_bf_area(struct mlx5_core_dev *dev) -{ - if (dev->priv.bf_mapping) - io_mapping_free(dev->priv.bf_mapping); -} - static inline int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; @@ -1117,22 +1106,23 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_poll; } - err = mlx5_enable_msix(dev); - if (err) { - mlx5_core_err(dev, "enable msix failed\n"); + dev->priv.uar = mlx5_get_uars_page(dev); + if (IS_ERR(dev->priv.uar)) { + mlx5_core_err(dev, "Failed allocating uar, aborting\n"); + err = PTR_ERR(dev->priv.uar); goto err_cleanup_once; } - err = mlx5_alloc_uuars(dev, &priv->uuari); + err = mlx5_enable_msix(dev); if (err) { - mlx5_core_err(dev, "Failed allocating uar, aborting\n"); - goto err_disable_msix; + mlx5_core_err(dev, "enable msix failed\n"); + goto err_cleanup_uar; } err = mlx5_start_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to start pages and async EQs\n"); - goto err_free_uar; + goto err_disable_msix; } err = alloc_comp_eqs(dev); @@ -1141,9 +1131,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_stop_eqs; } - if (map_bf_area(dev)) - mlx5_core_err(dev, "Failed to map blue flame area\n"); - err = mlx5_init_fs(dev); if (err) { mlx5_core_err(dev, "flow steering init %d\n", err); @@ -1185,17 +1172,16 @@ err_fs: err_free_comp_eqs: free_comp_eqs(dev); - unmap_bf_area(dev); err_stop_eqs: mlx5_stop_eqs(dev); -err_free_uar: - mlx5_free_uuars(dev, &priv->uuari); - err_disable_msix: mlx5_disable_msix(dev); +err_cleanup_uar: + mlx5_put_uars_page(dev, dev->priv.uar); + err_cleanup_once: if (boot) mlx5_cleanup_once(dev); @@ -1248,12 +1234,11 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_fpga_device_stop(dev); mlx5_mpfs_destroy(dev); mlx5_cleanup_fs(dev); - unmap_bf_area(dev); mlx5_wait_for_reclaim_vfs_pages(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); - mlx5_free_uuars(dev, &priv->uuari); mlx5_disable_msix(dev); + mlx5_put_uars_page(dev, dev->priv.uar); if (cleanup) mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev, cleanup); @@ -1586,6 +1571,12 @@ static int init_one(struct pci_dev *pdev, spin_lock_init(&priv->ctx_lock); mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); + + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + mtx_init(&dev->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW); err = mlx5_pci_init(dev, priv); if (err) { diff --git a/sys/dev/mlx5/mlx5_core/mlx5_uar.c b/sys/dev/mlx5/mlx5_core/mlx5_uar.c index 43a95d64bc8..1a662ade0ae 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_uar.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_uar.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -33,171 +33,291 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) { - u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0}; int err; MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - *uarn = MLX5_GET(alloc_uar_out, out, uar); - - return 0; + if (!err) + *uarn = MLX5_GET(alloc_uar_out, out, uar); + return err; } EXPORT_SYMBOL(mlx5_cmd_alloc_uar); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) { - u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); MLX5_SET(dealloc_uar_in, in, uar, uarn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_cmd_free_uar); -static int need_uuar_lock(int uuarn) +static int uars_per_sys_page(struct mlx5_core_dev *mdev) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - - if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS) - return 0; + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); return 1; } -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - struct mlx5_bf *bf; - phys_addr_t addr; - int err; + u32 system_page_index; + + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; + + return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index; +} + +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + iounmap(up->map); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + bitmap_free(up->reg_bitmap); + bitmap_free(up->fp_bitmap); + kfree(up); +} + +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; int i; - uuari->num_uars = NUM_DRIVER_UARS; - uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS; + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + up = kzalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return ERR_PTR(err); - mutex_init(&uuari->lock); - uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL); + up->mdev = mdev; + up->reg_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); + if (!up->reg_bitmap) + goto error1; - uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL); + up->fp_bitmap = bitmap_zalloc(bfregs, GFP_KERNEL); + if (!up->fp_bitmap) + goto error1; - uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap), - GFP_KERNEL); + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); - uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL); + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; - for (i = 0; i < uuari->num_uars; i++) { - err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index); - if (err) - goto out_count; - - addr = pci_resource_start(dev->pdev, 0) + - ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT); - uuari->uars[i].map = ioremap(addr, PAGE_SIZE); - if (!uuari->uars[i].map) { - mlx5_cmd_free_uar(dev, uuari->uars[i].index); - err = -ENOMEM; - goto out_count; - } - mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n", - uuari->uars[i].index, uuari->uars[i].map); - } - - for (i = 0; i < tot_uuars; i++) { - bf = &uuari->bfs[i]; - - bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; - bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; - bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; - bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * - (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + - MLX5_BF_OFFSET; - bf->need_lock = need_uuar_lock(i); - spin_lock_init(&bf->lock); - spin_lock_init(&bf->lock32); - bf->uuarn = i; - } - - return 0; - -out_count: - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); - } - kfree(uuari->count); - - kfree(uuari->bitmap); - - kfree(uuari->bfs); - - kfree(uuari->uars); - return err; -} - -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) -{ - int i = uuari->num_uars; - - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); - } - - kfree(uuari->count); - kfree(uuari->bitmap); - kfree(uuari->bfs); - kfree(uuari->uars); - - return 0; -} - -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) -{ - phys_addr_t pfn; - phys_addr_t uar_bar_start; - int err; - - err = mlx5_cmd_alloc_uar(mdev, &uar->index); + err = mlx5_cmd_alloc_uar(mdev, &up->index); if (err) { mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); - return err; + goto error1; } - uar_bar_start = pci_resource_start(mdev->pdev, 0); - pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) { - mlx5_core_warn(mdev, "ioremap() failed, %d\n", err); - err = -ENOMEM; - goto err_free_uar; + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -ENOMEM; + goto error2; + } } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; - if (mdev->priv.bf_mapping) - uar->bf_map = io_mapping_map_wc(mdev->priv.bf_mapping, - uar->index << PAGE_SHIFT, - PAGE_SIZE); +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + bitmap_free(up->fp_bitmap); + bitmap_free(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} + +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (!list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); + goto out; + } + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) + goto out; + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); + + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); + +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); +} +EXPORT_SYMBOL(mlx5_put_uars_page); + +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) +{ + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} + +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); return 0; +} -err_free_uar: - mlx5_cmd_free_uar(mdev, uar->index); +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + int err; + + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; + + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); return err; } -EXPORT_SYMBOL(mlx5_alloc_map_uar); +EXPORT_SYMBOL(mlx5_alloc_bfreg); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) { - io_mapping_unmap(uar->bf_map); - iounmap(uar->map); - mlx5_cmd_free_uar(mdev, uar->index); + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; + + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); + + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; } -EXPORT_SYMBOL(mlx5_unmap_free_uar); + +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); +} +EXPORT_SYMBOL(mlx5_free_bfreg); diff --git a/sys/dev/mlx5/mlx5_en/en.h b/sys/dev/mlx5/mlx5_en/en.h index 4c3d696ed41..b7c05264008 100644 --- a/sys/dev/mlx5/mlx5_en/en.h +++ b/sys/dev/mlx5/mlx5_en/en.h @@ -827,7 +827,7 @@ struct mlx5e_sq { /* read only */ struct mlx5_wq_cyc wq; - struct mlx5_uar uar; + void __iomem *uar_map; struct ifnet *ifp; u32 sqn; u32 bf_buf_size; @@ -1001,7 +1001,6 @@ struct mlx5e_priv { #define PRIV_LOCKED(priv) sx_xlocked(&(priv)->state_lock) #define PRIV_ASSERT_LOCKED(priv) sx_assert(&(priv)->state_lock, SA_XLOCKED) struct sx state_lock; /* Protects Interface state */ - struct mlx5_uar cq_uar; u32 pdn; u32 tdn; struct mlx5_core_mr mr; @@ -1055,6 +1054,8 @@ struct mlx5e_priv { struct mlx5e_dcbx dcbx; bool sw_is_port_buf_owner; + struct mlx5_sq_bfreg bfreg; + struct pfil_head *pfil; struct mlx5e_channel channel[]; }; @@ -1127,7 +1128,7 @@ int mlx5e_add_all_vlan_rules(struct mlx5e_priv *priv); void mlx5e_del_all_vlan_rules(struct mlx5e_priv *priv); static inline void -mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe, int bf_sz) +mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe) { u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; @@ -1142,16 +1143,8 @@ mlx5e_tx_notify_hw(struct mlx5e_sq *sq, u32 *wqe, int bf_sz) */ wmb(); - if (bf_sz) { - __iowrite64_copy(sq->uar.bf_map + ofst, wqe, bf_sz); - - /* flush the write-combining mapped buffer */ - wmb(); - - } else { - mlx5_write64(wqe, sq->uar.map + ofst, - MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); - } + mlx5_write64(wqe, sq->uar_map + ofst, + MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock)); sq->bf_offset ^= sq->bf_buf_size; } diff --git a/sys/dev/mlx5/mlx5_en/en_rl.h b/sys/dev/mlx5/mlx5_en/en_rl.h index 6ad2f26e5a1..f30e8ba8cc0 100644 --- a/sys/dev/mlx5/mlx5_en/en_rl.h +++ b/sys/dev/mlx5/mlx5_en/en_rl.h @@ -156,7 +156,6 @@ struct mlx5e_rl_priv_data { struct mlx5e_rl_channel_param chan_param; struct mlx5e_rl_params param; struct mlx5e_rl_stats stats; - struct mlx5_uar sq_uar; struct mlx5e_rl_worker *workers; struct mlx5e_priv *priv; uint64_t *rate_limit_table; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index 1e6562fed66..9fc3af186c0 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -1660,14 +1660,12 @@ mlx5e_create_sq(struct mlx5e_channel *c, &sq->dma_tag))) goto done; - err = mlx5_alloc_map_uar(mdev, &sq->uar); - if (err) - goto err_free_dma_tag; + sq->uar_map = priv->bfreg.map; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) - goto err_unmap_free_uar; + goto err_free_dma_tag; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; @@ -1693,9 +1691,6 @@ mlx5e_create_sq(struct mlx5e_channel *c, err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &sq->uar); - err_free_dma_tag: bus_dma_tag_destroy(sq->dma_tag); done: @@ -1710,7 +1705,6 @@ mlx5e_destroy_sq(struct mlx5e_sq *sq) mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar); bus_dma_tag_destroy(sq->dma_tag); } @@ -1742,7 +1736,7 @@ mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param, MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, uar_page, sq->priv->bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); @@ -1849,7 +1843,7 @@ mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep) done: /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } } @@ -1990,7 +1984,6 @@ mlx5e_create_cq(struct mlx5e_priv *priv, mcq->comp = comp; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -2037,7 +2030,6 @@ mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix) mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used); MLX5_SET(cqc, cqc, c_eqn, eqn); - MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -2363,7 +2355,7 @@ mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); } static void @@ -3802,7 +3794,7 @@ mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq) sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP); sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8); - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } @@ -4467,15 +4459,10 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) /* reuse mlx5core's watchdog workqueue */ priv->wq = mdev->priv.health.wq_watchdog; - err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); - if (err) { - mlx5_en_err(ifp, "mlx5_alloc_map_uar failed, %d\n", err); - goto err_free_wq; - } err = mlx5_core_alloc_pd(mdev, &priv->pdn); if (err) { mlx5_en_err(ifp, "mlx5_core_alloc_pd failed, %d\n", err); - goto err_unmap_free_uar; + goto err_free_wq; } err = mlx5_alloc_transport_domain(mdev, &priv->tdn); if (err) { @@ -4488,6 +4475,11 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) mlx5_en_err(ifp, "mlx5e_create_mkey failed, %d\n", err); goto err_dealloc_transport_domain; } + err = mlx5_alloc_bfreg(mdev, &priv->bfreg, false, false); + if (err) { + mlx5_en_err(ifp, "alloc bfreg failed, %d\n", err); + goto err_create_mkey; + } mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); /* check if we should generate a random MAC address */ @@ -4500,7 +4492,7 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) err = mlx5e_rl_init(priv); if (err) { mlx5_en_err(ifp, "mlx5e_rl_init failed, %d\n", err); - goto err_create_mkey; + goto err_alloc_bfreg; } err = mlx5e_tls_init(priv); @@ -4604,6 +4596,9 @@ mlx5e_create_ifp(struct mlx5_core_dev *mdev) err_rl_init: mlx5e_rl_cleanup(priv); +err_alloc_bfreg: + mlx5_free_bfreg(mdev, &priv->bfreg); + err_create_mkey: mlx5_core_destroy_mkey(priv->mdev, &priv->mr); @@ -4613,9 +4608,6 @@ err_dealloc_transport_domain: err_dealloc_pd: mlx5_core_dealloc_pd(mdev, priv->pdn); -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &priv->cq_uar); - err_free_wq: flush_workqueue(priv->wq); @@ -4693,10 +4685,10 @@ mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); sysctl_ctx_free(&priv->sysctl_ctx); + mlx5_free_bfreg(priv->mdev, &priv->bfreg); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); - mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); mlx5e_priv_static_destroy(priv, mdev->priv.eq_table.num_comp_vectors); @@ -4748,7 +4740,7 @@ mlx5_en_debugnet_transmit(struct ifnet *dev, struct mbuf *m) } if (likely(sq->doorbell.d64 != 0)) { - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } return (err); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c index 0e34403f2bf..7291253b816 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rl.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2020 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -116,7 +116,7 @@ mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, goto done; /* use shared UAR */ - sq->uar = priv->rl.sq_uar; + sq->uar_map = priv->bfreg.map; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); @@ -751,15 +751,10 @@ mlx5e_rl_init(struct mlx5e_priv *priv) sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); - /* allocate shared UAR for SQs */ - error = mlx5_alloc_map_uar(priv->mdev, &rl->sq_uar); - if (error) - goto done; - /* open own TIS domain for ratelimit SQs */ error = mlx5e_rl_open_tis(priv); if (error) - goto err_uar; + goto done; /* setup default value for parameters */ mlx5e_rl_set_default_params(&rl->param, priv->mdev); @@ -861,8 +856,6 @@ mlx5e_rl_init(struct mlx5e_priv *priv) return (0); -err_uar: - mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); done: sysctl_ctx_free(&rl->ctx); sx_destroy(&rl->rl_sxlock); @@ -974,9 +967,6 @@ mlx5e_rl_cleanup(struct mlx5e_priv *priv) mlx5e_rl_reset_rates(rl); - /* free shared UAR for SQs */ - mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); - /* close TIS domain */ mlx5e_rl_close_tis(priv); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c index 0da68fc5ac2..c8dd21f6aad 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -820,7 +820,7 @@ mlx5e_xmit_locked(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { - mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + mlx5e_tx_notify_hw(sq, sq->doorbell.d32); sq->doorbell.d64 = 0; } diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h index 974d39308c7..b9828f1da46 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h @@ -93,7 +93,7 @@ enum mlx5_ib_mad_ifc_flags { }; enum { - MLX5_CROSS_CHANNEL_UUAR = 0, + MLX5_CROSS_CHANNEL_BFREG = 0, }; enum { @@ -101,11 +101,34 @@ enum { MLX5_CQE_VERSION_V1, }; +enum { + MLX5_IB_INVALID_UAR_INDEX = BIT(31), + MLX5_IB_INVALID_BFREG = BIT(31), +}; + struct mlx5_ib_vma_private_data { struct list_head list; struct vm_area_struct *vma; }; +struct mlx5_bfreg_info { + u32 *sys_pages; + int num_low_latency_bfregs; + unsigned int *count; + + /* + * protect bfreg allocation data structs + */ + struct mutex lock; + u32 ver; + u8 lib_uar_4k : 1; + u8 lib_uar_dyn : 1; + u32 num_sys_pages; + u32 num_static_sys_pages; + u32 total_num_bfregs; + u32 num_dyn_bfregs; +}; + struct mlx5_ib_ucontext { struct ib_ucontext ibucontext; struct list_head db_page_list; @@ -113,7 +136,7 @@ struct mlx5_ib_ucontext { /* protect doorbell record alloc/free */ struct mutex db_page_mutex; - struct mlx5_uuar_info uuari; + struct mlx5_bfreg_info bfregi; u8 cqe_version; /* Transport Domain number */ u32 tdn; @@ -188,12 +211,8 @@ struct mlx5_ib_flow_db { * These flags are intended for internal use by the mlx5_ib driver, and they * rely on the range reserved for that use in the ib_qp_create_flags enum. */ - -/* Create a UD QP whose source QP number is 1 */ -static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void) -{ - return IB_QP_CREATE_RESERVED_START; -} +#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START +#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; @@ -327,6 +346,13 @@ struct mlx5_ib_raw_packet_qp { struct mlx5_ib_rq rq; }; +struct mlx5_bf { + int buf_size; + unsigned long offset; + struct mlx5_sq_bfreg *bfreg; + spinlock_t lock32; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; union { @@ -352,13 +378,13 @@ struct mlx5_ib_qp { int wq_sig; int scat_cqe; int max_inline_data; - struct mlx5_bf *bf; + struct mlx5_bf bf; int has_rq; /* only for user space QPs. For kernel * we have it from the bf object */ - int uuarn; + int bfregn; int create_type; @@ -693,6 +719,9 @@ struct mlx5_ib_dev { struct list_head qp_list; /* Array with num_ports elements */ struct mlx5_ib_port *port; + struct mlx5_sq_bfreg bfreg; + struct mlx5_sq_bfreg wc_bfreg; + struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_congestion congestion; struct mlx5_async_ctx async_ctx; @@ -966,6 +995,8 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi); int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); +void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, + int bfregn); static inline void init_query_mad(struct ib_smp *mad) { mad->base_version = 1; @@ -1055,4 +1086,20 @@ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, void mlx5_ib_cleanup_congestion(struct mlx5_ib_dev *); int mlx5_ib_init_congestion(struct mlx5_ib_dev *); +static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_support) +{ + return lib_support && MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_UARS_IN_PAGE : 1; +} + +static inline int get_num_static_uars(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) +{ + return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; +} + +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg); + #endif /* MLX5_IB_H */ diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c index 78d0518fe2b..15c33fdee44 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cq.c @@ -698,7 +698,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; struct mlx5_ib_cq *cq = to_mcq(ibcq); - void __iomem *uar_page = mdev->priv.uuari.uars[0].map; + void __iomem *uar_page = mdev->priv.uar->map; unsigned long irq_flags; int ret = 0; @@ -753,16 +753,14 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, void *cqc; int err; - ucmdlen = - (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) < - sizeof(ucmd)) ? (sizeof(ucmd) - - sizeof(ucmd.reserved)) : sizeof(ucmd); + ucmdlen = min(udata->inlen, sizeof(ucmd)); + if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags)) + return -EINVAL; if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) return -EFAULT; - if (ucmdlen == sizeof(ucmd) && - ucmd.reserved != 0) + if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX))) return -EINVAL; if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) @@ -803,10 +801,20 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, MLX5_SET(cqc, cqc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = to_mucontext(context)->uuari.uars[0].index; + if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { + *index = ucmd.uar_page_index; + } else if (to_mucontext(context)->bfregi.lib_uar_dyn) { + err = -EINVAL; + goto err_cqb; + } else { + *index = to_mucontext(context)->bfregi.sys_pages[0]; + } return 0; +err_cqb: + kvfree(*cqb); + err_db: mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); @@ -871,7 +879,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, MLX5_SET(cqc, cqc, log_page_size, cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); - *index = dev->mdev->priv.uuari.uars[0].index; + *index = dev->mdev->priv.uar->index; return 0; diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c index 31d1c11a2d3..6c041785166 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_gsi.c @@ -258,7 +258,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) }, .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, - .create_flags = mlx5_ib_create_qp_sqpn_qp1(), + .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; return ib_create_qp(pd, &init_attr); diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c index 9f891d5e959..58f9379e867 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c @@ -1105,6 +1105,94 @@ out: return err; } +static u16 calc_dynamic_bfregs(int uars_per_sys_page) +{ + /* Large page with non 4k uar support might limit the dynamic size */ + if (uars_per_sys_page == 1 && PAGE_SIZE > 4096) + return MLX5_MIN_DYN_BFREGS; + + return MLX5_MAX_DYN_BFREGS; +} + +static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, + struct mlx5_ib_alloc_ucontext_req_v2 *req, + struct mlx5_bfreg_info *bfregi) +{ + int uars_per_sys_page; + int bfregs_per_sys_page; + int ref_bfregs = req->total_num_bfregs; + + if (req->total_num_bfregs == 0) + return -EINVAL; + + BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE); + BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE); + + if (req->total_num_bfregs > MLX5_MAX_BFREGS) + return -ENOMEM; + + uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k); + bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR; + /* This holds the required static allocation asked by the user */ + req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page); + if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) + return -EINVAL; + + bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page; + bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page); + bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs; + bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page; + + mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n", + MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", + lib_uar_4k ? "yes" : "no", ref_bfregs, + req->total_num_bfregs, bfregi->total_num_bfregs, + bfregi->num_sys_pages); + + return 0; +} + +static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) +{ + struct mlx5_bfreg_info *bfregi; + int err; + int i; + + bfregi = &context->bfregi; + for (i = 0; i < bfregi->num_static_sys_pages; i++) { + err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]); + if (err) + goto error; + + mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]); + } + + for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++) + bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX; + + return 0; + +error: + for (--i; i >= 0; i--) + if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i])) + mlx5_ib_warn(dev, "failed to free uar %d\n", i); + + return err; +} + +static void deallocate_uars(struct mlx5_ib_dev *dev, + struct mlx5_ib_ucontext *context) +{ + struct mlx5_bfreg_info *bfregi; + int i; + + bfregi = &context->bfregi; + for (i = 0; i < bfregi->num_sys_pages; i++) + if (i < bfregi->num_static_sys_pages || + bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) + mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); +} + static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) { @@ -1112,17 +1200,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; struct mlx5_ib_ucontext *context; - struct mlx5_uuar_info *uuari; - struct mlx5_uar *uars; - int gross_uuars; - int num_uars; + struct mlx5_bfreg_info *bfregi; int ver; - int uuarn; int err; - int i; size_t reqlen; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); + bool lib_uar_4k; + bool lib_uar_dyn; if (!dev->ib_active) return ERR_PTR(-EAGAIN); @@ -1145,27 +1230,19 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (req.flags) return ERR_PTR(-EINVAL); - if (req.total_num_uuars > MLX5_MAX_UUARS) - return ERR_PTR(-ENOMEM); - - if (req.total_num_uuars == 0) - return ERR_PTR(-EINVAL); - if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) return ERR_PTR(-EOPNOTSUPP); + req.total_num_bfregs = ALIGN(req.total_num_bfregs, + MLX5_NON_FP_BFREGS_PER_UAR); + if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) + return ERR_PTR(-EINVAL); + if (reqlen > sizeof(req) && !ib_is_udata_cleared(udata, sizeof(req), reqlen - sizeof(req))) return ERR_PTR(-EOPNOTSUPP); - req.total_num_uuars = ALIGN(req.total_num_uuars, - MLX5_NON_FP_BF_REGS_PER_PAGE); - if (req.num_low_latency_uuars > req.total_num_uuars - 1) - return ERR_PTR(-EINVAL); - - num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; - gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); @@ -1178,6 +1255,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.cqe_version = min_t(__u8, (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), req.max_cqe_version); + resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; + resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; resp.response_length = min(offsetof(typeof(resp), response_length) + sizeof(resp.response_length), udata->outlen); @@ -1185,41 +1266,42 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (!context) return ERR_PTR(-ENOMEM); - uuari = &context->uuari; - mutex_init(&uuari->lock); - uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); - if (!uars) { + lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; + lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; + bfregi = &context->bfregi; + + if (lib_uar_dyn) { + bfregi->lib_uar_dyn = lib_uar_dyn; + goto uar_done; + } + + /* updates req->total_num_bfregs */ + err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); + if (err) + goto out_ctx; + + mutex_init(&bfregi->lock); + bfregi->lib_uar_4k = lib_uar_4k; + bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count), + GFP_KERNEL); + if (!bfregi->count) { err = -ENOMEM; goto out_ctx; } - uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), - sizeof(*uuari->bitmap), - GFP_KERNEL); - if (!uuari->bitmap) { + bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, + sizeof(*bfregi->sys_pages), + GFP_KERNEL); + if (!bfregi->sys_pages) { err = -ENOMEM; - goto out_uar_ctx; - } - /* - * clear all fast path uuars - */ - for (i = 0; i < gross_uuars; i++) { - uuarn = i & 3; - if (uuarn == 2 || uuarn == 3) - set_bit(i, uuari->bitmap); + goto out_count; } - uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); - if (!uuari->count) { - err = -ENOMEM; - goto out_bitmap; - } + err = allocate_uars(dev, context); + if (err) + goto out_sys_pages; - for (i = 0; i < num_uars; i++) { - err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index); - if (err) - goto out_count; - } +uar_done: #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; @@ -1236,7 +1318,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_uuars = req.total_num_uuars; + resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); if (field_avail(typeof(resp), cqe_version, udata->outlen)) @@ -1254,25 +1336,33 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, * pretend we don't support reading the HCA's core clock. This is also * forced by mmap function. */ - if (PAGE_SIZE <= 4096 && - field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; - resp.hca_core_clock_offset = - offsetof(struct mlx5_init_seg, internal_timer_h) % - PAGE_SIZE; - resp.response_length += sizeof(resp.hca_core_clock_offset) + - sizeof(resp.reserved2); + if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { + if (PAGE_SIZE <= 4096) { + resp.comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp.hca_core_clock_offset = + offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; + } + resp.response_length += sizeof(resp.hca_core_clock_offset); + } + + if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) + resp.response_length += sizeof(resp.log_uar_size); + + if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) + resp.response_length += sizeof(resp.num_uars_per_page); + + if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { + resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; + resp.response_length += sizeof(resp.num_dyn_bfregs); } err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_td; - uuari->ver = ver; - uuari->num_low_latency_uuars = req.num_low_latency_uuars; - uuari->uars = uars; - uuari->num_uars = num_uars; + bfregi->ver = ver; + bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; context->cqe_version = resp.cqe_version; return &context->ibucontext; @@ -1282,16 +1372,13 @@ out_td: mlx5_dealloc_transport_domain(dev->mdev, context->tdn); out_uars: - for (i--; i >= 0; i--) - mlx5_cmd_free_uar(dev->mdev, uars[i].index); + deallocate_uars(dev, context); + +out_sys_pages: + kfree(bfregi->sys_pages); + out_count: - kfree(uuari->count); - -out_bitmap: - kfree(uuari->bitmap); - -out_uar_ctx: - kfree(uars); + kfree(bfregi->count); out_ctx: kfree(context); @@ -1302,28 +1389,28 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); - struct mlx5_uuar_info *uuari = &context->uuari; - int i; + struct mlx5_bfreg_info *bfregi; + bfregi = &context->bfregi; if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) mlx5_dealloc_transport_domain(dev->mdev, context->tdn); - for (i = 0; i < uuari->num_uars; i++) { - if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) - mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); - } - - kfree(uuari->count); - kfree(uuari->bitmap); - kfree(uuari->uars); + deallocate_uars(dev, context); + kfree(bfregi->sys_pages); + kfree(bfregi->count); kfree(context); return 0; } -static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) +static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, + int uar_idx) { - return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index; + int fw_uars_per_page; + + fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1; + + return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; } static int get_command(unsigned long offset) @@ -1341,6 +1428,12 @@ static int get_index(unsigned long offset) return get_arg(offset); } +/* Index resides in an extra byte to enable larger values than 255 */ +static int get_extended_index(unsigned long offset) +{ + return get_arg(offset) | ((offset >> 16) & 0xff) << 8; +} + static void mlx5_ib_vma_open(struct vm_area_struct *area) { /* vma_open is called when a new VMA is created on top of our VMA. This @@ -1486,22 +1579,36 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, struct vm_area_struct *vma, struct mlx5_ib_ucontext *context) { - struct mlx5_uuar_info *uuari = &context->uuari; + struct mlx5_bfreg_info *bfregi = &context->bfregi; int err; unsigned long idx; - phys_addr_t pfn, pa; + phys_addr_t pfn; pgprot_t prot; + u32 bfreg_dyn_idx = 0; + u32 uar_index; + int dyn_uar = (cmd == MLX5_IB_MMAP_WC_PAGE); + int max_valid_idx = dyn_uar ? bfregi->num_sys_pages : + bfregi->num_static_sys_pages; + + if (bfregi->lib_uar_dyn) + return -EINVAL; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + if (dyn_uar) + idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages; + else + idx = get_index(vma->vm_pgoff); + + if (idx >= max_valid_idx) { + mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n", + idx, max_valid_idx); + return -EINVAL; + } switch (cmd) { case MLX5_IB_MMAP_WC_PAGE: -/* Some architectures don't support WC memory */ -#if defined(CONFIG_X86) - if (!pat_enabled()) - return -EPERM; -#elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) - return -EPERM; -#endif - /* fall through */ case MLX5_IB_MMAP_REGULAR_PAGE: /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ prot = pgprot_writecombine(vma->vm_page_prot); @@ -1513,14 +1620,40 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, return -EINVAL; } - if (vma->vm_end - vma->vm_start != PAGE_SIZE) - return -EINVAL; + if (dyn_uar) { + int uars_per_page; - idx = get_index(vma->vm_pgoff); - if (idx >= uuari->num_uars) - return -EINVAL; + uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k); + bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR); + if (bfreg_dyn_idx >= bfregi->total_num_bfregs) { + mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n", + bfreg_dyn_idx, bfregi->total_num_bfregs); + return -EINVAL; + } - pfn = uar_index2pfn(dev, uuari->uars[idx].index); + mutex_lock(&bfregi->lock); + /* Fail if uar already allocated, first bfreg index of each + * page holds its count. + */ + if (bfregi->count[bfreg_dyn_idx]) { + mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx); + mutex_unlock(&bfregi->lock); + return -EINVAL; + } + + bfregi->count[bfreg_dyn_idx]++; + mutex_unlock(&bfregi->lock); + + err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index); + if (err) { + mlx5_ib_warn(dev, "UAR alloc failed\n"); + goto free_bfreg; + } + } else { + uar_index = bfregi->sys_pages[idx]; + } + + pfn = uar_index2pfn(dev, uar_index); mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); vma->vm_page_prot = prot; @@ -1529,14 +1662,23 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, if (err) { mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%llx, pfn=%pa, mmap_cmd=%s\n", err, (unsigned long long)vma->vm_start, &pfn, mmap_cmd2str(cmd)); - return -EAGAIN; + goto err; } - pa = pfn << PAGE_SHIFT; - mlx5_ib_dbg(dev, "mapped %s at 0x%llx, PA %pa\n", mmap_cmd2str(cmd), - (unsigned long long)vma->vm_start, &pa); - + if (dyn_uar) + bfregi->sys_pages[idx] = uar_index; return mlx5_ib_set_vma_data(vma, context); + +err: + if (!dyn_uar) + return err; + + mlx5_cmd_free_uar(dev->mdev, idx); + +free_bfreg: + mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx); + + return err; } static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c index 68a1c8cc25d..94561d32acc 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_qp.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -467,60 +467,54 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) return 1; } -static int first_med_uuar(void) +enum { + /* this is the first blue flame register in the array of bfregs assigned + * to a processes. Since we do not use it for blue flame but rather + * regular 64 bit doorbells, we do not need a lock for maintaiing + * "odd/even" order + */ + NUM_NON_BLUE_FLAME_BFREGS = 1, +}; + +static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - return 1; + return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR; } -static int next_uuar(int n) -{ - n++; - - while (((n % 4) & 2)) - n++; - - return n; -} - -static int num_med_uuar(struct mlx5_uuar_info *uuari) +static int num_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { int n; - n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE - - uuari->num_low_latency_uuars - 1; + n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs - + NUM_NON_BLUE_FLAME_BFREGS; return n >= 0 ? n : 0; } -static int max_uuari(struct mlx5_uuar_info *uuari) +static int first_med_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { - return uuari->num_uars * 4; + return num_med_bfreg(dev, bfregi) ? 1 : -ENOMEM; } -static int first_hi_uuar(struct mlx5_uuar_info *uuari) +static int first_hi_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { int med; - int i; - int t; - med = num_med_uuar(uuari); - for (t = 0, i = first_med_uuar();; i = next_uuar(i)) { - t++; - if (t == med) - return next_uuar(i); - } - - return 0; + med = num_med_bfreg(dev, bfregi); + return ++med; } -static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) +static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { int i; - for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) { - if (!test_bit(i, uuari->bitmap)) { - set_bit(i, uuari->bitmap); - uuari->count[i]++; + for (i = first_hi_bfreg(dev, bfregi); i < max_bfregs(dev, bfregi); i++) { + if (!bfregi->count[i]) { + bfregi->count[i]++; return i; } } @@ -528,87 +522,56 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari) return -ENOMEM; } -static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari) +static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { - int minidx = first_med_uuar(); + int minidx = first_med_bfreg(dev, bfregi); int i; - for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) { - if (uuari->count[i] < uuari->count[minidx]) + if (minidx < 0) + return minidx; + + for (i = minidx; i < first_hi_bfreg(dev, bfregi); i++) { + if (bfregi->count[i] < bfregi->count[minidx]) minidx = i; + if (!bfregi->count[minidx]) + break; } - uuari->count[minidx]++; + bfregi->count[minidx]++; return minidx; } -static int alloc_uuar(struct mlx5_uuar_info *uuari, - enum mlx5_ib_latency_class lat) +static int alloc_bfreg(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi) { - int uuarn = -EINVAL; + int bfregn = -ENOMEM; - mutex_lock(&uuari->lock); - switch (lat) { - case MLX5_IB_LATENCY_CLASS_LOW: - uuarn = 0; - uuari->count[uuarn]++; - break; + if (bfregi->lib_uar_dyn) + return -EINVAL; - case MLX5_IB_LATENCY_CLASS_MEDIUM: - if (uuari->ver < 2) - uuarn = -ENOMEM; - else - uuarn = alloc_med_class_uuar(uuari); - break; - - case MLX5_IB_LATENCY_CLASS_HIGH: - if (uuari->ver < 2) - uuarn = -ENOMEM; - else - uuarn = alloc_high_class_uuar(uuari); - break; - - case MLX5_IB_LATENCY_CLASS_FAST_PATH: - uuarn = 2; - break; - } - mutex_unlock(&uuari->lock); - - return uuarn; -} - -static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) -{ - clear_bit(uuarn, uuari->bitmap); - --uuari->count[uuarn]; -} - -static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn) -{ - clear_bit(uuarn, uuari->bitmap); - --uuari->count[uuarn]; -} - -static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn) -{ - int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE; - int high_uuar = nuuars - uuari->num_low_latency_uuars; - - mutex_lock(&uuari->lock); - if (uuarn == 0) { - --uuari->count[uuarn]; - goto out; + mutex_lock(&bfregi->lock); + if (bfregi->ver >= 2) { + bfregn = alloc_high_class_bfreg(dev, bfregi); + if (bfregn < 0) + bfregn = alloc_med_class_bfreg(dev, bfregi); } - if (uuarn < high_uuar) { - free_med_class_uuar(uuari, uuarn); - goto out; + if (bfregn < 0) { + BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1); + bfregn = 0; + bfregi->count[bfregn]++; } + mutex_unlock(&bfregi->lock); - free_high_class_uuar(uuari, uuarn); + return bfregn; +} -out: - mutex_unlock(&uuari->lock); +void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn) +{ + mutex_lock(&bfregi->lock); + bfregi->count[bfregn]--; + mutex_unlock(&bfregi->lock); } static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state) @@ -649,9 +612,36 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv_cq); -static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn) +int bfregn_to_uar_index(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, u32 bfregn, + bool dyn_bfreg) { - return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index; + unsigned int bfregs_per_sys_page; + u32 index_of_sys_page; + u32 offset; + + if (bfregi->lib_uar_dyn) + return -EINVAL; + + bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * + MLX5_NON_FP_BFREGS_PER_UAR; + index_of_sys_page = bfregn / bfregs_per_sys_page; + + if (dyn_bfreg) { + index_of_sys_page += bfregi->num_static_sys_pages; + + if (index_of_sys_page >= bfregi->num_sys_pages) + return -EINVAL; + + if (bfregn > bfregi->num_dyn_bfregs || + bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) { + mlx5_ib_dbg(dev, "Invalid dynamic uar index\n"); + return -EINVAL; + } + } + + offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR; + return bfregi->sys_pages[index_of_sys_page] + offset; } static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, @@ -754,6 +744,13 @@ err_umem: return err; } +static int adjust_bfregn(struct mlx5_ib_dev *dev, + struct mlx5_bfreg_info *bfregi, int bfregn) +{ + return bfregn / MLX5_NON_FP_BFREGS_PER_UAR * MLX5_BFREGS_PER_UAR + + bfregn % MLX5_NON_FP_BFREGS_PER_UAR; +} + static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, @@ -765,14 +762,15 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_create_qp ucmd; struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer; int page_shift = 0; - int uar_index; + int uar_index = 0; int npages; u32 offset = 0; - int uuarn; + int bfregn; int ncont = 0; __be64 *pas; void *qpc; int err; + u32 uar_flags; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) { @@ -781,32 +779,35 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, } context = to_mucontext(pd->uobject->context); - /* - * TBD: should come from the verbs when we have the API - */ - if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) - /* In CROSS_CHANNEL CQ and QP must use the same UAR */ - uuarn = MLX5_CROSS_CHANNEL_UUAR; - else { - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to medium latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n"); - mlx5_ib_dbg(dev, "reverting to high latency\n"); - uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW); - if (uuarn < 0) { - mlx5_ib_warn(dev, "uuar allocation failed\n"); - return uuarn; - } - } - } + uar_flags = ucmd.flags & (MLX5_QP_FLAG_UAR_PAGE_INDEX | + MLX5_QP_FLAG_BFREG_INDEX); + switch (uar_flags) { + case MLX5_QP_FLAG_UAR_PAGE_INDEX: + uar_index = ucmd.bfreg_index; + bfregn = MLX5_IB_INVALID_BFREG; + break; + case MLX5_QP_FLAG_BFREG_INDEX: + uar_index = bfregn_to_uar_index(dev, &context->bfregi, + ucmd.bfreg_index, true); + if (uar_index < 0) + return uar_index; + bfregn = MLX5_IB_INVALID_BFREG; + break; + case 0: + if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) + return -EINVAL; + bfregn = alloc_bfreg(dev, &context->bfregi); + if (bfregn < 0) + return bfregn; + break; + default: + return -EINVAL; } - uar_index = uuarn_to_uar_index(&context->uuari, uuarn); - mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index); + mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index); + if (bfregn != MLX5_IB_INVALID_BFREG) + uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn, + false); qp->rq.offset = 0; qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); @@ -814,7 +815,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, err = set_user_buf_size(dev, qp, &ucmd, base, attr); if (err) - goto err_uuar; + goto err_bfreg; if (ucmd.buf_addr && ubuffer->buf_size) { ubuffer->buf_addr = ucmd.buf_addr; @@ -823,7 +824,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, &ubuffer->umem, &npages, &page_shift, &ncont, &offset); if (err) - goto err_uuar; + goto err_bfreg; } else { ubuffer->umem = NULL; } @@ -846,8 +847,11 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, page_offset, offset); MLX5_SET(qpc, qpc, uar_page, uar_index); - resp->uuar_index = uuarn; - qp->uuarn = uuarn; + if (bfregn != MLX5_IB_INVALID_BFREG) + resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn); + else + resp->bfreg_index = MLX5_IB_INVALID_BFREG; + qp->bfregn = bfregn; err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db); if (err) { @@ -874,12 +878,13 @@ err_umem: if (ubuffer->umem) ib_umem_release(ubuffer->umem); -err_uuar: - free_uuar(&context->uuari, uuarn); +err_bfreg: + if (bfregn != MLX5_IB_INVALID_BFREG) + mlx5_ib_free_bfreg(dev, &context->bfregi, bfregn); return err; } -static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp, +static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base) { struct mlx5_ib_ucontext *context; @@ -888,7 +893,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp, mlx5_ib_db_unmap_user(context, &qp->db); if (base->ubuffer.umem) ib_umem_release(base->ubuffer.umem); - free_uuar(&context->uuari, qp->uuarn); + + /* + * Free only the BFREGs which are handled by the kernel. + * BFREGs of UARs allocated dynamically are handled by user. + */ + if (qp->bfregn != MLX5_IB_INVALID_BFREG) + mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); } static int create_kernel_qp(struct mlx5_ib_dev *dev, @@ -897,36 +908,36 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, u32 **in, int *inlen, struct mlx5_ib_qp_base *base) { - enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; - struct mlx5_uuar_info *uuari; int uar_index; void *qpc; - int uuarn; int err; - uuari = &dev->mdev->priv.uuari; if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | - mlx5_ib_create_qp_sqpn_qp1())) + MLX5_IB_QP_CREATE_SQPN_QP1 | + MLX5_IB_QP_CREATE_WC_TEST)) return -EINVAL; + spin_lock_init(&qp->bf.lock32); + if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) - lc = MLX5_IB_LATENCY_CLASS_FAST_PATH; + qp->bf.bfreg = &dev->fp_bfreg; + else if (init_attr->create_flags & MLX5_IB_QP_CREATE_WC_TEST) + qp->bf.bfreg = &dev->wc_bfreg; + else + qp->bf.bfreg = &dev->bfreg; - uuarn = alloc_uuar(uuari, lc); - if (uuarn < 0) { - mlx5_ib_dbg(dev, "\n"); - return -ENOMEM; - } - - qp->bf = &uuari->bfs[uuarn]; - uar_index = qp->bf->uar->index; + /* We need to divide by two since each register is comprised of + * two buffers of identical size, namely odd and even + */ + qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2; + uar_index = qp->bf.bfreg->index; err = calc_sq_size(dev, init_attr, qp); if (err < 0) { mlx5_ib_dbg(dev, "err %d\n", err); - goto err_uuar; + return err; } qp->rq.offset = 0; @@ -937,7 +948,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, 2 * PAGE_SIZE, &qp->buf); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); - goto err_uuar; + return err; } qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); @@ -957,7 +968,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, MLX5_SET(qpc, qpc, fre, 1); MLX5_SET(qpc, qpc, rlky, 1); - if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { + if (init_attr->create_flags & MLX5_IB_QP_CREATE_SQPN_QP1) { MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } @@ -999,9 +1010,6 @@ err_free: err_buf: mlx5_buf_free(dev->mdev, &qp->buf); - -err_uuar: - free_uuar(&dev->mdev->priv.uuari, uuarn); return err; } @@ -1014,7 +1022,6 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) kfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); mlx5_buf_free(dev->mdev, &qp->buf); - free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn); } static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) @@ -1346,7 +1353,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (init_attr->create_flags || init_attr->send_cq) return -EINVAL; - min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index); + min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index); if (udata->outlen < min_resp_len) return -EINVAL; @@ -1623,7 +1630,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } if (init_attr->create_flags & - mlx5_ib_create_qp_sqpn_qp1()) { + MLX5_IB_QP_CREATE_SQPN_QP1) { mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n"); return -EINVAL; } @@ -1788,7 +1795,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, err_create: if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(pd, qp, base); + destroy_qp_user(dev, pd, qp, base); else if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); @@ -1967,7 +1974,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) - destroy_qp_user(&get_pd(qp)->ibpd, qp, base); + destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base); } static const char *ib_qp_type_str(enum ib_qp_type type) @@ -3717,24 +3724,6 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } } -static void mlx5_bf_copy(u64 __iomem *dst, u64 *src, - unsigned bytecnt, struct mlx5_ib_qp *qp) -{ - while (bytecnt > 0) { - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - __iowrite64_copy(dst++, src++, 8); - bytecnt -= 64; - if (unlikely(src == qp->sq.qend)) - src = mlx5_get_send_wqe(qp, 0); - } -} - static u8 get_fence(u8 fence, struct ib_send_wr *wr) { if (unlikely(wr->opcode == IB_WR_LOCAL_INV && @@ -3830,7 +3819,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr); qp = to_mqp(ibqp); - bf = qp->bf; + bf = &qp->bf; qend = qp->sq.qend; spin_lock_irqsave(&qp->sq.lock, flags); @@ -4103,28 +4092,12 @@ out: * we hit doorbell */ wmb(); - if (bf->need_lock) - spin_lock(&bf->lock); - else - __acquire(&bf->lock); - - /* TBD enable WC */ - if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { - mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp); - /* wc_wmb(); */ - } else { - mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset, - MLX5_GET_DOORBELL_LOCK(&bf->lock32)); - /* Make sure doorbells don't leak out of SQ spinlock - * and reach the HCA out of order. - */ - mmiowb(); - } + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, + MLX5_GET_DOORBELL_LOCK(&bf->lock32)); + /* Make sure doorbells don't leak out of SQ spinlock + * and reach the HCA out of order. + */ bf->offset ^= bf->buf_size; - if (bf->need_lock) - spin_unlock(&bf->lock); - else - __release(&bf->lock); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -4547,7 +4520,7 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (qp->flags & MLX5_IB_QP_MANAGED_RECV) qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV; if (qp->flags & MLX5_IB_QP_SQPN_QP1) - qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1(); + qp_init_attr->create_flags |= MLX5_IB_QP_CREATE_SQPN_QP1; qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h index 17fa0ca01bd..f748ec876ef 100644 --- a/sys/dev/mlx5/mlx5_ifc.h +++ b/sys/dev/mlx5/mlx5_ifc.h @@ -1192,7 +1192,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 uc[0x1]; u8 rc[0x1]; - u8 reserved_34[0xa]; + u8 uar_4k[0x1]; + u8 reserved_at_241[0x9]; u8 uar_sz[0x6]; u8 reserved_35[0x8]; u8 log_pg_sz[0x8]; @@ -1295,7 +1296,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 device_frequency_khz[0x20]; - u8 reserved_66[0x80]; + u8 reserved_at_500[0x20]; + u8 num_of_uars_per_page[0x20]; + u8 reserved_at_540[0x40]; u8 log_max_atomic_size_qp[0x8]; u8 reserved_67[0x10]; diff --git a/sys/ofed/include/uapi/rdma/mlx5-abi.h b/sys/ofed/include/uapi/rdma/mlx5-abi.h index 3ce4afeea60..abbab91c8af 100644 --- a/sys/ofed/include/uapi/rdma/mlx5-abi.h +++ b/sys/ofed/include/uapi/rdma/mlx5-abi.h @@ -46,6 +46,8 @@ enum { MLX5_QP_FLAG_SIGNATURE = 1 << 0, MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, + MLX5_QP_FLAG_BFREG_INDEX = 1 << 3, + MLX5_QP_FLAG_UAR_PAGE_INDEX = 1 << 10, }; enum { @@ -69,19 +71,28 @@ enum { */ struct mlx5_ib_alloc_ucontext_req { - __u32 total_num_uuars; - __u32 num_low_latency_uuars; + __u32 total_num_bfregs; + __u32 num_low_latency_bfregs; }; +enum mlx5_lib_caps { + MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0, + MLX5_LIB_CAP_DYN_UAR = (__u64)1 << 1, +}; + +enum mlx5_ib_alloc_uctx_v2_flags { + MLX5_IB_ALLOC_UCTX_DEVX = 1 << 0, +}; struct mlx5_ib_alloc_ucontext_req_v2 { - __u32 total_num_uuars; - __u32 num_low_latency_uuars; + __u32 total_num_bfregs; + __u32 num_low_latency_bfregs; __u32 flags; __u32 comp_mask; __u8 max_cqe_version; __u8 reserved0; __u16 reserved1; __u32 reserved2; + __aligned_u64 lib_caps; }; enum mlx5_ib_alloc_ucontext_resp_mask { @@ -96,7 +107,7 @@ enum mlx5_user_cmds_supp_uhw { struct mlx5_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 bf_reg_size; - __u32 tot_uuars; + __u32 tot_bfregs; __u32 cache_line_size; __u16 max_sq_desc_sz; __u16 max_rq_desc_sz; @@ -111,6 +122,9 @@ struct mlx5_ib_alloc_ucontext_resp { __u8 cmds_supp_uhw; __u16 reserved2; __u64 hca_core_clock_offset; + __u32 log_uar_size; + __u32 num_uars_per_page; + __u32 num_dyn_bfregs; }; struct mlx5_ib_alloc_pd_resp { @@ -140,11 +154,17 @@ struct mlx5_ib_query_device_resp { struct mlx5_ib_rss_caps rss_caps; }; +enum mlx5_ib_create_cq_flags { + MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD = 1 << 0, + MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX = 1 << 1, +}; + struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; - __u32 reserved; /* explicit padding (optional on i386) */ + __u16 flags; + __u16 uar_page_index; }; struct mlx5_ib_create_cq_resp { @@ -181,7 +201,7 @@ struct mlx5_ib_create_qp { __u32 rq_wqe_shift; __u32 flags; __u32 uidx; - __u32 reserved0; + __u32 bfreg_index; __u64 sq_buf_addr; }; @@ -220,7 +240,7 @@ struct mlx5_ib_create_qp_rss { }; struct mlx5_ib_create_qp_resp { - __u32 uuar_index; + __u32 bfreg_index; }; struct mlx5_ib_alloc_mw {