From 0fd4cd405bbf6b9dcc3d4fc3ddf37c876ba97c11 Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Fri, 15 Apr 2022 14:41:59 -0600 Subject: [PATCH] nvme: Use controller's page size instead of PAGE_SIZE to create qpair When constructing qpair, use the controller's notion of page size rather than the host's PAGE_SIZE. Currently, these are both 4k, but the arm 16k page size support requires decoupling. There's a "hidden" PAGE_SIZE in btoc, so we must change btoc(x) to howmany(x, ctrlr->page_size) to properly count the number of pages (in the drive's world view) are needed for various calculations. With these changes, we the nvme driver operates at production level load for both host 4k and host 16k page size. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D34873 --- sys/dev/nvme/nvme_qpair.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 175975b2b3a..3b20a7e209f 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -702,9 +702,10 @@ nvme_qpair_construct(struct nvme_qpair *qpair, /* Note: NVMe PRP format is restricted to 4-byte alignment. */ err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), - 4, PAGE_SIZE, BUS_SPACE_MAXADDR, + 4, ctrlr->page_size, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, ctrlr->max_xfer_size, - btoc(ctrlr->max_xfer_size) + 1, PAGE_SIZE, 0, + howmany(ctrlr->max_xfer_size, ctrlr->page_size) + 1, + ctrlr->page_size, 0, NULL, NULL, &qpair->dma_tag_payload); if (err != 0) { nvme_printf(ctrlr, "payload tag create failed %d\n", err); @@ -716,20 +717,21 @@ nvme_qpair_construct(struct nvme_qpair *qpair, * cannot cross a page boundary. */ cmdsz = qpair->num_entries * sizeof(struct nvme_command); - cmdsz = roundup2(cmdsz, PAGE_SIZE); + cmdsz = roundup2(cmdsz, ctrlr->page_size); cplsz = qpair->num_entries * sizeof(struct nvme_completion); - cplsz = roundup2(cplsz, PAGE_SIZE); + cplsz = roundup2(cplsz, ctrlr->page_size); /* * For commands requiring more than 2 PRP entries, one PRP will be * embedded in the command (prp1), and the rest of the PRP entries * will be in a list pointed to by the command (prp2). */ - prpsz = sizeof(uint64_t) * btoc(ctrlr->max_xfer_size); + prpsz = sizeof(uint64_t) * + howmany(ctrlr->max_xfer_size, ctrlr->page_size); prpmemsz = qpair->num_trackers * prpsz; allocsz = cmdsz + cplsz + prpmemsz; err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev), - PAGE_SIZE, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, + ctrlr->page_size, 0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, allocsz, 1, allocsz, 0, NULL, NULL, &qpair->dma_tag); if (err != 0) { nvme_printf(ctrlr, "tag create failed %d\n", err); @@ -791,13 +793,13 @@ nvme_qpair_construct(struct nvme_qpair *qpair, /* * Make sure that the PRP list for this tracker doesn't - * overflow to another page. + * overflow to another nvme page. */ if (trunc_page(list_phys) != trunc_page(list_phys + prpsz - 1)) { - list_phys = roundup2(list_phys, PAGE_SIZE); + list_phys = roundup2(list_phys, ctrlr->page_size); prp_list = - (uint8_t *)roundup2((uintptr_t)prp_list, PAGE_SIZE); + (uint8_t *)roundup2((uintptr_t)prp_list, ctrlr->page_size); } tr = malloc_domainset(sizeof(*tr), M_NVME, @@ -1101,10 +1103,9 @@ nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) } /* - * Note that we specified PAGE_SIZE for alignment and max - * segment size when creating the bus dma tags. So here - * we can safely just transfer each segment to its - * associated PRP entry. + * Note that we specified ctrlr->page_size for alignment and max + * segment size when creating the bus dma tags. So here we can safely + * just transfer each segment to its associated PRP entry. */ tr->req->cmd.prp1 = htole64(seg[0].ds_addr);