From 365b89e8ea4af34a05f68aa28e77573e89fa00b2 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Mon, 30 Dec 2024 13:52:21 -0500 Subject: [PATCH] nvmf: Switch several ioctls to using nvlists For requests that handoff queues from userspace to the kernel as well as the request to fetch reconnect parameters from the kernel, switch from using flat structures to nvlists. In particular, this will permit adding support for additional transports in the future without breaking the ABI of the structures. Note that this is an ABI break for the ioctls used by nvmf(4) and nvmft(4). Since this is only present in main I did not bother implementing compatability shims. Inspired by: imp (suggestion on a different review) Reviewed by: imp Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D48230 --- lib/libnvmf/Makefile | 2 + lib/libnvmf/internal.h | 10 +- lib/libnvmf/libnvmf.h | 9 +- lib/libnvmf/nvmf_controller.c | 21 ++- lib/libnvmf/nvmf_host.c | 87 ++++++--- lib/libnvmf/nvmf_tcp.c | 23 +-- lib/libnvmf/nvmf_transport.c | 47 ++++- rescue/rescue/Makefile | 2 +- sbin/nvmecontrol/reconnect.c | 18 +- share/mk/src.libnames.mk | 1 + sys/cam/ctl/ctl_ioctl.h | 2 +- sys/dev/nvmf/controller/ctl_frontend_nvmf.c | 58 ++++-- sys/dev/nvmf/controller/nvmft_controller.c | 19 +- sys/dev/nvmf/controller/nvmft_qpair.c | 15 +- sys/dev/nvmf/controller/nvmft_var.h | 11 +- sys/dev/nvmf/host/nvmf.c | 197 ++++++++++++-------- sys/dev/nvmf/host/nvmf_ctldev.c | 13 +- sys/dev/nvmf/host/nvmf_qpair.c | 21 ++- sys/dev/nvmf/host/nvmf_var.h | 13 +- sys/dev/nvmf/nvmf.h | 100 ++++++---- sys/dev/nvmf/nvmf_tcp.c | 40 ++-- sys/dev/nvmf/nvmf_transport.c | 92 ++++++++- sys/dev/nvmf/nvmf_transport.h | 24 ++- sys/dev/nvmf/nvmf_transport_internal.h | 3 +- usr.sbin/nvmfd/ctl.c | 4 +- 25 files changed, 557 insertions(+), 275 deletions(-) diff --git a/lib/libnvmf/Makefile b/lib/libnvmf/Makefile index dbba6b47651..b01f5ab82ca 100644 --- a/lib/libnvmf/Makefile +++ b/lib/libnvmf/Makefile @@ -14,6 +14,8 @@ SRCS= gsb_crc32.c \ nvmf_transport.c \ nvmft_subr.c +LIBADD= nv + CFLAGS+= -I${SRCTOP}/sys/dev/nvmf/controller CFLAGS+= -I${SRCTOP}/sys/dev/nvmf diff --git a/lib/libnvmf/internal.h b/lib/libnvmf/internal.h index cf45c15ba2f..7b3d4fbb03e 100644 --- a/lib/libnvmf/internal.h +++ b/lib/libnvmf/internal.h @@ -8,6 +8,7 @@ #ifndef __LIBNVMF_INTERNAL_H__ #define __LIBNVMF_INTERNAL_H__ +#include #include struct nvmf_transport_ops { @@ -23,9 +24,8 @@ struct nvmf_transport_ops { const struct nvmf_qpair_params *params); void (*free_qpair)(struct nvmf_qpair *qp); - /* Create params for kernel handoff. */ - int (*kernel_handoff_params)(struct nvmf_qpair *qp, - struct nvmf_handoff_qpair_params *qparams); + /* Add params for kernel handoff. */ + void (*kernel_handoff_params)(struct nvmf_qpair *qp, nvlist_t *nvl); /* Capsule operations. */ struct nvmf_capsule *(*allocate_capsule)(struct nvmf_qpair *qp); @@ -110,7 +110,7 @@ extern struct nvmf_transport_ops tcp_ops; void na_clear_error(struct nvmf_association *na); void na_error(struct nvmf_association *na, const char *fmt, ...); -int nvmf_kernel_handoff_params(struct nvmf_qpair *qp, - struct nvmf_handoff_qpair_params *qparams); +int nvmf_kernel_handoff_params(struct nvmf_qpair *qp, nvlist_t **nvlp); +int nvmf_pack_ioc_nvlist(struct nvmf_ioc_nv *nv, nvlist_t *nvl); #endif /* !__LIBNVMF_INTERNAL_H__ */ diff --git a/lib/libnvmf/libnvmf.h b/lib/libnvmf/libnvmf.h index f15277a0262..44f13fda5dd 100644 --- a/lib/libnvmf/libnvmf.h +++ b/lib/libnvmf/libnvmf.h @@ -8,6 +8,7 @@ #ifndef __LIBNVMF_H__ #define __LIBNVMF_H__ +#include #include #include #include @@ -249,7 +250,8 @@ uint64_t nvmf_get_log_page_offset(const struct nvme_command *cmd); /* Prepare to handoff a controller qpair. */ int nvmf_handoff_controller_qpair(struct nvmf_qpair *qp, - struct nvmf_handoff_controller_qpair *h); + const struct nvmf_fabric_connect_cmd *cmd, + const struct nvmf_fabric_connect_data *data, struct nvmf_ioc_nv *nv); /* Host-specific APIs. */ @@ -348,9 +350,10 @@ int nvmf_disconnect_all(void); /* * Fetch reconnect parameters from an existing kernel host to use for - * establishing a new association. + * establishing a new association. The caller must destroy the + * returned nvlist. */ -int nvmf_reconnect_params(int fd, struct nvmf_reconnect_params *rparams); +int nvmf_reconnect_params(int fd, nvlist_t **nvlp); /* * Handoff active host association to an existing host in the kernel. diff --git a/lib/libnvmf/nvmf_controller.c b/lib/libnvmf/nvmf_controller.c index 0e0126040ee..971dccbe039 100644 --- a/lib/libnvmf/nvmf_controller.c +++ b/lib/libnvmf/nvmf_controller.c @@ -457,8 +457,23 @@ nvmf_get_log_page_offset(const struct nvme_command *cmd) int nvmf_handoff_controller_qpair(struct nvmf_qpair *qp, - struct nvmf_handoff_controller_qpair *h) + const struct nvmf_fabric_connect_cmd *cmd, + const struct nvmf_fabric_connect_data *data, struct nvmf_ioc_nv *nv) { - h->trtype = qp->nq_association->na_trtype; - return (nvmf_kernel_handoff_params(qp, &h->params)); + nvlist_t *nvl, *nvl_qp; + int error; + + error = nvmf_kernel_handoff_params(qp, &nvl_qp); + if (error) + return (error); + + nvl = nvlist_create(0); + nvlist_add_number(nvl, "trtype", qp->nq_association->na_trtype); + nvlist_move_nvlist(nvl, "params", nvl_qp); + nvlist_add_binary(nvl, "cmd", cmd, sizeof(*cmd)); + nvlist_add_binary(nvl, "data", data, sizeof(*data)); + + error = nvmf_pack_ioc_nvlist(nv, nvl); + nvlist_destroy(nvl); + return (error); } diff --git a/lib/libnvmf/nvmf_host.c b/lib/libnvmf/nvmf_host.c index a0d95470d8e..c3668600c46 100644 --- a/lib/libnvmf/nvmf_host.c +++ b/lib/libnvmf/nvmf_host.c @@ -767,15 +767,16 @@ is_queue_pair_idle(struct nvmf_qpair *qp) } static int -prepare_queues_for_handoff(struct nvmf_handoff_host *hh, - struct nvmf_qpair *admin_qp, u_int num_queues, - struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata) +prepare_queues_for_handoff(struct nvmf_ioc_nv *nv, struct nvmf_qpair *admin_qp, + u_int num_queues, struct nvmf_qpair **io_queues, + const struct nvme_controller_data *cdata) { - struct nvmf_handoff_qpair_params *io; + nvlist_t *nvl, *nvl_qp; u_int i; int error; - memset(hh, 0, sizeof(*hh)); + if (num_queues == 0) + return (EINVAL); /* All queue pairs must be idle. */ if (!is_queue_pair_idle(admin_qp)) @@ -785,34 +786,40 @@ prepare_queues_for_handoff(struct nvmf_handoff_host *hh, return (EBUSY); } + nvl = nvlist_create(0); + nvlist_add_number(nvl, "trtype", admin_qp->nq_association->na_trtype); + nvlist_add_number(nvl, "kato", admin_qp->nq_kato); + /* First, the admin queue. */ - hh->trtype = admin_qp->nq_association->na_trtype; - hh->kato = admin_qp->nq_kato; - error = nvmf_kernel_handoff_params(admin_qp, &hh->admin); - if (error) + error = nvmf_kernel_handoff_params(admin_qp, &nvl_qp); + if (error) { + nvlist_destroy(nvl); return (error); + } + nvlist_move_nvlist(nvl, "admin", nvl_qp); /* Next, the I/O queues. */ - hh->num_io_queues = num_queues; - io = calloc(num_queues, sizeof(*io)); for (i = 0; i < num_queues; i++) { - error = nvmf_kernel_handoff_params(io_queues[i], &io[i]); + error = nvmf_kernel_handoff_params(io_queues[i], &nvl_qp); if (error) { - free(io); + nvlist_destroy(nvl); return (error); } + nvlist_append_nvlist_array(nvl, "io", nvl_qp); } - hh->io = io; - hh->cdata = cdata; - return (0); + nvlist_add_binary(nvl, "cdata", cdata, sizeof(*cdata)); + + error = nvmf_pack_ioc_nvlist(nv, nvl); + nvlist_destroy(nvl); + return (error); } int nvmf_handoff_host(struct nvmf_qpair *admin_qp, u_int num_queues, struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata) { - struct nvmf_handoff_host hh; + struct nvmf_ioc_nv nv; u_int i; int error, fd; @@ -822,14 +829,14 @@ nvmf_handoff_host(struct nvmf_qpair *admin_qp, u_int num_queues, goto out; } - error = prepare_queues_for_handoff(&hh, admin_qp, num_queues, io_queues, + error = prepare_queues_for_handoff(&nv, admin_qp, num_queues, io_queues, cdata); if (error != 0) goto out; - if (ioctl(fd, NVMF_HANDOFF_HOST, &hh) == -1) + if (ioctl(fd, NVMF_HANDOFF_HOST, &nv) == -1) error = errno; - free(hh.io); + free(nv.data); out: if (fd >= 0) @@ -882,30 +889,56 @@ out: return (error); } -int -nvmf_reconnect_params(int fd, struct nvmf_reconnect_params *rparams) +static int +nvmf_read_ioc_nv(int fd, u_long com, nvlist_t **nvlp) { - if (ioctl(fd, NVMF_RECONNECT_PARAMS, rparams) == -1) + struct nvmf_ioc_nv nv; + nvlist_t *nvl; + int error; + + memset(&nv, 0, sizeof(nv)); + if (ioctl(fd, com, &nv) == -1) return (errno); + + nv.data = malloc(nv.len); + nv.size = nv.len; + if (ioctl(fd, com, &nv) == -1) { + error = errno; + free(nv.data); + return (error); + } + + nvl = nvlist_unpack(nv.data, nv.len, 0); + free(nv.data); + if (nvl == NULL) + return (EINVAL); + + *nvlp = nvl; return (0); } +int +nvmf_reconnect_params(int fd, nvlist_t **nvlp) +{ + return (nvmf_read_ioc_nv(fd, NVMF_RECONNECT_PARAMS, nvlp)); +} + int nvmf_reconnect_host(int fd, struct nvmf_qpair *admin_qp, u_int num_queues, struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata) { - struct nvmf_handoff_host hh; + struct nvmf_ioc_nv nv; u_int i; int error; - error = prepare_queues_for_handoff(&hh, admin_qp, num_queues, io_queues, + error = prepare_queues_for_handoff(&nv, admin_qp, num_queues, io_queues, cdata); if (error != 0) goto out; - if (ioctl(fd, NVMF_RECONNECT_HOST, &hh) == -1) + if (ioctl(fd, NVMF_RECONNECT_HOST, &nv) == -1) error = errno; - free(hh.io); + free(nv.data); out: for (i = 0; i < num_queues; i++) diff --git a/lib/libnvmf/nvmf_tcp.c b/lib/libnvmf/nvmf_tcp.c index 264a5bb154a..3f794b5d975 100644 --- a/lib/libnvmf/nvmf_tcp.c +++ b/lib/libnvmf/nvmf_tcp.c @@ -1129,22 +1129,19 @@ tcp_free_qpair(struct nvmf_qpair *nq) free(qp); } -static int -tcp_kernel_handoff_params(struct nvmf_qpair *nq, - struct nvmf_handoff_qpair_params *qparams) +static void +tcp_kernel_handoff_params(struct nvmf_qpair *nq, nvlist_t *nvl) { struct nvmf_tcp_qpair *qp = TQP(nq); - qparams->tcp.fd = qp->s; - qparams->tcp.rxpda = qp->rxpda; - qparams->tcp.txpda = qp->txpda; - qparams->tcp.header_digests = qp->header_digests; - qparams->tcp.data_digests = qp->data_digests; - qparams->tcp.maxr2t = qp->maxr2t; - qparams->tcp.maxh2cdata = qp->maxh2cdata; - qparams->tcp.max_icd = qp->max_icd; - - return (0); + nvlist_add_number(nvl, "fd", qp->s); + nvlist_add_number(nvl, "rxpda", qp->rxpda); + nvlist_add_number(nvl, "txpda", qp->txpda); + nvlist_add_bool(nvl, "header_digests", qp->header_digests); + nvlist_add_bool(nvl, "data_digests", qp->data_digests); + nvlist_add_number(nvl, "maxr2t", qp->maxr2t); + nvlist_add_number(nvl, "maxh2cdata", qp->maxh2cdata); + nvlist_add_number(nvl, "max_icd", qp->max_icd); } static struct nvmf_capsule * diff --git a/lib/libnvmf/nvmf_transport.c b/lib/libnvmf/nvmf_transport.c index 1a8505f2a99..fa3826b8c50 100644 --- a/lib/libnvmf/nvmf_transport.c +++ b/lib/libnvmf/nvmf_transport.c @@ -236,16 +236,27 @@ nvmf_send_controller_data(const struct nvmf_capsule *nc, const void *buf, } int -nvmf_kernel_handoff_params(struct nvmf_qpair *qp, - struct nvmf_handoff_qpair_params *qparams) +nvmf_kernel_handoff_params(struct nvmf_qpair *qp, nvlist_t **nvlp) { - memset(qparams, 0, sizeof(*qparams)); - qparams->admin = qp->nq_admin; - qparams->sq_flow_control = qp->nq_flow_control; - qparams->qsize = qp->nq_qsize; - qparams->sqhd = qp->nq_sqhd; - qparams->sqtail = qp->nq_sqtail; - return (qp->nq_association->na_ops->kernel_handoff_params(qp, qparams)); + nvlist_t *nvl; + int error; + + nvl = nvlist_create(0); + nvlist_add_bool(nvl, "admin", qp->nq_admin); + nvlist_add_bool(nvl, "sq_flow_control", qp->nq_flow_control); + nvlist_add_number(nvl, "qsize", qp->nq_qsize); + nvlist_add_number(nvl, "sqhd", qp->nq_sqhd); + if (!qp->nq_association->na_controller) + nvlist_add_number(nvl, "sqtail", qp->nq_sqtail); + qp->nq_association->na_ops->kernel_handoff_params(qp, nvl); + error = nvlist_error(nvl); + if (error != 0) { + nvlist_destroy(nvl); + return (error); + } + + *nvlp = nvl; + return (0); } const char * @@ -267,3 +278,21 @@ nvmf_transport_type(uint8_t trtype) return (buf); } } + +int +nvmf_pack_ioc_nvlist(struct nvmf_ioc_nv *nv, nvlist_t *nvl) +{ + int error; + + memset(nv, 0, sizeof(*nv)); + + error = nvlist_error(nvl); + if (error) + return (error); + + nv->data = nvlist_pack(nvl, &nv->size); + if (nv->data == NULL) + return (ENOMEM); + + return (0); +} diff --git a/rescue/rescue/Makefile b/rescue/rescue/Makefile index 4474a0af050..797daf3d2f1 100644 --- a/rescue/rescue/Makefile +++ b/rescue/rescue/Makefile @@ -143,7 +143,7 @@ CRUNCH_PROGS_usr.sbin+= zdb # CRUNCH_PROGS+= devd CRUNCH_LIBS+= -l80211 -lalias -lcam -lncursesw -ldevstat -lipsec -llzma -CRUNCH_LIBS_camcontrol+= ${LIBNVMF} +CRUNCH_LIBS_camcontrol+= ${LIBNVMF} ${LIBNV} .if ${MK_ZFS} != "no" CRUNCH_LIBS+= -lavl -lpthread -luutil -lumem -ltpool -lspl -lrt CRUNCH_LIBS_zfs+= ${LIBBE} \ diff --git a/sbin/nvmecontrol/reconnect.c b/sbin/nvmecontrol/reconnect.c index b606409eea9..4c9277bd34c 100644 --- a/sbin/nvmecontrol/reconnect.c +++ b/sbin/nvmecontrol/reconnect.c @@ -5,6 +5,7 @@ * Written by: John Baldwin */ +#include #include #include #include @@ -60,7 +61,7 @@ reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam, { struct nvme_controller_data cdata; struct nvmf_association_params aparams; - struct nvmf_reconnect_params rparams; + nvlist_t *rparams; struct nvmf_qpair *admin, **io; int error; @@ -70,6 +71,13 @@ reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam, return (EX_IOERR); } + if (!nvlist_exists_number(rparams, "cntlid") || + !nvlist_exists_string(rparams, "subnqn")) { + nvlist_destroy(rparams); + warnx("Missing required reconnect parameters"); + return (EX_IOERR); + } + memset(&aparams, 0, sizeof(aparams)); aparams.sq_flow_control = opt.flow_control; switch (trtype) { @@ -77,18 +85,22 @@ reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam, tcp_association_params(&aparams); break; default: + nvlist_destroy(rparams); warnx("Unsupported transport %s", nvmf_transport_type(trtype)); return (EX_UNAVAILABLE); } io = calloc(opt.num_io_queues, sizeof(*io)); error = connect_nvm_queues(&aparams, trtype, adrfam, address, port, - rparams.cntlid, rparams.subnqn, opt.hostnqn, opt.kato, &admin, io, - opt.num_io_queues, opt.queue_size, &cdata); + nvlist_get_number(rparams, "cntlid"), + nvlist_get_string(rparams, "subnqn"), opt.hostnqn, opt.kato, + &admin, io, opt.num_io_queues, opt.queue_size, &cdata); if (error != 0) { free(io); + nvlist_destroy(rparams); return (error); } + nvlist_destroy(rparams); error = nvmf_reconnect_host(fd, admin, opt.num_io_queues, io, &cdata); if (error != 0) { diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index 588291d8ec9..786ad9a6f9a 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -346,6 +346,7 @@ _DP_mp= crypto _DP_memstat= kvm _DP_magic= z _DP_mt= sbuf bsdxml +_DP_nvmf= nv _DP_ldns= ssl crypto _DP_lua= m _DP_lutok= lua diff --git a/sys/cam/ctl/ctl_ioctl.h b/sys/cam/ctl/ctl_ioctl.h index 326e4c931f9..c7070b63eb0 100644 --- a/sys/cam/ctl/ctl_ioctl.h +++ b/sys/cam/ctl/ctl_ioctl.h @@ -800,7 +800,7 @@ struct ctl_nvmf_terminate_params { }; union ctl_nvmf_data { - struct nvmf_handoff_controller_qpair handoff; + struct nvmf_ioc_nv handoff; struct ctl_nvmf_list_params list; struct ctl_nvmf_terminate_params terminate; }; diff --git a/sys/dev/nvmf/controller/ctl_frontend_nvmf.c b/sys/dev/nvmf/controller/ctl_frontend_nvmf.c index 8a4538e5056..75b36b4834f 100644 --- a/sys/dev/nvmf/controller/ctl_frontend_nvmf.c +++ b/sys/dev/nvmf/controller/ctl_frontend_nvmf.c @@ -923,29 +923,55 @@ nvmft_port_remove(struct ctl_req *req) static void nvmft_handoff(struct ctl_nvmf *cn) { - struct nvmf_fabric_connect_cmd cmd; - struct nvmf_handoff_controller_qpair *handoff; - struct nvmf_fabric_connect_data *data; + const struct nvmf_fabric_connect_cmd *cmd; + const struct nvmf_fabric_connect_data *data; + const nvlist_t *params; struct nvmft_port *np; + nvlist_t *nvl; + size_t len; + enum nvmf_trtype trtype; int error; np = NULL; - data = NULL; - handoff = &cn->data.handoff; - error = copyin(handoff->cmd, &cmd, sizeof(cmd)); + error = nvmf_unpack_ioc_nvlist(&cn->data.handoff, &nvl); if (error != 0) { cn->status = CTL_NVMF_ERROR; snprintf(cn->error_str, sizeof(cn->error_str), - "Failed to copyin CONNECT SQE"); + "Failed to copyin and unpack handoff arguments"); return; } - data = malloc(sizeof(*data), M_NVMFT, M_WAITOK); - error = copyin(handoff->data, data, sizeof(*data)); - if (error != 0) { + if (!nvlist_exists_number(nvl, "trtype") || + !nvlist_exists_nvlist(nvl, "params") || + !nvlist_exists_binary(nvl, "cmd") || + !nvlist_exists_binary(nvl, "data")) { cn->status = CTL_NVMF_ERROR; snprintf(cn->error_str, sizeof(cn->error_str), - "Failed to copyin CONNECT data"); + "Handoff arguments missing required value"); + goto out; + } + + params = nvlist_get_nvlist(nvl, "params"); + if (!nvmf_validate_qpair_nvlist(params, true)) { + cn->status = CTL_NVMF_ERROR; + snprintf(cn->error_str, sizeof(cn->error_str), + "Invalid queue pair parameters"); + goto out; + } + + cmd = nvlist_get_binary(nvl, "cmd", &len); + if (len != sizeof(*cmd)) { + cn->status = CTL_NVMF_ERROR; + snprintf(cn->error_str, sizeof(cn->error_str), + "Wrong size for CONNECT SQE"); + goto out; + } + + data = nvlist_get_binary(nvl, "data", &len); + if (len != sizeof(*data)) { + cn->status = CTL_NVMF_ERROR; + snprintf(cn->error_str, sizeof(cn->error_str), + "Wrong size for CONNECT data"); goto out; } @@ -976,8 +1002,10 @@ nvmft_handoff(struct ctl_nvmf *cn) nvmft_port_ref(np); sx_sunlock(&nvmft_ports_lock); - if (handoff->params.admin) { - error = nvmft_handoff_admin_queue(np, handoff, &cmd, data); + trtype = nvlist_get_number(nvl, "trtype"); + if (nvlist_get_bool(params, "admin")) { + error = nvmft_handoff_admin_queue(np, trtype, params, cmd, + data); if (error != 0) { cn->status = CTL_NVMF_ERROR; snprintf(cn->error_str, sizeof(cn->error_str), @@ -985,7 +1013,7 @@ nvmft_handoff(struct ctl_nvmf *cn) goto out; } } else { - error = nvmft_handoff_io_queue(np, handoff, &cmd, data); + error = nvmft_handoff_io_queue(np, trtype, params, cmd, data); if (error != 0) { cn->status = CTL_NVMF_ERROR; snprintf(cn->error_str, sizeof(cn->error_str), @@ -998,7 +1026,7 @@ nvmft_handoff(struct ctl_nvmf *cn) out: if (np != NULL) nvmft_port_rele(np); - free(data, M_NVMFT); + nvlist_destroy(nvl); } static void diff --git a/sys/dev/nvmf/controller/nvmft_controller.c b/sys/dev/nvmf/controller/nvmft_controller.c index 3c10fea75c9..83a156d9b92 100644 --- a/sys/dev/nvmf/controller/nvmft_controller.c +++ b/sys/dev/nvmf/controller/nvmft_controller.c @@ -107,9 +107,8 @@ nvmft_keep_alive_timer(void *arg) } int -nvmft_handoff_admin_queue(struct nvmft_port *np, - const struct nvmf_handoff_controller_qpair *handoff, - const struct nvmf_fabric_connect_cmd *cmd, +nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype, + const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, const struct nvmf_fabric_connect_data *data) { struct nvmft_controller *ctrlr; @@ -120,8 +119,7 @@ nvmft_handoff_admin_queue(struct nvmft_port *np, if (cmd->qid != htole16(0)) return (EINVAL); - qp = nvmft_qpair_init(handoff->trtype, &handoff->params, 0, - "admin queue"); + qp = nvmft_qpair_init(trtype, params, 0, "admin queue"); if (qp == NULL) { printf("NVMFT: Failed to setup admin queue from %.*s\n", (int)sizeof(data->hostnqn), data->hostnqn); @@ -151,7 +149,7 @@ nvmft_handoff_admin_queue(struct nvmft_port *np, nvmft_printf(ctrlr, "associated with %.*s\n", (int)sizeof(data->hostnqn), data->hostnqn); ctrlr->admin = qp; - ctrlr->trtype = handoff->trtype; + ctrlr->trtype = trtype; /* * The spec requires a non-zero KeepAlive timer, but allow a @@ -175,9 +173,8 @@ nvmft_handoff_admin_queue(struct nvmft_port *np, } int -nvmft_handoff_io_queue(struct nvmft_port *np, - const struct nvmf_handoff_controller_qpair *handoff, - const struct nvmf_fabric_connect_cmd *cmd, +nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype, + const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, const struct nvmf_fabric_connect_data *data) { struct nvmft_controller *ctrlr; @@ -191,7 +188,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np, cntlid = le16toh(data->cntlid); snprintf(name, sizeof(name), "I/O queue %u", qid); - qp = nvmft_qpair_init(handoff->trtype, &handoff->params, qid, name); + qp = nvmft_qpair_init(trtype, params, qid, name); if (qp == NULL) { printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid, (int)sizeof(data->hostnqn), data->hostnqn); @@ -235,7 +232,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np, return (EINVAL); } - /* XXX: Require handoff->trtype == ctrlr->trtype? */ + /* XXX: Require trtype == ctrlr->trtype? */ mtx_lock(&ctrlr->lock); if (ctrlr->shutdown) { diff --git a/sys/dev/nvmf/controller/nvmft_qpair.c b/sys/dev/nvmf/controller/nvmft_qpair.c index e66d98f3822..73c7bb28078 100644 --- a/sys/dev/nvmf/controller/nvmft_qpair.c +++ b/sys/dev/nvmf/controller/nvmft_qpair.c @@ -31,7 +31,6 @@ struct nvmft_qpair { uint16_t qid; u_int qsize; uint16_t sqhd; - uint16_t sqtail; volatile u_int qp_refs; /* Internal references on 'qp'. */ struct task datamove_task; @@ -102,26 +101,24 @@ nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc) } struct nvmft_qpair * -nvmft_qpair_init(enum nvmf_trtype trtype, - const struct nvmf_handoff_qpair_params *handoff, uint16_t qid, +nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid, const char *name) { struct nvmft_qpair *qp; qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO); - qp->admin = handoff->admin; - qp->sq_flow_control = handoff->sq_flow_control; - qp->qsize = handoff->qsize; + qp->admin = nvlist_get_bool(params, "admin"); + qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control"); + qp->qsize = nvlist_get_number(params, "qsize"); qp->qid = qid; - qp->sqhd = handoff->sqhd; - qp->sqtail = handoff->sqtail; + qp->sqhd = nvlist_get_number(params, "sqhd"); strlcpy(qp->name, name, sizeof(qp->name)); mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF); qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO); STAILQ_INIT(&qp->datamove_queue); TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp); - qp->qp = nvmf_allocate_qpair(trtype, true, handoff, nvmft_qpair_error, + qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error, qp, nvmft_receive_capsule, qp); if (qp->qp == NULL) { mtx_destroy(&qp->lock); diff --git a/sys/dev/nvmf/controller/nvmft_var.h b/sys/dev/nvmf/controller/nvmft_var.h index 4fda297c8a8..7a1748d5999 100644 --- a/sys/dev/nvmf/controller/nvmft_var.h +++ b/sys/dev/nvmf/controller/nvmft_var.h @@ -9,6 +9,7 @@ #define __NVMFT_VAR_H__ #include +#include #include #include @@ -125,20 +126,18 @@ void nvmft_handle_admin_command(struct nvmft_controller *ctrlr, void nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid, struct nvmf_capsule *nc); int nvmft_handoff_admin_queue(struct nvmft_port *np, - const struct nvmf_handoff_controller_qpair *handoff, + enum nvmf_trtype trtype, const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, const struct nvmf_fabric_connect_data *data); -int nvmft_handoff_io_queue(struct nvmft_port *np, - const struct nvmf_handoff_controller_qpair *handoff, - const struct nvmf_fabric_connect_cmd *cmd, +int nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype, + const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, const struct nvmf_fabric_connect_data *data); int nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...) __printflike(2, 3); /* nvmft_qpair.c */ struct nvmft_qpair *nvmft_qpair_init(enum nvmf_trtype trtype, - const struct nvmf_handoff_qpair_params *handoff, uint16_t qid, - const char *name); + const nvlist_t *params, uint16_t qid, const char *name); void nvmft_qpair_shutdown(struct nvmft_qpair *qp); void nvmft_qpair_destroy(struct nvmft_qpair *qp); struct nvmft_controller *nvmft_qpair_ctrlr(struct nvmft_qpair *qp); diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c index c726e36e1fa..09d5cecdfad 100644 --- a/sys/dev/nvmf/host/nvmf.c +++ b/sys/dev/nvmf/host/nvmf.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -196,90 +198,100 @@ nvmf_send_keep_alive(void *arg) } int -nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh) +nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp) { - size_t len; - u_int i; + const nvlist_t *const *io; + const nvlist_t *admin; + nvlist_t *nvl; + size_t i, num_io_queues; + uint32_t qsize; int error; - memset(ivars, 0, sizeof(*ivars)); - - if (!hh->admin.admin || hh->num_io_queues < 1) - return (EINVAL); - - ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK); - error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata)); + error = nvmf_unpack_ioc_nvlist(nv, &nvl); if (error != 0) - goto out; - nvme_controller_data_swapbytes(ivars->cdata); + return (error); - len = hh->num_io_queues * sizeof(*ivars->io_params); - ivars->io_params = malloc(len, M_NVMF, M_WAITOK); - error = copyin(hh->io, ivars->io_params, len); - if (error != 0) - goto out; - for (i = 0; i < hh->num_io_queues; i++) { - if (ivars->io_params[i].admin) { - error = EINVAL; - goto out; - } + if (!nvlist_exists_number(nvl, "trtype") || + !nvlist_exists_nvlist(nvl, "admin") || + !nvlist_exists_nvlist_array(nvl, "io") || + !nvlist_exists_binary(nvl, "cdata")) + goto invalid; - /* Require all I/O queues to be the same size. */ - if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) { - error = EINVAL; - goto out; - } + admin = nvlist_get_nvlist(nvl, "admin"); + if (!nvmf_validate_qpair_nvlist(admin, false)) + goto invalid; + if (!nvlist_get_bool(admin, "admin")) + goto invalid; + + io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues); + if (num_io_queues < 1) + goto invalid; + for (i = 0; i < num_io_queues; i++) { + if (!nvmf_validate_qpair_nvlist(io[i], false)) + goto invalid; } - ivars->hh = hh; + /* Require all I/O queues to be the same size. */ + qsize = nvlist_get_number(io[0], "qsize"); + for (i = 1; i < num_io_queues; i++) { + if (nvlist_get_number(io[i], "qsize") != qsize) + goto invalid; + } + + nvlist_get_binary(nvl, "cdata", &i); + if (i != sizeof(struct nvme_controller_data)) + goto invalid; + + *nvlp = nvl; return (0); - -out: - free(ivars->io_params, M_NVMF); - free(ivars->cdata, M_NVMF); - return (error); -} - -void -nvmf_free_ivars(struct nvmf_ivars *ivars) -{ - free(ivars->io_params, M_NVMF); - free(ivars->cdata, M_NVMF); +invalid: + nvlist_destroy(nvl); + return (EINVAL); } static int nvmf_probe(device_t dev) { - struct nvmf_ivars *ivars = device_get_ivars(dev); + const nvlist_t *nvl = device_get_ivars(dev); + const struct nvme_controller_data *cdata; - if (ivars == NULL) + if (nvl == NULL) return (ENXIO); - device_set_descf(dev, "Fabrics: %.256s", ivars->cdata->subnqn); + cdata = nvlist_get_binary(nvl, "cdata", NULL); + device_set_descf(dev, "Fabrics: %.256s", cdata->subnqn); return (BUS_PROBE_DEFAULT); } static int -nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) +nvmf_establish_connection(struct nvmf_softc *sc, const nvlist_t *nvl) { + const nvlist_t *const *io; + const nvlist_t *admin; + uint64_t kato; + size_t num_io_queues; + enum nvmf_trtype trtype; char name[16]; + trtype = nvlist_get_number(nvl, "trtype"); + admin = nvlist_get_nvlist(nvl, "admin"); + io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues); + kato = dnvlist_get_number(nvl, "kato", 0); + /* Setup the admin queue. */ - sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin, - "admin queue", 0); + sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0); if (sc->admin == NULL) { device_printf(sc->dev, "Failed to setup admin queue\n"); return (ENXIO); } /* Setup I/O queues. */ - sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF, + sc->io = malloc(num_io_queues * sizeof(*sc->io), M_NVMF, M_WAITOK | M_ZERO); - sc->num_io_queues = ivars->hh->num_io_queues; + sc->num_io_queues = num_io_queues; for (u_int i = 0; i < sc->num_io_queues; i++) { snprintf(name, sizeof(name), "I/O queue %u", i); - sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype, - &ivars->io_params[i], name, i); + sc->io[i] = nvmf_init_qp(sc, trtype, io[i], name, i); if (sc->io[i] == NULL) { device_printf(sc->dev, "Failed to setup I/O queue %u\n", i + 1); @@ -288,10 +300,10 @@ nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) } /* Start KeepAlive timers. */ - if (ivars->hh->kato != 0) { + if (kato != 0) { sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS, sc->cdata->ctratt) != 0; - sc->ka_rx_sbt = mstosbt(ivars->hh->kato); + sc->ka_rx_sbt = mstosbt(kato); sc->ka_tx_sbt = sc->ka_rx_sbt / 2; callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, nvmf_check_keep_alive, sc, C_HARDCLOCK); @@ -299,6 +311,9 @@ nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) nvmf_send_keep_alive, sc, C_HARDCLOCK); } + memcpy(sc->cdata, nvlist_get_binary(nvl, "cdata", NULL), + sizeof(*sc->cdata)); + return (0); } @@ -452,17 +467,18 @@ nvmf_attach(device_t dev) { struct make_dev_args mda; struct nvmf_softc *sc = device_get_softc(dev); - struct nvmf_ivars *ivars = device_get_ivars(dev); + const nvlist_t *nvl = device_get_ivars(dev); + const nvlist_t * const *io; struct sysctl_oid *oid; uint64_t val; u_int i; int error; - if (ivars == NULL) + if (nvl == NULL) return (ENXIO); sc->dev = dev; - sc->trtype = ivars->hh->trtype; + sc->trtype = nvlist_get_number(nvl, "trtype"); callout_init(&sc->ka_rx_timer, 1); callout_init(&sc->ka_tx_timer, 1); sx_init(&sc->connection_lock, "nvmf connection"); @@ -473,13 +489,11 @@ nvmf_attach(device_t dev) CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues"); sc->ioq_oid_list = SYSCTL_CHILDREN(oid); - /* Claim the cdata pointer from ivars. */ - sc->cdata = ivars->cdata; - ivars->cdata = NULL; + sc->cdata = malloc(sizeof(*sc->cdata), M_NVMF, M_WAITOK); nvmf_init_aer(sc); - error = nvmf_establish_connection(sc, ivars); + error = nvmf_establish_connection(sc, nvl); if (error != 0) goto out; @@ -506,7 +520,9 @@ nvmf_attach(device_t dev) NVME_CAP_HI_MPSMIN(sc->cap >> 32))); } - sc->max_pending_io = ivars->io_params[0].qsize * sc->num_io_queues; + io = nvlist_get_nvlist_array(nvl, "io", NULL); + sc->max_pending_io = nvlist_get_number(io[0], "qsize") * + sc->num_io_queues; error = nvmf_init_sim(sc); if (error != 0) @@ -641,23 +657,24 @@ nvmf_disconnect_task(void *arg, int pending __unused) } static int -nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) +nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv) { - struct nvmf_ivars ivars; + const struct nvme_controller_data *cdata; + nvlist_t *nvl; u_int i; int error; + error = nvmf_copyin_handoff(nv, &nvl); + if (error != 0) + return (error); + /* XXX: Should we permit changing the transport type? */ - if (sc->trtype != hh->trtype) { + if (sc->trtype != nvlist_get_number(nvl, "trtype")) { device_printf(sc->dev, "transport type mismatch on reconnect\n"); return (EINVAL); } - error = nvmf_init_ivars(&ivars, hh); - if (error != 0) - return (error); - sx_xlock(&sc->connection_lock); if (sc->admin != NULL || sc->detaching) { error = EBUSY; @@ -671,8 +688,9 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) * ensures the new association is connected to the same NVMe * subsystem. */ - if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn, - sizeof(ivars.cdata->subnqn)) != 0) { + cdata = nvlist_get_binary(nvl, "cdata", NULL); + if (memcmp(sc->cdata->subnqn, cdata->subnqn, + sizeof(cdata->subnqn)) != 0) { device_printf(sc->dev, "controller subsystem NQN mismatch on reconnect\n"); error = EINVAL; @@ -684,7 +702,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) * max_pending_io is still correct? */ - error = nvmf_establish_connection(sc, &ivars); + error = nvmf_establish_connection(sc, nvl); if (error != 0) goto out; @@ -706,7 +724,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) nvmf_rescan_all_ns(sc); out: sx_xunlock(&sc->connection_lock); - nvmf_free_ivars(&ivars); + nvlist_destroy(nvl); return (error); } @@ -1020,6 +1038,27 @@ error: return (error); } +static int +nvmf_reconnect_params(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv) +{ + nvlist_t *nvl; + int error; + + nvl = nvlist_create(0); + + sx_slock(&sc->connection_lock); + if ((sc->cdata->fcatt & 1) == 0) + nvlist_add_number(nvl, "cntlid", NVMF_CNTLID_DYNAMIC); + else + nvlist_add_number(nvl, "cntlid", sc->cdata->ctrlr_id); + nvlist_add_stringf(nvl, "subnqn", "%.256s", sc->cdata->subnqn); + sx_sunlock(&sc->connection_lock); + + error = nvmf_pack_ioc_nvlist(nvl, nv); + nvlist_destroy(nvl); + return (error); +} + static int nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct thread *td) @@ -1027,8 +1066,7 @@ nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct nvmf_softc *sc = cdev->si_drv1; struct nvme_get_nsid *gnsid; struct nvme_pt_command *pt; - struct nvmf_reconnect_params *rp; - struct nvmf_handoff_host *hh; + struct nvmf_ioc_nv *nv; switch (cmd) { case NVME_PASSTHROUGH_CMD: @@ -1044,16 +1082,11 @@ nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, *(uint64_t *)arg = sc->max_xfer_size; return (0); case NVMF_RECONNECT_PARAMS: - rp = (struct nvmf_reconnect_params *)arg; - if ((sc->cdata->fcatt & 1) == 0) - rp->cntlid = NVMF_CNTLID_DYNAMIC; - else - rp->cntlid = sc->cdata->ctrlr_id; - memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn)); - return (0); + nv = (struct nvmf_ioc_nv *)arg; + return (nvmf_reconnect_params(sc, nv)); case NVMF_RECONNECT_HOST: - hh = (struct nvmf_handoff_host *)arg; - return (nvmf_reconnect_host(sc, hh)); + nv = (struct nvmf_ioc_nv *)arg; + return (nvmf_reconnect_host(sc, nv)); default: return (ENOTTY); } diff --git a/sys/dev/nvmf/host/nvmf_ctldev.c b/sys/dev/nvmf/host/nvmf_ctldev.c index f40005a2a66..bc79bd99f63 100644 --- a/sys/dev/nvmf/host/nvmf_ctldev.c +++ b/sys/dev/nvmf/host/nvmf_ctldev.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -17,13 +18,13 @@ static struct cdev *nvmf_cdev; static int -nvmf_handoff_host(struct nvmf_handoff_host *hh) +nvmf_handoff_host(struct nvmf_ioc_nv *nv) { - struct nvmf_ivars ivars; + nvlist_t *nvl; device_t dev; int error; - error = nvmf_init_ivars(&ivars, hh); + error = nvmf_copyin_handoff(nv, &nvl); if (error != 0) return (error); @@ -35,7 +36,7 @@ nvmf_handoff_host(struct nvmf_handoff_host *hh) goto out; } - device_set_ivars(dev, &ivars); + device_set_ivars(dev, nvl); error = device_probe_and_attach(dev); device_set_ivars(dev, NULL); if (error != 0) @@ -43,7 +44,7 @@ nvmf_handoff_host(struct nvmf_handoff_host *hh) bus_topo_unlock(); out: - nvmf_free_ivars(&ivars); + nvlist_destroy(nvl); return (error); } @@ -117,7 +118,7 @@ nvmf_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, { switch (cmd) { case NVMF_HANDOFF_HOST: - return (nvmf_handoff_host((struct nvmf_handoff_host *)arg)); + return (nvmf_handoff_host((struct nvmf_ioc_nv *)arg)); case NVMF_DISCONNECT_HOST: return (nvmf_disconnect_host((const char **)arg)); case NVMF_DISCONNECT_ALL: diff --git a/sys/dev/nvmf/host/nvmf_qpair.c b/sys/dev/nvmf/host/nvmf_qpair.c index b03ecfa081d..2f511cf0406 100644 --- a/sys/dev/nvmf/host/nvmf_qpair.c +++ b/sys/dev/nvmf/host/nvmf_qpair.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -282,17 +283,19 @@ nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp, struct nvmf_host_qpair * nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, - struct nvmf_handoff_qpair_params *handoff, const char *name, u_int qid) + const nvlist_t *nvl, const char *name, u_int qid) { struct nvmf_host_command *cmd, *ncmd; struct nvmf_host_qpair *qp; u_int i; + bool admin; + admin = nvlist_get_bool(nvl, "admin"); qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO); qp->sc = sc; - qp->sq_flow_control = handoff->sq_flow_control; - qp->sqhd = handoff->sqhd; - qp->sqtail = handoff->sqtail; + qp->sq_flow_control = nvlist_get_bool(nvl, "sq_flow_control"); + qp->sqhd = nvlist_get_number(nvl, "sqhd"); + qp->sqtail = nvlist_get_number(nvl, "sqtail"); strlcpy(qp->name, name, sizeof(qp->name)); mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF); (void)sysctl_ctx_init(&qp->sysctl_ctx); @@ -301,8 +304,8 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, * Allocate a spare command slot for each pending AER command * on the admin queue. */ - qp->num_commands = handoff->qsize - 1; - if (handoff->admin) + qp->num_commands = nvlist_get_number(nvl, "qsize") - 1; + if (admin) qp->num_commands += sc->num_aer; qp->active_commands = malloc(sizeof(*qp->active_commands) * @@ -315,8 +318,8 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, } STAILQ_INIT(&qp->pending_requests); - qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error, - qp, nvmf_receive_capsule, qp); + qp->qp = nvmf_allocate_qpair(trtype, false, nvl, nvmf_qp_error, qp, + nvmf_receive_capsule, qp); if (qp->qp == NULL) { (void)sysctl_ctx_free(&qp->sysctl_ctx); TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { @@ -329,7 +332,7 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, return (NULL); } - nvmf_sysctls_qp(sc, qp, handoff->admin, qid); + nvmf_sysctls_qp(sc, qp, admin, qid); return (qp); } diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h index e9f33207fea..14bdf4abb1b 100644 --- a/sys/dev/nvmf/host/nvmf_var.h +++ b/sys/dev/nvmf/host/nvmf_var.h @@ -12,6 +12,7 @@ #include #include #include +//#include #include #include #include @@ -27,12 +28,6 @@ struct sysctl_oid_list; typedef void nvmf_request_complete_t(void *, const struct nvme_completion *); -struct nvmf_ivars { - struct nvmf_handoff_host *hh; - struct nvmf_handoff_qpair_params *io_params; - struct nvme_controller_data *cdata; -}; - struct nvmf_softc { device_t dev; @@ -156,8 +151,7 @@ extern bool nvmf_fail_disconnect; void nvmf_complete(void *arg, const struct nvme_completion *cqe); void nvmf_io_complete(void *arg, size_t xfered, int error); void nvmf_wait_for_reply(struct nvmf_completion_status *status); -int nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh); -void nvmf_free_ivars(struct nvmf_ivars *ivars); +int nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp); void nvmf_disconnect(struct nvmf_softc *sc); void nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid); void nvmf_rescan_all_ns(struct nvmf_softc *sc); @@ -203,8 +197,7 @@ bool nvmf_update_ns(struct nvmf_namespace *ns, /* nvmf_qpair.c */ struct nvmf_host_qpair *nvmf_init_qp(struct nvmf_softc *sc, - enum nvmf_trtype trtype, struct nvmf_handoff_qpair_params *handoff, - const char *name, u_int qid); + enum nvmf_trtype trtype, const nvlist_t *nvl, const char *name, u_int qid); void nvmf_shutdown_qp(struct nvmf_host_qpair *qp); void nvmf_destroy_qp(struct nvmf_host_qpair *qp); struct nvmf_request *nvmf_allocate_request(struct nvmf_host_qpair *qp, diff --git a/sys/dev/nvmf/nvmf.h b/sys/dev/nvmf/nvmf.h index 1f1ecd437c7..316bd80d493 100644 --- a/sys/dev/nvmf/nvmf.h +++ b/sys/dev/nvmf/nvmf.h @@ -26,54 +26,76 @@ #define NVMF_NN (1024) -struct nvmf_handoff_qpair_params { - bool admin; - bool sq_flow_control; - u_int qsize; - uint16_t sqhd; - uint16_t sqtail; /* host only */ - union { - struct { - int fd; - uint8_t rxpda; - uint8_t txpda; - bool header_digests; - bool data_digests; - uint32_t maxr2t; - uint32_t maxh2cdata; - uint32_t max_icd; - } tcp; - }; +/* + * (data, size) is the userspace buffer for a packed nvlist. + * + * For requests that copyout an nvlist, len is the amount of data + * copied out to *data. If size is zero, no data is copied and len is + * set to the required buffer size. + */ +struct nvmf_ioc_nv { + void *data; + size_t len; + size_t size; }; -struct nvmf_handoff_host { - u_int trtype; - u_int num_io_queues; - u_int kato; - struct nvmf_handoff_qpair_params admin; - struct nvmf_handoff_qpair_params *io; - const struct nvme_controller_data *cdata; -}; +/* + * The fields in a qpair handoff nvlist are: + * + * Transport independent: + * + * bool admin + * bool sq_flow_control + * number qsize + * number sqhd + * number sqtail host only + * + * TCP transport: + * + * number fd + * number rxpda + * number txpda + * bool header_digests + * bool data_digests + * number maxr2t + * number maxh2cdata + * number max_icd + */ -struct nvmf_reconnect_params { - uint16_t cntlid; - char subnqn[256]; -}; +/* + * The fields in the nvlist for NVMF_HANDOFF_HOST and + * NVMF_RECONNECT_HOST are: + * + * number trtype + * number kato (optional) + * qpair handoff nvlist admin + * qpair handoff nvlist array io + * binary cdata struct nvme_controller_data + */ -struct nvmf_handoff_controller_qpair { - u_int trtype; - struct nvmf_handoff_qpair_params params; - const struct nvmf_fabric_connect_cmd *cmd; - const struct nvmf_fabric_connect_data *data; -}; +/* + * The fields in the nvlist for NVMF_RECONNECT_PARAMS are: + * + * number cntlid + * string subnqn + */ + +/* + * The fields in the nvlist for handing off a controller qpair are: + * + * number trtype + * qpair handoff nvlist params + * binary cmd struct nvmf_fabric_connect_cmd + * binary data struct nvmf_fabric_connect_data + */ /* Operations on /dev/nvmf */ -#define NVMF_HANDOFF_HOST _IOW('n', 200, struct nvmf_handoff_host) +#define NVMF_HANDOFF_HOST _IOW('n', 200, struct nvmf_ioc_nv) #define NVMF_DISCONNECT_HOST _IOW('n', 201, const char *) #define NVMF_DISCONNECT_ALL _IO('n', 202) /* Operations on /dev/nvmeX */ -#define NVMF_RECONNECT_PARAMS _IOR('n', 203, struct nvmf_reconnect_params) -#define NVMF_RECONNECT_HOST _IOW('n', 204, struct nvmf_handoff_host) +#define NVMF_RECONNECT_PARAMS _IOWR('n', 203, struct nvmf_ioc_nv) +#define NVMF_RECONNECT_HOST _IOW('n', 204, struct nvmf_ioc_nv) #endif /* !__NVMF_H__ */ diff --git a/sys/dev/nvmf/nvmf_tcp.c b/sys/dev/nvmf/nvmf_tcp.c index 50adbfdd291..6ad5229f604 100644 --- a/sys/dev/nvmf/nvmf_tcp.c +++ b/sys/dev/nvmf/nvmf_tcp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -1413,8 +1414,7 @@ nvmf_soupcall_send(struct socket *so, void *arg, int waitflag) } static struct nvmf_qpair * -tcp_allocate_qpair(bool controller, - const struct nvmf_handoff_qpair_params *params) +tcp_allocate_qpair(bool controller, const nvlist_t *nvl) { struct nvmf_tcp_qpair *qp; struct socket *so; @@ -1422,8 +1422,18 @@ tcp_allocate_qpair(bool controller, cap_rights_t rights; int error; - error = fget(curthread, params->tcp.fd, cap_rights_init_one(&rights, - CAP_SOCK_CLIENT), &fp); + if (!nvlist_exists_number(nvl, "fd") || + !nvlist_exists_number(nvl, "rxpda") || + !nvlist_exists_number(nvl, "txpda") || + !nvlist_exists_bool(nvl, "header_digests") || + !nvlist_exists_bool(nvl, "data_digests") || + !nvlist_exists_number(nvl, "maxr2t") || + !nvlist_exists_number(nvl, "maxh2cdata") || + !nvlist_exists_number(nvl, "max_icd")) + return (NULL); + + error = fget(curthread, nvlist_get_number(nvl, "fd"), + cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); if (error != 0) return (NULL); if (fp->f_type != DTYPE_SOCKET) { @@ -1445,26 +1455,28 @@ tcp_allocate_qpair(bool controller, qp = malloc(sizeof(*qp), M_NVMF_TCP, M_WAITOK | M_ZERO); qp->so = so; refcount_init(&qp->refs, 1); - qp->txpda = params->tcp.txpda; - qp->rxpda = params->tcp.rxpda; - qp->header_digests = params->tcp.header_digests; - qp->data_digests = params->tcp.data_digests; - qp->maxr2t = params->tcp.maxr2t; + qp->txpda = nvlist_get_number(nvl, "txpda"); + qp->rxpda = nvlist_get_number(nvl, "rxpda"); + qp->header_digests = nvlist_get_bool(nvl, "header_digests"); + qp->data_digests = nvlist_get_bool(nvl, "data_digests"); + qp->maxr2t = nvlist_get_number(nvl, "maxr2t"); if (controller) - qp->maxh2cdata = params->tcp.maxh2cdata; + qp->maxh2cdata = nvlist_get_number(nvl, "maxh2cdata"); qp->max_tx_data = tcp_max_transmit_data; if (!controller) { - qp->max_tx_data = min(qp->max_tx_data, params->tcp.maxh2cdata); - qp->max_icd = params->tcp.max_icd; + qp->max_tx_data = min(qp->max_tx_data, + nvlist_get_number(nvl, "maxh2cdata")); + qp->max_icd = nvlist_get_number(nvl, "max_icd"); } if (controller) { /* Use the SUCCESS flag if SQ flow control is disabled. */ - qp->send_success = !params->sq_flow_control; + qp->send_success = !nvlist_get_bool(nvl, "sq_flow_control"); /* NB: maxr2t is 0's based. */ qp->num_ttags = MIN((u_int)UINT16_MAX + 1, - (uint64_t)params->qsize * ((uint64_t)qp->maxr2t + 1)); + nvlist_get_number(nvl, "qsize") * + ((uint64_t)qp->maxr2t + 1)); qp->open_ttags = mallocarray(qp->num_ttags, sizeof(*qp->open_ttags), M_NVMF_TCP, M_WAITOK | M_ZERO); } diff --git a/sys/dev/nvmf/nvmf_transport.c b/sys/dev/nvmf/nvmf_transport.c index 316d1571e61..1d3f5ea4cf6 100644 --- a/sys/dev/nvmf/nvmf_transport.c +++ b/sys/dev/nvmf/nvmf_transport.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -47,8 +48,7 @@ nvmf_supported_trtype(enum nvmf_trtype trtype) struct nvmf_qpair * nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, - const struct nvmf_handoff_qpair_params *params, - nvmf_qpair_error_t *error_cb, void *error_cb_arg, + const nvlist_t *params, nvmf_qpair_error_t *error_cb, void *error_cb_arg, nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg) { struct nvmf_transport *nt; @@ -76,7 +76,7 @@ nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, qp->nq_error_arg = error_cb_arg; qp->nq_receive = receive_cb; qp->nq_receive_arg = receive_cb_arg; - qp->nq_admin = params->admin; + qp->nq_admin = nvlist_get_bool(params, "admin"); return (qp); } @@ -230,6 +230,92 @@ nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, len)); } +int +nvmf_pack_ioc_nvlist(const nvlist_t *nvl, struct nvmf_ioc_nv *nv) +{ + void *packed; + int error; + + error = nvlist_error(nvl); + if (error != 0) + return (error); + + if (nv->size == 0) { + nv->len = nvlist_size(nvl); + } else { + packed = nvlist_pack(nvl, &nv->len); + if (packed == NULL) + error = ENOMEM; + else if (nv->len > nv->size) + error = EFBIG; + else + error = copyout(packed, nv->data, nv->len); + free(packed, M_NVLIST); + } + return (error); +} + +int +nvmf_unpack_ioc_nvlist(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp) +{ + void *packed; + nvlist_t *nvl; + int error; + + packed = malloc(nv->size, M_NVMF_TRANSPORT, M_WAITOK); + error = copyin(nv->data, packed, nv->size); + if (error != 0) { + free(packed, M_NVMF_TRANSPORT); + return (error); + } + + nvl = nvlist_unpack(packed, nv->size, 0); + free(packed, M_NVMF_TRANSPORT); + if (nvl == NULL) + return (EINVAL); + + *nvlp = nvl; + return (0); +} + +bool +nvmf_validate_qpair_nvlist(const nvlist_t *nvl, bool controller) +{ + uint64_t value, qsize; + bool admin, valid; + + valid = true; + valid &= nvlist_exists_bool(nvl, "admin"); + valid &= nvlist_exists_bool(nvl, "sq_flow_control"); + valid &= nvlist_exists_number(nvl, "qsize"); + valid &= nvlist_exists_number(nvl, "sqhd"); + if (!controller) + valid &= nvlist_exists_number(nvl, "sqtail"); + if (!valid) + return (false); + + admin = nvlist_get_bool(nvl, "admin"); + qsize = nvlist_get_number(nvl, "qsize"); + if (admin) { + if (qsize < NVME_MIN_ADMIN_ENTRIES || + qsize > NVME_MAX_ADMIN_ENTRIES) + return (false); + } else { + if (qsize < NVME_MIN_IO_ENTRIES || qsize > NVME_MAX_IO_ENTRIES) + return (false); + } + value = nvlist_get_number(nvl, "sqhd"); + if (value > qsize - 1) + return (false); + if (!controller) { + value = nvlist_get_number(nvl, "sqtail"); + if (value > qsize - 1) + return (false); + } + + return (true); +} + int nvmf_transport_module_handler(struct module *mod, int what, void *arg) { diff --git a/sys/dev/nvmf/nvmf_transport.h b/sys/dev/nvmf/nvmf_transport.h index bbd830eba57..b192baeaccc 100644 --- a/sys/dev/nvmf/nvmf_transport.h +++ b/sys/dev/nvmf/nvmf_transport.h @@ -13,6 +13,7 @@ * (target) to send and receive capsules and associated data. */ +#include #include #include @@ -20,8 +21,8 @@ struct mbuf; struct memdesc; struct nvmf_capsule; struct nvmf_connection; +struct nvmf_ioc_nv; struct nvmf_qpair; -struct nvmf_handoff_qpair_params; SYSCTL_DECL(_kern_nvmf); @@ -54,7 +55,7 @@ typedef void nvmf_io_complete_t(void *, size_t, int); * independent. */ struct nvmf_qpair *nvmf_allocate_qpair(enum nvmf_trtype trtype, - bool controller, const struct nvmf_handoff_qpair_params *params, + bool controller, const nvlist_t *params, nvmf_qpair_error_t *error_cb, void *error_cb_arg, nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg); void nvmf_free_qpair(struct nvmf_qpair *qp); @@ -138,4 +139,23 @@ u_int nvmf_send_controller_data(struct nvmf_capsule *nc, #define NVMF_SUCCESS_SENT 0x100 #define NVMF_MORE 0x101 +/* Helper APIs for nvlists used in icotls. */ + +/* + * Pack the nvlist nvl and copyout to the buffer described by nv. + */ +int nvmf_pack_ioc_nvlist(const nvlist_t *nvl, struct nvmf_ioc_nv *nv); + +/* + * Copyin and unpack an nvlist described by nv. The unpacked nvlist + * is returned in *nvlp on success. + */ +int nvmf_unpack_ioc_nvlist(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp); + +/* + * Returns true if a qpair handoff nvlist has all the required + * transport-independent values. + */ +bool nvmf_validate_qpair_nvlist(const nvlist_t *nvl, bool controller); + #endif /* !__NVMF_TRANSPORT_H__ */ diff --git a/sys/dev/nvmf/nvmf_transport_internal.h b/sys/dev/nvmf/nvmf_transport_internal.h index 0be427ee069..eb819a5c83b 100644 --- a/sys/dev/nvmf/nvmf_transport_internal.h +++ b/sys/dev/nvmf/nvmf_transport_internal.h @@ -8,6 +8,7 @@ #ifndef __NVMF_TRANSPORT_INTERNAL_H__ #define __NVMF_TRANSPORT_INTERNAL_H__ +#include #include /* @@ -21,7 +22,7 @@ struct nvmf_io_request; struct nvmf_transport_ops { /* Queue pair management. */ struct nvmf_qpair *(*allocate_qpair)(bool controller, - const struct nvmf_handoff_qpair_params *params); + const nvlist_t *nvl); void (*free_qpair)(struct nvmf_qpair *qp); /* Capsule operations. */ diff --git a/usr.sbin/nvmfd/ctl.c b/usr.sbin/nvmfd/ctl.c index 5f01ec8e5bc..73e90e1411b 100644 --- a/usr.sbin/nvmfd/ctl.c +++ b/usr.sbin/nvmfd/ctl.c @@ -126,14 +126,12 @@ ctl_handoff_qpair(struct nvmf_qpair *qp, memset(&req, 0, sizeof(req)); req.type = CTL_NVMF_HANDOFF; - error = nvmf_handoff_controller_qpair(qp, &req.data.handoff); + error = nvmf_handoff_controller_qpair(qp, cmd, data, &req.data.handoff); if (error != 0) { warnc(error, "Failed to prepare qpair for handoff"); return; } - req.data.handoff.cmd = cmd; - req.data.handoff.data = data; if (ioctl(ctl_fd, CTL_NVMF, &req) != 0) warn("ioctl(CTL_NVMF/CTL_NVMF_HANDOFF)"); }