aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/nvmf
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/nvmf')
-rw-r--r--sys/dev/nvmf/controller/ctl_frontend_nvmf.c196
-rw-r--r--sys/dev/nvmf/controller/nvmft_controller.c78
-rw-r--r--sys/dev/nvmf/controller/nvmft_qpair.c72
-rw-r--r--sys/dev/nvmf/controller/nvmft_subr.c40
-rw-r--r--sys/dev/nvmf/controller/nvmft_var.h19
-rw-r--r--sys/dev/nvmf/host/nvmf.c640
-rw-r--r--sys/dev/nvmf/host/nvmf_aer.c2
-rw-r--r--sys/dev/nvmf/host/nvmf_ctldev.c15
-rw-r--r--sys/dev/nvmf/host/nvmf_ns.c66
-rw-r--r--sys/dev/nvmf/host/nvmf_qpair.c88
-rw-r--r--sys/dev/nvmf/host/nvmf_sim.c29
-rw-r--r--sys/dev/nvmf/host/nvmf_var.h46
-rw-r--r--sys/dev/nvmf/nvmf.h131
-rw-r--r--sys/dev/nvmf/nvmf_proto.h6
-rw-r--r--sys/dev/nvmf/nvmf_tcp.c93
-rw-r--r--sys/dev/nvmf/nvmf_tcp.h27
-rw-r--r--sys/dev/nvmf/nvmf_transport.c102
-rw-r--r--sys/dev/nvmf/nvmf_transport.h25
-rw-r--r--sys/dev/nvmf/nvmf_transport_internal.h3
19 files changed, 1257 insertions, 421 deletions
diff --git a/sys/dev/nvmf/controller/ctl_frontend_nvmf.c b/sys/dev/nvmf/controller/ctl_frontend_nvmf.c
index a203bb1c90a6..658b47699c1d 100644
--- a/sys/dev/nvmf/controller/ctl_frontend_nvmf.c
+++ b/sys/dev/nvmf/controller/ctl_frontend_nvmf.c
@@ -19,7 +19,9 @@
#include <sys/queue.h>
#include <sys/refcount.h>
#include <sys/sbuf.h>
+#include <sys/smp.h>
#include <sys/sx.h>
+#include <sys/taskqueue.h>
#include <machine/bus.h>
#include <machine/bus_dma.h>
@@ -31,8 +33,10 @@
#include <cam/ctl/ctl.h>
#include <cam/ctl/ctl_error.h>
+#include <cam/ctl/ctl_ha.h>
#include <cam/ctl/ctl_io.h>
#include <cam/ctl/ctl_frontend.h>
+#include <cam/ctl/ctl_private.h>
/*
* Store pointers to the capsule and qpair in the two pointer members
@@ -47,6 +51,7 @@ static int nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data,
int flag, struct thread *td);
static int nvmft_shutdown(void);
+static struct taskqueue *nvmft_taskq;
static TAILQ_HEAD(, nvmft_port) nvmft_ports;
static struct sx nvmft_ports_lock;
@@ -65,9 +70,9 @@ nvmft_online(void *arg)
{
struct nvmft_port *np = arg;
- sx_xlock(&np->lock);
+ mtx_lock(&np->lock);
np->online = true;
- sx_xunlock(&np->lock);
+ mtx_unlock(&np->lock);
}
static void
@@ -76,7 +81,7 @@ nvmft_offline(void *arg)
struct nvmft_port *np = arg;
struct nvmft_controller *ctrlr;
- sx_xlock(&np->lock);
+ mtx_lock(&np->lock);
np->online = false;
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
@@ -86,8 +91,32 @@ nvmft_offline(void *arg)
}
while (!TAILQ_EMPTY(&np->controllers))
- sx_sleep(np, &np->lock, 0, "nvmfoff", 0);
- sx_xunlock(&np->lock);
+ mtx_sleep(np, &np->lock, 0, "nvmfoff", 0);
+ mtx_unlock(&np->lock);
+}
+
+static int
+nvmft_info(void *arg, struct sbuf *sb)
+{
+ struct nvmft_port *np = arg;
+ struct nvmft_controller *ctrlr;
+ int retval;
+
+ mtx_lock(&np->lock);
+ retval = sbuf_printf(sb, "\t<port>%s,p,%u</port>\n", np->cdata.subnqn,
+ np->portid);
+ if (retval != 0)
+ goto out;
+
+ TAILQ_FOREACH(ctrlr, &np->controllers, link) {
+ retval = sbuf_printf(sb, "\t<host id=\"%u\">%s</host>\n",
+ ctrlr->cntlid, ctrlr->hostnqn);
+ if (retval != 0)
+ break;
+ }
+out:
+ mtx_unlock(&np->lock);
+ return (retval);
}
static int
@@ -97,7 +126,7 @@ nvmft_lun_enable(void *arg, int lun_id)
struct nvmft_controller *ctrlr;
uint32_t *old_ns, *new_ns;
uint32_t nsid;
- u_int i;
+ u_int i, new_count;
if (lun_id >= le32toh(np->cdata.nn)) {
printf("NVMFT: %s lun %d larger than maximum nsid %u\n",
@@ -106,14 +135,22 @@ nvmft_lun_enable(void *arg, int lun_id)
}
nsid = lun_id + 1;
- sx_xlock(&np->lock);
- new_ns = mallocarray(np->num_ns + 1, sizeof(*new_ns), M_NVMFT,
- M_WAITOK);
+ mtx_lock(&np->lock);
+ for (;;) {
+ new_count = np->num_ns + 1;
+ mtx_unlock(&np->lock);
+ new_ns = mallocarray(new_count, sizeof(*new_ns), M_NVMFT,
+ M_WAITOK);
+ mtx_lock(&np->lock);
+ if (np->num_ns + 1 <= new_count)
+ break;
+ free(new_ns, M_NVMFT);
+ }
for (i = 0; i < np->num_ns; i++) {
if (np->active_ns[i] < nsid)
continue;
if (np->active_ns[i] == nsid) {
- sx_xunlock(&np->lock);
+ mtx_unlock(&np->lock);
free(new_ns, M_NVMFT);
printf("NVMFT: %s duplicate lun %d\n",
np->cdata.subnqn, lun_id);
@@ -140,7 +177,7 @@ nvmft_lun_enable(void *arg, int lun_id)
nvmft_controller_lun_changed(ctrlr, lun_id);
}
- sx_xunlock(&np->lock);
+ mtx_unlock(&np->lock);
free(old_ns, M_NVMFT);
return (0);
@@ -158,12 +195,12 @@ nvmft_lun_disable(void *arg, int lun_id)
return (0);
nsid = lun_id + 1;
- sx_xlock(&np->lock);
+ mtx_lock(&np->lock);
for (i = 0; i < np->num_ns; i++) {
if (np->active_ns[i] == nsid)
goto found;
}
- sx_xunlock(&np->lock);
+ mtx_unlock(&np->lock);
printf("NVMFT: %s request to disable nonexistent lun %d\n",
np->cdata.subnqn, lun_id);
return (EINVAL);
@@ -180,7 +217,7 @@ found:
nvmft_controller_lun_changed(ctrlr, lun_id);
}
- sx_xunlock(&np->lock);
+ mtx_unlock(&np->lock);
return (0);
}
@@ -191,7 +228,7 @@ nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
{
u_int i, count;
- sx_slock(&np->lock);
+ mtx_lock(&np->lock);
count = 0;
for (i = 0; i < np->num_ns; i++) {
if (np->active_ns[i] <= nsid)
@@ -201,7 +238,7 @@ nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
if (count == nitems(nslist->ns))
break;
}
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
}
void
@@ -458,8 +495,8 @@ nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
ctl_datamove_done((union ctl_io *)ctnio, true);
}
-static void
-nvmft_datamove(union ctl_io *io)
+void
+nvmft_handle_datamove(union ctl_io *io)
{
struct nvmf_capsule *nc;
struct nvmft_qpair *qp;
@@ -478,6 +515,35 @@ nvmft_datamove(union ctl_io *io)
nvmft_datamove_out(&io->nvmeio, qp, nc);
}
+void
+nvmft_abort_datamove(union ctl_io *io)
+{
+ io->io_hdr.port_status = 1;
+ io->io_hdr.flags |= CTL_FLAG_ABORT;
+ ctl_datamove_done(io, true);
+}
+
+static void
+nvmft_datamove(union ctl_io *io)
+{
+ struct nvmft_qpair *qp;
+
+ qp = NVMFT_QP(io);
+ nvmft_qpair_datamove(qp, io);
+}
+
+void
+nvmft_enqueue_task(struct task *task)
+{
+ taskqueue_enqueue(nvmft_taskq, task);
+}
+
+void
+nvmft_drain_task(struct task *task)
+{
+ taskqueue_drain(nvmft_taskq, task);
+}
+
static void
hip_add(uint64_t pair[2], uint64_t addend)
{
@@ -561,6 +627,17 @@ end:
static int
nvmft_init(void)
{
+ int error;
+
+ nvmft_taskq = taskqueue_create("nvmft", M_WAITOK,
+ taskqueue_thread_enqueue, &nvmft_taskq);
+ error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT,
+ control_softc->ctl_proc, "nvmft");
+ if (error != 0) {
+ taskqueue_free(nvmft_taskq);
+ return (error);
+ }
+
TAILQ_INIT(&nvmft_ports);
sx_init(&nvmft_ports_lock, "nvmft ports");
return (0);
@@ -580,7 +657,7 @@ nvmft_port_free(struct nvmft_port *np)
free(np->active_ns, M_NVMFT);
clean_unrhdr(np->ids);
delete_unrhdr(np->ids);
- sx_destroy(&np->lock);
+ mtx_destroy(&np->lock);
free(np, M_NVMFT);
}
@@ -750,9 +827,10 @@ nvmft_port_create(struct ctl_req *req)
np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO);
refcount_init(&np->refs, 1);
+ np->portid = portid;
np->max_io_qsize = max_io_qsize;
np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500);
- sx_init(&np->lock, "nvmft port");
+ mtx_init(&np->lock, "nvmft port", NULL, MTX_DEF);
np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1,
NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX);
TAILQ_INIT(&np->controllers);
@@ -781,6 +859,7 @@ nvmft_port_create(struct ctl_req *req)
port->virtual_port = 0;
port->port_online = nvmft_online;
port->port_offline = nvmft_offline;
+ port->port_info = nvmft_info;
port->onoff_arg = np;
port->lun_enable = nvmft_lun_enable;
port->lun_disable = nvmft_lun_disable;
@@ -870,7 +949,13 @@ nvmft_port_remove(struct ctl_req *req)
TAILQ_REMOVE(&nvmft_ports, np, link);
sx_xunlock(&nvmft_ports_lock);
- ctl_port_offline(&np->port);
+ mtx_lock(&np->lock);
+ if (np->online) {
+ mtx_unlock(&np->lock);
+ ctl_port_offline(&np->port);
+ } else
+ mtx_unlock(&np->lock);
+
nvmft_port_rele(np);
req->status = CTL_LUN_OK;
}
@@ -878,29 +963,55 @@ nvmft_port_remove(struct ctl_req *req)
static void
nvmft_handoff(struct ctl_nvmf *cn)
{
- struct nvmf_fabric_connect_cmd cmd;
- struct nvmf_handoff_controller_qpair *handoff;
- struct nvmf_fabric_connect_data *data;
+ const struct nvmf_fabric_connect_cmd *cmd;
+ const struct nvmf_fabric_connect_data *data;
+ const nvlist_t *params;
struct nvmft_port *np;
+ nvlist_t *nvl;
+ size_t len;
+ enum nvmf_trtype trtype;
int error;
np = NULL;
- data = NULL;
- handoff = &cn->data.handoff;
- error = copyin(handoff->cmd, &cmd, sizeof(cmd));
+ error = nvmf_unpack_ioc_nvlist(&cn->data.handoff, &nvl);
if (error != 0) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
- "Failed to copyin CONNECT SQE");
+ "Failed to copyin and unpack handoff arguments");
return;
}
- data = malloc(sizeof(*data), M_NVMFT, M_WAITOK);
- error = copyin(handoff->data, data, sizeof(*data));
- if (error != 0) {
+ if (!nvlist_exists_number(nvl, "trtype") ||
+ !nvlist_exists_nvlist(nvl, "params") ||
+ !nvlist_exists_binary(nvl, "cmd") ||
+ !nvlist_exists_binary(nvl, "data")) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
- "Failed to copyin CONNECT data");
+ "Handoff arguments missing required value");
+ goto out;
+ }
+
+ params = nvlist_get_nvlist(nvl, "params");
+ if (!nvmf_validate_qpair_nvlist(params, true)) {
+ cn->status = CTL_NVMF_ERROR;
+ snprintf(cn->error_str, sizeof(cn->error_str),
+ "Invalid queue pair parameters");
+ goto out;
+ }
+
+ cmd = nvlist_get_binary(nvl, "cmd", &len);
+ if (len != sizeof(*cmd)) {
+ cn->status = CTL_NVMF_ERROR;
+ snprintf(cn->error_str, sizeof(cn->error_str),
+ "Wrong size for CONNECT SQE");
+ goto out;
+ }
+
+ data = nvlist_get_binary(nvl, "data", &len);
+ if (len != sizeof(*data)) {
+ cn->status = CTL_NVMF_ERROR;
+ snprintf(cn->error_str, sizeof(cn->error_str),
+ "Wrong size for CONNECT data");
goto out;
}
@@ -931,8 +1042,10 @@ nvmft_handoff(struct ctl_nvmf *cn)
nvmft_port_ref(np);
sx_sunlock(&nvmft_ports_lock);
- if (handoff->params.admin) {
- error = nvmft_handoff_admin_queue(np, handoff, &cmd, data);
+ trtype = nvlist_get_number(nvl, "trtype");
+ if (nvlist_get_bool(params, "admin")) {
+ error = nvmft_handoff_admin_queue(np, trtype, params, cmd,
+ data);
if (error != 0) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
@@ -940,11 +1053,11 @@ nvmft_handoff(struct ctl_nvmf *cn)
goto out;
}
} else {
- error = nvmft_handoff_io_queue(np, handoff, &cmd, data);
+ error = nvmft_handoff_io_queue(np, trtype, params, cmd, data);
if (error != 0) {
cn->status = CTL_NVMF_ERROR;
snprintf(cn->error_str, sizeof(cn->error_str),
- "Failed to handoff admin queue: %d", error);
+ "Failed to handoff I/O queue: %d", error);
goto out;
}
}
@@ -953,7 +1066,7 @@ nvmft_handoff(struct ctl_nvmf *cn)
out:
if (np != NULL)
nvmft_port_rele(np);
- free(data, M_NVMFT);
+ nvlist_destroy(nvl);
}
static void
@@ -979,7 +1092,7 @@ nvmft_list(struct ctl_nvmf *cn)
sbuf_printf(sb, "<ctlnvmflist>\n");
sx_slock(&nvmft_ports_lock);
TAILQ_FOREACH(np, &nvmft_ports, link) {
- sx_slock(&np->lock);
+ mtx_lock(&np->lock);
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
sbuf_printf(sb, "<connection id=\"%d\">"
"<hostnqn>%s</hostnqn>"
@@ -991,7 +1104,7 @@ nvmft_list(struct ctl_nvmf *cn)
np->cdata.subnqn,
ctrlr->trtype);
}
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
}
sx_sunlock(&nvmft_ports_lock);
sbuf_printf(sb, "</ctlnvmflist>\n");
@@ -1029,7 +1142,7 @@ nvmft_terminate(struct ctl_nvmf *cn)
found = false;
sx_slock(&nvmft_ports_lock);
TAILQ_FOREACH(np, &nvmft_ports, link) {
- sx_slock(&np->lock);
+ mtx_lock(&np->lock);
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
if (tp->all != 0)
match = true;
@@ -1047,7 +1160,7 @@ nvmft_terminate(struct ctl_nvmf *cn)
nvmft_controller_error(ctrlr, NULL, ECONNABORTED);
found = true;
}
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
}
sx_sunlock(&nvmft_ports_lock);
@@ -1115,6 +1228,7 @@ nvmft_shutdown(void)
if (!TAILQ_EMPTY(&nvmft_ports))
return (EBUSY);
+ taskqueue_free(nvmft_taskq);
sx_destroy(&nvmft_ports_lock);
return (0);
}
diff --git a/sys/dev/nvmf/controller/nvmft_controller.c b/sys/dev/nvmf/controller/nvmft_controller.c
index f3783eac1275..390467534ca2 100644
--- a/sys/dev/nvmf/controller/nvmft_controller.c
+++ b/sys/dev/nvmf/controller/nvmft_controller.c
@@ -14,7 +14,6 @@
#include <sys/memdesc.h>
#include <sys/mutex.h>
#include <sys/sbuf.h>
-#include <sys/sx.h>
#include <sys/taskqueue.h>
#include <dev/nvmf/nvmf_transport.h>
@@ -55,8 +54,6 @@ nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid,
ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO);
ctrlr->cntlid = cntlid;
- nvmft_port_ref(np);
- TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
ctrlr->np = np;
mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF);
callout_init(&ctrlr->ka_timer, 1);
@@ -107,9 +104,8 @@ nvmft_keep_alive_timer(void *arg)
}
int
-nvmft_handoff_admin_queue(struct nvmft_port *np,
- const struct nvmf_handoff_controller_qpair *handoff,
- const struct nvmf_fabric_connect_cmd *cmd,
+nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
+ const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
const struct nvmf_fabric_connect_data *data)
{
struct nvmft_controller *ctrlr;
@@ -120,13 +116,17 @@ nvmft_handoff_admin_queue(struct nvmft_port *np,
if (cmd->qid != htole16(0))
return (EINVAL);
- qp = nvmft_qpair_init(handoff->trtype, &handoff->params, 0,
- "admin queue");
+ qp = nvmft_qpair_init(trtype, params, 0, "admin queue");
+ if (qp == NULL) {
+ printf("NVMFT: Failed to setup admin queue from %.*s\n",
+ (int)sizeof(data->hostnqn), data->hostnqn);
+ return (ENXIO);
+ }
- sx_xlock(&np->lock);
+ mtx_lock(&np->lock);
cntlid = alloc_unr(np->ids);
if (cntlid == -1) {
- sx_xunlock(&np->lock);
+ mtx_unlock(&np->lock);
printf("NVMFT: Unable to allocate controller for %.*s\n",
(int)sizeof(data->hostnqn), data->hostnqn);
nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC,
@@ -141,12 +141,25 @@ nvmft_handoff_admin_queue(struct nvmft_port *np,
("%s: duplicate controllers with id %d", __func__, cntlid));
}
#endif
+ mtx_unlock(&np->lock);
ctrlr = nvmft_controller_alloc(np, cntlid, data);
+
+ mtx_lock(&np->lock);
+ if (!np->online) {
+ mtx_unlock(&np->lock);
+ nvmft_controller_free(ctrlr);
+ free_unr(np->ids, cntlid);
+ nvmft_qpair_destroy(qp);
+ return (ENXIO);
+ }
+ nvmft_port_ref(np);
+ TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
+
nvmft_printf(ctrlr, "associated with %.*s\n",
(int)sizeof(data->hostnqn), data->hostnqn);
ctrlr->admin = qp;
- ctrlr->trtype = handoff->trtype;
+ ctrlr->trtype = trtype;
/*
* The spec requires a non-zero KeepAlive timer, but allow a
@@ -162,17 +175,16 @@ nvmft_handoff_admin_queue(struct nvmft_port *np,
callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK);
}
+ mtx_unlock(&np->lock);
nvmft_finish_accept(qp, cmd, ctrlr);
- sx_xunlock(&np->lock);
return (0);
}
int
-nvmft_handoff_io_queue(struct nvmft_port *np,
- const struct nvmf_handoff_controller_qpair *handoff,
- const struct nvmf_fabric_connect_cmd *cmd,
+nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
+ const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
const struct nvmf_fabric_connect_data *data)
{
struct nvmft_controller *ctrlr;
@@ -186,15 +198,20 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
cntlid = le16toh(data->cntlid);
snprintf(name, sizeof(name), "I/O queue %u", qid);
- qp = nvmft_qpair_init(handoff->trtype, &handoff->params, qid, name);
+ qp = nvmft_qpair_init(trtype, params, qid, name);
+ if (qp == NULL) {
+ printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
+ (int)sizeof(data->hostnqn), data->hostnqn);
+ return (ENXIO);
+ }
- sx_slock(&np->lock);
+ mtx_lock(&np->lock);
TAILQ_FOREACH(ctrlr, &np->controllers, link) {
if (ctrlr->cntlid == cntlid)
break;
}
if (ctrlr == NULL) {
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n",
ctrlr->cntlid, qid, (int)sizeof(data->hostnqn),
data->hostnqn);
@@ -205,7 +222,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
}
if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) {
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
nvmft_printf(ctrlr,
"hostid mismatch for I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
@@ -215,7 +232,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
return (EINVAL);
}
if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) {
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
nvmft_printf(ctrlr,
"hostnqn mismatch for I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
@@ -225,12 +242,12 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
return (EINVAL);
}
- /* XXX: Require handoff->trtype == ctrlr->trtype? */
+ /* XXX: Require trtype == ctrlr->trtype? */
mtx_lock(&ctrlr->lock);
if (ctrlr->shutdown) {
mtx_unlock(&ctrlr->lock);
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to create I/O queue %u on disabled controller from %.*s\n",
qid, (int)sizeof(data->hostnqn), data->hostnqn);
@@ -241,7 +258,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
}
if (ctrlr->num_io_queues == 0) {
mtx_unlock(&ctrlr->lock);
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to create I/O queue %u without enabled queues from %.*s\n",
qid, (int)sizeof(data->hostnqn), data->hostnqn);
@@ -252,7 +269,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
}
if (cmd->qid > ctrlr->num_io_queues) {
mtx_unlock(&ctrlr->lock);
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to create invalid I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
@@ -263,7 +280,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
}
if (ctrlr->io_qpairs[qid - 1].qp != NULL) {
mtx_unlock(&ctrlr->lock);
- sx_sunlock(&np->lock);
+ mtx_unlock(&np->lock);
nvmft_printf(ctrlr,
"attempt to re-create I/O queue %u from %.*s\n", qid,
(int)sizeof(data->hostnqn), data->hostnqn);
@@ -275,8 +292,8 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
ctrlr->io_qpairs[qid - 1].qp = qp;
mtx_unlock(&ctrlr->lock);
+ mtx_unlock(&np->lock);
nvmft_finish_accept(qp, cmd, ctrlr);
- sx_sunlock(&np->lock);
return (0);
}
@@ -375,11 +392,11 @@ nvmft_controller_terminate(void *arg, int pending)
/* Remove association (CNTLID). */
np = ctrlr->np;
- sx_xlock(&np->lock);
+ mtx_lock(&np->lock);
TAILQ_REMOVE(&np->controllers, ctrlr, link);
- free_unr(np->ids, ctrlr->cntlid);
wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers));
- sx_xunlock(&np->lock);
+ mtx_unlock(&np->lock);
+ free_unr(np->ids, ctrlr->cntlid);
if (wakeup_np)
wakeup(np);
@@ -770,6 +787,7 @@ handle_set_features(struct nvmft_controller *ctrlr,
ctrlr->aer_mask = aer_mask;
mtx_unlock(&ctrlr->lock);
nvmft_send_success(ctrlr->admin, nc);
+ nvmf_free_capsule(nc);
return;
}
default:
@@ -944,7 +962,7 @@ nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 &&
cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
nvmft_printf(ctrlr,
- "Unsupported admin opcode %#x whiled disabled\n", cmd->opc);
+ "Unsupported admin opcode %#x while disabled\n", cmd->opc);
nvmft_send_generic_error(ctrlr->admin, nc,
NVME_SC_COMMAND_SEQUENCE_ERROR);
nvmf_free_capsule(nc);
diff --git a/sys/dev/nvmf/controller/nvmft_qpair.c b/sys/dev/nvmf/controller/nvmft_qpair.c
index 6cb3ebd76884..73c7bb280780 100644
--- a/sys/dev/nvmf/controller/nvmft_qpair.c
+++ b/sys/dev/nvmf/controller/nvmft_qpair.c
@@ -31,9 +31,11 @@ struct nvmft_qpair {
uint16_t qid;
u_int qsize;
uint16_t sqhd;
- uint16_t sqtail;
volatile u_int qp_refs; /* Internal references on 'qp'. */
+ struct task datamove_task;
+ STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
+
struct mtx lock;
char name[16];
@@ -41,6 +43,7 @@ struct nvmft_qpair {
static int _nvmft_send_generic_error(struct nvmft_qpair *qp,
struct nvmf_capsule *nc, uint8_t sc_status);
+static void nvmft_datamove_task(void *context, int pending);
static void
nvmft_qpair_error(void *arg, int error)
@@ -98,24 +101,24 @@ nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
}
struct nvmft_qpair *
-nvmft_qpair_init(enum nvmf_trtype trtype,
- const struct nvmf_handoff_qpair_params *handoff, uint16_t qid,
+nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid,
const char *name)
{
struct nvmft_qpair *qp;
qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
- qp->admin = handoff->admin;
- qp->sq_flow_control = handoff->sq_flow_control;
- qp->qsize = handoff->qsize;
+ qp->admin = nvlist_get_bool(params, "admin");
+ qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control");
+ qp->qsize = nvlist_get_number(params, "qsize");
qp->qid = qid;
- qp->sqhd = handoff->sqhd;
- qp->sqtail = handoff->sqtail;
+ qp->sqhd = nvlist_get_number(params, "sqhd");
strlcpy(qp->name, name, sizeof(qp->name));
mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
+ STAILQ_INIT(&qp->datamove_queue);
+ TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp);
- qp->qp = nvmf_allocate_qpair(trtype, true, handoff, nvmft_qpair_error,
+ qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error,
qp, nvmft_receive_capsule, qp);
if (qp->qp == NULL) {
mtx_destroy(&qp->lock);
@@ -131,14 +134,25 @@ nvmft_qpair_init(enum nvmf_trtype trtype,
void
nvmft_qpair_shutdown(struct nvmft_qpair *qp)
{
+ STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
struct nvmf_qpair *nq;
+ union ctl_io *io;
+ STAILQ_INIT(&datamove_queue);
mtx_lock(&qp->lock);
nq = qp->qp;
qp->qp = NULL;
+ STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue);
mtx_unlock(&qp->lock);
if (nq != NULL && refcount_release(&qp->qp_refs))
nvmf_free_qpair(nq);
+
+ while (!STAILQ_EMPTY(&datamove_queue)) {
+ io = (union ctl_io *)STAILQ_FIRST(&datamove_queue);
+ STAILQ_REMOVE_HEAD(&datamove_queue, links);
+ nvmft_abort_datamove(io);
+ }
+ nvmft_drain_task(&qp->datamove_task);
}
void
@@ -359,3 +373,43 @@ nvmft_finish_accept(struct nvmft_qpair *qp,
rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid);
return (nvmft_send_connect_response(qp, &rsp));
}
+
+void
+nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io)
+{
+ bool enqueue_task;
+
+ mtx_lock(&qp->lock);
+ if (qp->qp == NULL) {
+ mtx_unlock(&qp->lock);
+ nvmft_abort_datamove(io);
+ return;
+ }
+ enqueue_task = STAILQ_EMPTY(&qp->datamove_queue);
+ STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links);
+ mtx_unlock(&qp->lock);
+ if (enqueue_task)
+ nvmft_enqueue_task(&qp->datamove_task);
+}
+
+static void
+nvmft_datamove_task(void *context, int pending __unused)
+{
+ struct nvmft_qpair *qp = context;
+ union ctl_io *io;
+ bool abort;
+
+ mtx_lock(&qp->lock);
+ while (!STAILQ_EMPTY(&qp->datamove_queue)) {
+ io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue);
+ STAILQ_REMOVE_HEAD(&qp->datamove_queue, links);
+ abort = (qp->qp == NULL);
+ mtx_unlock(&qp->lock);
+ if (abort)
+ nvmft_abort_datamove(io);
+ else
+ nvmft_handle_datamove(io);
+ mtx_lock(&qp->lock);
+ }
+ mtx_unlock(&qp->lock);
+}
diff --git a/sys/dev/nvmf/controller/nvmft_subr.c b/sys/dev/nvmf/controller/nvmft_subr.c
index bb2bc0988e81..245971813854 100644
--- a/sys/dev/nvmf/controller/nvmft_subr.c
+++ b/sys/dev/nvmf/controller/nvmft_subr.c
@@ -26,46 +26,6 @@ nvmf_nqn_valid(const char *nqn)
len = strnlen(nqn, NVME_NQN_FIELD_SIZE);
if (len == 0 || len > NVMF_NQN_MAX_LEN)
return (false);
-
-#ifdef STRICT_CHECKS
- /*
- * Stricter checks from the spec. Linux does not seem to
- * require these.
- */
-
- /*
- * NVMF_NQN_MIN_LEN does not include '.', and require at least
- * one character of a domain name.
- */
- if (len < NVMF_NQN_MIN_LEN + 2)
- return (false);
- if (memcmp("nqn.", nqn, strlen("nqn.")) != 0)
- return (false);
- nqn += strlen("nqn.");
-
- /* Next 4 digits must be a year. */
- for (u_int i = 0; i < 4; i++) {
- if (!isdigit(nqn[i]))
- return (false);
- }
- nqn += 4;
-
- /* '-' between year and month. */
- if (nqn[0] != '-')
- return (false);
- nqn++;
-
- /* 2 digit month. */
- for (u_int i = 0; i < 2; i++) {
- if (!isdigit(nqn[i]))
- return (false);
- }
- nqn += 2;
-
- /* '.' between month and reverse domain name. */
- if (nqn[0] != '.')
- return (false);
-#endif
return (true);
}
diff --git a/sys/dev/nvmf/controller/nvmft_var.h b/sys/dev/nvmf/controller/nvmft_var.h
index fc1f86754382..85032b2dc55f 100644
--- a/sys/dev/nvmf/controller/nvmft_var.h
+++ b/sys/dev/nvmf/controller/nvmft_var.h
@@ -9,6 +9,7 @@
#define __NVMFT_VAR_H__
#include <sys/_callout.h>
+#include <sys/_nv.h>
#include <sys/refcount.h>
#include <sys/taskqueue.h>
@@ -32,9 +33,10 @@ struct nvmft_port {
struct nvme_firmware_page fp;
uint64_t cap;
uint32_t max_io_qsize;
+ uint16_t portid;
bool online;
- struct sx lock;
+ struct mtx lock;
struct unrhdr *ids;
TAILQ_HEAD(, nvmft_controller) controllers;
@@ -110,6 +112,10 @@ void nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
void nvmft_dispatch_command(struct nvmft_qpair *qp,
struct nvmf_capsule *nc, bool admin);
void nvmft_terminate_commands(struct nvmft_controller *ctrlr);
+void nvmft_abort_datamove(union ctl_io *io);
+void nvmft_handle_datamove(union ctl_io *io);
+void nvmft_drain_task(struct task *task);
+void nvmft_enqueue_task(struct task *task);
/* nvmft_controller.c */
void nvmft_controller_error(struct nvmft_controller *ctrlr,
@@ -121,23 +127,22 @@ void nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
void nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
struct nvmf_capsule *nc);
int nvmft_handoff_admin_queue(struct nvmft_port *np,
- const struct nvmf_handoff_controller_qpair *handoff,
+ enum nvmf_trtype trtype, const nvlist_t *params,
const struct nvmf_fabric_connect_cmd *cmd,
const struct nvmf_fabric_connect_data *data);
-int nvmft_handoff_io_queue(struct nvmft_port *np,
- const struct nvmf_handoff_controller_qpair *handoff,
- const struct nvmf_fabric_connect_cmd *cmd,
+int nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
+ const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
const struct nvmf_fabric_connect_data *data);
int nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
__printflike(2, 3);
/* nvmft_qpair.c */
struct nvmft_qpair *nvmft_qpair_init(enum nvmf_trtype trtype,
- const struct nvmf_handoff_qpair_params *handoff, uint16_t qid,
- const char *name);
+ const nvlist_t *params, uint16_t qid, const char *name);
void nvmft_qpair_shutdown(struct nvmft_qpair *qp);
void nvmft_qpair_destroy(struct nvmft_qpair *qp);
struct nvmft_controller *nvmft_qpair_ctrlr(struct nvmft_qpair *qp);
+void nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io);
uint16_t nvmft_qpair_id(struct nvmft_qpair *qp);
const char *nvmft_qpair_name(struct nvmft_qpair *qp);
void nvmft_command_completed(struct nvmft_qpair *qp,
diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c
index 0902bc78a7b5..1ac0d142443b 100644
--- a/sys/dev/nvmf/host/nvmf.c
+++ b/sys/dev/nvmf/host/nvmf.c
@@ -8,13 +8,18 @@
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/conf.h>
+#include <sys/dnv.h>
+#include <sys/eventhandler.h>
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/memdesc.h>
#include <sys/module.h>
#include <sys/mutex.h>
+#include <sys/nv.h>
+#include <sys/reboot.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf.h>
@@ -22,10 +27,20 @@
#include <dev/nvmf/host/nvmf_var.h>
static struct cdevsw nvmf_cdevsw;
+static struct taskqueue *nvmf_tq;
+
+bool nvmf_fail_disconnect = false;
+SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
+ &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure");
MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
+static void nvmf_controller_loss_task(void *arg, int pending);
static void nvmf_disconnect_task(void *arg, int pending);
+static void nvmf_request_reconnect(struct nvmf_softc *sc);
+static void nvmf_request_reconnect_task(void *arg, int pending);
+static void nvmf_shutdown_pre_sync(void *arg, int howto);
+static void nvmf_shutdown_post_sync(void *arg, int howto);
void
nvmf_complete(void *arg, const struct nvme_completion *cqe)
@@ -187,104 +202,132 @@ nvmf_send_keep_alive(void *arg)
}
int
-nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh)
+nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp)
{
- size_t len;
- u_int i;
+ const struct nvme_discovery_log_entry *dle;
+ const struct nvme_controller_data *cdata;
+ const nvlist_t *const *io;
+ const nvlist_t *admin, *rparams;
+ nvlist_t *nvl;
+ size_t i, num_io_queues;
+ uint32_t qsize;
int error;
- memset(ivars, 0, sizeof(*ivars));
-
- if (!hh->admin.admin || hh->num_io_queues < 1)
- return (EINVAL);
-
- ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK);
- error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata));
+ error = nvmf_unpack_ioc_nvlist(nv, &nvl);
if (error != 0)
- goto out;
- nvme_controller_data_swapbytes(ivars->cdata);
+ return (error);
- len = hh->num_io_queues * sizeof(*ivars->io_params);
- ivars->io_params = malloc(len, M_NVMF, M_WAITOK);
- error = copyin(hh->io, ivars->io_params, len);
- if (error != 0)
- goto out;
- for (i = 0; i < hh->num_io_queues; i++) {
- if (ivars->io_params[i].admin) {
- error = EINVAL;
- goto out;
- }
+ if (!nvlist_exists_number(nvl, "trtype") ||
+ !nvlist_exists_nvlist(nvl, "admin") ||
+ !nvlist_exists_nvlist_array(nvl, "io") ||
+ !nvlist_exists_binary(nvl, "cdata") ||
+ !nvlist_exists_nvlist(nvl, "rparams"))
+ goto invalid;
+
+ rparams = nvlist_get_nvlist(nvl, "rparams");
+ if (!nvlist_exists_binary(rparams, "dle") ||
+ !nvlist_exists_string(rparams, "hostnqn") ||
+ !nvlist_exists_number(rparams, "num_io_queues") ||
+ !nvlist_exists_number(rparams, "io_qsize"))
+ goto invalid;
+
+ admin = nvlist_get_nvlist(nvl, "admin");
+ if (!nvmf_validate_qpair_nvlist(admin, false))
+ goto invalid;
+ if (!nvlist_get_bool(admin, "admin"))
+ goto invalid;
+
+ io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
+ if (num_io_queues < 1 ||
+ num_io_queues != nvlist_get_number(rparams, "num_io_queues"))
+ goto invalid;
+ for (i = 0; i < num_io_queues; i++) {
+ if (!nvmf_validate_qpair_nvlist(io[i], false))
+ goto invalid;
+ }
- /* Require all I/O queues to be the same size. */
- if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) {
- error = EINVAL;
- goto out;
- }
+ /* Require all I/O queues to be the same size. */
+ qsize = nvlist_get_number(rparams, "io_qsize");
+ for (i = 0; i < num_io_queues; i++) {
+ if (nvlist_get_number(io[i], "qsize") != qsize)
+ goto invalid;
}
- ivars->hh = hh;
- return (0);
+ cdata = nvlist_get_binary(nvl, "cdata", &i);
+ if (i != sizeof(*cdata))
+ goto invalid;
+ dle = nvlist_get_binary(rparams, "dle", &i);
+ if (i != sizeof(*dle))
+ goto invalid;
-out:
- free(ivars->io_params, M_NVMF);
- free(ivars->cdata, M_NVMF);
- return (error);
-}
+ if (memcmp(dle->subnqn, cdata->subnqn, sizeof(cdata->subnqn)) != 0)
+ goto invalid;
-void
-nvmf_free_ivars(struct nvmf_ivars *ivars)
-{
- free(ivars->io_params, M_NVMF);
- free(ivars->cdata, M_NVMF);
+ *nvlp = nvl;
+ return (0);
+invalid:
+ nvlist_destroy(nvl);
+ return (EINVAL);
}
static int
nvmf_probe(device_t dev)
{
- struct nvmf_ivars *ivars = device_get_ivars(dev);
- char desc[260];
+ const nvlist_t *nvl = device_get_ivars(dev);
+ const struct nvme_controller_data *cdata;
- if (ivars == NULL)
+ if (nvl == NULL)
return (ENXIO);
- snprintf(desc, sizeof(desc), "Fabrics: %.256s", ivars->cdata->subnqn);
- device_set_desc_copy(dev, desc);
+ cdata = nvlist_get_binary(nvl, "cdata", NULL);
+ device_set_descf(dev, "Fabrics: %.256s", cdata->subnqn);
return (BUS_PROBE_DEFAULT);
}
static int
-nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
+nvmf_establish_connection(struct nvmf_softc *sc, nvlist_t *nvl)
{
+ const nvlist_t *const *io;
+ const nvlist_t *admin;
+ uint64_t kato;
+ size_t num_io_queues;
+ enum nvmf_trtype trtype;
char name[16];
+ trtype = nvlist_get_number(nvl, "trtype");
+ admin = nvlist_get_nvlist(nvl, "admin");
+ io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
+ kato = dnvlist_get_number(nvl, "kato", 0);
+ sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0);
+ sc->controller_loss_timeout = dnvlist_get_number(nvl,
+ "controller_loss_timeout", 0);
+
/* Setup the admin queue. */
- sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin,
- "admin queue");
+ sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0);
if (sc->admin == NULL) {
device_printf(sc->dev, "Failed to setup admin queue\n");
return (ENXIO);
}
/* Setup I/O queues. */
- sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF,
+ sc->io = malloc(num_io_queues * sizeof(*sc->io), M_NVMF,
M_WAITOK | M_ZERO);
- sc->num_io_queues = ivars->hh->num_io_queues;
+ sc->num_io_queues = num_io_queues;
for (u_int i = 0; i < sc->num_io_queues; i++) {
snprintf(name, sizeof(name), "I/O queue %u", i);
- sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype,
- &ivars->io_params[i], name);
+ sc->io[i] = nvmf_init_qp(sc, trtype, io[i], name, i);
if (sc->io[i] == NULL) {
device_printf(sc->dev, "Failed to setup I/O queue %u\n",
- i + 1);
+ i);
return (ENXIO);
}
}
/* Start KeepAlive timers. */
- if (ivars->hh->kato != 0) {
+ if (kato != 0) {
sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS,
sc->cdata->ctratt) != 0;
- sc->ka_rx_sbt = mstosbt(ivars->hh->kato);
+ sc->ka_rx_sbt = mstosbt(kato);
sc->ka_tx_sbt = sc->ka_rx_sbt / 2;
callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0,
nvmf_check_keep_alive, sc, C_HARDCLOCK);
@@ -292,12 +335,23 @@ nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
nvmf_send_keep_alive, sc, C_HARDCLOCK);
}
+ memcpy(sc->cdata, nvlist_get_binary(nvl, "cdata", NULL),
+ sizeof(*sc->cdata));
+
+ /* Save reconnect parameters. */
+ nvlist_destroy(sc->rparams);
+ sc->rparams = nvlist_take_nvlist(nvl, "rparams");
+
return (0);
}
+typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t,
+ const struct nvme_namespace_data *, void *);
+
static bool
-nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
- struct nvme_namespace_data *data, uint32_t *nsidp)
+nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
+ struct nvme_namespace_data *data, uint32_t *nsidp,
+ nvmf_scan_active_ns_cb *cb, void *cb_arg)
{
struct nvmf_completion_status status;
uint32_t nsid;
@@ -333,13 +387,6 @@ nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
return (true);
}
- if (sc->ns[nsid - 1] != NULL) {
- device_printf(sc->dev,
- "duplicate namespace %u in active namespace list\n",
- nsid);
- return (false);
- }
-
nvmf_status_init(&status);
nvmf_status_wait_io(&status);
if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
@@ -365,49 +412,37 @@ nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
return (false);
}
- /*
- * As in nvme_ns_construct, a size of zero indicates an
- * invalid namespace.
- */
nvme_namespace_data_swapbytes(data);
- if (data->nsze == 0) {
- device_printf(sc->dev,
- "ignoring active namespace %u with zero size\n",
- nsid);
- continue;
- }
-
- sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
-
- nvmf_sim_rescan_ns(sc, nsid);
+ if (!cb(sc, nsid, data, cb_arg))
+ return (false);
}
MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0);
- if (nsid >= 0xfffffffd)
+ if (nsid >= NVME_GLOBAL_NAMESPACE_TAG - 1)
*nsidp = 0;
else
- *nsidp = nsid + 1;
+ *nsidp = nsid;
return (true);
}
static bool
-nvmf_add_namespaces(struct nvmf_softc *sc)
+nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb,
+ void *cb_arg)
{
struct nvme_namespace_data *data;
struct nvme_ns_list *nslist;
uint32_t nsid;
bool retval;
- sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF,
- M_WAITOK | M_ZERO);
nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK);
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
nsid = 0;
retval = true;
for (;;) {
- if (!nvmf_scan_nslist(sc, nslist, data, &nsid)) {
+ if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb,
+ cb_arg)) {
retval = false;
break;
}
@@ -420,36 +455,77 @@ nvmf_add_namespaces(struct nvmf_softc *sc)
return (retval);
}
+static bool
+nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid,
+ const struct nvme_namespace_data *data, void *arg __unused)
+{
+ if (sc->ns[nsid - 1] != NULL) {
+ device_printf(sc->dev,
+ "duplicate namespace %u in active namespace list\n",
+ nsid);
+ return (false);
+ }
+
+ /*
+ * As in nvme_ns_construct, a size of zero indicates an
+ * invalid namespace.
+ */
+ if (data->nsze == 0) {
+ device_printf(sc->dev,
+ "ignoring active namespace %u with zero size\n", nsid);
+ return (true);
+ }
+
+ sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
+
+ nvmf_sim_rescan_ns(sc, nsid);
+ return (true);
+}
+
+static bool
+nvmf_add_namespaces(struct nvmf_softc *sc)
+{
+ sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF,
+ M_WAITOK | M_ZERO);
+ return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL));
+}
+
static int
nvmf_attach(device_t dev)
{
struct make_dev_args mda;
struct nvmf_softc *sc = device_get_softc(dev);
- struct nvmf_ivars *ivars = device_get_ivars(dev);
+ nvlist_t *nvl = device_get_ivars(dev);
+ const nvlist_t * const *io;
+ struct sysctl_oid *oid;
uint64_t val;
u_int i;
int error;
- if (ivars == NULL)
+ if (nvl == NULL)
return (ENXIO);
sc->dev = dev;
- sc->trtype = ivars->hh->trtype;
+ sc->trtype = nvlist_get_number(nvl, "trtype");
callout_init(&sc->ka_rx_timer, 1);
callout_init(&sc->ka_tx_timer, 1);
sx_init(&sc->connection_lock, "nvmf connection");
TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
+ TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0,
+ nvmf_controller_loss_task, sc);
+ TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0,
+ nvmf_request_reconnect_task, sc);
- /* Claim the cdata pointer from ivars. */
- sc->cdata = ivars->cdata;
- ivars->cdata = NULL;
+ oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues");
+ sc->ioq_oid_list = SYSCTL_CHILDREN(oid);
- nvmf_init_aer(sc);
+ sc->cdata = malloc(sizeof(*sc->cdata), M_NVMF, M_WAITOK);
- /* TODO: Multiqueue support. */
- sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */;
+ nvmf_init_aer(sc);
- error = nvmf_establish_connection(sc, ivars);
+ error = nvmf_establish_connection(sc, nvl);
if (error != 0)
goto out;
@@ -476,6 +552,10 @@ nvmf_attach(device_t dev)
NVME_CAP_HI_MPSMIN(sc->cap >> 32)));
}
+ io = nvlist_get_nvlist_array(nvl, "io", NULL);
+ sc->max_pending_io = nvlist_get_number(io[0], "qsize") *
+ sc->num_io_queues;
+
error = nvmf_init_sim(sc);
if (error != 0)
goto out;
@@ -503,6 +583,11 @@ nvmf_attach(device_t dev)
goto out;
}
+ sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+ nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST);
+ sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
+ nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_LAST);
+
return (0);
out:
if (sc->ns != NULL) {
@@ -529,8 +614,11 @@ out:
nvmf_destroy_aer(sc);
- taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+ taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task);
+ taskqueue_drain(nvmf_tq, &sc->disconnect_task);
sx_destroy(&sc->connection_lock);
+ nvlist_destroy(sc->rparams);
free(sc->cdata, M_NVMF);
return (error);
}
@@ -538,7 +626,7 @@ out:
void
nvmf_disconnect(struct nvmf_softc *sc)
{
- taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_enqueue(nvmf_tq, &sc->disconnect_task);
}
static void
@@ -579,6 +667,7 @@ nvmf_disconnect_task(void *arg, int pending __unused)
return;
}
+ nanotime(&sc->last_disconnect);
callout_drain(&sc->ka_tx_timer);
callout_drain(&sc->ka_rx_timer);
sc->ka_traffic = false;
@@ -600,29 +689,98 @@ nvmf_disconnect_task(void *arg, int pending __unused)
nvmf_destroy_qp(sc->admin);
sc->admin = NULL;
+ if (sc->reconnect_delay != 0)
+ nvmf_request_reconnect(sc);
+ if (sc->controller_loss_timeout != 0)
+ taskqueue_enqueue_timeout(nvmf_tq,
+ &sc->controller_loss_task, sc->controller_loss_timeout *
+ hz);
+
+ sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_controller_loss_task(void *arg, int pending)
+{
+ struct nvmf_softc *sc = arg;
+ device_t dev;
+ int error;
+
+ bus_topo_lock();
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching) {
+ /* Reconnected or already detaching. */
+ sx_xunlock(&sc->connection_lock);
+ bus_topo_unlock();
+ return;
+ }
+
+ sc->controller_timedout = true;
+ sx_xunlock(&sc->connection_lock);
+
+ /*
+ * XXX: Doing this from here is a bit ugly. We don't have an
+ * extra reference on `dev` but bus_topo_lock should block any
+ * concurrent device_delete_child invocations.
+ */
+ dev = sc->dev;
+ error = device_delete_child(root_bus, dev);
+ if (error != 0)
+ device_printf(dev,
+ "failed to detach after controller loss: %d\n", error);
+ bus_topo_unlock();
+}
+
+static void
+nvmf_request_reconnect(struct nvmf_softc *sc)
+{
+ char buf[64];
+
+ sx_assert(&sc->connection_lock, SX_LOCKED);
+
+ snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev));
+ devctl_notify("nvme", "controller", "RECONNECT", buf);
+ taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task,
+ sc->reconnect_delay * hz);
+}
+
+static void
+nvmf_request_reconnect_task(void *arg, int pending)
+{
+ struct nvmf_softc *sc = arg;
+
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
+ /* Reconnected or already detaching. */
+ sx_xunlock(&sc->connection_lock);
+ return;
+ }
+
+ nvmf_request_reconnect(sc);
sx_xunlock(&sc->connection_lock);
}
static int
-nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
+nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
{
- struct nvmf_ivars ivars;
+ const struct nvme_controller_data *cdata;
+ nvlist_t *nvl;
u_int i;
int error;
+ error = nvmf_copyin_handoff(nv, &nvl);
+ if (error != 0)
+ return (error);
+
/* XXX: Should we permit changing the transport type? */
- if (sc->trtype != hh->trtype) {
+ if (sc->trtype != nvlist_get_number(nvl, "trtype")) {
device_printf(sc->dev,
"transport type mismatch on reconnect\n");
return (EINVAL);
}
- error = nvmf_init_ivars(&ivars, hh);
- if (error != 0)
- return (error);
-
sx_xlock(&sc->connection_lock);
- if (sc->admin != NULL || sc->detaching) {
+ if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
error = EBUSY;
goto out;
}
@@ -634,8 +792,9 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
* ensures the new association is connected to the same NVMe
* subsystem.
*/
- if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn,
- sizeof(ivars.cdata->subnqn)) != 0) {
+ cdata = nvlist_get_binary(nvl, "cdata", NULL);
+ if (memcmp(sc->cdata->subnqn, cdata->subnqn,
+ sizeof(cdata->subnqn)) != 0) {
device_printf(sc->dev,
"controller subsystem NQN mismatch on reconnect\n");
error = EINVAL;
@@ -647,7 +806,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
* max_pending_io is still correct?
*/
- error = nvmf_establish_connection(sc, &ivars);
+ error = nvmf_establish_connection(sc, nvl);
if (error != 0)
goto out;
@@ -665,12 +824,85 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
nvmf_reconnect_ns(sc->ns[i]);
}
nvmf_reconnect_sim(sc);
+
+ nvmf_rescan_all_ns(sc);
+
+ taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL);
+ taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL);
out:
sx_xunlock(&sc->connection_lock);
- nvmf_free_ivars(&ivars);
+ nvlist_destroy(nvl);
return (error);
}
+static void
+nvmf_shutdown_pre_sync(void *arg, int howto)
+{
+ struct nvmf_softc *sc = arg;
+
+ if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
+ return;
+
+ /*
+ * If this association is disconnected, abort any pending
+ * requests with an error to permit filesystems to unmount
+ * without hanging.
+ */
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching) {
+ sx_xunlock(&sc->connection_lock);
+ return;
+ }
+
+ for (u_int i = 0; i < sc->cdata->nn; i++) {
+ if (sc->ns[i] != NULL)
+ nvmf_shutdown_ns(sc->ns[i]);
+ }
+ nvmf_shutdown_sim(sc);
+ sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_shutdown_post_sync(void *arg, int howto)
+{
+ struct nvmf_softc *sc = arg;
+
+ if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
+ return;
+
+ /*
+ * If this association is connected, disconnect gracefully.
+ */
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin == NULL || sc->detaching) {
+ sx_xunlock(&sc->connection_lock);
+ return;
+ }
+
+ callout_drain(&sc->ka_tx_timer);
+ callout_drain(&sc->ka_rx_timer);
+
+ nvmf_shutdown_controller(sc);
+
+ /*
+ * Quiesce consumers so that any commands submitted after this
+ * fail with an error. Notably, nda(4) calls nda_flush() from
+ * a post_sync handler that might be ordered after this one.
+ */
+ for (u_int i = 0; i < sc->cdata->nn; i++) {
+ if (sc->ns[i] != NULL)
+ nvmf_shutdown_ns(sc->ns[i]);
+ }
+ nvmf_shutdown_sim(sc);
+
+ for (u_int i = 0; i < sc->num_io_queues; i++) {
+ nvmf_destroy_qp(sc->io[i]);
+ }
+ nvmf_destroy_qp(sc->admin);
+ sc->admin = NULL;
+ sx_xunlock(&sc->connection_lock);
+}
+
static int
nvmf_detach(device_t dev)
{
@@ -683,6 +915,9 @@ nvmf_detach(device_t dev)
sc->detaching = true;
sx_xunlock(&sc->connection_lock);
+ EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh);
+ EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->shutdown_post_sync_eh);
+
nvmf_destroy_sim(sc);
for (i = 0; i < sc->cdata->nn; i++) {
if (sc->ns[i] != NULL)
@@ -701,7 +936,21 @@ nvmf_detach(device_t dev)
}
free(sc->io, M_NVMF);
- taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_drain(nvmf_tq, &sc->disconnect_task);
+ if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task,
+ NULL) != 0)
+ taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+
+ /*
+ * Don't cancel/drain the controller loss task if that task
+ * has fired and is triggering the detach.
+ */
+ if (!sc->controller_timedout) {
+ if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task,
+ NULL) != 0)
+ taskqueue_drain_timeout(nvmf_tq,
+ &sc->controller_loss_task);
+ }
if (sc->admin != NULL)
nvmf_destroy_qp(sc->admin);
@@ -709,16 +958,45 @@ nvmf_detach(device_t dev)
nvmf_destroy_aer(sc);
sx_destroy(&sc->connection_lock);
+ nvlist_destroy(sc->rparams);
free(sc->cdata, M_NVMF);
return (0);
}
+static void
+nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid,
+ const struct nvme_namespace_data *data)
+{
+ struct nvmf_namespace *ns;
+
+ /* XXX: Needs locking around sc->ns[]. */
+ ns = sc->ns[nsid - 1];
+ if (data->nsze == 0) {
+ /* XXX: Needs locking */
+ if (ns != NULL) {
+ nvmf_destroy_ns(ns);
+ sc->ns[nsid - 1] = NULL;
+ }
+ } else {
+ /* XXX: Needs locking */
+ if (ns == NULL) {
+ sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
+ } else {
+ if (!nvmf_update_ns(ns, data)) {
+ nvmf_destroy_ns(ns);
+ sc->ns[nsid - 1] = NULL;
+ }
+ }
+ }
+
+ nvmf_sim_rescan_ns(sc, nsid);
+}
+
void
nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid)
{
struct nvmf_completion_status status;
struct nvme_namespace_data *data;
- struct nvmf_namespace *ns;
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
@@ -751,29 +1029,58 @@ nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid)
nvme_namespace_data_swapbytes(data);
- /* XXX: Needs locking around sc->ns[]. */
- ns = sc->ns[nsid - 1];
- if (data->nsze == 0) {
- /* XXX: Needs locking */
+ nvmf_rescan_ns_1(sc, nsid, data);
+
+ free(data, M_NVMF);
+}
+
+static void
+nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid,
+ uint32_t next_valid_nsid)
+{
+ struct nvmf_namespace *ns;
+
+ for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++)
+ {
+ /* XXX: Needs locking around sc->ns[]. */
+ ns = sc->ns[nsid - 1];
if (ns != NULL) {
nvmf_destroy_ns(ns);
sc->ns[nsid - 1] = NULL;
- }
- } else {
- /* XXX: Needs locking */
- if (ns == NULL) {
- sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
- } else {
- if (!nvmf_update_ns(ns, data)) {
- nvmf_destroy_ns(ns);
- sc->ns[nsid - 1] = NULL;
- }
+
+ nvmf_sim_rescan_ns(sc, nsid);
}
}
+}
- free(data, M_NVMF);
+static bool
+nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid,
+ const struct nvme_namespace_data *data, void *arg)
+{
+ uint32_t *last_nsid = arg;
- nvmf_sim_rescan_ns(sc, nsid);
+ /* Check for any gaps prior to this namespace. */
+ nvmf_purge_namespaces(sc, *last_nsid + 1, nsid);
+ *last_nsid = nsid;
+
+ nvmf_rescan_ns_1(sc, nsid, data);
+ return (true);
+}
+
+void
+nvmf_rescan_all_ns(struct nvmf_softc *sc)
+{
+ uint32_t last_nsid;
+
+ last_nsid = 0;
+ if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid))
+ return;
+
+ /*
+ * Check for any namespace devices after the last active
+ * namespace.
+ */
+ nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1);
}
int
@@ -822,12 +1129,21 @@ nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
cmd.cdw14 = pt->cmd.cdw14;
cmd.cdw15 = pt->cmd.cdw15;
+ sx_slock(&sc->connection_lock);
+ if (sc->admin == NULL || sc->detaching) {
+ device_printf(sc->dev,
+ "failed to send passthrough command\n");
+ error = ECONNABORTED;
+ sx_sunlock(&sc->connection_lock);
+ goto error;
+ }
if (admin)
qp = sc->admin;
else
qp = nvmf_select_io_queue(sc);
nvmf_status_init(&status);
req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK);
+ sx_sunlock(&sc->connection_lock);
if (req == NULL) {
device_printf(sc->dev, "failed to send passthrough command\n");
error = ECONNABORTED;
@@ -857,14 +1173,46 @@ error:
}
static int
+nvmf_reconnect_params(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
+{
+ int error;
+
+ sx_slock(&sc->connection_lock);
+ error = nvmf_pack_ioc_nvlist(sc->rparams, nv);
+ sx_sunlock(&sc->connection_lock);
+
+ return (error);
+}
+
+static int
+nvmf_connection_status(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
+{
+ nvlist_t *nvl, *nvl_ts;
+ int error;
+
+ nvl = nvlist_create(0);
+ nvl_ts = nvlist_create(0);
+
+ sx_slock(&sc->connection_lock);
+ nvlist_add_bool(nvl, "connected", sc->admin != NULL);
+ nvlist_add_number(nvl_ts, "tv_sec", sc->last_disconnect.tv_sec);
+ nvlist_add_number(nvl_ts, "tv_nsec", sc->last_disconnect.tv_nsec);
+ sx_sunlock(&sc->connection_lock);
+ nvlist_move_nvlist(nvl, "last_disconnect", nvl_ts);
+
+ error = nvmf_pack_ioc_nvlist(nvl, nv);
+ nvlist_destroy(nvl);
+ return (error);
+}
+
+static int
nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
struct thread *td)
{
struct nvmf_softc *sc = cdev->si_drv1;
struct nvme_get_nsid *gnsid;
struct nvme_pt_command *pt;
- struct nvmf_reconnect_params *rp;
- struct nvmf_handoff_host *hh;
+ struct nvmf_ioc_nv *nv;
switch (cmd) {
case NVME_PASSTHROUGH_CMD:
@@ -872,25 +1220,25 @@ nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
return (nvmf_passthrough_cmd(sc, pt, true));
case NVME_GET_NSID:
gnsid = (struct nvme_get_nsid *)arg;
- strncpy(gnsid->cdev, device_get_nameunit(sc->dev),
+ strlcpy(gnsid->cdev, device_get_nameunit(sc->dev),
sizeof(gnsid->cdev));
- gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0';
gnsid->nsid = 0;
return (0);
case NVME_GET_MAX_XFER_SIZE:
*(uint64_t *)arg = sc->max_xfer_size;
return (0);
- case NVMF_RECONNECT_PARAMS:
- rp = (struct nvmf_reconnect_params *)arg;
- if ((sc->cdata->fcatt & 1) == 0)
- rp->cntlid = NVMF_CNTLID_DYNAMIC;
- else
- rp->cntlid = sc->cdata->ctrlr_id;
- memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn));
+ case NVME_GET_CONTROLLER_DATA:
+ memcpy(arg, sc->cdata, sizeof(*sc->cdata));
return (0);
+ case NVMF_RECONNECT_PARAMS:
+ nv = (struct nvmf_ioc_nv *)arg;
+ return (nvmf_reconnect_params(sc, nv));
case NVMF_RECONNECT_HOST:
- hh = (struct nvmf_handoff_host *)arg;
- return (nvmf_reconnect_host(sc, hh));
+ nv = (struct nvmf_ioc_nv *)arg;
+ return (nvmf_reconnect_host(sc, nv));
+ case NVMF_CONNECTION_STATUS:
+ nv = (struct nvmf_ioc_nv *)arg;
+ return (nvmf_connection_status(sc, nv));
default:
return (ENOTTY);
}
@@ -904,14 +1252,25 @@ static struct cdevsw nvmf_cdevsw = {
static int
nvmf_modevent(module_t mod, int what, void *arg)
{
+ int error;
+
switch (what) {
case MOD_LOAD:
- return (nvmf_ctl_load());
+ error = nvmf_ctl_load();
+ if (error != 0)
+ return (error);
+
+ nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO,
+ taskqueue_thread_enqueue, &nvmf_tq);
+ taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq");
+ return (0);
case MOD_QUIESCE:
return (0);
case MOD_UNLOAD:
nvmf_ctl_unload();
destroy_dev_drain(&nvmf_cdevsw);
+ if (nvmf_tq != NULL)
+ taskqueue_free(nvmf_tq);
return (0);
default:
return (EOPNOTSUPP);
@@ -923,9 +1282,6 @@ static device_method_t nvmf_methods[] = {
DEVMETHOD(device_probe, nvmf_probe),
DEVMETHOD(device_attach, nvmf_attach),
DEVMETHOD(device_detach, nvmf_detach),
-#if 0
- DEVMETHOD(device_shutdown, nvmf_shutdown),
-#endif
DEVMETHOD_END
};
diff --git a/sys/dev/nvmf/host/nvmf_aer.c b/sys/dev/nvmf/host/nvmf_aer.c
index 4c950f1518d0..2f7f177d0421 100644
--- a/sys/dev/nvmf/host/nvmf_aer.c
+++ b/sys/dev/nvmf/host/nvmf_aer.c
@@ -62,7 +62,7 @@ nvmf_handle_changed_namespaces(struct nvmf_softc *sc,
* probably just rescan the entire set of namespaces.
*/
if (ns_list->ns[0] == 0xffffffff) {
- device_printf(sc->dev, "too many changed namespaces\n");
+ nvmf_rescan_all_ns(sc);
return;
}
diff --git a/sys/dev/nvmf/host/nvmf_ctldev.c b/sys/dev/nvmf/host/nvmf_ctldev.c
index f40005a2a666..275d5e9c932a 100644
--- a/sys/dev/nvmf/host/nvmf_ctldev.c
+++ b/sys/dev/nvmf/host/nvmf_ctldev.c
@@ -9,6 +9,7 @@
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/malloc.h>
+#include <sys/nv.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf.h>
#include <dev/nvmf/nvmf_transport.h>
@@ -17,25 +18,25 @@
static struct cdev *nvmf_cdev;
static int
-nvmf_handoff_host(struct nvmf_handoff_host *hh)
+nvmf_handoff_host(struct nvmf_ioc_nv *nv)
{
- struct nvmf_ivars ivars;
+ nvlist_t *nvl;
device_t dev;
int error;
- error = nvmf_init_ivars(&ivars, hh);
+ error = nvmf_copyin_handoff(nv, &nvl);
if (error != 0)
return (error);
bus_topo_lock();
- dev = device_add_child(root_bus, "nvme", -1);
+ dev = device_add_child(root_bus, "nvme", DEVICE_UNIT_ANY);
if (dev == NULL) {
bus_topo_unlock();
error = ENXIO;
goto out;
}
- device_set_ivars(dev, &ivars);
+ device_set_ivars(dev, nvl);
error = device_probe_and_attach(dev);
device_set_ivars(dev, NULL);
if (error != 0)
@@ -43,7 +44,7 @@ nvmf_handoff_host(struct nvmf_handoff_host *hh)
bus_topo_unlock();
out:
- nvmf_free_ivars(&ivars);
+ nvlist_destroy(nvl);
return (error);
}
@@ -117,7 +118,7 @@ nvmf_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag,
{
switch (cmd) {
case NVMF_HANDOFF_HOST:
- return (nvmf_handoff_host((struct nvmf_handoff_host *)arg));
+ return (nvmf_handoff_host((struct nvmf_ioc_nv *)arg));
case NVMF_DISCONNECT_HOST:
return (nvmf_disconnect_host((const char **)arg));
case NVMF_DISCONNECT_ALL:
diff --git a/sys/dev/nvmf/host/nvmf_ns.c b/sys/dev/nvmf/host/nvmf_ns.c
index 3ce434bf7c50..4215c8295d2e 100644
--- a/sys/dev/nvmf/host/nvmf_ns.c
+++ b/sys/dev/nvmf/host/nvmf_ns.c
@@ -18,7 +18,7 @@
#include <sys/proc.h>
#include <sys/refcount.h>
#include <sys/sbuf.h>
-#include <machine/stdarg.h>
+#include <sys/stdarg.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/host/nvmf_var.h>
@@ -29,6 +29,7 @@ struct nvmf_namespace {
u_int flags;
uint32_t lba_size;
bool disconnected;
+ bool shutdown;
TAILQ_HEAD(, bio) pending_bios;
struct mtx lock;
@@ -49,7 +50,7 @@ ns_printf(struct nvmf_namespace *ns, const char *fmt, ...)
sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
- sbuf_printf(&sb, "%sns%u: ", device_get_nameunit(ns->sc->dev),
+ sbuf_printf(&sb, "%sn%u: ", device_get_nameunit(ns->sc->dev),
ns->id);
va_start(ap, fmt);
@@ -84,13 +85,22 @@ nvmf_ns_biodone(struct bio *bio)
ns = bio->bio_dev->si_drv1;
/* If a request is aborted, resubmit or queue it for resubmission. */
- if (bio->bio_error == ECONNABORTED) {
+ if (bio->bio_error == ECONNABORTED && !nvmf_fail_disconnect) {
bio->bio_error = 0;
bio->bio_driver2 = 0;
mtx_lock(&ns->lock);
if (ns->disconnected) {
- TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
- mtx_unlock(&ns->lock);
+ if (nvmf_fail_disconnect || ns->shutdown) {
+ mtx_unlock(&ns->lock);
+ bio->bio_error = ECONNABORTED;
+ bio->bio_flags |= BIO_ERROR;
+ bio->bio_resid = bio->bio_bcount;
+ biodone(bio);
+ } else {
+ TAILQ_INSERT_TAIL(&ns->pending_bios, bio,
+ bio_queue);
+ mtx_unlock(&ns->lock);
+ }
} else {
mtx_unlock(&ns->lock);
nvmf_ns_strategy(bio);
@@ -163,6 +173,7 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio)
struct nvme_dsm_range *dsm_range;
struct memdesc mem;
uint64_t lba, lba_count;
+ int error;
dsm_range = NULL;
memset(&cmd, 0, sizeof(cmd));
@@ -201,10 +212,15 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio)
mtx_lock(&ns->lock);
if (ns->disconnected) {
- TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
+ if (nvmf_fail_disconnect || ns->shutdown) {
+ error = ECONNABORTED;
+ } else {
+ TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
+ error = 0;
+ }
mtx_unlock(&ns->lock);
free(dsm_range, M_NVMF);
- return (0);
+ return (error);
}
req = nvmf_allocate_request(nvmf_select_io_queue(ns->sc), &cmd,
@@ -258,9 +274,8 @@ nvmf_ns_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag,
return (nvmf_passthrough_cmd(ns->sc, pt, false));
case NVME_GET_NSID:
gnsid = (struct nvme_get_nsid *)arg;
- strncpy(gnsid->cdev, device_get_nameunit(ns->sc->dev),
+ strlcpy(gnsid->cdev, device_get_nameunit(ns->sc->dev),
sizeof(gnsid->cdev));
- gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0';
gnsid->nsid = ns->id;
return (0);
case DIOCGMEDIASIZE:
@@ -314,7 +329,7 @@ static struct cdevsw nvmf_ns_cdevsw = {
struct nvmf_namespace *
nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
- struct nvme_namespace_data *data)
+ const struct nvme_namespace_data *data)
{
struct make_dev_args mda;
struct nvmf_namespace *ns;
@@ -372,10 +387,12 @@ nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
mda.mda_gid = GID_WHEEL;
mda.mda_mode = 0600;
mda.mda_si_drv1 = ns;
- error = make_dev_s(&mda, &ns->cdev, "%sns%u",
+ error = make_dev_s(&mda, &ns->cdev, "%sn%u",
device_get_nameunit(sc->dev), id);
if (error != 0)
goto fail;
+ ns->cdev->si_drv2 = make_dev_alias(ns->cdev, "%sns%u",
+ device_get_nameunit(sc->dev), id);
ns->cdev->si_flags |= SI_UNMAPPED;
@@ -414,11 +431,35 @@ nvmf_reconnect_ns(struct nvmf_namespace *ns)
}
void
+nvmf_shutdown_ns(struct nvmf_namespace *ns)
+{
+ TAILQ_HEAD(, bio) bios;
+ struct bio *bio;
+
+ mtx_lock(&ns->lock);
+ ns->shutdown = true;
+ TAILQ_INIT(&bios);
+ TAILQ_CONCAT(&bios, &ns->pending_bios, bio_queue);
+ mtx_unlock(&ns->lock);
+
+ while (!TAILQ_EMPTY(&bios)) {
+ bio = TAILQ_FIRST(&bios);
+ TAILQ_REMOVE(&bios, bio, bio_queue);
+ bio->bio_error = ECONNABORTED;
+ bio->bio_flags |= BIO_ERROR;
+ bio->bio_resid = bio->bio_bcount;
+ biodone(bio);
+ }
+}
+
+void
nvmf_destroy_ns(struct nvmf_namespace *ns)
{
TAILQ_HEAD(, bio) bios;
struct bio *bio;
+ if (ns->cdev->si_drv2 != NULL)
+ destroy_dev(ns->cdev->si_drv2);
destroy_dev(ns->cdev);
/*
@@ -451,7 +492,8 @@ nvmf_destroy_ns(struct nvmf_namespace *ns)
}
bool
-nvmf_update_ns(struct nvmf_namespace *ns, struct nvme_namespace_data *data)
+nvmf_update_ns(struct nvmf_namespace *ns,
+ const struct nvme_namespace_data *data)
{
uint8_t lbads, lbaf;
diff --git a/sys/dev/nvmf/host/nvmf_qpair.c b/sys/dev/nvmf/host/nvmf_qpair.c
index 96cb5a8b0465..2f511cf0406d 100644
--- a/sys/dev/nvmf/host/nvmf_qpair.c
+++ b/sys/dev/nvmf/host/nvmf_qpair.c
@@ -10,6 +10,8 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/nv.h>
+#include <sys/sysctl.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf.h>
#include <dev/nvmf/nvmf_transport.h>
@@ -31,6 +33,7 @@ struct nvmf_host_qpair {
u_int num_commands;
uint16_t sqhd;
uint16_t sqtail;
+ uint64_t submitted;
struct mtx lock;
@@ -41,6 +44,7 @@ struct nvmf_host_qpair {
struct nvmf_host_command **active_commands;
char name[16];
+ struct sysctl_ctx_list sysctl_ctx;
};
struct nvmf_request *
@@ -112,8 +116,23 @@ nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd)
struct nvmf_softc *sc = qp->sc;
struct nvme_command *sqe;
struct nvmf_capsule *nc;
+ uint16_t new_sqtail;
int error;
+ mtx_assert(&qp->lock, MA_OWNED);
+
+ qp->submitted++;
+
+ /*
+ * Update flow control tracking. This is just a sanity check.
+ * Since num_commands == qsize - 1, there can never be too
+ * many commands in flight.
+ */
+ new_sqtail = (qp->sqtail + 1) % (qp->num_commands + 1);
+ KASSERT(new_sqtail != qp->sqhd, ("%s: qp %p is full", __func__, qp));
+ qp->sqtail = new_sqtail;
+ mtx_unlock(&qp->lock);
+
nc = cmd->req->nc;
sqe = nvmf_capsule_sqe(nc);
@@ -177,11 +196,23 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
return;
}
+ /* Update flow control tracking. */
+ mtx_lock(&qp->lock);
+ if (qp->sq_flow_control) {
+ if (nvmf_sqhd_valid(nc))
+ qp->sqhd = le16toh(cqe->sqhd);
+ } else {
+ /*
+ * If SQ FC is disabled, just advance the head for
+ * each response capsule received.
+ */
+ qp->sqhd = (qp->sqhd + 1) % (qp->num_commands + 1);
+ }
+
/*
* If the queue has been shutdown due to an error, silently
* drop the response.
*/
- mtx_lock(&qp->lock);
if (qp->qp == NULL) {
device_printf(sc->dev,
"received completion for CID %u on shutdown %s\n", cid,
@@ -212,7 +243,6 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
} else {
cmd->req = STAILQ_FIRST(&qp->pending_requests);
STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
- mtx_unlock(&qp->lock);
nvmf_dispatch_command(qp, cmd);
}
@@ -221,28 +251,61 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
nvmf_free_request(req);
}
+static void
+nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp,
+ bool admin, u_int qid)
+{
+ struct sysctl_ctx_list *ctx = &qp->sysctl_ctx;
+ struct sysctl_oid *oid;
+ struct sysctl_oid_list *list;
+ char name[8];
+
+ if (admin) {
+ oid = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO,
+ "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
+ } else {
+ snprintf(name, sizeof(name), "%u", qid);
+ oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name,
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue");
+ }
+ list = SYSCTL_CHILDREN(oid);
+
+ SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD,
+ NULL, qp->num_commands + 1, "Number of entries in queue");
+ SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd,
+ 0, "Current head of submission queue (as observed by driver)");
+ SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail,
+ 0, "Current tail of submission queue (as observed by driver)");
+ SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD,
+ &qp->submitted, 0, "Number of commands submitted");
+}
+
struct nvmf_host_qpair *
nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
- struct nvmf_handoff_qpair_params *handoff, const char *name)
+ const nvlist_t *nvl, const char *name, u_int qid)
{
struct nvmf_host_command *cmd, *ncmd;
struct nvmf_host_qpair *qp;
u_int i;
+ bool admin;
+ admin = nvlist_get_bool(nvl, "admin");
qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO);
qp->sc = sc;
- qp->sq_flow_control = handoff->sq_flow_control;
- qp->sqhd = handoff->sqhd;
- qp->sqtail = handoff->sqtail;
+ qp->sq_flow_control = nvlist_get_bool(nvl, "sq_flow_control");
+ qp->sqhd = nvlist_get_number(nvl, "sqhd");
+ qp->sqtail = nvlist_get_number(nvl, "sqtail");
strlcpy(qp->name, name, sizeof(qp->name));
mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF);
+ (void)sysctl_ctx_init(&qp->sysctl_ctx);
/*
* Allocate a spare command slot for each pending AER command
* on the admin queue.
*/
- qp->num_commands = handoff->qsize - 1;
- if (handoff->admin)
+ qp->num_commands = nvlist_get_number(nvl, "qsize") - 1;
+ if (admin)
qp->num_commands += sc->num_aer;
qp->active_commands = malloc(sizeof(*qp->active_commands) *
@@ -255,9 +318,10 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
}
STAILQ_INIT(&qp->pending_requests);
- qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error,
- qp, nvmf_receive_capsule, qp);
+ qp->qp = nvmf_allocate_qpair(trtype, false, nvl, nvmf_qp_error, qp,
+ nvmf_receive_capsule, qp);
if (qp->qp == NULL) {
+ (void)sysctl_ctx_free(&qp->sysctl_ctx);
TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
TAILQ_REMOVE(&qp->free_commands, cmd, link);
free(cmd, M_NVMF);
@@ -268,6 +332,8 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
return (NULL);
}
+ nvmf_sysctls_qp(sc, qp, admin, qid);
+
return (qp);
}
@@ -339,6 +405,7 @@ nvmf_destroy_qp(struct nvmf_host_qpair *qp)
struct nvmf_host_command *cmd, *ncmd;
nvmf_shutdown_qp(qp);
+ (void)sysctl_ctx_free(&qp->sysctl_ctx);
TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
TAILQ_REMOVE(&qp->free_commands, cmd, link);
@@ -381,6 +448,5 @@ nvmf_submit_request(struct nvmf_request *req)
("%s: CID already busy", __func__));
qp->active_commands[cmd->cid] = cmd;
cmd->req = req;
- mtx_unlock(&qp->lock);
nvmf_dispatch_command(qp, cmd);
}
diff --git a/sys/dev/nvmf/host/nvmf_sim.c b/sys/dev/nvmf/host/nvmf_sim.c
index b097b04d64c3..de9e958d8afd 100644
--- a/sys/dev/nvmf/host/nvmf_sim.c
+++ b/sys/dev/nvmf/host/nvmf_sim.c
@@ -40,7 +40,13 @@ nvmf_ccb_done(union ccb *ccb)
return;
if (nvmf_cqe_aborted(&ccb->nvmeio.cpl)) {
- ccb->ccb_h.status = CAM_REQUEUE_REQ;
+ struct cam_sim *sim = xpt_path_sim(ccb->ccb_h.path);
+ struct nvmf_softc *sc = cam_sim_softc(sim);
+
+ if (nvmf_fail_disconnect || sc->sim_shutdown)
+ ccb->ccb_h.status = CAM_DEV_NOT_THERE;
+ else
+ ccb->ccb_h.status = CAM_REQUEUE_REQ;
xpt_done(ccb);
} else if (ccb->nvmeio.cpl.status != 0) {
ccb->ccb_h.status = CAM_NVME_STATUS_ERROR;
@@ -52,7 +58,7 @@ nvmf_ccb_done(union ccb *ccb)
xpt_done(ccb);
} else {
ccb->ccb_h.status = CAM_REQ_CMP;
- xpt_done_direct(ccb);
+ xpt_done(ccb);
}
}
@@ -106,7 +112,10 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
mtx_lock(&sc->sim_mtx);
if (sc->sim_disconnected) {
mtx_unlock(&sc->sim_mtx);
- nvmeio->ccb_h.status = CAM_REQUEUE_REQ;
+ if (nvmf_fail_disconnect || sc->sim_shutdown)
+ nvmeio->ccb_h.status = CAM_DEV_NOT_THERE;
+ else
+ nvmeio->ccb_h.status = CAM_REQUEUE_REQ;
xpt_done(ccb);
return;
}
@@ -116,8 +125,8 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
qp = sc->admin;
req = nvmf_allocate_request(qp, &nvmeio->cmd, nvmf_ccb_complete,
ccb, M_NOWAIT);
+ mtx_unlock(&sc->sim_mtx);
if (req == NULL) {
- mtx_unlock(&sc->sim_mtx);
nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL;
xpt_done(ccb);
return;
@@ -141,7 +150,6 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
("%s: incoming CCB is not in-progress", __func__));
ccb->ccb_h.status |= CAM_SIM_QUEUED;
nvmf_submit_request(req);
- mtx_unlock(&sc->sim_mtx);
}
static void
@@ -183,7 +191,7 @@ nvmf_sim_action(struct cam_sim *sim, union ccb *ccb)
cpi->xport_specific.nvmf.nsid =
xpt_path_lun_id(ccb->ccb_h.path);
cpi->xport_specific.nvmf.trtype = sc->trtype;
- strncpy(cpi->xport_specific.nvmf.dev_name,
+ strlcpy(cpi->xport_specific.nvmf.dev_name,
device_get_nameunit(sc->dev),
sizeof(cpi->xport_specific.nvmf.dev_name));
cpi->maxio = sc->max_xfer_size;
@@ -320,6 +328,15 @@ nvmf_reconnect_sim(struct nvmf_softc *sc)
}
void
+nvmf_shutdown_sim(struct nvmf_softc *sc)
+{
+ mtx_lock(&sc->sim_mtx);
+ sc->sim_shutdown = true;
+ mtx_unlock(&sc->sim_mtx);
+ xpt_release_simq(sc->sim, 1);
+}
+
+void
nvmf_destroy_sim(struct nvmf_softc *sc)
{
xpt_async(AC_LOST_DEVICE, sc->path, NULL);
diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h
index 64525851631e..606245b3969c 100644
--- a/sys/dev/nvmf/host/nvmf_var.h
+++ b/sys/dev/nvmf/host/nvmf_var.h
@@ -9,10 +9,13 @@
#define __NVMF_VAR_H__
#include <sys/_callout.h>
+#include <sys/_eventhandler.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+//#include <sys/_nv.h>
#include <sys/_sx.h>
#include <sys/_task.h>
+#include <sys/smp.h>
#include <sys/queue.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf_transport.h>
@@ -21,15 +24,10 @@ struct nvmf_aer;
struct nvmf_capsule;
struct nvmf_host_qpair;
struct nvmf_namespace;
+struct sysctl_oid_list;
typedef void nvmf_request_complete_t(void *, const struct nvme_completion *);
-struct nvmf_ivars {
- struct nvmf_handoff_host *hh;
- struct nvmf_handoff_qpair_params *io_params;
- struct nvme_controller_data *cdata;
-};
-
struct nvmf_softc {
device_t dev;
@@ -42,6 +40,7 @@ struct nvmf_softc {
struct cam_path *path;
struct mtx sim_mtx;
bool sim_disconnected;
+ bool sim_shutdown;
struct nvmf_namespace **ns;
@@ -76,12 +75,27 @@ struct nvmf_softc {
struct callout ka_rx_timer;
sbintime_t ka_rx_sbt;
+ struct timeout_task request_reconnect_task;
+ struct timeout_task controller_loss_task;
+ uint32_t reconnect_delay;
+ uint32_t controller_loss_timeout;
+
struct sx connection_lock;
struct task disconnect_task;
bool detaching;
+ bool controller_timedout;
u_int num_aer;
struct nvmf_aer *aer;
+
+ struct sysctl_oid_list *ioq_oid_list;
+
+ nvlist_t *rparams;
+
+ struct timespec last_disconnect;
+
+ eventhandler_tag shutdown_pre_sync_eh;
+ eventhandler_tag shutdown_post_sync_eh;
};
struct nvmf_request {
@@ -104,8 +118,8 @@ struct nvmf_completion_status {
static __inline struct nvmf_host_qpair *
nvmf_select_io_queue(struct nvmf_softc *sc)
{
- /* TODO: Support multiple queues? */
- return (sc->io[0]);
+ u_int idx = curcpu * sc->num_io_queues / (mp_maxid + 1);
+ return (sc->io[idx]);
}
static __inline bool
@@ -140,14 +154,17 @@ extern driver_t nvme_nvmf_driver;
MALLOC_DECLARE(M_NVMF);
#endif
+/* If true, I/O requests will fail while the host is disconnected. */
+extern bool nvmf_fail_disconnect;
+
/* nvmf.c */
void nvmf_complete(void *arg, const struct nvme_completion *cqe);
void nvmf_io_complete(void *arg, size_t xfered, int error);
void nvmf_wait_for_reply(struct nvmf_completion_status *status);
-int nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh);
-void nvmf_free_ivars(struct nvmf_ivars *ivars);
+int nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp);
void nvmf_disconnect(struct nvmf_softc *sc);
void nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid);
+void nvmf_rescan_all_ns(struct nvmf_softc *sc);
int nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
bool admin);
@@ -180,17 +197,17 @@ void nvmf_ctl_unload(void);
/* nvmf_ns.c */
struct nvmf_namespace *nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
- struct nvme_namespace_data *data);
+ const struct nvme_namespace_data *data);
void nvmf_disconnect_ns(struct nvmf_namespace *ns);
void nvmf_reconnect_ns(struct nvmf_namespace *ns);
+void nvmf_shutdown_ns(struct nvmf_namespace *ns);
void nvmf_destroy_ns(struct nvmf_namespace *ns);
bool nvmf_update_ns(struct nvmf_namespace *ns,
- struct nvme_namespace_data *data);
+ const struct nvme_namespace_data *data);
/* nvmf_qpair.c */
struct nvmf_host_qpair *nvmf_init_qp(struct nvmf_softc *sc,
- enum nvmf_trtype trtype, struct nvmf_handoff_qpair_params *handoff,
- const char *name);
+ enum nvmf_trtype trtype, const nvlist_t *nvl, const char *name, u_int qid);
void nvmf_shutdown_qp(struct nvmf_host_qpair *qp);
void nvmf_destroy_qp(struct nvmf_host_qpair *qp);
struct nvmf_request *nvmf_allocate_request(struct nvmf_host_qpair *qp,
@@ -202,6 +219,7 @@ void nvmf_free_request(struct nvmf_request *req);
int nvmf_init_sim(struct nvmf_softc *sc);
void nvmf_disconnect_sim(struct nvmf_softc *sc);
void nvmf_reconnect_sim(struct nvmf_softc *sc);
+void nvmf_shutdown_sim(struct nvmf_softc *sc);
void nvmf_destroy_sim(struct nvmf_softc *sc);
void nvmf_sim_rescan_ns(struct nvmf_softc *sc, uint32_t id);
diff --git a/sys/dev/nvmf/nvmf.h b/sys/dev/nvmf/nvmf.h
index 1f1ecd437c7e..9b2b4c1dea40 100644
--- a/sys/dev/nvmf/nvmf.h
+++ b/sys/dev/nvmf/nvmf.h
@@ -26,54 +26,107 @@
#define NVMF_NN (1024)
-struct nvmf_handoff_qpair_params {
- bool admin;
- bool sq_flow_control;
- u_int qsize;
- uint16_t sqhd;
- uint16_t sqtail; /* host only */
- union {
- struct {
- int fd;
- uint8_t rxpda;
- uint8_t txpda;
- bool header_digests;
- bool data_digests;
- uint32_t maxr2t;
- uint32_t maxh2cdata;
- uint32_t max_icd;
- } tcp;
- };
-};
+/*
+ * Default timeouts for Fabrics hosts. These match values used by
+ * Linux.
+ */
+#define NVMF_DEFAULT_RECONNECT_DELAY 10
+#define NVMF_DEFAULT_CONTROLLER_LOSS 600
-struct nvmf_handoff_host {
- u_int trtype;
- u_int num_io_queues;
- u_int kato;
- struct nvmf_handoff_qpair_params admin;
- struct nvmf_handoff_qpair_params *io;
- const struct nvme_controller_data *cdata;
+/*
+ * (data, size) is the userspace buffer for a packed nvlist.
+ *
+ * For requests that copyout an nvlist, len is the amount of data
+ * copied out to *data. If size is zero, no data is copied and len is
+ * set to the required buffer size.
+ */
+struct nvmf_ioc_nv {
+ void *data;
+ size_t len;
+ size_t size;
};
-struct nvmf_reconnect_params {
- uint16_t cntlid;
- char subnqn[256];
-};
+/*
+ * The fields in a qpair handoff nvlist are:
+ *
+ * Transport independent:
+ *
+ * bool admin
+ * bool sq_flow_control
+ * number qsize
+ * number sqhd
+ * number sqtail host only
+ *
+ * TCP transport:
+ *
+ * number fd
+ * number rxpda
+ * number txpda
+ * bool header_digests
+ * bool data_digests
+ * number maxr2t
+ * number maxh2cdata
+ * number max_icd
+ */
-struct nvmf_handoff_controller_qpair {
- u_int trtype;
- struct nvmf_handoff_qpair_params params;
- const struct nvmf_fabric_connect_cmd *cmd;
- const struct nvmf_fabric_connect_data *data;
-};
+/*
+ * The fields in the nvlist for NVMF_HANDOFF_HOST and
+ * NVMF_RECONNECT_HOST are:
+ *
+ * number trtype
+ * number kato (optional)
+ * number reconnect_delay (optional)
+ * number controller_loss_timeout (optional)
+ * qpair handoff nvlist admin
+ * qpair handoff nvlist array io
+ * binary cdata struct nvme_controller_data
+ * NVMF_RECONNECT_PARAMS nvlist rparams
+ */
+
+/*
+ * The fields in the nvlist for NVMF_RECONNECT_PARAMS are:
+ *
+ * binary dle struct nvme_discovery_log_entry
+ * string hostnqn
+ * number num_io_queues
+ * number kato (optional)
+ * number reconnect_delay (optional)
+ * number controller_loss_timeout (optional)
+ * number io_qsize
+ * bool sq_flow_control
+ *
+ * TCP transport:
+ *
+ * bool header_digests
+ * bool data_digests
+ */
+
+/*
+ * The fields in the nvlist for NVMF_CONNECTION_STATUS are:
+ *
+ * bool connected
+ * timespec nvlist last_disconnect
+ * number tv_sec
+ * number tv_nsec
+ */
+
+/*
+ * The fields in the nvlist for handing off a controller qpair are:
+ *
+ * number trtype
+ * qpair handoff nvlist params
+ * binary cmd struct nvmf_fabric_connect_cmd
+ * binary data struct nvmf_fabric_connect_data
+ */
/* Operations on /dev/nvmf */
-#define NVMF_HANDOFF_HOST _IOW('n', 200, struct nvmf_handoff_host)
+#define NVMF_HANDOFF_HOST _IOW('n', 200, struct nvmf_ioc_nv)
#define NVMF_DISCONNECT_HOST _IOW('n', 201, const char *)
#define NVMF_DISCONNECT_ALL _IO('n', 202)
/* Operations on /dev/nvmeX */
-#define NVMF_RECONNECT_PARAMS _IOR('n', 203, struct nvmf_reconnect_params)
-#define NVMF_RECONNECT_HOST _IOW('n', 204, struct nvmf_handoff_host)
+#define NVMF_RECONNECT_PARAMS _IOWR('n', 203, struct nvmf_ioc_nv)
+#define NVMF_RECONNECT_HOST _IOW('n', 204, struct nvmf_ioc_nv)
+#define NVMF_CONNECTION_STATUS _IOWR('n', 205, struct nvmf_ioc_nv)
#endif /* !__NVMF_H__ */
diff --git a/sys/dev/nvmf/nvmf_proto.h b/sys/dev/nvmf/nvmf_proto.h
index b0be236f77fa..f67c34acbf95 100644
--- a/sys/dev/nvmf/nvmf_proto.h
+++ b/sys/dev/nvmf/nvmf_proto.h
@@ -22,8 +22,6 @@
* NVMe over Fabrics specification definitions
*/
-#pragma pack(push, 1)
-
#define NVME_NQN_FIELD_SIZE 256
struct nvmf_capsule_cmd {
@@ -174,7 +172,7 @@ struct nvmf_fabric_cmd {
uint16_t cid;
uint8_t fctype;
uint8_t reserved2[59];
-};
+} __aligned(8);
struct nvmf_fabric_auth_recv_cmd {
uint8_t opcode;
@@ -764,6 +762,4 @@ _Static_assert(offsetof(struct nvme_tcp_r2t_hdr, ttag) == 10, "Incorrect offset"
_Static_assert(offsetof(struct nvme_tcp_r2t_hdr, r2to) == 12, "Incorrect offset");
_Static_assert(offsetof(struct nvme_tcp_r2t_hdr, r2tl) == 16, "Incorrect offset");
-#pragma pack(pop)
-
#endif /* __NVMF_PROTO_H__ */
diff --git a/sys/dev/nvmf/nvmf_tcp.c b/sys/dev/nvmf/nvmf_tcp.c
index 57c81eceee02..6ad5229f6043 100644
--- a/sys/dev/nvmf/nvmf_tcp.c
+++ b/sys/dev/nvmf/nvmf_tcp.c
@@ -18,6 +18,7 @@
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/mutex.h>
+#include <sys/nv.h>
#include <sys/protosw.h>
#include <sys/refcount.h>
#include <sys/socket.h>
@@ -138,7 +139,7 @@ static void tcp_free_qpair(struct nvmf_qpair *nq);
SYSCTL_NODE(_kern_nvmf, OID_AUTO, tcp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"TCP transport");
static u_int tcp_max_transmit_data = 256 * 1024;
-SYSCTL_UINT(_kern_nvmf_tcp, OID_AUTO, max_c2hdata, CTLFLAG_RWTUN,
+SYSCTL_UINT(_kern_nvmf_tcp, OID_AUTO, max_transmit_data, CTLFLAG_RWTUN,
&tcp_max_transmit_data, 0,
"Maximum size of data payload in a transmitted PDU");
@@ -442,7 +443,7 @@ nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen,
plen += sizeof(digest);
if (data_len != 0) {
KASSERT(m_length(data, NULL) == data_len, ("length mismatch"));
- pdo = roundup2(plen, qp->txpda);
+ pdo = roundup(plen, qp->txpda);
pad = pdo - plen;
plen = pdo + data_len;
if (qp->data_digests)
@@ -623,10 +624,7 @@ mbuf_copyto_io(struct mbuf *m, u_int skip, u_int len,
while (len != 0) {
MPASS((m->m_flags & M_EXTPG) == 0);
- todo = m->m_len - skip;
- if (todo > len)
- todo = len;
-
+ todo = min(m->m_len - skip, len);
memdesc_copyback(&io->io_mem, io_offset, todo, mtodo(m, skip));
skip = 0;
io_offset += todo;
@@ -887,7 +885,7 @@ nvmf_tcp_mext_pg(void *arg, int how)
struct nvmf_tcp_command_buffer *cb = arg;
struct mbuf *m;
- m = mb_alloc_ext_pgs(how, nvmf_tcp_free_mext_pg);
+ m = mb_alloc_ext_pgs(how, nvmf_tcp_free_mext_pg, M_RDONLY);
m->m_ext.ext_arg1 = cb;
tcp_hold_command_buffer(cb);
return (m);
@@ -1000,9 +998,7 @@ nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
struct mbuf *m;
uint32_t sent, todo;
- todo = data_len;
- if (todo > qp->max_tx_data)
- todo = qp->max_tx_data;
+ todo = min(data_len, qp->max_tx_data);
m = nvmf_tcp_command_buffer_mbuf(cb, data_offset, todo, &sent,
todo < data_len);
tcp_send_h2c_pdu(qp, r2t->cccid, r2t->ttag, data_offset, m,
@@ -1418,8 +1414,7 @@ nvmf_soupcall_send(struct socket *so, void *arg, int waitflag)
}
static struct nvmf_qpair *
-tcp_allocate_qpair(bool controller,
- const struct nvmf_handoff_qpair_params *params)
+tcp_allocate_qpair(bool controller, const nvlist_t *nvl)
{
struct nvmf_tcp_qpair *qp;
struct socket *so;
@@ -1427,8 +1422,18 @@ tcp_allocate_qpair(bool controller,
cap_rights_t rights;
int error;
- error = fget(curthread, params->tcp.fd, cap_rights_init_one(&rights,
- CAP_SOCK_CLIENT), &fp);
+ if (!nvlist_exists_number(nvl, "fd") ||
+ !nvlist_exists_number(nvl, "rxpda") ||
+ !nvlist_exists_number(nvl, "txpda") ||
+ !nvlist_exists_bool(nvl, "header_digests") ||
+ !nvlist_exists_bool(nvl, "data_digests") ||
+ !nvlist_exists_number(nvl, "maxr2t") ||
+ !nvlist_exists_number(nvl, "maxh2cdata") ||
+ !nvlist_exists_number(nvl, "max_icd"))
+ return (NULL);
+
+ error = fget(curthread, nvlist_get_number(nvl, "fd"),
+ cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp);
if (error != 0)
return (NULL);
if (fp->f_type != DTYPE_SOCKET) {
@@ -1450,26 +1455,28 @@ tcp_allocate_qpair(bool controller,
qp = malloc(sizeof(*qp), M_NVMF_TCP, M_WAITOK | M_ZERO);
qp->so = so;
refcount_init(&qp->refs, 1);
- qp->txpda = params->tcp.txpda;
- qp->rxpda = params->tcp.rxpda;
- qp->header_digests = params->tcp.header_digests;
- qp->data_digests = params->tcp.data_digests;
- qp->maxr2t = params->tcp.maxr2t;
- qp->maxh2cdata = params->tcp.maxh2cdata;
+ qp->txpda = nvlist_get_number(nvl, "txpda");
+ qp->rxpda = nvlist_get_number(nvl, "rxpda");
+ qp->header_digests = nvlist_get_bool(nvl, "header_digests");
+ qp->data_digests = nvlist_get_bool(nvl, "data_digests");
+ qp->maxr2t = nvlist_get_number(nvl, "maxr2t");
+ if (controller)
+ qp->maxh2cdata = nvlist_get_number(nvl, "maxh2cdata");
qp->max_tx_data = tcp_max_transmit_data;
if (!controller) {
- if (qp->max_tx_data > params->tcp.maxh2cdata)
- qp->max_tx_data = params->tcp.maxh2cdata;
+ qp->max_tx_data = min(qp->max_tx_data,
+ nvlist_get_number(nvl, "maxh2cdata"));
+ qp->max_icd = nvlist_get_number(nvl, "max_icd");
}
- qp->max_icd = params->tcp.max_icd;
if (controller) {
/* Use the SUCCESS flag if SQ flow control is disabled. */
- qp->send_success = !params->sq_flow_control;
+ qp->send_success = !nvlist_get_bool(nvl, "sq_flow_control");
/* NB: maxr2t is 0's based. */
qp->num_ttags = MIN((u_int)UINT16_MAX + 1,
- (uint64_t)params->qsize * (uint64_t)qp->maxr2t + 1);
+ nvlist_get_number(nvl, "qsize") *
+ ((uint64_t)qp->maxr2t + 1));
qp->open_ttags = mallocarray(qp->num_ttags,
sizeof(*qp->open_ttags), M_NVMF_TCP, M_WAITOK | M_ZERO);
}
@@ -1558,6 +1565,7 @@ tcp_free_qpair(struct nvmf_qpair *nq)
for (u_int i = 0; i < qp->num_ttags; i++) {
cb = qp->open_ttags[i];
if (cb != NULL) {
+ cb->tc->active_r2ts--;
cb->error = ECONNABORTED;
tcp_release_command_buffer(cb);
}
@@ -1569,6 +1577,10 @@ tcp_free_qpair(struct nvmf_qpair *nq)
TAILQ_FOREACH_SAFE(cb, &qp->rx_buffers.head, link, ncb) {
tcp_remove_command_buffer(&qp->rx_buffers, cb);
mtx_unlock(&qp->rx_buffers.lock);
+#ifdef INVARIANTS
+ if (cb->tc != NULL)
+ cb->tc->pending_r2ts--;
+#endif
cb->error = ECONNABORTED;
tcp_release_command_buffer(cb);
mtx_lock(&qp->rx_buffers.lock);
@@ -1784,7 +1796,6 @@ tcp_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
{
struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
struct nvme_sgl_descriptor *sgl;
- struct mbuf *n, *p;
uint32_t data_len;
bool last_pdu, last_xfer;
@@ -1813,21 +1824,29 @@ tcp_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
/* Queue one more C2H_DATA PDUs containing the data from 'm'. */
while (m != NULL) {
+ struct mbuf *n;
uint32_t todo;
- todo = m->m_len;
- p = m;
- n = p->m_next;
- while (n != NULL) {
- if (todo + n->m_len > qp->max_tx_data) {
- p->m_next = NULL;
- break;
- }
- todo += n->m_len;
- p = n;
+ if (m->m_len > qp->max_tx_data) {
+ n = m_split(m, qp->max_tx_data, M_WAITOK);
+ todo = m->m_len;
+ } else {
+ struct mbuf *p;
+
+ todo = m->m_len;
+ p = m;
n = p->m_next;
+ while (n != NULL) {
+ if (todo + n->m_len > qp->max_tx_data) {
+ p->m_next = NULL;
+ break;
+ }
+ todo += n->m_len;
+ p = n;
+ n = p->m_next;
+ }
+ MPASS(m_length(m, NULL) == todo);
}
- MPASS(m_length(m, NULL) == todo);
last_pdu = (n == NULL && last_xfer);
tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset, m, todo,
diff --git a/sys/dev/nvmf/nvmf_tcp.h b/sys/dev/nvmf/nvmf_tcp.h
index 00b0917f75a4..03b5d2445928 100644
--- a/sys/dev/nvmf/nvmf_tcp.h
+++ b/sys/dev/nvmf/nvmf_tcp.h
@@ -9,7 +9,6 @@
#define __NVMF_TCP_H__
#ifndef _KERNEL
-#define __assert_unreachable __unreachable
#define MPASS assert
#endif
@@ -41,6 +40,13 @@ nvmf_tcp_validate_pdu_header(const struct nvme_tcp_common_pdu_hdr *ch,
uint8_t digest_flags, valid_flags;
plen = le32toh(ch->plen);
+ full_hlen = ch->hlen;
+ if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0)
+ full_hlen += sizeof(uint32_t);
+ if (plen == full_hlen)
+ data_len = 0;
+ else
+ data_len = plen - ch->pdo;
/*
* Errors must be reported for the lowest incorrect field
@@ -50,7 +56,7 @@ nvmf_tcp_validate_pdu_header(const struct nvme_tcp_common_pdu_hdr *ch,
/* Validate pdu_type. */
/* Controllers only receive PDUs with a PDU direction of 0. */
- if (controller != (ch->pdu_type & 0x01) == 0) {
+ if (controller != ((ch->pdu_type & 0x01) == 0)) {
printf("NVMe/TCP: Invalid PDU type %u\n", ch->pdu_type);
*fes = NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
*fei = offsetof(struct nvme_tcp_common_pdu_hdr, pdu_type);
@@ -125,11 +131,15 @@ nvmf_tcp_validate_pdu_header(const struct nvme_tcp_common_pdu_hdr *ch,
return (EBADMSG);
}
- /* Verify that digests are present iff enabled. */
+ /*
+ * Verify that digests are present iff enabled. Note that the
+ * data digest will not be present if there is no data
+ * payload.
+ */
digest_flags = 0;
if (header_digests)
digest_flags |= NVME_TCP_CH_FLAGS_HDGSTF;
- if (data_digests)
+ if (data_digests && data_len != 0)
digest_flags |= NVME_TCP_CH_FLAGS_DDGSTF;
if ((digest_flags & valid_flags) !=
(ch->flags & (NVME_TCP_CH_FLAGS_HDGSTF |
@@ -184,9 +194,6 @@ nvmf_tcp_validate_pdu_header(const struct nvme_tcp_common_pdu_hdr *ch,
}
/* Validate pdo. */
- full_hlen = ch->hlen;
- if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0)
- full_hlen += sizeof(uint32_t);
switch (ch->pdu_type) {
default:
__assert_unreachable();
@@ -207,7 +214,7 @@ nvmf_tcp_validate_pdu_header(const struct nvme_tcp_common_pdu_hdr *ch,
case NVME_TCP_PDU_TYPE_H2C_DATA:
case NVME_TCP_PDU_TYPE_C2H_DATA:
/* Permit PDO of 0 if there is no data. */
- if (full_hlen == plen && ch->pdo == 0)
+ if (data_len == 0 && ch->pdo == 0)
break;
if (ch->pdo < full_hlen || ch->pdo > plen ||
@@ -229,10 +236,6 @@ nvmf_tcp_validate_pdu_header(const struct nvme_tcp_common_pdu_hdr *ch,
return (EBADMSG);
}
- if (plen == full_hlen)
- data_len = 0;
- else
- data_len = plen - ch->pdo;
switch (ch->pdu_type) {
default:
__assert_unreachable();
diff --git a/sys/dev/nvmf/nvmf_transport.c b/sys/dev/nvmf/nvmf_transport.c
index 14d526192270..1d3f5ea4cf69 100644
--- a/sys/dev/nvmf/nvmf_transport.c
+++ b/sys/dev/nvmf/nvmf_transport.c
@@ -12,6 +12,7 @@
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
+#include <sys/nv.h>
#include <sys/refcount.h>
#include <sys/sysctl.h>
#include <sys/sx.h>
@@ -47,8 +48,7 @@ nvmf_supported_trtype(enum nvmf_trtype trtype)
struct nvmf_qpair *
nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller,
- const struct nvmf_handoff_qpair_params *params,
- nvmf_qpair_error_t *error_cb, void *error_cb_arg,
+ const nvlist_t *params, nvmf_qpair_error_t *error_cb, void *error_cb_arg,
nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg)
{
struct nvmf_transport *nt;
@@ -76,7 +76,7 @@ nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller,
qp->nq_error_arg = error_cb_arg;
qp->nq_receive = receive_cb;
qp->nq_receive_arg = receive_cb_arg;
- qp->nq_admin = params->admin;
+ qp->nq_admin = nvlist_get_bool(params, "admin");
return (qp);
}
@@ -180,6 +180,14 @@ nvmf_capsule_cqe(struct nvmf_capsule *nc)
return (&nc->nc_cqe);
}
+bool
+nvmf_sqhd_valid(struct nvmf_capsule *nc)
+{
+ KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion),
+ ("%s: capsule %p is not a response capsule", __func__, nc));
+ return (nc->nc_sqhd_valid);
+}
+
uint8_t
nvmf_validate_command_capsule(struct nvmf_capsule *nc)
{
@@ -223,6 +231,92 @@ nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
}
int
+nvmf_pack_ioc_nvlist(const nvlist_t *nvl, struct nvmf_ioc_nv *nv)
+{
+ void *packed;
+ int error;
+
+ error = nvlist_error(nvl);
+ if (error != 0)
+ return (error);
+
+ if (nv->size == 0) {
+ nv->len = nvlist_size(nvl);
+ } else {
+ packed = nvlist_pack(nvl, &nv->len);
+ if (packed == NULL)
+ error = ENOMEM;
+ else if (nv->len > nv->size)
+ error = EFBIG;
+ else
+ error = copyout(packed, nv->data, nv->len);
+ free(packed, M_NVLIST);
+ }
+ return (error);
+}
+
+int
+nvmf_unpack_ioc_nvlist(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp)
+{
+ void *packed;
+ nvlist_t *nvl;
+ int error;
+
+ packed = malloc(nv->size, M_NVMF_TRANSPORT, M_WAITOK);
+ error = copyin(nv->data, packed, nv->size);
+ if (error != 0) {
+ free(packed, M_NVMF_TRANSPORT);
+ return (error);
+ }
+
+ nvl = nvlist_unpack(packed, nv->size, 0);
+ free(packed, M_NVMF_TRANSPORT);
+ if (nvl == NULL)
+ return (EINVAL);
+
+ *nvlp = nvl;
+ return (0);
+}
+
+bool
+nvmf_validate_qpair_nvlist(const nvlist_t *nvl, bool controller)
+{
+ uint64_t value, qsize;
+ bool admin, valid;
+
+ valid = true;
+ valid &= nvlist_exists_bool(nvl, "admin");
+ valid &= nvlist_exists_bool(nvl, "sq_flow_control");
+ valid &= nvlist_exists_number(nvl, "qsize");
+ valid &= nvlist_exists_number(nvl, "sqhd");
+ if (!controller)
+ valid &= nvlist_exists_number(nvl, "sqtail");
+ if (!valid)
+ return (false);
+
+ admin = nvlist_get_bool(nvl, "admin");
+ qsize = nvlist_get_number(nvl, "qsize");
+ if (admin) {
+ if (qsize < NVME_MIN_ADMIN_ENTRIES ||
+ qsize > NVME_MAX_ADMIN_ENTRIES)
+ return (false);
+ } else {
+ if (qsize < NVME_MIN_IO_ENTRIES || qsize > NVME_MAX_IO_ENTRIES)
+ return (false);
+ }
+ value = nvlist_get_number(nvl, "sqhd");
+ if (value > qsize - 1)
+ return (false);
+ if (!controller) {
+ value = nvlist_get_number(nvl, "sqtail");
+ if (value > qsize - 1)
+ return (false);
+ }
+
+ return (true);
+}
+
+int
nvmf_transport_module_handler(struct module *mod, int what, void *arg)
{
struct nvmf_transport_ops *ops = arg;
@@ -292,8 +386,6 @@ nvmf_transport_module_handler(struct module *mod, int what, void *arg)
prev = nt;
}
if (nt == NULL) {
- KASSERT(nt->nt_active_qpairs == 0,
- ("unregistered transport has connections"));
sx_xunlock(&nvmf_transports_lock);
return (0);
}
diff --git a/sys/dev/nvmf/nvmf_transport.h b/sys/dev/nvmf/nvmf_transport.h
index 549170b25940..b192baeaccc1 100644
--- a/sys/dev/nvmf/nvmf_transport.h
+++ b/sys/dev/nvmf/nvmf_transport.h
@@ -13,6 +13,7 @@
* (target) to send and receive capsules and associated data.
*/
+#include <sys/_nv.h>
#include <sys/sysctl.h>
#include <dev/nvmf/nvmf_proto.h>
@@ -20,8 +21,8 @@ struct mbuf;
struct memdesc;
struct nvmf_capsule;
struct nvmf_connection;
+struct nvmf_ioc_nv;
struct nvmf_qpair;
-struct nvmf_handoff_qpair_params;
SYSCTL_DECL(_kern_nvmf);
@@ -54,7 +55,7 @@ typedef void nvmf_io_complete_t(void *, size_t, int);
* independent.
*/
struct nvmf_qpair *nvmf_allocate_qpair(enum nvmf_trtype trtype,
- bool controller, const struct nvmf_handoff_qpair_params *params,
+ bool controller, const nvlist_t *params,
nvmf_qpair_error_t *error_cb, void *error_cb_arg,
nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg);
void nvmf_free_qpair(struct nvmf_qpair *qp);
@@ -78,6 +79,7 @@ int nvmf_transmit_capsule(struct nvmf_capsule *nc);
void nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error);
void *nvmf_capsule_sqe(struct nvmf_capsule *nc);
void *nvmf_capsule_cqe(struct nvmf_capsule *nc);
+bool nvmf_sqhd_valid(struct nvmf_capsule *nc);
/* Controller-specific APIs. */
@@ -137,4 +139,23 @@ u_int nvmf_send_controller_data(struct nvmf_capsule *nc,
#define NVMF_SUCCESS_SENT 0x100
#define NVMF_MORE 0x101
+/* Helper APIs for nvlists used in icotls. */
+
+/*
+ * Pack the nvlist nvl and copyout to the buffer described by nv.
+ */
+int nvmf_pack_ioc_nvlist(const nvlist_t *nvl, struct nvmf_ioc_nv *nv);
+
+/*
+ * Copyin and unpack an nvlist described by nv. The unpacked nvlist
+ * is returned in *nvlp on success.
+ */
+int nvmf_unpack_ioc_nvlist(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp);
+
+/*
+ * Returns true if a qpair handoff nvlist has all the required
+ * transport-independent values.
+ */
+bool nvmf_validate_qpair_nvlist(const nvlist_t *nvl, bool controller);
+
#endif /* !__NVMF_TRANSPORT_H__ */
diff --git a/sys/dev/nvmf/nvmf_transport_internal.h b/sys/dev/nvmf/nvmf_transport_internal.h
index 0be427ee0690..eb819a5c83b9 100644
--- a/sys/dev/nvmf/nvmf_transport_internal.h
+++ b/sys/dev/nvmf/nvmf_transport_internal.h
@@ -8,6 +8,7 @@
#ifndef __NVMF_TRANSPORT_INTERNAL_H__
#define __NVMF_TRANSPORT_INTERNAL_H__
+#include <sys/_nv.h>
#include <sys/memdesc.h>
/*
@@ -21,7 +22,7 @@ struct nvmf_io_request;
struct nvmf_transport_ops {
/* Queue pair management. */
struct nvmf_qpair *(*allocate_qpair)(bool controller,
- const struct nvmf_handoff_qpair_params *params);
+ const nvlist_t *nvl);
void (*free_qpair)(struct nvmf_qpair *qp);
/* Capsule operations. */