diff options
Diffstat (limited to 'sys/dev/nvmf/host')
-rw-r--r-- | sys/dev/nvmf/host/nvmf.c | 640 | ||||
-rw-r--r-- | sys/dev/nvmf/host/nvmf_aer.c | 2 | ||||
-rw-r--r-- | sys/dev/nvmf/host/nvmf_ctldev.c | 15 | ||||
-rw-r--r-- | sys/dev/nvmf/host/nvmf_ns.c | 66 | ||||
-rw-r--r-- | sys/dev/nvmf/host/nvmf_qpair.c | 88 | ||||
-rw-r--r-- | sys/dev/nvmf/host/nvmf_sim.c | 29 | ||||
-rw-r--r-- | sys/dev/nvmf/host/nvmf_var.h | 46 |
7 files changed, 693 insertions, 193 deletions
diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c index 0902bc78a7b5..1ac0d142443b 100644 --- a/sys/dev/nvmf/host/nvmf.c +++ b/sys/dev/nvmf/host/nvmf.c @@ -8,13 +8,18 @@ #include <sys/param.h> #include <sys/bus.h> #include <sys/conf.h> +#include <sys/dnv.h> +#include <sys/eventhandler.h> #include <sys/lock.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/memdesc.h> #include <sys/module.h> #include <sys/mutex.h> +#include <sys/nv.h> +#include <sys/reboot.h> #include <sys/sx.h> +#include <sys/sysctl.h> #include <sys/taskqueue.h> #include <dev/nvme/nvme.h> #include <dev/nvmf/nvmf.h> @@ -22,10 +27,20 @@ #include <dev/nvmf/host/nvmf_var.h> static struct cdevsw nvmf_cdevsw; +static struct taskqueue *nvmf_tq; + +bool nvmf_fail_disconnect = false; +SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, + &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure"); MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host"); +static void nvmf_controller_loss_task(void *arg, int pending); static void nvmf_disconnect_task(void *arg, int pending); +static void nvmf_request_reconnect(struct nvmf_softc *sc); +static void nvmf_request_reconnect_task(void *arg, int pending); +static void nvmf_shutdown_pre_sync(void *arg, int howto); +static void nvmf_shutdown_post_sync(void *arg, int howto); void nvmf_complete(void *arg, const struct nvme_completion *cqe) @@ -187,104 +202,132 @@ nvmf_send_keep_alive(void *arg) } int -nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh) +nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp) { - size_t len; - u_int i; + const struct nvme_discovery_log_entry *dle; + const struct nvme_controller_data *cdata; + const nvlist_t *const *io; + const nvlist_t *admin, *rparams; + nvlist_t *nvl; + size_t i, num_io_queues; + uint32_t qsize; int error; - memset(ivars, 0, sizeof(*ivars)); - - if (!hh->admin.admin || hh->num_io_queues < 1) - return (EINVAL); - - ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK); - error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata)); + error = nvmf_unpack_ioc_nvlist(nv, &nvl); if (error != 0) - goto out; - nvme_controller_data_swapbytes(ivars->cdata); + return (error); - len = hh->num_io_queues * sizeof(*ivars->io_params); - ivars->io_params = malloc(len, M_NVMF, M_WAITOK); - error = copyin(hh->io, ivars->io_params, len); - if (error != 0) - goto out; - for (i = 0; i < hh->num_io_queues; i++) { - if (ivars->io_params[i].admin) { - error = EINVAL; - goto out; - } + if (!nvlist_exists_number(nvl, "trtype") || + !nvlist_exists_nvlist(nvl, "admin") || + !nvlist_exists_nvlist_array(nvl, "io") || + !nvlist_exists_binary(nvl, "cdata") || + !nvlist_exists_nvlist(nvl, "rparams")) + goto invalid; + + rparams = nvlist_get_nvlist(nvl, "rparams"); + if (!nvlist_exists_binary(rparams, "dle") || + !nvlist_exists_string(rparams, "hostnqn") || + !nvlist_exists_number(rparams, "num_io_queues") || + !nvlist_exists_number(rparams, "io_qsize")) + goto invalid; + + admin = nvlist_get_nvlist(nvl, "admin"); + if (!nvmf_validate_qpair_nvlist(admin, false)) + goto invalid; + if (!nvlist_get_bool(admin, "admin")) + goto invalid; + + io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues); + if (num_io_queues < 1 || + num_io_queues != nvlist_get_number(rparams, "num_io_queues")) + goto invalid; + for (i = 0; i < num_io_queues; i++) { + if (!nvmf_validate_qpair_nvlist(io[i], false)) + goto invalid; + } - /* Require all I/O queues to be the same size. */ - if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) { - error = EINVAL; - goto out; - } + /* Require all I/O queues to be the same size. */ + qsize = nvlist_get_number(rparams, "io_qsize"); + for (i = 0; i < num_io_queues; i++) { + if (nvlist_get_number(io[i], "qsize") != qsize) + goto invalid; } - ivars->hh = hh; - return (0); + cdata = nvlist_get_binary(nvl, "cdata", &i); + if (i != sizeof(*cdata)) + goto invalid; + dle = nvlist_get_binary(rparams, "dle", &i); + if (i != sizeof(*dle)) + goto invalid; -out: - free(ivars->io_params, M_NVMF); - free(ivars->cdata, M_NVMF); - return (error); -} + if (memcmp(dle->subnqn, cdata->subnqn, sizeof(cdata->subnqn)) != 0) + goto invalid; -void -nvmf_free_ivars(struct nvmf_ivars *ivars) -{ - free(ivars->io_params, M_NVMF); - free(ivars->cdata, M_NVMF); + *nvlp = nvl; + return (0); +invalid: + nvlist_destroy(nvl); + return (EINVAL); } static int nvmf_probe(device_t dev) { - struct nvmf_ivars *ivars = device_get_ivars(dev); - char desc[260]; + const nvlist_t *nvl = device_get_ivars(dev); + const struct nvme_controller_data *cdata; - if (ivars == NULL) + if (nvl == NULL) return (ENXIO); - snprintf(desc, sizeof(desc), "Fabrics: %.256s", ivars->cdata->subnqn); - device_set_desc_copy(dev, desc); + cdata = nvlist_get_binary(nvl, "cdata", NULL); + device_set_descf(dev, "Fabrics: %.256s", cdata->subnqn); return (BUS_PROBE_DEFAULT); } static int -nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) +nvmf_establish_connection(struct nvmf_softc *sc, nvlist_t *nvl) { + const nvlist_t *const *io; + const nvlist_t *admin; + uint64_t kato; + size_t num_io_queues; + enum nvmf_trtype trtype; char name[16]; + trtype = nvlist_get_number(nvl, "trtype"); + admin = nvlist_get_nvlist(nvl, "admin"); + io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues); + kato = dnvlist_get_number(nvl, "kato", 0); + sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0); + sc->controller_loss_timeout = dnvlist_get_number(nvl, + "controller_loss_timeout", 0); + /* Setup the admin queue. */ - sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin, - "admin queue"); + sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0); if (sc->admin == NULL) { device_printf(sc->dev, "Failed to setup admin queue\n"); return (ENXIO); } /* Setup I/O queues. */ - sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF, + sc->io = malloc(num_io_queues * sizeof(*sc->io), M_NVMF, M_WAITOK | M_ZERO); - sc->num_io_queues = ivars->hh->num_io_queues; + sc->num_io_queues = num_io_queues; for (u_int i = 0; i < sc->num_io_queues; i++) { snprintf(name, sizeof(name), "I/O queue %u", i); - sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype, - &ivars->io_params[i], name); + sc->io[i] = nvmf_init_qp(sc, trtype, io[i], name, i); if (sc->io[i] == NULL) { device_printf(sc->dev, "Failed to setup I/O queue %u\n", - i + 1); + i); return (ENXIO); } } /* Start KeepAlive timers. */ - if (ivars->hh->kato != 0) { + if (kato != 0) { sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS, sc->cdata->ctratt) != 0; - sc->ka_rx_sbt = mstosbt(ivars->hh->kato); + sc->ka_rx_sbt = mstosbt(kato); sc->ka_tx_sbt = sc->ka_rx_sbt / 2; callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, nvmf_check_keep_alive, sc, C_HARDCLOCK); @@ -292,12 +335,23 @@ nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) nvmf_send_keep_alive, sc, C_HARDCLOCK); } + memcpy(sc->cdata, nvlist_get_binary(nvl, "cdata", NULL), + sizeof(*sc->cdata)); + + /* Save reconnect parameters. */ + nvlist_destroy(sc->rparams); + sc->rparams = nvlist_take_nvlist(nvl, "rparams"); + return (0); } +typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t, + const struct nvme_namespace_data *, void *); + static bool -nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, - struct nvme_namespace_data *data, uint32_t *nsidp) +nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, + struct nvme_namespace_data *data, uint32_t *nsidp, + nvmf_scan_active_ns_cb *cb, void *cb_arg) { struct nvmf_completion_status status; uint32_t nsid; @@ -333,13 +387,6 @@ nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, return (true); } - if (sc->ns[nsid - 1] != NULL) { - device_printf(sc->dev, - "duplicate namespace %u in active namespace list\n", - nsid); - return (false); - } - nvmf_status_init(&status); nvmf_status_wait_io(&status); if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, @@ -365,49 +412,37 @@ nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, return (false); } - /* - * As in nvme_ns_construct, a size of zero indicates an - * invalid namespace. - */ nvme_namespace_data_swapbytes(data); - if (data->nsze == 0) { - device_printf(sc->dev, - "ignoring active namespace %u with zero size\n", - nsid); - continue; - } - - sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); - - nvmf_sim_rescan_ns(sc, nsid); + if (!cb(sc, nsid, data, cb_arg)) + return (false); } MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0); - if (nsid >= 0xfffffffd) + if (nsid >= NVME_GLOBAL_NAMESPACE_TAG - 1) *nsidp = 0; else - *nsidp = nsid + 1; + *nsidp = nsid; return (true); } static bool -nvmf_add_namespaces(struct nvmf_softc *sc) +nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb, + void *cb_arg) { struct nvme_namespace_data *data; struct nvme_ns_list *nslist; uint32_t nsid; bool retval; - sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF, - M_WAITOK | M_ZERO); nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK); data = malloc(sizeof(*data), M_NVMF, M_WAITOK); nsid = 0; retval = true; for (;;) { - if (!nvmf_scan_nslist(sc, nslist, data, &nsid)) { + if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb, + cb_arg)) { retval = false; break; } @@ -420,36 +455,77 @@ nvmf_add_namespaces(struct nvmf_softc *sc) return (retval); } +static bool +nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid, + const struct nvme_namespace_data *data, void *arg __unused) +{ + if (sc->ns[nsid - 1] != NULL) { + device_printf(sc->dev, + "duplicate namespace %u in active namespace list\n", + nsid); + return (false); + } + + /* + * As in nvme_ns_construct, a size of zero indicates an + * invalid namespace. + */ + if (data->nsze == 0) { + device_printf(sc->dev, + "ignoring active namespace %u with zero size\n", nsid); + return (true); + } + + sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); + + nvmf_sim_rescan_ns(sc, nsid); + return (true); +} + +static bool +nvmf_add_namespaces(struct nvmf_softc *sc) +{ + sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF, + M_WAITOK | M_ZERO); + return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL)); +} + static int nvmf_attach(device_t dev) { struct make_dev_args mda; struct nvmf_softc *sc = device_get_softc(dev); - struct nvmf_ivars *ivars = device_get_ivars(dev); + nvlist_t *nvl = device_get_ivars(dev); + const nvlist_t * const *io; + struct sysctl_oid *oid; uint64_t val; u_int i; int error; - if (ivars == NULL) + if (nvl == NULL) return (ENXIO); sc->dev = dev; - sc->trtype = ivars->hh->trtype; + sc->trtype = nvlist_get_number(nvl, "trtype"); callout_init(&sc->ka_rx_timer, 1); callout_init(&sc->ka_tx_timer, 1); sx_init(&sc->connection_lock, "nvmf connection"); TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc); + TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0, + nvmf_controller_loss_task, sc); + TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0, + nvmf_request_reconnect_task, sc); - /* Claim the cdata pointer from ivars. */ - sc->cdata = ivars->cdata; - ivars->cdata = NULL; + oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues"); + sc->ioq_oid_list = SYSCTL_CHILDREN(oid); - nvmf_init_aer(sc); + sc->cdata = malloc(sizeof(*sc->cdata), M_NVMF, M_WAITOK); - /* TODO: Multiqueue support. */ - sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */; + nvmf_init_aer(sc); - error = nvmf_establish_connection(sc, ivars); + error = nvmf_establish_connection(sc, nvl); if (error != 0) goto out; @@ -476,6 +552,10 @@ nvmf_attach(device_t dev) NVME_CAP_HI_MPSMIN(sc->cap >> 32))); } + io = nvlist_get_nvlist_array(nvl, "io", NULL); + sc->max_pending_io = nvlist_get_number(io[0], "qsize") * + sc->num_io_queues; + error = nvmf_init_sim(sc); if (error != 0) goto out; @@ -503,6 +583,11 @@ nvmf_attach(device_t dev) goto out; } + sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, + nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST); + sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, + nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_LAST); + return (0); out: if (sc->ns != NULL) { @@ -529,8 +614,11 @@ out: nvmf_destroy_aer(sc); - taskqueue_drain(taskqueue_thread, &sc->disconnect_task); + taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task); + taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task); + taskqueue_drain(nvmf_tq, &sc->disconnect_task); sx_destroy(&sc->connection_lock); + nvlist_destroy(sc->rparams); free(sc->cdata, M_NVMF); return (error); } @@ -538,7 +626,7 @@ out: void nvmf_disconnect(struct nvmf_softc *sc) { - taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task); + taskqueue_enqueue(nvmf_tq, &sc->disconnect_task); } static void @@ -579,6 +667,7 @@ nvmf_disconnect_task(void *arg, int pending __unused) return; } + nanotime(&sc->last_disconnect); callout_drain(&sc->ka_tx_timer); callout_drain(&sc->ka_rx_timer); sc->ka_traffic = false; @@ -600,29 +689,98 @@ nvmf_disconnect_task(void *arg, int pending __unused) nvmf_destroy_qp(sc->admin); sc->admin = NULL; + if (sc->reconnect_delay != 0) + nvmf_request_reconnect(sc); + if (sc->controller_loss_timeout != 0) + taskqueue_enqueue_timeout(nvmf_tq, + &sc->controller_loss_task, sc->controller_loss_timeout * + hz); + + sx_xunlock(&sc->connection_lock); +} + +static void +nvmf_controller_loss_task(void *arg, int pending) +{ + struct nvmf_softc *sc = arg; + device_t dev; + int error; + + bus_topo_lock(); + sx_xlock(&sc->connection_lock); + if (sc->admin != NULL || sc->detaching) { + /* Reconnected or already detaching. */ + sx_xunlock(&sc->connection_lock); + bus_topo_unlock(); + return; + } + + sc->controller_timedout = true; + sx_xunlock(&sc->connection_lock); + + /* + * XXX: Doing this from here is a bit ugly. We don't have an + * extra reference on `dev` but bus_topo_lock should block any + * concurrent device_delete_child invocations. + */ + dev = sc->dev; + error = device_delete_child(root_bus, dev); + if (error != 0) + device_printf(dev, + "failed to detach after controller loss: %d\n", error); + bus_topo_unlock(); +} + +static void +nvmf_request_reconnect(struct nvmf_softc *sc) +{ + char buf[64]; + + sx_assert(&sc->connection_lock, SX_LOCKED); + + snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev)); + devctl_notify("nvme", "controller", "RECONNECT", buf); + taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task, + sc->reconnect_delay * hz); +} + +static void +nvmf_request_reconnect_task(void *arg, int pending) +{ + struct nvmf_softc *sc = arg; + + sx_xlock(&sc->connection_lock); + if (sc->admin != NULL || sc->detaching || sc->controller_timedout) { + /* Reconnected or already detaching. */ + sx_xunlock(&sc->connection_lock); + return; + } + + nvmf_request_reconnect(sc); sx_xunlock(&sc->connection_lock); } static int -nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) +nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv) { - struct nvmf_ivars ivars; + const struct nvme_controller_data *cdata; + nvlist_t *nvl; u_int i; int error; + error = nvmf_copyin_handoff(nv, &nvl); + if (error != 0) + return (error); + /* XXX: Should we permit changing the transport type? */ - if (sc->trtype != hh->trtype) { + if (sc->trtype != nvlist_get_number(nvl, "trtype")) { device_printf(sc->dev, "transport type mismatch on reconnect\n"); return (EINVAL); } - error = nvmf_init_ivars(&ivars, hh); - if (error != 0) - return (error); - sx_xlock(&sc->connection_lock); - if (sc->admin != NULL || sc->detaching) { + if (sc->admin != NULL || sc->detaching || sc->controller_timedout) { error = EBUSY; goto out; } @@ -634,8 +792,9 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) * ensures the new association is connected to the same NVMe * subsystem. */ - if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn, - sizeof(ivars.cdata->subnqn)) != 0) { + cdata = nvlist_get_binary(nvl, "cdata", NULL); + if (memcmp(sc->cdata->subnqn, cdata->subnqn, + sizeof(cdata->subnqn)) != 0) { device_printf(sc->dev, "controller subsystem NQN mismatch on reconnect\n"); error = EINVAL; @@ -647,7 +806,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) * max_pending_io is still correct? */ - error = nvmf_establish_connection(sc, &ivars); + error = nvmf_establish_connection(sc, nvl); if (error != 0) goto out; @@ -665,12 +824,85 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) nvmf_reconnect_ns(sc->ns[i]); } nvmf_reconnect_sim(sc); + + nvmf_rescan_all_ns(sc); + + taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL); + taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL); out: sx_xunlock(&sc->connection_lock); - nvmf_free_ivars(&ivars); + nvlist_destroy(nvl); return (error); } +static void +nvmf_shutdown_pre_sync(void *arg, int howto) +{ + struct nvmf_softc *sc = arg; + + if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) + return; + + /* + * If this association is disconnected, abort any pending + * requests with an error to permit filesystems to unmount + * without hanging. + */ + sx_xlock(&sc->connection_lock); + if (sc->admin != NULL || sc->detaching) { + sx_xunlock(&sc->connection_lock); + return; + } + + for (u_int i = 0; i < sc->cdata->nn; i++) { + if (sc->ns[i] != NULL) + nvmf_shutdown_ns(sc->ns[i]); + } + nvmf_shutdown_sim(sc); + sx_xunlock(&sc->connection_lock); +} + +static void +nvmf_shutdown_post_sync(void *arg, int howto) +{ + struct nvmf_softc *sc = arg; + + if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) + return; + + /* + * If this association is connected, disconnect gracefully. + */ + sx_xlock(&sc->connection_lock); + if (sc->admin == NULL || sc->detaching) { + sx_xunlock(&sc->connection_lock); + return; + } + + callout_drain(&sc->ka_tx_timer); + callout_drain(&sc->ka_rx_timer); + + nvmf_shutdown_controller(sc); + + /* + * Quiesce consumers so that any commands submitted after this + * fail with an error. Notably, nda(4) calls nda_flush() from + * a post_sync handler that might be ordered after this one. + */ + for (u_int i = 0; i < sc->cdata->nn; i++) { + if (sc->ns[i] != NULL) + nvmf_shutdown_ns(sc->ns[i]); + } + nvmf_shutdown_sim(sc); + + for (u_int i = 0; i < sc->num_io_queues; i++) { + nvmf_destroy_qp(sc->io[i]); + } + nvmf_destroy_qp(sc->admin); + sc->admin = NULL; + sx_xunlock(&sc->connection_lock); +} + static int nvmf_detach(device_t dev) { @@ -683,6 +915,9 @@ nvmf_detach(device_t dev) sc->detaching = true; sx_xunlock(&sc->connection_lock); + EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh); + EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->shutdown_post_sync_eh); + nvmf_destroy_sim(sc); for (i = 0; i < sc->cdata->nn; i++) { if (sc->ns[i] != NULL) @@ -701,7 +936,21 @@ nvmf_detach(device_t dev) } free(sc->io, M_NVMF); - taskqueue_drain(taskqueue_thread, &sc->disconnect_task); + taskqueue_drain(nvmf_tq, &sc->disconnect_task); + if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, + NULL) != 0) + taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task); + + /* + * Don't cancel/drain the controller loss task if that task + * has fired and is triggering the detach. + */ + if (!sc->controller_timedout) { + if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, + NULL) != 0) + taskqueue_drain_timeout(nvmf_tq, + &sc->controller_loss_task); + } if (sc->admin != NULL) nvmf_destroy_qp(sc->admin); @@ -709,16 +958,45 @@ nvmf_detach(device_t dev) nvmf_destroy_aer(sc); sx_destroy(&sc->connection_lock); + nvlist_destroy(sc->rparams); free(sc->cdata, M_NVMF); return (0); } +static void +nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid, + const struct nvme_namespace_data *data) +{ + struct nvmf_namespace *ns; + + /* XXX: Needs locking around sc->ns[]. */ + ns = sc->ns[nsid - 1]; + if (data->nsze == 0) { + /* XXX: Needs locking */ + if (ns != NULL) { + nvmf_destroy_ns(ns); + sc->ns[nsid - 1] = NULL; + } + } else { + /* XXX: Needs locking */ + if (ns == NULL) { + sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); + } else { + if (!nvmf_update_ns(ns, data)) { + nvmf_destroy_ns(ns); + sc->ns[nsid - 1] = NULL; + } + } + } + + nvmf_sim_rescan_ns(sc, nsid); +} + void nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid) { struct nvmf_completion_status status; struct nvme_namespace_data *data; - struct nvmf_namespace *ns; data = malloc(sizeof(*data), M_NVMF, M_WAITOK); @@ -751,29 +1029,58 @@ nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid) nvme_namespace_data_swapbytes(data); - /* XXX: Needs locking around sc->ns[]. */ - ns = sc->ns[nsid - 1]; - if (data->nsze == 0) { - /* XXX: Needs locking */ + nvmf_rescan_ns_1(sc, nsid, data); + + free(data, M_NVMF); +} + +static void +nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid, + uint32_t next_valid_nsid) +{ + struct nvmf_namespace *ns; + + for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++) + { + /* XXX: Needs locking around sc->ns[]. */ + ns = sc->ns[nsid - 1]; if (ns != NULL) { nvmf_destroy_ns(ns); sc->ns[nsid - 1] = NULL; - } - } else { - /* XXX: Needs locking */ - if (ns == NULL) { - sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); - } else { - if (!nvmf_update_ns(ns, data)) { - nvmf_destroy_ns(ns); - sc->ns[nsid - 1] = NULL; - } + + nvmf_sim_rescan_ns(sc, nsid); } } +} - free(data, M_NVMF); +static bool +nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid, + const struct nvme_namespace_data *data, void *arg) +{ + uint32_t *last_nsid = arg; - nvmf_sim_rescan_ns(sc, nsid); + /* Check for any gaps prior to this namespace. */ + nvmf_purge_namespaces(sc, *last_nsid + 1, nsid); + *last_nsid = nsid; + + nvmf_rescan_ns_1(sc, nsid, data); + return (true); +} + +void +nvmf_rescan_all_ns(struct nvmf_softc *sc) +{ + uint32_t last_nsid; + + last_nsid = 0; + if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid)) + return; + + /* + * Check for any namespace devices after the last active + * namespace. + */ + nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1); } int @@ -822,12 +1129,21 @@ nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt, cmd.cdw14 = pt->cmd.cdw14; cmd.cdw15 = pt->cmd.cdw15; + sx_slock(&sc->connection_lock); + if (sc->admin == NULL || sc->detaching) { + device_printf(sc->dev, + "failed to send passthrough command\n"); + error = ECONNABORTED; + sx_sunlock(&sc->connection_lock); + goto error; + } if (admin) qp = sc->admin; else qp = nvmf_select_io_queue(sc); nvmf_status_init(&status); req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK); + sx_sunlock(&sc->connection_lock); if (req == NULL) { device_printf(sc->dev, "failed to send passthrough command\n"); error = ECONNABORTED; @@ -857,14 +1173,46 @@ error: } static int +nvmf_reconnect_params(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv) +{ + int error; + + sx_slock(&sc->connection_lock); + error = nvmf_pack_ioc_nvlist(sc->rparams, nv); + sx_sunlock(&sc->connection_lock); + + return (error); +} + +static int +nvmf_connection_status(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv) +{ + nvlist_t *nvl, *nvl_ts; + int error; + + nvl = nvlist_create(0); + nvl_ts = nvlist_create(0); + + sx_slock(&sc->connection_lock); + nvlist_add_bool(nvl, "connected", sc->admin != NULL); + nvlist_add_number(nvl_ts, "tv_sec", sc->last_disconnect.tv_sec); + nvlist_add_number(nvl_ts, "tv_nsec", sc->last_disconnect.tv_nsec); + sx_sunlock(&sc->connection_lock); + nvlist_move_nvlist(nvl, "last_disconnect", nvl_ts); + + error = nvmf_pack_ioc_nvlist(nvl, nv); + nvlist_destroy(nvl); + return (error); +} + +static int nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, struct thread *td) { struct nvmf_softc *sc = cdev->si_drv1; struct nvme_get_nsid *gnsid; struct nvme_pt_command *pt; - struct nvmf_reconnect_params *rp; - struct nvmf_handoff_host *hh; + struct nvmf_ioc_nv *nv; switch (cmd) { case NVME_PASSTHROUGH_CMD: @@ -872,25 +1220,25 @@ nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, return (nvmf_passthrough_cmd(sc, pt, true)); case NVME_GET_NSID: gnsid = (struct nvme_get_nsid *)arg; - strncpy(gnsid->cdev, device_get_nameunit(sc->dev), + strlcpy(gnsid->cdev, device_get_nameunit(sc->dev), sizeof(gnsid->cdev)); - gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0'; gnsid->nsid = 0; return (0); case NVME_GET_MAX_XFER_SIZE: *(uint64_t *)arg = sc->max_xfer_size; return (0); - case NVMF_RECONNECT_PARAMS: - rp = (struct nvmf_reconnect_params *)arg; - if ((sc->cdata->fcatt & 1) == 0) - rp->cntlid = NVMF_CNTLID_DYNAMIC; - else - rp->cntlid = sc->cdata->ctrlr_id; - memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn)); + case NVME_GET_CONTROLLER_DATA: + memcpy(arg, sc->cdata, sizeof(*sc->cdata)); return (0); + case NVMF_RECONNECT_PARAMS: + nv = (struct nvmf_ioc_nv *)arg; + return (nvmf_reconnect_params(sc, nv)); case NVMF_RECONNECT_HOST: - hh = (struct nvmf_handoff_host *)arg; - return (nvmf_reconnect_host(sc, hh)); + nv = (struct nvmf_ioc_nv *)arg; + return (nvmf_reconnect_host(sc, nv)); + case NVMF_CONNECTION_STATUS: + nv = (struct nvmf_ioc_nv *)arg; + return (nvmf_connection_status(sc, nv)); default: return (ENOTTY); } @@ -904,14 +1252,25 @@ static struct cdevsw nvmf_cdevsw = { static int nvmf_modevent(module_t mod, int what, void *arg) { + int error; + switch (what) { case MOD_LOAD: - return (nvmf_ctl_load()); + error = nvmf_ctl_load(); + if (error != 0) + return (error); + + nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO, + taskqueue_thread_enqueue, &nvmf_tq); + taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq"); + return (0); case MOD_QUIESCE: return (0); case MOD_UNLOAD: nvmf_ctl_unload(); destroy_dev_drain(&nvmf_cdevsw); + if (nvmf_tq != NULL) + taskqueue_free(nvmf_tq); return (0); default: return (EOPNOTSUPP); @@ -923,9 +1282,6 @@ static device_method_t nvmf_methods[] = { DEVMETHOD(device_probe, nvmf_probe), DEVMETHOD(device_attach, nvmf_attach), DEVMETHOD(device_detach, nvmf_detach), -#if 0 - DEVMETHOD(device_shutdown, nvmf_shutdown), -#endif DEVMETHOD_END }; diff --git a/sys/dev/nvmf/host/nvmf_aer.c b/sys/dev/nvmf/host/nvmf_aer.c index 4c950f1518d0..2f7f177d0421 100644 --- a/sys/dev/nvmf/host/nvmf_aer.c +++ b/sys/dev/nvmf/host/nvmf_aer.c @@ -62,7 +62,7 @@ nvmf_handle_changed_namespaces(struct nvmf_softc *sc, * probably just rescan the entire set of namespaces. */ if (ns_list->ns[0] == 0xffffffff) { - device_printf(sc->dev, "too many changed namespaces\n"); + nvmf_rescan_all_ns(sc); return; } diff --git a/sys/dev/nvmf/host/nvmf_ctldev.c b/sys/dev/nvmf/host/nvmf_ctldev.c index f40005a2a666..275d5e9c932a 100644 --- a/sys/dev/nvmf/host/nvmf_ctldev.c +++ b/sys/dev/nvmf/host/nvmf_ctldev.c @@ -9,6 +9,7 @@ #include <sys/bus.h> #include <sys/conf.h> #include <sys/malloc.h> +#include <sys/nv.h> #include <dev/nvme/nvme.h> #include <dev/nvmf/nvmf.h> #include <dev/nvmf/nvmf_transport.h> @@ -17,25 +18,25 @@ static struct cdev *nvmf_cdev; static int -nvmf_handoff_host(struct nvmf_handoff_host *hh) +nvmf_handoff_host(struct nvmf_ioc_nv *nv) { - struct nvmf_ivars ivars; + nvlist_t *nvl; device_t dev; int error; - error = nvmf_init_ivars(&ivars, hh); + error = nvmf_copyin_handoff(nv, &nvl); if (error != 0) return (error); bus_topo_lock(); - dev = device_add_child(root_bus, "nvme", -1); + dev = device_add_child(root_bus, "nvme", DEVICE_UNIT_ANY); if (dev == NULL) { bus_topo_unlock(); error = ENXIO; goto out; } - device_set_ivars(dev, &ivars); + device_set_ivars(dev, nvl); error = device_probe_and_attach(dev); device_set_ivars(dev, NULL); if (error != 0) @@ -43,7 +44,7 @@ nvmf_handoff_host(struct nvmf_handoff_host *hh) bus_topo_unlock(); out: - nvmf_free_ivars(&ivars); + nvlist_destroy(nvl); return (error); } @@ -117,7 +118,7 @@ nvmf_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, { switch (cmd) { case NVMF_HANDOFF_HOST: - return (nvmf_handoff_host((struct nvmf_handoff_host *)arg)); + return (nvmf_handoff_host((struct nvmf_ioc_nv *)arg)); case NVMF_DISCONNECT_HOST: return (nvmf_disconnect_host((const char **)arg)); case NVMF_DISCONNECT_ALL: diff --git a/sys/dev/nvmf/host/nvmf_ns.c b/sys/dev/nvmf/host/nvmf_ns.c index 3ce434bf7c50..4215c8295d2e 100644 --- a/sys/dev/nvmf/host/nvmf_ns.c +++ b/sys/dev/nvmf/host/nvmf_ns.c @@ -18,7 +18,7 @@ #include <sys/proc.h> #include <sys/refcount.h> #include <sys/sbuf.h> -#include <machine/stdarg.h> +#include <sys/stdarg.h> #include <dev/nvme/nvme.h> #include <dev/nvmf/host/nvmf_var.h> @@ -29,6 +29,7 @@ struct nvmf_namespace { u_int flags; uint32_t lba_size; bool disconnected; + bool shutdown; TAILQ_HEAD(, bio) pending_bios; struct mtx lock; @@ -49,7 +50,7 @@ ns_printf(struct nvmf_namespace *ns, const char *fmt, ...) sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); sbuf_set_drain(&sb, sbuf_printf_drain, NULL); - sbuf_printf(&sb, "%sns%u: ", device_get_nameunit(ns->sc->dev), + sbuf_printf(&sb, "%sn%u: ", device_get_nameunit(ns->sc->dev), ns->id); va_start(ap, fmt); @@ -84,13 +85,22 @@ nvmf_ns_biodone(struct bio *bio) ns = bio->bio_dev->si_drv1; /* If a request is aborted, resubmit or queue it for resubmission. */ - if (bio->bio_error == ECONNABORTED) { + if (bio->bio_error == ECONNABORTED && !nvmf_fail_disconnect) { bio->bio_error = 0; bio->bio_driver2 = 0; mtx_lock(&ns->lock); if (ns->disconnected) { - TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue); - mtx_unlock(&ns->lock); + if (nvmf_fail_disconnect || ns->shutdown) { + mtx_unlock(&ns->lock); + bio->bio_error = ECONNABORTED; + bio->bio_flags |= BIO_ERROR; + bio->bio_resid = bio->bio_bcount; + biodone(bio); + } else { + TAILQ_INSERT_TAIL(&ns->pending_bios, bio, + bio_queue); + mtx_unlock(&ns->lock); + } } else { mtx_unlock(&ns->lock); nvmf_ns_strategy(bio); @@ -163,6 +173,7 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio) struct nvme_dsm_range *dsm_range; struct memdesc mem; uint64_t lba, lba_count; + int error; dsm_range = NULL; memset(&cmd, 0, sizeof(cmd)); @@ -201,10 +212,15 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio) mtx_lock(&ns->lock); if (ns->disconnected) { - TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue); + if (nvmf_fail_disconnect || ns->shutdown) { + error = ECONNABORTED; + } else { + TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue); + error = 0; + } mtx_unlock(&ns->lock); free(dsm_range, M_NVMF); - return (0); + return (error); } req = nvmf_allocate_request(nvmf_select_io_queue(ns->sc), &cmd, @@ -258,9 +274,8 @@ nvmf_ns_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag, return (nvmf_passthrough_cmd(ns->sc, pt, false)); case NVME_GET_NSID: gnsid = (struct nvme_get_nsid *)arg; - strncpy(gnsid->cdev, device_get_nameunit(ns->sc->dev), + strlcpy(gnsid->cdev, device_get_nameunit(ns->sc->dev), sizeof(gnsid->cdev)); - gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0'; gnsid->nsid = ns->id; return (0); case DIOCGMEDIASIZE: @@ -314,7 +329,7 @@ static struct cdevsw nvmf_ns_cdevsw = { struct nvmf_namespace * nvmf_init_ns(struct nvmf_softc *sc, uint32_t id, - struct nvme_namespace_data *data) + const struct nvme_namespace_data *data) { struct make_dev_args mda; struct nvmf_namespace *ns; @@ -372,10 +387,12 @@ nvmf_init_ns(struct nvmf_softc *sc, uint32_t id, mda.mda_gid = GID_WHEEL; mda.mda_mode = 0600; mda.mda_si_drv1 = ns; - error = make_dev_s(&mda, &ns->cdev, "%sns%u", + error = make_dev_s(&mda, &ns->cdev, "%sn%u", device_get_nameunit(sc->dev), id); if (error != 0) goto fail; + ns->cdev->si_drv2 = make_dev_alias(ns->cdev, "%sns%u", + device_get_nameunit(sc->dev), id); ns->cdev->si_flags |= SI_UNMAPPED; @@ -414,11 +431,35 @@ nvmf_reconnect_ns(struct nvmf_namespace *ns) } void +nvmf_shutdown_ns(struct nvmf_namespace *ns) +{ + TAILQ_HEAD(, bio) bios; + struct bio *bio; + + mtx_lock(&ns->lock); + ns->shutdown = true; + TAILQ_INIT(&bios); + TAILQ_CONCAT(&bios, &ns->pending_bios, bio_queue); + mtx_unlock(&ns->lock); + + while (!TAILQ_EMPTY(&bios)) { + bio = TAILQ_FIRST(&bios); + TAILQ_REMOVE(&bios, bio, bio_queue); + bio->bio_error = ECONNABORTED; + bio->bio_flags |= BIO_ERROR; + bio->bio_resid = bio->bio_bcount; + biodone(bio); + } +} + +void nvmf_destroy_ns(struct nvmf_namespace *ns) { TAILQ_HEAD(, bio) bios; struct bio *bio; + if (ns->cdev->si_drv2 != NULL) + destroy_dev(ns->cdev->si_drv2); destroy_dev(ns->cdev); /* @@ -451,7 +492,8 @@ nvmf_destroy_ns(struct nvmf_namespace *ns) } bool -nvmf_update_ns(struct nvmf_namespace *ns, struct nvme_namespace_data *data) +nvmf_update_ns(struct nvmf_namespace *ns, + const struct nvme_namespace_data *data) { uint8_t lbads, lbaf; diff --git a/sys/dev/nvmf/host/nvmf_qpair.c b/sys/dev/nvmf/host/nvmf_qpair.c index 96cb5a8b0465..2f511cf0406d 100644 --- a/sys/dev/nvmf/host/nvmf_qpair.c +++ b/sys/dev/nvmf/host/nvmf_qpair.c @@ -10,6 +10,8 @@ #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mutex.h> +#include <sys/nv.h> +#include <sys/sysctl.h> #include <dev/nvme/nvme.h> #include <dev/nvmf/nvmf.h> #include <dev/nvmf/nvmf_transport.h> @@ -31,6 +33,7 @@ struct nvmf_host_qpair { u_int num_commands; uint16_t sqhd; uint16_t sqtail; + uint64_t submitted; struct mtx lock; @@ -41,6 +44,7 @@ struct nvmf_host_qpair { struct nvmf_host_command **active_commands; char name[16]; + struct sysctl_ctx_list sysctl_ctx; }; struct nvmf_request * @@ -112,8 +116,23 @@ nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd) struct nvmf_softc *sc = qp->sc; struct nvme_command *sqe; struct nvmf_capsule *nc; + uint16_t new_sqtail; int error; + mtx_assert(&qp->lock, MA_OWNED); + + qp->submitted++; + + /* + * Update flow control tracking. This is just a sanity check. + * Since num_commands == qsize - 1, there can never be too + * many commands in flight. + */ + new_sqtail = (qp->sqtail + 1) % (qp->num_commands + 1); + KASSERT(new_sqtail != qp->sqhd, ("%s: qp %p is full", __func__, qp)); + qp->sqtail = new_sqtail; + mtx_unlock(&qp->lock); + nc = cmd->req->nc; sqe = nvmf_capsule_sqe(nc); @@ -177,11 +196,23 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc) return; } + /* Update flow control tracking. */ + mtx_lock(&qp->lock); + if (qp->sq_flow_control) { + if (nvmf_sqhd_valid(nc)) + qp->sqhd = le16toh(cqe->sqhd); + } else { + /* + * If SQ FC is disabled, just advance the head for + * each response capsule received. + */ + qp->sqhd = (qp->sqhd + 1) % (qp->num_commands + 1); + } + /* * If the queue has been shutdown due to an error, silently * drop the response. */ - mtx_lock(&qp->lock); if (qp->qp == NULL) { device_printf(sc->dev, "received completion for CID %u on shutdown %s\n", cid, @@ -212,7 +243,6 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc) } else { cmd->req = STAILQ_FIRST(&qp->pending_requests); STAILQ_REMOVE_HEAD(&qp->pending_requests, link); - mtx_unlock(&qp->lock); nvmf_dispatch_command(qp, cmd); } @@ -221,28 +251,61 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc) nvmf_free_request(req); } +static void +nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp, + bool admin, u_int qid) +{ + struct sysctl_ctx_list *ctx = &qp->sysctl_ctx; + struct sysctl_oid *oid; + struct sysctl_oid_list *list; + char name[8]; + + if (admin) { + oid = SYSCTL_ADD_NODE(ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, + "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue"); + } else { + snprintf(name, sizeof(name), "%u", qid); + oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name, + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue"); + } + list = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD, + NULL, qp->num_commands + 1, "Number of entries in queue"); + SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd, + 0, "Current head of submission queue (as observed by driver)"); + SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail, + 0, "Current tail of submission queue (as observed by driver)"); + SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD, + &qp->submitted, 0, "Number of commands submitted"); +} + struct nvmf_host_qpair * nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, - struct nvmf_handoff_qpair_params *handoff, const char *name) + const nvlist_t *nvl, const char *name, u_int qid) { struct nvmf_host_command *cmd, *ncmd; struct nvmf_host_qpair *qp; u_int i; + bool admin; + admin = nvlist_get_bool(nvl, "admin"); qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO); qp->sc = sc; - qp->sq_flow_control = handoff->sq_flow_control; - qp->sqhd = handoff->sqhd; - qp->sqtail = handoff->sqtail; + qp->sq_flow_control = nvlist_get_bool(nvl, "sq_flow_control"); + qp->sqhd = nvlist_get_number(nvl, "sqhd"); + qp->sqtail = nvlist_get_number(nvl, "sqtail"); strlcpy(qp->name, name, sizeof(qp->name)); mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF); + (void)sysctl_ctx_init(&qp->sysctl_ctx); /* * Allocate a spare command slot for each pending AER command * on the admin queue. */ - qp->num_commands = handoff->qsize - 1; - if (handoff->admin) + qp->num_commands = nvlist_get_number(nvl, "qsize") - 1; + if (admin) qp->num_commands += sc->num_aer; qp->active_commands = malloc(sizeof(*qp->active_commands) * @@ -255,9 +318,10 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, } STAILQ_INIT(&qp->pending_requests); - qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error, - qp, nvmf_receive_capsule, qp); + qp->qp = nvmf_allocate_qpair(trtype, false, nvl, nvmf_qp_error, qp, + nvmf_receive_capsule, qp); if (qp->qp == NULL) { + (void)sysctl_ctx_free(&qp->sysctl_ctx); TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { TAILQ_REMOVE(&qp->free_commands, cmd, link); free(cmd, M_NVMF); @@ -268,6 +332,8 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, return (NULL); } + nvmf_sysctls_qp(sc, qp, admin, qid); + return (qp); } @@ -339,6 +405,7 @@ nvmf_destroy_qp(struct nvmf_host_qpair *qp) struct nvmf_host_command *cmd, *ncmd; nvmf_shutdown_qp(qp); + (void)sysctl_ctx_free(&qp->sysctl_ctx); TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { TAILQ_REMOVE(&qp->free_commands, cmd, link); @@ -381,6 +448,5 @@ nvmf_submit_request(struct nvmf_request *req) ("%s: CID already busy", __func__)); qp->active_commands[cmd->cid] = cmd; cmd->req = req; - mtx_unlock(&qp->lock); nvmf_dispatch_command(qp, cmd); } diff --git a/sys/dev/nvmf/host/nvmf_sim.c b/sys/dev/nvmf/host/nvmf_sim.c index b097b04d64c3..de9e958d8afd 100644 --- a/sys/dev/nvmf/host/nvmf_sim.c +++ b/sys/dev/nvmf/host/nvmf_sim.c @@ -40,7 +40,13 @@ nvmf_ccb_done(union ccb *ccb) return; if (nvmf_cqe_aborted(&ccb->nvmeio.cpl)) { - ccb->ccb_h.status = CAM_REQUEUE_REQ; + struct cam_sim *sim = xpt_path_sim(ccb->ccb_h.path); + struct nvmf_softc *sc = cam_sim_softc(sim); + + if (nvmf_fail_disconnect || sc->sim_shutdown) + ccb->ccb_h.status = CAM_DEV_NOT_THERE; + else + ccb->ccb_h.status = CAM_REQUEUE_REQ; xpt_done(ccb); } else if (ccb->nvmeio.cpl.status != 0) { ccb->ccb_h.status = CAM_NVME_STATUS_ERROR; @@ -52,7 +58,7 @@ nvmf_ccb_done(union ccb *ccb) xpt_done(ccb); } else { ccb->ccb_h.status = CAM_REQ_CMP; - xpt_done_direct(ccb); + xpt_done(ccb); } } @@ -106,7 +112,10 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb) mtx_lock(&sc->sim_mtx); if (sc->sim_disconnected) { mtx_unlock(&sc->sim_mtx); - nvmeio->ccb_h.status = CAM_REQUEUE_REQ; + if (nvmf_fail_disconnect || sc->sim_shutdown) + nvmeio->ccb_h.status = CAM_DEV_NOT_THERE; + else + nvmeio->ccb_h.status = CAM_REQUEUE_REQ; xpt_done(ccb); return; } @@ -116,8 +125,8 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb) qp = sc->admin; req = nvmf_allocate_request(qp, &nvmeio->cmd, nvmf_ccb_complete, ccb, M_NOWAIT); + mtx_unlock(&sc->sim_mtx); if (req == NULL) { - mtx_unlock(&sc->sim_mtx); nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL; xpt_done(ccb); return; @@ -141,7 +150,6 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb) ("%s: incoming CCB is not in-progress", __func__)); ccb->ccb_h.status |= CAM_SIM_QUEUED; nvmf_submit_request(req); - mtx_unlock(&sc->sim_mtx); } static void @@ -183,7 +191,7 @@ nvmf_sim_action(struct cam_sim *sim, union ccb *ccb) cpi->xport_specific.nvmf.nsid = xpt_path_lun_id(ccb->ccb_h.path); cpi->xport_specific.nvmf.trtype = sc->trtype; - strncpy(cpi->xport_specific.nvmf.dev_name, + strlcpy(cpi->xport_specific.nvmf.dev_name, device_get_nameunit(sc->dev), sizeof(cpi->xport_specific.nvmf.dev_name)); cpi->maxio = sc->max_xfer_size; @@ -320,6 +328,15 @@ nvmf_reconnect_sim(struct nvmf_softc *sc) } void +nvmf_shutdown_sim(struct nvmf_softc *sc) +{ + mtx_lock(&sc->sim_mtx); + sc->sim_shutdown = true; + mtx_unlock(&sc->sim_mtx); + xpt_release_simq(sc->sim, 1); +} + +void nvmf_destroy_sim(struct nvmf_softc *sc) { xpt_async(AC_LOST_DEVICE, sc->path, NULL); diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h index 64525851631e..606245b3969c 100644 --- a/sys/dev/nvmf/host/nvmf_var.h +++ b/sys/dev/nvmf/host/nvmf_var.h @@ -9,10 +9,13 @@ #define __NVMF_VAR_H__ #include <sys/_callout.h> +#include <sys/_eventhandler.h> #include <sys/_lock.h> #include <sys/_mutex.h> +//#include <sys/_nv.h> #include <sys/_sx.h> #include <sys/_task.h> +#include <sys/smp.h> #include <sys/queue.h> #include <dev/nvme/nvme.h> #include <dev/nvmf/nvmf_transport.h> @@ -21,15 +24,10 @@ struct nvmf_aer; struct nvmf_capsule; struct nvmf_host_qpair; struct nvmf_namespace; +struct sysctl_oid_list; typedef void nvmf_request_complete_t(void *, const struct nvme_completion *); -struct nvmf_ivars { - struct nvmf_handoff_host *hh; - struct nvmf_handoff_qpair_params *io_params; - struct nvme_controller_data *cdata; -}; - struct nvmf_softc { device_t dev; @@ -42,6 +40,7 @@ struct nvmf_softc { struct cam_path *path; struct mtx sim_mtx; bool sim_disconnected; + bool sim_shutdown; struct nvmf_namespace **ns; @@ -76,12 +75,27 @@ struct nvmf_softc { struct callout ka_rx_timer; sbintime_t ka_rx_sbt; + struct timeout_task request_reconnect_task; + struct timeout_task controller_loss_task; + uint32_t reconnect_delay; + uint32_t controller_loss_timeout; + struct sx connection_lock; struct task disconnect_task; bool detaching; + bool controller_timedout; u_int num_aer; struct nvmf_aer *aer; + + struct sysctl_oid_list *ioq_oid_list; + + nvlist_t *rparams; + + struct timespec last_disconnect; + + eventhandler_tag shutdown_pre_sync_eh; + eventhandler_tag shutdown_post_sync_eh; }; struct nvmf_request { @@ -104,8 +118,8 @@ struct nvmf_completion_status { static __inline struct nvmf_host_qpair * nvmf_select_io_queue(struct nvmf_softc *sc) { - /* TODO: Support multiple queues? */ - return (sc->io[0]); + u_int idx = curcpu * sc->num_io_queues / (mp_maxid + 1); + return (sc->io[idx]); } static __inline bool @@ -140,14 +154,17 @@ extern driver_t nvme_nvmf_driver; MALLOC_DECLARE(M_NVMF); #endif +/* If true, I/O requests will fail while the host is disconnected. */ +extern bool nvmf_fail_disconnect; + /* nvmf.c */ void nvmf_complete(void *arg, const struct nvme_completion *cqe); void nvmf_io_complete(void *arg, size_t xfered, int error); void nvmf_wait_for_reply(struct nvmf_completion_status *status); -int nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh); -void nvmf_free_ivars(struct nvmf_ivars *ivars); +int nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp); void nvmf_disconnect(struct nvmf_softc *sc); void nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid); +void nvmf_rescan_all_ns(struct nvmf_softc *sc); int nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt, bool admin); @@ -180,17 +197,17 @@ void nvmf_ctl_unload(void); /* nvmf_ns.c */ struct nvmf_namespace *nvmf_init_ns(struct nvmf_softc *sc, uint32_t id, - struct nvme_namespace_data *data); + const struct nvme_namespace_data *data); void nvmf_disconnect_ns(struct nvmf_namespace *ns); void nvmf_reconnect_ns(struct nvmf_namespace *ns); +void nvmf_shutdown_ns(struct nvmf_namespace *ns); void nvmf_destroy_ns(struct nvmf_namespace *ns); bool nvmf_update_ns(struct nvmf_namespace *ns, - struct nvme_namespace_data *data); + const struct nvme_namespace_data *data); /* nvmf_qpair.c */ struct nvmf_host_qpair *nvmf_init_qp(struct nvmf_softc *sc, - enum nvmf_trtype trtype, struct nvmf_handoff_qpair_params *handoff, - const char *name); + enum nvmf_trtype trtype, const nvlist_t *nvl, const char *name, u_int qid); void nvmf_shutdown_qp(struct nvmf_host_qpair *qp); void nvmf_destroy_qp(struct nvmf_host_qpair *qp); struct nvmf_request *nvmf_allocate_request(struct nvmf_host_qpair *qp, @@ -202,6 +219,7 @@ void nvmf_free_request(struct nvmf_request *req); int nvmf_init_sim(struct nvmf_softc *sc); void nvmf_disconnect_sim(struct nvmf_softc *sc); void nvmf_reconnect_sim(struct nvmf_softc *sc); +void nvmf_shutdown_sim(struct nvmf_softc *sc); void nvmf_destroy_sim(struct nvmf_softc *sc); void nvmf_sim_rescan_ns(struct nvmf_softc *sc, uint32_t id); |