aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/nvmf/host
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/nvmf/host')
-rw-r--r--sys/dev/nvmf/host/nvmf.c640
-rw-r--r--sys/dev/nvmf/host/nvmf_aer.c2
-rw-r--r--sys/dev/nvmf/host/nvmf_ctldev.c15
-rw-r--r--sys/dev/nvmf/host/nvmf_ns.c66
-rw-r--r--sys/dev/nvmf/host/nvmf_qpair.c88
-rw-r--r--sys/dev/nvmf/host/nvmf_sim.c29
-rw-r--r--sys/dev/nvmf/host/nvmf_var.h46
7 files changed, 693 insertions, 193 deletions
diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c
index 0902bc78a7b5..1ac0d142443b 100644
--- a/sys/dev/nvmf/host/nvmf.c
+++ b/sys/dev/nvmf/host/nvmf.c
@@ -8,13 +8,18 @@
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/conf.h>
+#include <sys/dnv.h>
+#include <sys/eventhandler.h>
#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/memdesc.h>
#include <sys/module.h>
#include <sys/mutex.h>
+#include <sys/nv.h>
+#include <sys/reboot.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf.h>
@@ -22,10 +27,20 @@
#include <dev/nvmf/host/nvmf_var.h>
static struct cdevsw nvmf_cdevsw;
+static struct taskqueue *nvmf_tq;
+
+bool nvmf_fail_disconnect = false;
+SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
+ &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure");
MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
+static void nvmf_controller_loss_task(void *arg, int pending);
static void nvmf_disconnect_task(void *arg, int pending);
+static void nvmf_request_reconnect(struct nvmf_softc *sc);
+static void nvmf_request_reconnect_task(void *arg, int pending);
+static void nvmf_shutdown_pre_sync(void *arg, int howto);
+static void nvmf_shutdown_post_sync(void *arg, int howto);
void
nvmf_complete(void *arg, const struct nvme_completion *cqe)
@@ -187,104 +202,132 @@ nvmf_send_keep_alive(void *arg)
}
int
-nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh)
+nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp)
{
- size_t len;
- u_int i;
+ const struct nvme_discovery_log_entry *dle;
+ const struct nvme_controller_data *cdata;
+ const nvlist_t *const *io;
+ const nvlist_t *admin, *rparams;
+ nvlist_t *nvl;
+ size_t i, num_io_queues;
+ uint32_t qsize;
int error;
- memset(ivars, 0, sizeof(*ivars));
-
- if (!hh->admin.admin || hh->num_io_queues < 1)
- return (EINVAL);
-
- ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK);
- error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata));
+ error = nvmf_unpack_ioc_nvlist(nv, &nvl);
if (error != 0)
- goto out;
- nvme_controller_data_swapbytes(ivars->cdata);
+ return (error);
- len = hh->num_io_queues * sizeof(*ivars->io_params);
- ivars->io_params = malloc(len, M_NVMF, M_WAITOK);
- error = copyin(hh->io, ivars->io_params, len);
- if (error != 0)
- goto out;
- for (i = 0; i < hh->num_io_queues; i++) {
- if (ivars->io_params[i].admin) {
- error = EINVAL;
- goto out;
- }
+ if (!nvlist_exists_number(nvl, "trtype") ||
+ !nvlist_exists_nvlist(nvl, "admin") ||
+ !nvlist_exists_nvlist_array(nvl, "io") ||
+ !nvlist_exists_binary(nvl, "cdata") ||
+ !nvlist_exists_nvlist(nvl, "rparams"))
+ goto invalid;
+
+ rparams = nvlist_get_nvlist(nvl, "rparams");
+ if (!nvlist_exists_binary(rparams, "dle") ||
+ !nvlist_exists_string(rparams, "hostnqn") ||
+ !nvlist_exists_number(rparams, "num_io_queues") ||
+ !nvlist_exists_number(rparams, "io_qsize"))
+ goto invalid;
+
+ admin = nvlist_get_nvlist(nvl, "admin");
+ if (!nvmf_validate_qpair_nvlist(admin, false))
+ goto invalid;
+ if (!nvlist_get_bool(admin, "admin"))
+ goto invalid;
+
+ io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
+ if (num_io_queues < 1 ||
+ num_io_queues != nvlist_get_number(rparams, "num_io_queues"))
+ goto invalid;
+ for (i = 0; i < num_io_queues; i++) {
+ if (!nvmf_validate_qpair_nvlist(io[i], false))
+ goto invalid;
+ }
- /* Require all I/O queues to be the same size. */
- if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) {
- error = EINVAL;
- goto out;
- }
+ /* Require all I/O queues to be the same size. */
+ qsize = nvlist_get_number(rparams, "io_qsize");
+ for (i = 0; i < num_io_queues; i++) {
+ if (nvlist_get_number(io[i], "qsize") != qsize)
+ goto invalid;
}
- ivars->hh = hh;
- return (0);
+ cdata = nvlist_get_binary(nvl, "cdata", &i);
+ if (i != sizeof(*cdata))
+ goto invalid;
+ dle = nvlist_get_binary(rparams, "dle", &i);
+ if (i != sizeof(*dle))
+ goto invalid;
-out:
- free(ivars->io_params, M_NVMF);
- free(ivars->cdata, M_NVMF);
- return (error);
-}
+ if (memcmp(dle->subnqn, cdata->subnqn, sizeof(cdata->subnqn)) != 0)
+ goto invalid;
-void
-nvmf_free_ivars(struct nvmf_ivars *ivars)
-{
- free(ivars->io_params, M_NVMF);
- free(ivars->cdata, M_NVMF);
+ *nvlp = nvl;
+ return (0);
+invalid:
+ nvlist_destroy(nvl);
+ return (EINVAL);
}
static int
nvmf_probe(device_t dev)
{
- struct nvmf_ivars *ivars = device_get_ivars(dev);
- char desc[260];
+ const nvlist_t *nvl = device_get_ivars(dev);
+ const struct nvme_controller_data *cdata;
- if (ivars == NULL)
+ if (nvl == NULL)
return (ENXIO);
- snprintf(desc, sizeof(desc), "Fabrics: %.256s", ivars->cdata->subnqn);
- device_set_desc_copy(dev, desc);
+ cdata = nvlist_get_binary(nvl, "cdata", NULL);
+ device_set_descf(dev, "Fabrics: %.256s", cdata->subnqn);
return (BUS_PROBE_DEFAULT);
}
static int
-nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
+nvmf_establish_connection(struct nvmf_softc *sc, nvlist_t *nvl)
{
+ const nvlist_t *const *io;
+ const nvlist_t *admin;
+ uint64_t kato;
+ size_t num_io_queues;
+ enum nvmf_trtype trtype;
char name[16];
+ trtype = nvlist_get_number(nvl, "trtype");
+ admin = nvlist_get_nvlist(nvl, "admin");
+ io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
+ kato = dnvlist_get_number(nvl, "kato", 0);
+ sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0);
+ sc->controller_loss_timeout = dnvlist_get_number(nvl,
+ "controller_loss_timeout", 0);
+
/* Setup the admin queue. */
- sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin,
- "admin queue");
+ sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0);
if (sc->admin == NULL) {
device_printf(sc->dev, "Failed to setup admin queue\n");
return (ENXIO);
}
/* Setup I/O queues. */
- sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF,
+ sc->io = malloc(num_io_queues * sizeof(*sc->io), M_NVMF,
M_WAITOK | M_ZERO);
- sc->num_io_queues = ivars->hh->num_io_queues;
+ sc->num_io_queues = num_io_queues;
for (u_int i = 0; i < sc->num_io_queues; i++) {
snprintf(name, sizeof(name), "I/O queue %u", i);
- sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype,
- &ivars->io_params[i], name);
+ sc->io[i] = nvmf_init_qp(sc, trtype, io[i], name, i);
if (sc->io[i] == NULL) {
device_printf(sc->dev, "Failed to setup I/O queue %u\n",
- i + 1);
+ i);
return (ENXIO);
}
}
/* Start KeepAlive timers. */
- if (ivars->hh->kato != 0) {
+ if (kato != 0) {
sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS,
sc->cdata->ctratt) != 0;
- sc->ka_rx_sbt = mstosbt(ivars->hh->kato);
+ sc->ka_rx_sbt = mstosbt(kato);
sc->ka_tx_sbt = sc->ka_rx_sbt / 2;
callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0,
nvmf_check_keep_alive, sc, C_HARDCLOCK);
@@ -292,12 +335,23 @@ nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
nvmf_send_keep_alive, sc, C_HARDCLOCK);
}
+ memcpy(sc->cdata, nvlist_get_binary(nvl, "cdata", NULL),
+ sizeof(*sc->cdata));
+
+ /* Save reconnect parameters. */
+ nvlist_destroy(sc->rparams);
+ sc->rparams = nvlist_take_nvlist(nvl, "rparams");
+
return (0);
}
+typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t,
+ const struct nvme_namespace_data *, void *);
+
static bool
-nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
- struct nvme_namespace_data *data, uint32_t *nsidp)
+nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
+ struct nvme_namespace_data *data, uint32_t *nsidp,
+ nvmf_scan_active_ns_cb *cb, void *cb_arg)
{
struct nvmf_completion_status status;
uint32_t nsid;
@@ -333,13 +387,6 @@ nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
return (true);
}
- if (sc->ns[nsid - 1] != NULL) {
- device_printf(sc->dev,
- "duplicate namespace %u in active namespace list\n",
- nsid);
- return (false);
- }
-
nvmf_status_init(&status);
nvmf_status_wait_io(&status);
if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
@@ -365,49 +412,37 @@ nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
return (false);
}
- /*
- * As in nvme_ns_construct, a size of zero indicates an
- * invalid namespace.
- */
nvme_namespace_data_swapbytes(data);
- if (data->nsze == 0) {
- device_printf(sc->dev,
- "ignoring active namespace %u with zero size\n",
- nsid);
- continue;
- }
-
- sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
-
- nvmf_sim_rescan_ns(sc, nsid);
+ if (!cb(sc, nsid, data, cb_arg))
+ return (false);
}
MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0);
- if (nsid >= 0xfffffffd)
+ if (nsid >= NVME_GLOBAL_NAMESPACE_TAG - 1)
*nsidp = 0;
else
- *nsidp = nsid + 1;
+ *nsidp = nsid;
return (true);
}
static bool
-nvmf_add_namespaces(struct nvmf_softc *sc)
+nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb,
+ void *cb_arg)
{
struct nvme_namespace_data *data;
struct nvme_ns_list *nslist;
uint32_t nsid;
bool retval;
- sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF,
- M_WAITOK | M_ZERO);
nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK);
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
nsid = 0;
retval = true;
for (;;) {
- if (!nvmf_scan_nslist(sc, nslist, data, &nsid)) {
+ if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb,
+ cb_arg)) {
retval = false;
break;
}
@@ -420,36 +455,77 @@ nvmf_add_namespaces(struct nvmf_softc *sc)
return (retval);
}
+static bool
+nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid,
+ const struct nvme_namespace_data *data, void *arg __unused)
+{
+ if (sc->ns[nsid - 1] != NULL) {
+ device_printf(sc->dev,
+ "duplicate namespace %u in active namespace list\n",
+ nsid);
+ return (false);
+ }
+
+ /*
+ * As in nvme_ns_construct, a size of zero indicates an
+ * invalid namespace.
+ */
+ if (data->nsze == 0) {
+ device_printf(sc->dev,
+ "ignoring active namespace %u with zero size\n", nsid);
+ return (true);
+ }
+
+ sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
+
+ nvmf_sim_rescan_ns(sc, nsid);
+ return (true);
+}
+
+static bool
+nvmf_add_namespaces(struct nvmf_softc *sc)
+{
+ sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF,
+ M_WAITOK | M_ZERO);
+ return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL));
+}
+
static int
nvmf_attach(device_t dev)
{
struct make_dev_args mda;
struct nvmf_softc *sc = device_get_softc(dev);
- struct nvmf_ivars *ivars = device_get_ivars(dev);
+ nvlist_t *nvl = device_get_ivars(dev);
+ const nvlist_t * const *io;
+ struct sysctl_oid *oid;
uint64_t val;
u_int i;
int error;
- if (ivars == NULL)
+ if (nvl == NULL)
return (ENXIO);
sc->dev = dev;
- sc->trtype = ivars->hh->trtype;
+ sc->trtype = nvlist_get_number(nvl, "trtype");
callout_init(&sc->ka_rx_timer, 1);
callout_init(&sc->ka_tx_timer, 1);
sx_init(&sc->connection_lock, "nvmf connection");
TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
+ TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0,
+ nvmf_controller_loss_task, sc);
+ TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0,
+ nvmf_request_reconnect_task, sc);
- /* Claim the cdata pointer from ivars. */
- sc->cdata = ivars->cdata;
- ivars->cdata = NULL;
+ oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
+ SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues");
+ sc->ioq_oid_list = SYSCTL_CHILDREN(oid);
- nvmf_init_aer(sc);
+ sc->cdata = malloc(sizeof(*sc->cdata), M_NVMF, M_WAITOK);
- /* TODO: Multiqueue support. */
- sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */;
+ nvmf_init_aer(sc);
- error = nvmf_establish_connection(sc, ivars);
+ error = nvmf_establish_connection(sc, nvl);
if (error != 0)
goto out;
@@ -476,6 +552,10 @@ nvmf_attach(device_t dev)
NVME_CAP_HI_MPSMIN(sc->cap >> 32)));
}
+ io = nvlist_get_nvlist_array(nvl, "io", NULL);
+ sc->max_pending_io = nvlist_get_number(io[0], "qsize") *
+ sc->num_io_queues;
+
error = nvmf_init_sim(sc);
if (error != 0)
goto out;
@@ -503,6 +583,11 @@ nvmf_attach(device_t dev)
goto out;
}
+ sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
+ nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST);
+ sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
+ nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_LAST);
+
return (0);
out:
if (sc->ns != NULL) {
@@ -529,8 +614,11 @@ out:
nvmf_destroy_aer(sc);
- taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+ taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task);
+ taskqueue_drain(nvmf_tq, &sc->disconnect_task);
sx_destroy(&sc->connection_lock);
+ nvlist_destroy(sc->rparams);
free(sc->cdata, M_NVMF);
return (error);
}
@@ -538,7 +626,7 @@ out:
void
nvmf_disconnect(struct nvmf_softc *sc)
{
- taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_enqueue(nvmf_tq, &sc->disconnect_task);
}
static void
@@ -579,6 +667,7 @@ nvmf_disconnect_task(void *arg, int pending __unused)
return;
}
+ nanotime(&sc->last_disconnect);
callout_drain(&sc->ka_tx_timer);
callout_drain(&sc->ka_rx_timer);
sc->ka_traffic = false;
@@ -600,29 +689,98 @@ nvmf_disconnect_task(void *arg, int pending __unused)
nvmf_destroy_qp(sc->admin);
sc->admin = NULL;
+ if (sc->reconnect_delay != 0)
+ nvmf_request_reconnect(sc);
+ if (sc->controller_loss_timeout != 0)
+ taskqueue_enqueue_timeout(nvmf_tq,
+ &sc->controller_loss_task, sc->controller_loss_timeout *
+ hz);
+
+ sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_controller_loss_task(void *arg, int pending)
+{
+ struct nvmf_softc *sc = arg;
+ device_t dev;
+ int error;
+
+ bus_topo_lock();
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching) {
+ /* Reconnected or already detaching. */
+ sx_xunlock(&sc->connection_lock);
+ bus_topo_unlock();
+ return;
+ }
+
+ sc->controller_timedout = true;
+ sx_xunlock(&sc->connection_lock);
+
+ /*
+ * XXX: Doing this from here is a bit ugly. We don't have an
+ * extra reference on `dev` but bus_topo_lock should block any
+ * concurrent device_delete_child invocations.
+ */
+ dev = sc->dev;
+ error = device_delete_child(root_bus, dev);
+ if (error != 0)
+ device_printf(dev,
+ "failed to detach after controller loss: %d\n", error);
+ bus_topo_unlock();
+}
+
+static void
+nvmf_request_reconnect(struct nvmf_softc *sc)
+{
+ char buf[64];
+
+ sx_assert(&sc->connection_lock, SX_LOCKED);
+
+ snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev));
+ devctl_notify("nvme", "controller", "RECONNECT", buf);
+ taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task,
+ sc->reconnect_delay * hz);
+}
+
+static void
+nvmf_request_reconnect_task(void *arg, int pending)
+{
+ struct nvmf_softc *sc = arg;
+
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
+ /* Reconnected or already detaching. */
+ sx_xunlock(&sc->connection_lock);
+ return;
+ }
+
+ nvmf_request_reconnect(sc);
sx_xunlock(&sc->connection_lock);
}
static int
-nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
+nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
{
- struct nvmf_ivars ivars;
+ const struct nvme_controller_data *cdata;
+ nvlist_t *nvl;
u_int i;
int error;
+ error = nvmf_copyin_handoff(nv, &nvl);
+ if (error != 0)
+ return (error);
+
/* XXX: Should we permit changing the transport type? */
- if (sc->trtype != hh->trtype) {
+ if (sc->trtype != nvlist_get_number(nvl, "trtype")) {
device_printf(sc->dev,
"transport type mismatch on reconnect\n");
return (EINVAL);
}
- error = nvmf_init_ivars(&ivars, hh);
- if (error != 0)
- return (error);
-
sx_xlock(&sc->connection_lock);
- if (sc->admin != NULL || sc->detaching) {
+ if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
error = EBUSY;
goto out;
}
@@ -634,8 +792,9 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
* ensures the new association is connected to the same NVMe
* subsystem.
*/
- if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn,
- sizeof(ivars.cdata->subnqn)) != 0) {
+ cdata = nvlist_get_binary(nvl, "cdata", NULL);
+ if (memcmp(sc->cdata->subnqn, cdata->subnqn,
+ sizeof(cdata->subnqn)) != 0) {
device_printf(sc->dev,
"controller subsystem NQN mismatch on reconnect\n");
error = EINVAL;
@@ -647,7 +806,7 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
* max_pending_io is still correct?
*/
- error = nvmf_establish_connection(sc, &ivars);
+ error = nvmf_establish_connection(sc, nvl);
if (error != 0)
goto out;
@@ -665,12 +824,85 @@ nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh)
nvmf_reconnect_ns(sc->ns[i]);
}
nvmf_reconnect_sim(sc);
+
+ nvmf_rescan_all_ns(sc);
+
+ taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL);
+ taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL);
out:
sx_xunlock(&sc->connection_lock);
- nvmf_free_ivars(&ivars);
+ nvlist_destroy(nvl);
return (error);
}
+static void
+nvmf_shutdown_pre_sync(void *arg, int howto)
+{
+ struct nvmf_softc *sc = arg;
+
+ if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
+ return;
+
+ /*
+ * If this association is disconnected, abort any pending
+ * requests with an error to permit filesystems to unmount
+ * without hanging.
+ */
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin != NULL || sc->detaching) {
+ sx_xunlock(&sc->connection_lock);
+ return;
+ }
+
+ for (u_int i = 0; i < sc->cdata->nn; i++) {
+ if (sc->ns[i] != NULL)
+ nvmf_shutdown_ns(sc->ns[i]);
+ }
+ nvmf_shutdown_sim(sc);
+ sx_xunlock(&sc->connection_lock);
+}
+
+static void
+nvmf_shutdown_post_sync(void *arg, int howto)
+{
+ struct nvmf_softc *sc = arg;
+
+ if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
+ return;
+
+ /*
+ * If this association is connected, disconnect gracefully.
+ */
+ sx_xlock(&sc->connection_lock);
+ if (sc->admin == NULL || sc->detaching) {
+ sx_xunlock(&sc->connection_lock);
+ return;
+ }
+
+ callout_drain(&sc->ka_tx_timer);
+ callout_drain(&sc->ka_rx_timer);
+
+ nvmf_shutdown_controller(sc);
+
+ /*
+ * Quiesce consumers so that any commands submitted after this
+ * fail with an error. Notably, nda(4) calls nda_flush() from
+ * a post_sync handler that might be ordered after this one.
+ */
+ for (u_int i = 0; i < sc->cdata->nn; i++) {
+ if (sc->ns[i] != NULL)
+ nvmf_shutdown_ns(sc->ns[i]);
+ }
+ nvmf_shutdown_sim(sc);
+
+ for (u_int i = 0; i < sc->num_io_queues; i++) {
+ nvmf_destroy_qp(sc->io[i]);
+ }
+ nvmf_destroy_qp(sc->admin);
+ sc->admin = NULL;
+ sx_xunlock(&sc->connection_lock);
+}
+
static int
nvmf_detach(device_t dev)
{
@@ -683,6 +915,9 @@ nvmf_detach(device_t dev)
sc->detaching = true;
sx_xunlock(&sc->connection_lock);
+ EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh);
+ EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->shutdown_post_sync_eh);
+
nvmf_destroy_sim(sc);
for (i = 0; i < sc->cdata->nn; i++) {
if (sc->ns[i] != NULL)
@@ -701,7 +936,21 @@ nvmf_detach(device_t dev)
}
free(sc->io, M_NVMF);
- taskqueue_drain(taskqueue_thread, &sc->disconnect_task);
+ taskqueue_drain(nvmf_tq, &sc->disconnect_task);
+ if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task,
+ NULL) != 0)
+ taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
+
+ /*
+ * Don't cancel/drain the controller loss task if that task
+ * has fired and is triggering the detach.
+ */
+ if (!sc->controller_timedout) {
+ if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task,
+ NULL) != 0)
+ taskqueue_drain_timeout(nvmf_tq,
+ &sc->controller_loss_task);
+ }
if (sc->admin != NULL)
nvmf_destroy_qp(sc->admin);
@@ -709,16 +958,45 @@ nvmf_detach(device_t dev)
nvmf_destroy_aer(sc);
sx_destroy(&sc->connection_lock);
+ nvlist_destroy(sc->rparams);
free(sc->cdata, M_NVMF);
return (0);
}
+static void
+nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid,
+ const struct nvme_namespace_data *data)
+{
+ struct nvmf_namespace *ns;
+
+ /* XXX: Needs locking around sc->ns[]. */
+ ns = sc->ns[nsid - 1];
+ if (data->nsze == 0) {
+ /* XXX: Needs locking */
+ if (ns != NULL) {
+ nvmf_destroy_ns(ns);
+ sc->ns[nsid - 1] = NULL;
+ }
+ } else {
+ /* XXX: Needs locking */
+ if (ns == NULL) {
+ sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
+ } else {
+ if (!nvmf_update_ns(ns, data)) {
+ nvmf_destroy_ns(ns);
+ sc->ns[nsid - 1] = NULL;
+ }
+ }
+ }
+
+ nvmf_sim_rescan_ns(sc, nsid);
+}
+
void
nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid)
{
struct nvmf_completion_status status;
struct nvme_namespace_data *data;
- struct nvmf_namespace *ns;
data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
@@ -751,29 +1029,58 @@ nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid)
nvme_namespace_data_swapbytes(data);
- /* XXX: Needs locking around sc->ns[]. */
- ns = sc->ns[nsid - 1];
- if (data->nsze == 0) {
- /* XXX: Needs locking */
+ nvmf_rescan_ns_1(sc, nsid, data);
+
+ free(data, M_NVMF);
+}
+
+static void
+nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid,
+ uint32_t next_valid_nsid)
+{
+ struct nvmf_namespace *ns;
+
+ for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++)
+ {
+ /* XXX: Needs locking around sc->ns[]. */
+ ns = sc->ns[nsid - 1];
if (ns != NULL) {
nvmf_destroy_ns(ns);
sc->ns[nsid - 1] = NULL;
- }
- } else {
- /* XXX: Needs locking */
- if (ns == NULL) {
- sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
- } else {
- if (!nvmf_update_ns(ns, data)) {
- nvmf_destroy_ns(ns);
- sc->ns[nsid - 1] = NULL;
- }
+
+ nvmf_sim_rescan_ns(sc, nsid);
}
}
+}
- free(data, M_NVMF);
+static bool
+nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid,
+ const struct nvme_namespace_data *data, void *arg)
+{
+ uint32_t *last_nsid = arg;
- nvmf_sim_rescan_ns(sc, nsid);
+ /* Check for any gaps prior to this namespace. */
+ nvmf_purge_namespaces(sc, *last_nsid + 1, nsid);
+ *last_nsid = nsid;
+
+ nvmf_rescan_ns_1(sc, nsid, data);
+ return (true);
+}
+
+void
+nvmf_rescan_all_ns(struct nvmf_softc *sc)
+{
+ uint32_t last_nsid;
+
+ last_nsid = 0;
+ if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid))
+ return;
+
+ /*
+ * Check for any namespace devices after the last active
+ * namespace.
+ */
+ nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1);
}
int
@@ -822,12 +1129,21 @@ nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
cmd.cdw14 = pt->cmd.cdw14;
cmd.cdw15 = pt->cmd.cdw15;
+ sx_slock(&sc->connection_lock);
+ if (sc->admin == NULL || sc->detaching) {
+ device_printf(sc->dev,
+ "failed to send passthrough command\n");
+ error = ECONNABORTED;
+ sx_sunlock(&sc->connection_lock);
+ goto error;
+ }
if (admin)
qp = sc->admin;
else
qp = nvmf_select_io_queue(sc);
nvmf_status_init(&status);
req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK);
+ sx_sunlock(&sc->connection_lock);
if (req == NULL) {
device_printf(sc->dev, "failed to send passthrough command\n");
error = ECONNABORTED;
@@ -857,14 +1173,46 @@ error:
}
static int
+nvmf_reconnect_params(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
+{
+ int error;
+
+ sx_slock(&sc->connection_lock);
+ error = nvmf_pack_ioc_nvlist(sc->rparams, nv);
+ sx_sunlock(&sc->connection_lock);
+
+ return (error);
+}
+
+static int
+nvmf_connection_status(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
+{
+ nvlist_t *nvl, *nvl_ts;
+ int error;
+
+ nvl = nvlist_create(0);
+ nvl_ts = nvlist_create(0);
+
+ sx_slock(&sc->connection_lock);
+ nvlist_add_bool(nvl, "connected", sc->admin != NULL);
+ nvlist_add_number(nvl_ts, "tv_sec", sc->last_disconnect.tv_sec);
+ nvlist_add_number(nvl_ts, "tv_nsec", sc->last_disconnect.tv_nsec);
+ sx_sunlock(&sc->connection_lock);
+ nvlist_move_nvlist(nvl, "last_disconnect", nvl_ts);
+
+ error = nvmf_pack_ioc_nvlist(nvl, nv);
+ nvlist_destroy(nvl);
+ return (error);
+}
+
+static int
nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
struct thread *td)
{
struct nvmf_softc *sc = cdev->si_drv1;
struct nvme_get_nsid *gnsid;
struct nvme_pt_command *pt;
- struct nvmf_reconnect_params *rp;
- struct nvmf_handoff_host *hh;
+ struct nvmf_ioc_nv *nv;
switch (cmd) {
case NVME_PASSTHROUGH_CMD:
@@ -872,25 +1220,25 @@ nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
return (nvmf_passthrough_cmd(sc, pt, true));
case NVME_GET_NSID:
gnsid = (struct nvme_get_nsid *)arg;
- strncpy(gnsid->cdev, device_get_nameunit(sc->dev),
+ strlcpy(gnsid->cdev, device_get_nameunit(sc->dev),
sizeof(gnsid->cdev));
- gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0';
gnsid->nsid = 0;
return (0);
case NVME_GET_MAX_XFER_SIZE:
*(uint64_t *)arg = sc->max_xfer_size;
return (0);
- case NVMF_RECONNECT_PARAMS:
- rp = (struct nvmf_reconnect_params *)arg;
- if ((sc->cdata->fcatt & 1) == 0)
- rp->cntlid = NVMF_CNTLID_DYNAMIC;
- else
- rp->cntlid = sc->cdata->ctrlr_id;
- memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn));
+ case NVME_GET_CONTROLLER_DATA:
+ memcpy(arg, sc->cdata, sizeof(*sc->cdata));
return (0);
+ case NVMF_RECONNECT_PARAMS:
+ nv = (struct nvmf_ioc_nv *)arg;
+ return (nvmf_reconnect_params(sc, nv));
case NVMF_RECONNECT_HOST:
- hh = (struct nvmf_handoff_host *)arg;
- return (nvmf_reconnect_host(sc, hh));
+ nv = (struct nvmf_ioc_nv *)arg;
+ return (nvmf_reconnect_host(sc, nv));
+ case NVMF_CONNECTION_STATUS:
+ nv = (struct nvmf_ioc_nv *)arg;
+ return (nvmf_connection_status(sc, nv));
default:
return (ENOTTY);
}
@@ -904,14 +1252,25 @@ static struct cdevsw nvmf_cdevsw = {
static int
nvmf_modevent(module_t mod, int what, void *arg)
{
+ int error;
+
switch (what) {
case MOD_LOAD:
- return (nvmf_ctl_load());
+ error = nvmf_ctl_load();
+ if (error != 0)
+ return (error);
+
+ nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO,
+ taskqueue_thread_enqueue, &nvmf_tq);
+ taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq");
+ return (0);
case MOD_QUIESCE:
return (0);
case MOD_UNLOAD:
nvmf_ctl_unload();
destroy_dev_drain(&nvmf_cdevsw);
+ if (nvmf_tq != NULL)
+ taskqueue_free(nvmf_tq);
return (0);
default:
return (EOPNOTSUPP);
@@ -923,9 +1282,6 @@ static device_method_t nvmf_methods[] = {
DEVMETHOD(device_probe, nvmf_probe),
DEVMETHOD(device_attach, nvmf_attach),
DEVMETHOD(device_detach, nvmf_detach),
-#if 0
- DEVMETHOD(device_shutdown, nvmf_shutdown),
-#endif
DEVMETHOD_END
};
diff --git a/sys/dev/nvmf/host/nvmf_aer.c b/sys/dev/nvmf/host/nvmf_aer.c
index 4c950f1518d0..2f7f177d0421 100644
--- a/sys/dev/nvmf/host/nvmf_aer.c
+++ b/sys/dev/nvmf/host/nvmf_aer.c
@@ -62,7 +62,7 @@ nvmf_handle_changed_namespaces(struct nvmf_softc *sc,
* probably just rescan the entire set of namespaces.
*/
if (ns_list->ns[0] == 0xffffffff) {
- device_printf(sc->dev, "too many changed namespaces\n");
+ nvmf_rescan_all_ns(sc);
return;
}
diff --git a/sys/dev/nvmf/host/nvmf_ctldev.c b/sys/dev/nvmf/host/nvmf_ctldev.c
index f40005a2a666..275d5e9c932a 100644
--- a/sys/dev/nvmf/host/nvmf_ctldev.c
+++ b/sys/dev/nvmf/host/nvmf_ctldev.c
@@ -9,6 +9,7 @@
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/malloc.h>
+#include <sys/nv.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf.h>
#include <dev/nvmf/nvmf_transport.h>
@@ -17,25 +18,25 @@
static struct cdev *nvmf_cdev;
static int
-nvmf_handoff_host(struct nvmf_handoff_host *hh)
+nvmf_handoff_host(struct nvmf_ioc_nv *nv)
{
- struct nvmf_ivars ivars;
+ nvlist_t *nvl;
device_t dev;
int error;
- error = nvmf_init_ivars(&ivars, hh);
+ error = nvmf_copyin_handoff(nv, &nvl);
if (error != 0)
return (error);
bus_topo_lock();
- dev = device_add_child(root_bus, "nvme", -1);
+ dev = device_add_child(root_bus, "nvme", DEVICE_UNIT_ANY);
if (dev == NULL) {
bus_topo_unlock();
error = ENXIO;
goto out;
}
- device_set_ivars(dev, &ivars);
+ device_set_ivars(dev, nvl);
error = device_probe_and_attach(dev);
device_set_ivars(dev, NULL);
if (error != 0)
@@ -43,7 +44,7 @@ nvmf_handoff_host(struct nvmf_handoff_host *hh)
bus_topo_unlock();
out:
- nvmf_free_ivars(&ivars);
+ nvlist_destroy(nvl);
return (error);
}
@@ -117,7 +118,7 @@ nvmf_ctl_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag,
{
switch (cmd) {
case NVMF_HANDOFF_HOST:
- return (nvmf_handoff_host((struct nvmf_handoff_host *)arg));
+ return (nvmf_handoff_host((struct nvmf_ioc_nv *)arg));
case NVMF_DISCONNECT_HOST:
return (nvmf_disconnect_host((const char **)arg));
case NVMF_DISCONNECT_ALL:
diff --git a/sys/dev/nvmf/host/nvmf_ns.c b/sys/dev/nvmf/host/nvmf_ns.c
index 3ce434bf7c50..4215c8295d2e 100644
--- a/sys/dev/nvmf/host/nvmf_ns.c
+++ b/sys/dev/nvmf/host/nvmf_ns.c
@@ -18,7 +18,7 @@
#include <sys/proc.h>
#include <sys/refcount.h>
#include <sys/sbuf.h>
-#include <machine/stdarg.h>
+#include <sys/stdarg.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/host/nvmf_var.h>
@@ -29,6 +29,7 @@ struct nvmf_namespace {
u_int flags;
uint32_t lba_size;
bool disconnected;
+ bool shutdown;
TAILQ_HEAD(, bio) pending_bios;
struct mtx lock;
@@ -49,7 +50,7 @@ ns_printf(struct nvmf_namespace *ns, const char *fmt, ...)
sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
- sbuf_printf(&sb, "%sns%u: ", device_get_nameunit(ns->sc->dev),
+ sbuf_printf(&sb, "%sn%u: ", device_get_nameunit(ns->sc->dev),
ns->id);
va_start(ap, fmt);
@@ -84,13 +85,22 @@ nvmf_ns_biodone(struct bio *bio)
ns = bio->bio_dev->si_drv1;
/* If a request is aborted, resubmit or queue it for resubmission. */
- if (bio->bio_error == ECONNABORTED) {
+ if (bio->bio_error == ECONNABORTED && !nvmf_fail_disconnect) {
bio->bio_error = 0;
bio->bio_driver2 = 0;
mtx_lock(&ns->lock);
if (ns->disconnected) {
- TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
- mtx_unlock(&ns->lock);
+ if (nvmf_fail_disconnect || ns->shutdown) {
+ mtx_unlock(&ns->lock);
+ bio->bio_error = ECONNABORTED;
+ bio->bio_flags |= BIO_ERROR;
+ bio->bio_resid = bio->bio_bcount;
+ biodone(bio);
+ } else {
+ TAILQ_INSERT_TAIL(&ns->pending_bios, bio,
+ bio_queue);
+ mtx_unlock(&ns->lock);
+ }
} else {
mtx_unlock(&ns->lock);
nvmf_ns_strategy(bio);
@@ -163,6 +173,7 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio)
struct nvme_dsm_range *dsm_range;
struct memdesc mem;
uint64_t lba, lba_count;
+ int error;
dsm_range = NULL;
memset(&cmd, 0, sizeof(cmd));
@@ -201,10 +212,15 @@ nvmf_ns_submit_bio(struct nvmf_namespace *ns, struct bio *bio)
mtx_lock(&ns->lock);
if (ns->disconnected) {
- TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
+ if (nvmf_fail_disconnect || ns->shutdown) {
+ error = ECONNABORTED;
+ } else {
+ TAILQ_INSERT_TAIL(&ns->pending_bios, bio, bio_queue);
+ error = 0;
+ }
mtx_unlock(&ns->lock);
free(dsm_range, M_NVMF);
- return (0);
+ return (error);
}
req = nvmf_allocate_request(nvmf_select_io_queue(ns->sc), &cmd,
@@ -258,9 +274,8 @@ nvmf_ns_ioctl(struct cdev *dev, u_long cmd, caddr_t arg, int flag,
return (nvmf_passthrough_cmd(ns->sc, pt, false));
case NVME_GET_NSID:
gnsid = (struct nvme_get_nsid *)arg;
- strncpy(gnsid->cdev, device_get_nameunit(ns->sc->dev),
+ strlcpy(gnsid->cdev, device_get_nameunit(ns->sc->dev),
sizeof(gnsid->cdev));
- gnsid->cdev[sizeof(gnsid->cdev) - 1] = '\0';
gnsid->nsid = ns->id;
return (0);
case DIOCGMEDIASIZE:
@@ -314,7 +329,7 @@ static struct cdevsw nvmf_ns_cdevsw = {
struct nvmf_namespace *
nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
- struct nvme_namespace_data *data)
+ const struct nvme_namespace_data *data)
{
struct make_dev_args mda;
struct nvmf_namespace *ns;
@@ -372,10 +387,12 @@ nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
mda.mda_gid = GID_WHEEL;
mda.mda_mode = 0600;
mda.mda_si_drv1 = ns;
- error = make_dev_s(&mda, &ns->cdev, "%sns%u",
+ error = make_dev_s(&mda, &ns->cdev, "%sn%u",
device_get_nameunit(sc->dev), id);
if (error != 0)
goto fail;
+ ns->cdev->si_drv2 = make_dev_alias(ns->cdev, "%sns%u",
+ device_get_nameunit(sc->dev), id);
ns->cdev->si_flags |= SI_UNMAPPED;
@@ -414,11 +431,35 @@ nvmf_reconnect_ns(struct nvmf_namespace *ns)
}
void
+nvmf_shutdown_ns(struct nvmf_namespace *ns)
+{
+ TAILQ_HEAD(, bio) bios;
+ struct bio *bio;
+
+ mtx_lock(&ns->lock);
+ ns->shutdown = true;
+ TAILQ_INIT(&bios);
+ TAILQ_CONCAT(&bios, &ns->pending_bios, bio_queue);
+ mtx_unlock(&ns->lock);
+
+ while (!TAILQ_EMPTY(&bios)) {
+ bio = TAILQ_FIRST(&bios);
+ TAILQ_REMOVE(&bios, bio, bio_queue);
+ bio->bio_error = ECONNABORTED;
+ bio->bio_flags |= BIO_ERROR;
+ bio->bio_resid = bio->bio_bcount;
+ biodone(bio);
+ }
+}
+
+void
nvmf_destroy_ns(struct nvmf_namespace *ns)
{
TAILQ_HEAD(, bio) bios;
struct bio *bio;
+ if (ns->cdev->si_drv2 != NULL)
+ destroy_dev(ns->cdev->si_drv2);
destroy_dev(ns->cdev);
/*
@@ -451,7 +492,8 @@ nvmf_destroy_ns(struct nvmf_namespace *ns)
}
bool
-nvmf_update_ns(struct nvmf_namespace *ns, struct nvme_namespace_data *data)
+nvmf_update_ns(struct nvmf_namespace *ns,
+ const struct nvme_namespace_data *data)
{
uint8_t lbads, lbaf;
diff --git a/sys/dev/nvmf/host/nvmf_qpair.c b/sys/dev/nvmf/host/nvmf_qpair.c
index 96cb5a8b0465..2f511cf0406d 100644
--- a/sys/dev/nvmf/host/nvmf_qpair.c
+++ b/sys/dev/nvmf/host/nvmf_qpair.c
@@ -10,6 +10,8 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/nv.h>
+#include <sys/sysctl.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf.h>
#include <dev/nvmf/nvmf_transport.h>
@@ -31,6 +33,7 @@ struct nvmf_host_qpair {
u_int num_commands;
uint16_t sqhd;
uint16_t sqtail;
+ uint64_t submitted;
struct mtx lock;
@@ -41,6 +44,7 @@ struct nvmf_host_qpair {
struct nvmf_host_command **active_commands;
char name[16];
+ struct sysctl_ctx_list sysctl_ctx;
};
struct nvmf_request *
@@ -112,8 +116,23 @@ nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd)
struct nvmf_softc *sc = qp->sc;
struct nvme_command *sqe;
struct nvmf_capsule *nc;
+ uint16_t new_sqtail;
int error;
+ mtx_assert(&qp->lock, MA_OWNED);
+
+ qp->submitted++;
+
+ /*
+ * Update flow control tracking. This is just a sanity check.
+ * Since num_commands == qsize - 1, there can never be too
+ * many commands in flight.
+ */
+ new_sqtail = (qp->sqtail + 1) % (qp->num_commands + 1);
+ KASSERT(new_sqtail != qp->sqhd, ("%s: qp %p is full", __func__, qp));
+ qp->sqtail = new_sqtail;
+ mtx_unlock(&qp->lock);
+
nc = cmd->req->nc;
sqe = nvmf_capsule_sqe(nc);
@@ -177,11 +196,23 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
return;
}
+ /* Update flow control tracking. */
+ mtx_lock(&qp->lock);
+ if (qp->sq_flow_control) {
+ if (nvmf_sqhd_valid(nc))
+ qp->sqhd = le16toh(cqe->sqhd);
+ } else {
+ /*
+ * If SQ FC is disabled, just advance the head for
+ * each response capsule received.
+ */
+ qp->sqhd = (qp->sqhd + 1) % (qp->num_commands + 1);
+ }
+
/*
* If the queue has been shutdown due to an error, silently
* drop the response.
*/
- mtx_lock(&qp->lock);
if (qp->qp == NULL) {
device_printf(sc->dev,
"received completion for CID %u on shutdown %s\n", cid,
@@ -212,7 +243,6 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
} else {
cmd->req = STAILQ_FIRST(&qp->pending_requests);
STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
- mtx_unlock(&qp->lock);
nvmf_dispatch_command(qp, cmd);
}
@@ -221,28 +251,61 @@ nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
nvmf_free_request(req);
}
+static void
+nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp,
+ bool admin, u_int qid)
+{
+ struct sysctl_ctx_list *ctx = &qp->sysctl_ctx;
+ struct sysctl_oid *oid;
+ struct sysctl_oid_list *list;
+ char name[8];
+
+ if (admin) {
+ oid = SYSCTL_ADD_NODE(ctx,
+ SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO,
+ "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
+ } else {
+ snprintf(name, sizeof(name), "%u", qid);
+ oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name,
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue");
+ }
+ list = SYSCTL_CHILDREN(oid);
+
+ SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD,
+ NULL, qp->num_commands + 1, "Number of entries in queue");
+ SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd,
+ 0, "Current head of submission queue (as observed by driver)");
+ SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail,
+ 0, "Current tail of submission queue (as observed by driver)");
+ SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD,
+ &qp->submitted, 0, "Number of commands submitted");
+}
+
struct nvmf_host_qpair *
nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
- struct nvmf_handoff_qpair_params *handoff, const char *name)
+ const nvlist_t *nvl, const char *name, u_int qid)
{
struct nvmf_host_command *cmd, *ncmd;
struct nvmf_host_qpair *qp;
u_int i;
+ bool admin;
+ admin = nvlist_get_bool(nvl, "admin");
qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO);
qp->sc = sc;
- qp->sq_flow_control = handoff->sq_flow_control;
- qp->sqhd = handoff->sqhd;
- qp->sqtail = handoff->sqtail;
+ qp->sq_flow_control = nvlist_get_bool(nvl, "sq_flow_control");
+ qp->sqhd = nvlist_get_number(nvl, "sqhd");
+ qp->sqtail = nvlist_get_number(nvl, "sqtail");
strlcpy(qp->name, name, sizeof(qp->name));
mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF);
+ (void)sysctl_ctx_init(&qp->sysctl_ctx);
/*
* Allocate a spare command slot for each pending AER command
* on the admin queue.
*/
- qp->num_commands = handoff->qsize - 1;
- if (handoff->admin)
+ qp->num_commands = nvlist_get_number(nvl, "qsize") - 1;
+ if (admin)
qp->num_commands += sc->num_aer;
qp->active_commands = malloc(sizeof(*qp->active_commands) *
@@ -255,9 +318,10 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
}
STAILQ_INIT(&qp->pending_requests);
- qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error,
- qp, nvmf_receive_capsule, qp);
+ qp->qp = nvmf_allocate_qpair(trtype, false, nvl, nvmf_qp_error, qp,
+ nvmf_receive_capsule, qp);
if (qp->qp == NULL) {
+ (void)sysctl_ctx_free(&qp->sysctl_ctx);
TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
TAILQ_REMOVE(&qp->free_commands, cmd, link);
free(cmd, M_NVMF);
@@ -268,6 +332,8 @@ nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
return (NULL);
}
+ nvmf_sysctls_qp(sc, qp, admin, qid);
+
return (qp);
}
@@ -339,6 +405,7 @@ nvmf_destroy_qp(struct nvmf_host_qpair *qp)
struct nvmf_host_command *cmd, *ncmd;
nvmf_shutdown_qp(qp);
+ (void)sysctl_ctx_free(&qp->sysctl_ctx);
TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
TAILQ_REMOVE(&qp->free_commands, cmd, link);
@@ -381,6 +448,5 @@ nvmf_submit_request(struct nvmf_request *req)
("%s: CID already busy", __func__));
qp->active_commands[cmd->cid] = cmd;
cmd->req = req;
- mtx_unlock(&qp->lock);
nvmf_dispatch_command(qp, cmd);
}
diff --git a/sys/dev/nvmf/host/nvmf_sim.c b/sys/dev/nvmf/host/nvmf_sim.c
index b097b04d64c3..de9e958d8afd 100644
--- a/sys/dev/nvmf/host/nvmf_sim.c
+++ b/sys/dev/nvmf/host/nvmf_sim.c
@@ -40,7 +40,13 @@ nvmf_ccb_done(union ccb *ccb)
return;
if (nvmf_cqe_aborted(&ccb->nvmeio.cpl)) {
- ccb->ccb_h.status = CAM_REQUEUE_REQ;
+ struct cam_sim *sim = xpt_path_sim(ccb->ccb_h.path);
+ struct nvmf_softc *sc = cam_sim_softc(sim);
+
+ if (nvmf_fail_disconnect || sc->sim_shutdown)
+ ccb->ccb_h.status = CAM_DEV_NOT_THERE;
+ else
+ ccb->ccb_h.status = CAM_REQUEUE_REQ;
xpt_done(ccb);
} else if (ccb->nvmeio.cpl.status != 0) {
ccb->ccb_h.status = CAM_NVME_STATUS_ERROR;
@@ -52,7 +58,7 @@ nvmf_ccb_done(union ccb *ccb)
xpt_done(ccb);
} else {
ccb->ccb_h.status = CAM_REQ_CMP;
- xpt_done_direct(ccb);
+ xpt_done(ccb);
}
}
@@ -106,7 +112,10 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
mtx_lock(&sc->sim_mtx);
if (sc->sim_disconnected) {
mtx_unlock(&sc->sim_mtx);
- nvmeio->ccb_h.status = CAM_REQUEUE_REQ;
+ if (nvmf_fail_disconnect || sc->sim_shutdown)
+ nvmeio->ccb_h.status = CAM_DEV_NOT_THERE;
+ else
+ nvmeio->ccb_h.status = CAM_REQUEUE_REQ;
xpt_done(ccb);
return;
}
@@ -116,8 +125,8 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
qp = sc->admin;
req = nvmf_allocate_request(qp, &nvmeio->cmd, nvmf_ccb_complete,
ccb, M_NOWAIT);
+ mtx_unlock(&sc->sim_mtx);
if (req == NULL) {
- mtx_unlock(&sc->sim_mtx);
nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL;
xpt_done(ccb);
return;
@@ -141,7 +150,6 @@ nvmf_sim_io(struct nvmf_softc *sc, union ccb *ccb)
("%s: incoming CCB is not in-progress", __func__));
ccb->ccb_h.status |= CAM_SIM_QUEUED;
nvmf_submit_request(req);
- mtx_unlock(&sc->sim_mtx);
}
static void
@@ -183,7 +191,7 @@ nvmf_sim_action(struct cam_sim *sim, union ccb *ccb)
cpi->xport_specific.nvmf.nsid =
xpt_path_lun_id(ccb->ccb_h.path);
cpi->xport_specific.nvmf.trtype = sc->trtype;
- strncpy(cpi->xport_specific.nvmf.dev_name,
+ strlcpy(cpi->xport_specific.nvmf.dev_name,
device_get_nameunit(sc->dev),
sizeof(cpi->xport_specific.nvmf.dev_name));
cpi->maxio = sc->max_xfer_size;
@@ -320,6 +328,15 @@ nvmf_reconnect_sim(struct nvmf_softc *sc)
}
void
+nvmf_shutdown_sim(struct nvmf_softc *sc)
+{
+ mtx_lock(&sc->sim_mtx);
+ sc->sim_shutdown = true;
+ mtx_unlock(&sc->sim_mtx);
+ xpt_release_simq(sc->sim, 1);
+}
+
+void
nvmf_destroy_sim(struct nvmf_softc *sc)
{
xpt_async(AC_LOST_DEVICE, sc->path, NULL);
diff --git a/sys/dev/nvmf/host/nvmf_var.h b/sys/dev/nvmf/host/nvmf_var.h
index 64525851631e..606245b3969c 100644
--- a/sys/dev/nvmf/host/nvmf_var.h
+++ b/sys/dev/nvmf/host/nvmf_var.h
@@ -9,10 +9,13 @@
#define __NVMF_VAR_H__
#include <sys/_callout.h>
+#include <sys/_eventhandler.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+//#include <sys/_nv.h>
#include <sys/_sx.h>
#include <sys/_task.h>
+#include <sys/smp.h>
#include <sys/queue.h>
#include <dev/nvme/nvme.h>
#include <dev/nvmf/nvmf_transport.h>
@@ -21,15 +24,10 @@ struct nvmf_aer;
struct nvmf_capsule;
struct nvmf_host_qpair;
struct nvmf_namespace;
+struct sysctl_oid_list;
typedef void nvmf_request_complete_t(void *, const struct nvme_completion *);
-struct nvmf_ivars {
- struct nvmf_handoff_host *hh;
- struct nvmf_handoff_qpair_params *io_params;
- struct nvme_controller_data *cdata;
-};
-
struct nvmf_softc {
device_t dev;
@@ -42,6 +40,7 @@ struct nvmf_softc {
struct cam_path *path;
struct mtx sim_mtx;
bool sim_disconnected;
+ bool sim_shutdown;
struct nvmf_namespace **ns;
@@ -76,12 +75,27 @@ struct nvmf_softc {
struct callout ka_rx_timer;
sbintime_t ka_rx_sbt;
+ struct timeout_task request_reconnect_task;
+ struct timeout_task controller_loss_task;
+ uint32_t reconnect_delay;
+ uint32_t controller_loss_timeout;
+
struct sx connection_lock;
struct task disconnect_task;
bool detaching;
+ bool controller_timedout;
u_int num_aer;
struct nvmf_aer *aer;
+
+ struct sysctl_oid_list *ioq_oid_list;
+
+ nvlist_t *rparams;
+
+ struct timespec last_disconnect;
+
+ eventhandler_tag shutdown_pre_sync_eh;
+ eventhandler_tag shutdown_post_sync_eh;
};
struct nvmf_request {
@@ -104,8 +118,8 @@ struct nvmf_completion_status {
static __inline struct nvmf_host_qpair *
nvmf_select_io_queue(struct nvmf_softc *sc)
{
- /* TODO: Support multiple queues? */
- return (sc->io[0]);
+ u_int idx = curcpu * sc->num_io_queues / (mp_maxid + 1);
+ return (sc->io[idx]);
}
static __inline bool
@@ -140,14 +154,17 @@ extern driver_t nvme_nvmf_driver;
MALLOC_DECLARE(M_NVMF);
#endif
+/* If true, I/O requests will fail while the host is disconnected. */
+extern bool nvmf_fail_disconnect;
+
/* nvmf.c */
void nvmf_complete(void *arg, const struct nvme_completion *cqe);
void nvmf_io_complete(void *arg, size_t xfered, int error);
void nvmf_wait_for_reply(struct nvmf_completion_status *status);
-int nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh);
-void nvmf_free_ivars(struct nvmf_ivars *ivars);
+int nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp);
void nvmf_disconnect(struct nvmf_softc *sc);
void nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid);
+void nvmf_rescan_all_ns(struct nvmf_softc *sc);
int nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
bool admin);
@@ -180,17 +197,17 @@ void nvmf_ctl_unload(void);
/* nvmf_ns.c */
struct nvmf_namespace *nvmf_init_ns(struct nvmf_softc *sc, uint32_t id,
- struct nvme_namespace_data *data);
+ const struct nvme_namespace_data *data);
void nvmf_disconnect_ns(struct nvmf_namespace *ns);
void nvmf_reconnect_ns(struct nvmf_namespace *ns);
+void nvmf_shutdown_ns(struct nvmf_namespace *ns);
void nvmf_destroy_ns(struct nvmf_namespace *ns);
bool nvmf_update_ns(struct nvmf_namespace *ns,
- struct nvme_namespace_data *data);
+ const struct nvme_namespace_data *data);
/* nvmf_qpair.c */
struct nvmf_host_qpair *nvmf_init_qp(struct nvmf_softc *sc,
- enum nvmf_trtype trtype, struct nvmf_handoff_qpair_params *handoff,
- const char *name);
+ enum nvmf_trtype trtype, const nvlist_t *nvl, const char *name, u_int qid);
void nvmf_shutdown_qp(struct nvmf_host_qpair *qp);
void nvmf_destroy_qp(struct nvmf_host_qpair *qp);
struct nvmf_request *nvmf_allocate_request(struct nvmf_host_qpair *qp,
@@ -202,6 +219,7 @@ void nvmf_free_request(struct nvmf_request *req);
int nvmf_init_sim(struct nvmf_softc *sc);
void nvmf_disconnect_sim(struct nvmf_softc *sc);
void nvmf_reconnect_sim(struct nvmf_softc *sc);
+void nvmf_shutdown_sim(struct nvmf_softc *sc);
void nvmf_destroy_sim(struct nvmf_softc *sc);
void nvmf_sim_rescan_ns(struct nvmf_softc *sc, uint32_t id);