aboutsummaryrefslogtreecommitdiff
path: root/usr.sbin
diff options
context:
space:
mode:
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/Makefile1
-rw-r--r--usr.sbin/nvmfd/Makefile14
-rw-r--r--usr.sbin/nvmfd/controller.c244
-rw-r--r--usr.sbin/nvmfd/ctl.c139
-rw-r--r--usr.sbin/nvmfd/devices.c386
-rw-r--r--usr.sbin/nvmfd/discovery.c343
-rw-r--r--usr.sbin/nvmfd/internal.h65
-rw-r--r--usr.sbin/nvmfd/io.c677
-rw-r--r--usr.sbin/nvmfd/nvmfd.8126
-rw-r--r--usr.sbin/nvmfd/nvmfd.c260
10 files changed, 2255 insertions, 0 deletions
diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile
index c3a4cc42f721..0aac7062146d 100644
--- a/usr.sbin/Makefile
+++ b/usr.sbin/Makefile
@@ -56,6 +56,7 @@ SUBDIR= adduser \
nfsuserd \
nmtree \
nologin \
+ nvmfd \
pciconf \
periodic \
pnfsdscopymr \
diff --git a/usr.sbin/nvmfd/Makefile b/usr.sbin/nvmfd/Makefile
new file mode 100644
index 000000000000..dc3dcc5e3a5c
--- /dev/null
+++ b/usr.sbin/nvmfd/Makefile
@@ -0,0 +1,14 @@
+.include <src.opts.mk>
+.PATH: ${SRCTOP}/sys/libkern
+
+PACKAGE=nvme-tools
+PROG= nvmfd
+SRCS= nvmfd.c controller.c ctl.c devices.c discovery.c gsb_crc32.c io.c
+CFLAGS+= -I${SRCTOP}/lib/libnvmf
+MAN= nvmfd.8
+LIBADD+= nvmf pthread util nv
+
+.include <bsd.prog.mk>
+
+CFLAGS.ctl.c= -I${SRCTOP}/sys
+CWARNFLAGS.gsb_crc32.c= -Wno-cast-align
diff --git a/usr.sbin/nvmfd/controller.c b/usr.sbin/nvmfd/controller.c
new file mode 100644
index 000000000000..09baaea74ab4
--- /dev/null
+++ b/usr.sbin/nvmfd/controller.c
@@ -0,0 +1,244 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023-2024 Chelsio Communications, Inc.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <libnvmf.h>
+#include <stdlib.h>
+
+#include "internal.h"
+
+struct controller {
+ struct nvmf_qpair *qp;
+
+ uint64_t cap;
+ uint32_t vs;
+ uint32_t cc;
+ uint32_t csts;
+
+ bool shutdown;
+
+ struct nvme_controller_data cdata;
+};
+
+static bool
+update_cc(struct controller *c, uint32_t new_cc)
+{
+ uint32_t changes;
+
+ if (c->shutdown)
+ return (false);
+ if (!nvmf_validate_cc(c->qp, c->cap, c->cc, new_cc))
+ return (false);
+
+ changes = c->cc ^ new_cc;
+ c->cc = new_cc;
+
+ /* Handle shutdown requests. */
+ if (NVMEV(NVME_CC_REG_SHN, changes) != 0 &&
+ NVMEV(NVME_CC_REG_SHN, new_cc) != 0) {
+ c->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
+ c->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE);
+ c->shutdown = true;
+ }
+
+ if (NVMEV(NVME_CC_REG_EN, changes) != 0) {
+ if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) {
+ /* Controller reset. */
+ c->csts = 0;
+ c->shutdown = true;
+ } else
+ c->csts |= NVMEF(NVME_CSTS_REG_RDY, 1);
+ }
+ return (true);
+}
+
+static void
+handle_property_get(const struct controller *c, const struct nvmf_capsule *nc,
+ const struct nvmf_fabric_prop_get_cmd *pget)
+{
+ struct nvmf_fabric_prop_get_rsp rsp;
+
+ nvmf_init_cqe(&rsp, nc, 0);
+
+ switch (le32toh(pget->ofst)) {
+ case NVMF_PROP_CAP:
+ if (pget->attrib.size != NVMF_PROP_SIZE_8)
+ goto error;
+ rsp.value.u64 = htole64(c->cap);
+ break;
+ case NVMF_PROP_VS:
+ if (pget->attrib.size != NVMF_PROP_SIZE_4)
+ goto error;
+ rsp.value.u32.low = htole32(c->vs);
+ break;
+ case NVMF_PROP_CC:
+ if (pget->attrib.size != NVMF_PROP_SIZE_4)
+ goto error;
+ rsp.value.u32.low = htole32(c->cc);
+ break;
+ case NVMF_PROP_CSTS:
+ if (pget->attrib.size != NVMF_PROP_SIZE_4)
+ goto error;
+ rsp.value.u32.low = htole32(c->csts);
+ break;
+ default:
+ goto error;
+ }
+
+ nvmf_send_response(nc, &rsp);
+ return;
+error:
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+}
+
+static void
+handle_property_set(struct controller *c, const struct nvmf_capsule *nc,
+ const struct nvmf_fabric_prop_set_cmd *pset)
+{
+ switch (le32toh(pset->ofst)) {
+ case NVMF_PROP_CC:
+ if (pset->attrib.size != NVMF_PROP_SIZE_4)
+ goto error;
+ if (!update_cc(c, le32toh(pset->value.u32.low)))
+ goto error;
+ break;
+ default:
+ goto error;
+ }
+
+ nvmf_send_success(nc);
+ return;
+error:
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+}
+
+static void
+handle_fabrics_command(struct controller *c,
+ const struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc)
+{
+ switch (fc->fctype) {
+ case NVMF_FABRIC_COMMAND_PROPERTY_GET:
+ handle_property_get(c, nc,
+ (const struct nvmf_fabric_prop_get_cmd *)fc);
+ break;
+ case NVMF_FABRIC_COMMAND_PROPERTY_SET:
+ handle_property_set(c, nc,
+ (const struct nvmf_fabric_prop_set_cmd *)fc);
+ break;
+ case NVMF_FABRIC_COMMAND_CONNECT:
+ warnx("CONNECT command on connected queue");
+ nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
+ break;
+ case NVMF_FABRIC_COMMAND_DISCONNECT:
+ warnx("DISCONNECT command on admin queue");
+ nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
+ NVMF_FABRIC_SC_INVALID_QUEUE_TYPE);
+ break;
+ default:
+ warnx("Unsupported fabrics command %#x", fc->fctype);
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
+ break;
+ }
+}
+
+static void
+handle_identify_command(const struct controller *c,
+ const struct nvmf_capsule *nc, const struct nvme_command *cmd)
+{
+ uint8_t cns;
+
+ cns = le32toh(cmd->cdw10) & 0xFF;
+ switch (cns) {
+ case 1:
+ break;
+ default:
+ warnx("Unsupported CNS %#x for IDENTIFY", cns);
+ goto error;
+ }
+
+ nvmf_send_controller_data(nc, &c->cdata, sizeof(c->cdata));
+ return;
+error:
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+}
+
+void
+controller_handle_admin_commands(struct controller *c, handle_command *cb,
+ void *cb_arg)
+{
+ struct nvmf_qpair *qp = c->qp;
+ const struct nvme_command *cmd;
+ struct nvmf_capsule *nc;
+ int error;
+
+ for (;;) {
+ error = nvmf_controller_receive_capsule(qp, &nc);
+ if (error != 0) {
+ if (error != ECONNRESET)
+ warnc(error, "Failed to read command capsule");
+ break;
+ }
+
+ cmd = nvmf_capsule_sqe(nc);
+
+ /*
+ * Only permit Fabrics commands while a controller is
+ * disabled.
+ */
+ if (NVMEV(NVME_CC_REG_EN, c->cc) == 0 &&
+ cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
+ warnx("Unsupported admin opcode %#x whiled disabled\n",
+ cmd->opc);
+ nvmf_send_generic_error(nc,
+ NVME_SC_COMMAND_SEQUENCE_ERROR);
+ nvmf_free_capsule(nc);
+ continue;
+ }
+
+ if (cb(nc, cmd, cb_arg)) {
+ nvmf_free_capsule(nc);
+ continue;
+ }
+
+ switch (cmd->opc) {
+ case NVME_OPC_FABRICS_COMMANDS:
+ handle_fabrics_command(c, nc,
+ (const struct nvmf_fabric_cmd *)cmd);
+ break;
+ case NVME_OPC_IDENTIFY:
+ handle_identify_command(c, nc, cmd);
+ break;
+ default:
+ warnx("Unsupported admin opcode %#x", cmd->opc);
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
+ break;
+ }
+ nvmf_free_capsule(nc);
+ }
+}
+
+struct controller *
+init_controller(struct nvmf_qpair *qp,
+ const struct nvme_controller_data *cdata)
+{
+ struct controller *c;
+
+ c = calloc(1, sizeof(*c));
+ c->qp = qp;
+ c->cap = nvmf_controller_cap(c->qp);
+ c->vs = cdata->ver;
+ c->cdata = *cdata;
+
+ return (c);
+}
+
+void
+free_controller(struct controller *c)
+{
+ free(c);
+}
diff --git a/usr.sbin/nvmfd/ctl.c b/usr.sbin/nvmfd/ctl.c
new file mode 100644
index 000000000000..5f01ec8e5bc8
--- /dev/null
+++ b/usr.sbin/nvmfd/ctl.c
@@ -0,0 +1,139 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 Chelsio Communications, Inc.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ */
+
+#include <sys/param.h>
+#include <sys/linker.h>
+#include <sys/nv.h>
+#include <sys/time.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libnvmf.h>
+#include <string.h>
+
+#include <cam/ctl/ctl.h>
+#include <cam/ctl/ctl_io.h>
+#include <cam/ctl/ctl_ioctl.h>
+
+#include "internal.h"
+
+static int ctl_fd = -1;
+static int ctl_port;
+
+static void
+open_ctl(void)
+{
+ if (ctl_fd > 0)
+ return;
+
+ ctl_fd = open(CTL_DEFAULT_DEV, O_RDWR);
+ if (ctl_fd == -1 && errno == ENOENT) {
+ if (kldload("ctl") == -1)
+ err(1, "Failed to load ctl.ko");
+ ctl_fd = open(CTL_DEFAULT_DEV, O_RDWR);
+ }
+ if (ctl_fd == -1)
+ err(1, "Failed to open %s", CTL_DEFAULT_DEV);
+}
+
+void
+init_ctl_port(const char *subnqn, const struct nvmf_association_params *params)
+{
+ char result_buf[256];
+ struct ctl_port_entry entry;
+ struct ctl_req req;
+ nvlist_t *nvl;
+
+ open_ctl();
+
+ nvl = nvlist_create(0);
+
+ nvlist_add_string(nvl, "subnqn", subnqn);
+
+ /* XXX: Hardcoded in discovery.c */
+ nvlist_add_stringf(nvl, "portid", "%u", 1);
+
+ nvlist_add_stringf(nvl, "max_io_qsize", "%u", params->max_io_qsize);
+
+ memset(&req, 0, sizeof(req));
+ strlcpy(req.driver, "nvmf", sizeof(req.driver));
+ req.reqtype = CTL_REQ_CREATE;
+ req.args = nvlist_pack(nvl, &req.args_len);
+ if (req.args == NULL)
+ errx(1, "Failed to pack nvlist for CTL_PORT/CTL_REQ_CREATE");
+ req.result = result_buf;
+ req.result_len = sizeof(result_buf);
+ if (ioctl(ctl_fd, CTL_PORT_REQ, &req) != 0)
+ err(1, "ioctl(CTL_PORT/CTL_REQ_CREATE)");
+ if (req.status == CTL_LUN_ERROR)
+ errx(1, "Failed to create CTL port: %s", req.error_str);
+ if (req.status != CTL_LUN_OK)
+ errx(1, "Failed to create CTL port: %d", req.status);
+
+ nvlist_destroy(nvl);
+ nvl = nvlist_unpack(result_buf, req.result_len, 0);
+ if (nvl == NULL)
+ errx(1, "Failed to unpack nvlist from CTL_PORT/CTL_REQ_CREATE");
+
+ ctl_port = nvlist_get_number(nvl, "port_id");
+ nvlist_destroy(nvl);
+
+ memset(&entry, 0, sizeof(entry));
+ entry.targ_port = ctl_port;
+ if (ioctl(ctl_fd, CTL_ENABLE_PORT, &entry) != 0)
+ errx(1, "ioctl(CTL_ENABLE_PORT)");
+}
+
+void
+shutdown_ctl_port(const char *subnqn)
+{
+ struct ctl_req req;
+ nvlist_t *nvl;
+
+ open_ctl();
+
+ nvl = nvlist_create(0);
+
+ nvlist_add_string(nvl, "subnqn", subnqn);
+
+ memset(&req, 0, sizeof(req));
+ strlcpy(req.driver, "nvmf", sizeof(req.driver));
+ req.reqtype = CTL_REQ_REMOVE;
+ req.args = nvlist_pack(nvl, &req.args_len);
+ if (req.args == NULL)
+ errx(1, "Failed to pack nvlist for CTL_PORT/CTL_REQ_REMOVE");
+ if (ioctl(ctl_fd, CTL_PORT_REQ, &req) != 0)
+ err(1, "ioctl(CTL_PORT/CTL_REQ_REMOVE)");
+ if (req.status == CTL_LUN_ERROR)
+ errx(1, "Failed to remove CTL port: %s", req.error_str);
+ if (req.status != CTL_LUN_OK)
+ errx(1, "Failed to remove CTL port: %d", req.status);
+
+ nvlist_destroy(nvl);
+}
+
+void
+ctl_handoff_qpair(struct nvmf_qpair *qp,
+ const struct nvmf_fabric_connect_cmd *cmd,
+ const struct nvmf_fabric_connect_data *data)
+{
+ struct ctl_nvmf req;
+ int error;
+
+ memset(&req, 0, sizeof(req));
+ req.type = CTL_NVMF_HANDOFF;
+ error = nvmf_handoff_controller_qpair(qp, &req.data.handoff);
+ if (error != 0) {
+ warnc(error, "Failed to prepare qpair for handoff");
+ return;
+ }
+
+ req.data.handoff.cmd = cmd;
+ req.data.handoff.data = data;
+ if (ioctl(ctl_fd, CTL_NVMF, &req) != 0)
+ warn("ioctl(CTL_NVMF/CTL_NVMF_HANDOFF)");
+}
diff --git a/usr.sbin/nvmfd/devices.c b/usr.sbin/nvmfd/devices.c
new file mode 100644
index 000000000000..fafc1077f207
--- /dev/null
+++ b/usr.sbin/nvmfd/devices.c
@@ -0,0 +1,386 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023-2024 Chelsio Communications, Inc.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ */
+
+#include <sys/disk.h>
+#include <sys/gsb_crc32.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <net/ieee_oui.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libnvmf.h>
+#include <libutil.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "internal.h"
+
+#define RAMDISK_PREFIX "ramdisk:"
+
+struct backing_device {
+ enum { RAMDISK, FILE, CDEV } type;
+ union {
+ int fd; /* FILE, CDEV */
+ void *mem; /* RAMDISK */
+ };
+ u_int sector_size;
+ uint64_t nlbas;
+ uint64_t eui64;
+};
+
+static struct backing_device *devices;
+static u_int ndevices;
+
+static uint64_t
+generate_eui64(uint32_t low)
+{
+ return (OUI_FREEBSD_NVME_LOW << 16 | low);
+}
+
+static uint32_t
+crc32(const void *buf, size_t len)
+{
+ return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff);
+}
+
+static void
+init_ramdisk(const char *config, struct backing_device *dev)
+{
+ static uint32_t ramdisk_idx = 1;
+ uint64_t num;
+
+ dev->type = RAMDISK;
+ dev->sector_size = 512;
+ if (expand_number(config, &num))
+ errx(1, "Invalid ramdisk specification: %s", config);
+ if ((num % dev->sector_size) != 0)
+ errx(1, "Invalid ramdisk size %ju", (uintmax_t)num);
+ dev->mem = calloc(num, 1);
+ dev->nlbas = num / dev->sector_size;
+ dev->eui64 = generate_eui64('M' << 24 | ramdisk_idx++);
+}
+
+static void
+init_filedevice(const char *config, int fd, struct stat *sb,
+ struct backing_device *dev)
+{
+ dev->type = FILE;
+ dev->fd = fd;
+ dev->sector_size = 512;
+ if ((sb->st_size % dev->sector_size) != 0)
+ errx(1, "File size is not a multiple of 512: %s", config);
+ dev->nlbas = sb->st_size / dev->sector_size;
+ dev->eui64 = generate_eui64('F' << 24 |
+ (crc32(config, strlen(config)) & 0xffffff));
+}
+
+static void
+init_chardevice(const char *config, int fd, struct backing_device *dev)
+{
+ off_t len;
+
+ dev->type = CDEV;
+ dev->fd = fd;
+ if (ioctl(fd, DIOCGSECTORSIZE, &dev->sector_size) != 0)
+ err(1, "Failed to fetch sector size for %s", config);
+ if (ioctl(fd, DIOCGMEDIASIZE, &len) != 0)
+ err(1, "Failed to fetch sector size for %s", config);
+ dev->nlbas = len / dev->sector_size;
+ dev->eui64 = generate_eui64('C' << 24 |
+ (crc32(config, strlen(config)) & 0xffffff));
+}
+
+static void
+init_device(const char *config, struct backing_device *dev)
+{
+ struct stat sb;
+ int fd;
+
+ /* Check for a RAM disk. */
+ if (strncmp(RAMDISK_PREFIX, config, strlen(RAMDISK_PREFIX)) == 0) {
+ init_ramdisk(config + strlen(RAMDISK_PREFIX), dev);
+ return;
+ }
+
+ fd = open(config, O_RDWR);
+ if (fd == -1)
+ err(1, "Failed to open %s", config);
+ if (fstat(fd, &sb) == -1)
+ err(1, "fstat");
+ switch (sb.st_mode & S_IFMT) {
+ case S_IFCHR:
+ init_chardevice(config, fd, dev);
+ break;
+ case S_IFREG:
+ init_filedevice(config, fd, &sb, dev);
+ break;
+ default:
+ errx(1, "Invalid file type for %s", config);
+ }
+}
+
+void
+register_devices(int ac, char **av)
+{
+ ndevices = ac;
+ devices = calloc(ndevices, sizeof(*devices));
+
+ for (int i = 0; i < ac; i++)
+ init_device(av[i], &devices[i]);
+}
+
+u_int
+device_count(void)
+{
+ return (ndevices);
+}
+
+static struct backing_device *
+lookup_device(uint32_t nsid)
+{
+ if (nsid == 0 || nsid > ndevices)
+ return (NULL);
+ return (&devices[nsid - 1]);
+}
+
+void
+device_active_nslist(uint32_t nsid, struct nvme_ns_list *nslist)
+{
+ u_int count;
+
+ memset(nslist, 0, sizeof(*nslist));
+ count = 0;
+ nsid++;
+ while (nsid <= ndevices) {
+ nslist->ns[count] = htole32(nsid);
+ count++;
+ if (count == nitems(nslist->ns))
+ break;
+ nsid++;
+ }
+}
+
+bool
+device_identification_descriptor(uint32_t nsid, void *buf)
+{
+ struct backing_device *dev;
+ char *p;
+
+ dev = lookup_device(nsid);
+ if (dev == NULL)
+ return (false);
+
+ memset(buf, 0, 4096);
+
+ p = buf;
+
+ /* EUI64 */
+ *p++ = 1;
+ *p++ = 8;
+ p += 2;
+ be64enc(p, dev->eui64);
+ return (true);
+}
+
+bool
+device_namespace_data(uint32_t nsid, struct nvme_namespace_data *nsdata)
+{
+ struct backing_device *dev;
+
+ dev = lookup_device(nsid);
+ if (dev == NULL)
+ return (false);
+
+ memset(nsdata, 0, sizeof(*nsdata));
+ nsdata->nsze = htole64(dev->nlbas);
+ nsdata->ncap = nsdata->nsze;
+ nsdata->nuse = nsdata->ncap;
+ nsdata->nlbaf = 1 - 1;
+ nsdata->flbas = NVMEF(NVME_NS_DATA_FLBAS_FORMAT, 0);
+ nsdata->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS,
+ ffs(dev->sector_size) - 1);
+
+ be64enc(nsdata->eui64, dev->eui64);
+ return (true);
+}
+
+static bool
+read_buffer(int fd, void *buf, size_t len, off_t offset)
+{
+ ssize_t nread;
+ char *dst;
+
+ dst = buf;
+ while (len > 0) {
+ nread = pread(fd, dst, len, offset);
+ if (nread == -1 && errno == EINTR)
+ continue;
+ if (nread <= 0)
+ return (false);
+ dst += nread;
+ len -= nread;
+ offset += nread;
+ }
+ return (true);
+}
+
+void
+device_read(uint32_t nsid, uint64_t lba, u_int nlb,
+ const struct nvmf_capsule *nc)
+{
+ struct backing_device *dev;
+ char *p, *src;
+ off_t off;
+ size_t len;
+
+ dev = lookup_device(nsid);
+ if (dev == NULL) {
+ nvmf_send_generic_error(nc,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ return;
+ }
+
+ if (lba + nlb < lba || lba + nlb > dev->nlbas) {
+ nvmf_send_generic_error(nc, NVME_SC_LBA_OUT_OF_RANGE);
+ return;
+ }
+
+ off = lba * dev->sector_size;
+ len = nlb * dev->sector_size;
+ if (nvmf_capsule_data_len(nc) != len) {
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+ return;
+ }
+
+ if (dev->type == RAMDISK) {
+ p = NULL;
+ src = (char *)dev->mem + off;
+ } else {
+ p = malloc(len);
+ if (!read_buffer(dev->fd, p, len, off)) {
+ free(p);
+ nvmf_send_generic_error(nc,
+ NVME_SC_INTERNAL_DEVICE_ERROR);
+ return;
+ }
+ src = p;
+ }
+
+ nvmf_send_controller_data(nc, src, len);
+ free(p);
+}
+
+static bool
+write_buffer(int fd, const void *buf, size_t len, off_t offset)
+{
+ ssize_t nwritten;
+ const char *src;
+
+ src = buf;
+ while (len > 0) {
+ nwritten = pwrite(fd, src, len, offset);
+ if (nwritten == -1 && errno == EINTR)
+ continue;
+ if (nwritten <= 0)
+ return (false);
+ src += nwritten;
+ len -= nwritten;
+ offset += nwritten;
+ }
+ return (true);
+}
+
+void
+device_write(uint32_t nsid, uint64_t lba, u_int nlb,
+ const struct nvmf_capsule *nc)
+{
+ struct backing_device *dev;
+ char *p, *dst;
+ off_t off;
+ size_t len;
+ int error;
+
+ dev = lookup_device(nsid);
+ if (dev == NULL) {
+ nvmf_send_generic_error(nc,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ return;
+ }
+
+ if (lba + nlb < lba || lba + nlb > dev->nlbas) {
+ nvmf_send_generic_error(nc, NVME_SC_LBA_OUT_OF_RANGE);
+ return;
+ }
+
+ off = lba * dev->sector_size;
+ len = nlb * dev->sector_size;
+ if (nvmf_capsule_data_len(nc) != len) {
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+ return;
+ }
+
+ if (dev->type == RAMDISK) {
+ p = NULL;
+ dst = (char *)dev->mem + off;
+ } else {
+ p = malloc(len);
+ dst = p;
+ }
+
+ error = nvmf_receive_controller_data(nc, 0, dst, len);
+ if (error != 0) {
+ nvmf_send_generic_error(nc, NVME_SC_TRANSIENT_TRANSPORT_ERROR);
+ free(p);
+ return;
+ }
+
+ if (dev->type != RAMDISK) {
+ if (!write_buffer(dev->fd, p, len, off)) {
+ free(p);
+ nvmf_send_generic_error(nc,
+ NVME_SC_INTERNAL_DEVICE_ERROR);
+ return;
+ }
+ }
+ free(p);
+ nvmf_send_success(nc);
+}
+
+void
+device_flush(uint32_t nsid, const struct nvmf_capsule *nc)
+{
+ struct backing_device *dev;
+
+ dev = lookup_device(nsid);
+ if (dev == NULL) {
+ nvmf_send_generic_error(nc,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ return;
+ }
+
+ switch (dev->type) {
+ case RAMDISK:
+ break;
+ case FILE:
+ if (fdatasync(dev->fd) == -1) {
+ nvmf_send_error(nc, NVME_SCT_MEDIA_ERROR,
+ NVME_SC_WRITE_FAULTS);
+ return;
+ }
+ break;
+ case CDEV:
+ if (ioctl(dev->fd, DIOCGFLUSH) == -1) {
+ nvmf_send_error(nc, NVME_SCT_MEDIA_ERROR,
+ NVME_SC_WRITE_FAULTS);
+ return;
+ }
+ }
+
+ nvmf_send_success(nc);
+}
diff --git a/usr.sbin/nvmfd/discovery.c b/usr.sbin/nvmfd/discovery.c
new file mode 100644
index 000000000000..985c77620a62
--- /dev/null
+++ b/usr.sbin/nvmfd/discovery.c
@@ -0,0 +1,343 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023-2024 Chelsio Communications, Inc.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ */
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <err.h>
+#include <libnvmf.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "internal.h"
+
+struct io_controller_data {
+ struct nvme_discovery_log_entry entry;
+ bool wildcard;
+};
+
+struct discovery_controller {
+ struct nvme_discovery_log *discovery_log;
+ size_t discovery_log_len;
+ int s;
+};
+
+struct discovery_thread_arg {
+ struct controller *c;
+ struct nvmf_qpair *qp;
+ int s;
+};
+
+static struct io_controller_data *io_controllers;
+static struct nvmf_association *discovery_na;
+static u_int num_io_controllers;
+
+static bool
+init_discovery_log_entry(struct nvme_discovery_log_entry *entry, int s,
+ const char *subnqn)
+{
+ struct sockaddr_storage ss;
+ socklen_t len;
+ bool wildcard;
+
+ len = sizeof(ss);
+ if (getsockname(s, (struct sockaddr *)&ss, &len) == -1)
+ err(1, "getsockname");
+
+ memset(entry, 0, sizeof(*entry));
+ entry->trtype = NVMF_TRTYPE_TCP;
+ switch (ss.ss_family) {
+ case AF_INET:
+ {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&ss;
+ entry->adrfam = NVMF_ADRFAM_IPV4;
+ snprintf(entry->trsvcid, sizeof(entry->trsvcid), "%u",
+ htons(sin->sin_port));
+ if (inet_ntop(AF_INET, &sin->sin_addr, entry->traddr,
+ sizeof(entry->traddr)) == NULL)
+ err(1, "inet_ntop");
+ wildcard = (sin->sin_addr.s_addr == htonl(INADDR_ANY));
+ break;
+ }
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&ss;
+ entry->adrfam = NVMF_ADRFAM_IPV6;
+ snprintf(entry->trsvcid, sizeof(entry->trsvcid), "%u",
+ htons(sin6->sin6_port));
+ if (inet_ntop(AF_INET6, &sin6->sin6_addr, entry->traddr,
+ sizeof(entry->traddr)) == NULL)
+ err(1, "inet_ntop");
+ wildcard = (memcmp(&sin6->sin6_addr, &in6addr_any,
+ sizeof(in6addr_any)) == 0);
+ break;
+ }
+ default:
+ errx(1, "Unsupported address family %u", ss.ss_family);
+ }
+ entry->subtype = NVMF_SUBTYPE_NVME;
+ if (flow_control_disable)
+ entry->treq |= (1 << 2);
+ entry->portid = htole16(1);
+ entry->cntlid = htole16(NVMF_CNTLID_DYNAMIC);
+ entry->aqsz = NVME_MAX_ADMIN_ENTRIES;
+ strlcpy(entry->subnqn, subnqn, sizeof(entry->subnqn));
+ return (wildcard);
+}
+
+void
+init_discovery(void)
+{
+ struct nvmf_association_params aparams;
+
+ memset(&aparams, 0, sizeof(aparams));
+ aparams.sq_flow_control = false;
+ aparams.dynamic_controller_model = true;
+ aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES;
+ aparams.tcp.pda = 0;
+ aparams.tcp.header_digests = header_digests;
+ aparams.tcp.data_digests = data_digests;
+ aparams.tcp.maxr2t = 1;
+ aparams.tcp.maxh2cdata = 256 * 1024;
+ discovery_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true,
+ &aparams);
+ if (discovery_na == NULL)
+ err(1, "Failed to create discovery association");
+}
+
+void
+discovery_add_io_controller(int s, const char *subnqn)
+{
+ struct io_controller_data *icd;
+
+ io_controllers = reallocf(io_controllers, (num_io_controllers + 1) *
+ sizeof(*io_controllers));
+
+ icd = &io_controllers[num_io_controllers];
+ num_io_controllers++;
+
+ icd->wildcard = init_discovery_log_entry(&icd->entry, s, subnqn);
+}
+
+static void
+build_discovery_log_page(struct discovery_controller *dc)
+{
+ struct sockaddr_storage ss;
+ socklen_t len;
+ char traddr[256];
+ u_int i, nentries;
+ uint8_t adrfam;
+
+ if (dc->discovery_log != NULL)
+ return;
+
+ len = sizeof(ss);
+ if (getsockname(dc->s, (struct sockaddr *)&ss, &len) == -1) {
+ warn("build_discovery_log_page: getsockname");
+ return;
+ }
+
+ memset(traddr, 0, sizeof(traddr));
+ switch (ss.ss_family) {
+ case AF_INET:
+ {
+ struct sockaddr_in *sin;
+
+ sin = (struct sockaddr_in *)&ss;
+ adrfam = NVMF_ADRFAM_IPV4;
+ if (inet_ntop(AF_INET, &sin->sin_addr, traddr,
+ sizeof(traddr)) == NULL) {
+ warn("build_discovery_log_page: inet_ntop");
+ return;
+ }
+ break;
+ }
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *sin6;
+
+ sin6 = (struct sockaddr_in6 *)&ss;
+ adrfam = NVMF_ADRFAM_IPV6;
+ if (inet_ntop(AF_INET6, &sin6->sin6_addr, traddr,
+ sizeof(traddr)) == NULL) {
+ warn("build_discovery_log_page: inet_ntop");
+ return;
+ }
+ break;
+ }
+ default:
+ assert(false);
+ }
+
+ nentries = 0;
+ for (i = 0; i < num_io_controllers; i++) {
+ if (io_controllers[i].wildcard &&
+ io_controllers[i].entry.adrfam != adrfam)
+ continue;
+ nentries++;
+ }
+
+ dc->discovery_log_len = sizeof(*dc->discovery_log) +
+ nentries * sizeof(struct nvme_discovery_log_entry);
+ dc->discovery_log = calloc(dc->discovery_log_len, 1);
+ dc->discovery_log->numrec = nentries;
+ dc->discovery_log->recfmt = 0;
+ nentries = 0;
+ for (i = 0; i < num_io_controllers; i++) {
+ if (io_controllers[i].wildcard &&
+ io_controllers[i].entry.adrfam != adrfam)
+ continue;
+
+ dc->discovery_log->entries[nentries] = io_controllers[i].entry;
+ if (io_controllers[i].wildcard)
+ memcpy(dc->discovery_log->entries[nentries].traddr,
+ traddr, sizeof(traddr));
+ }
+}
+
+static void
+handle_get_log_page_command(const struct nvmf_capsule *nc,
+ const struct nvme_command *cmd, struct discovery_controller *dc)
+{
+ uint64_t offset;
+ uint32_t length;
+
+ switch (nvmf_get_log_page_id(cmd)) {
+ case NVME_LOG_DISCOVERY:
+ break;
+ default:
+ warnx("Unsupported log page %u for discovery controller",
+ nvmf_get_log_page_id(cmd));
+ goto error;
+ }
+
+ build_discovery_log_page(dc);
+
+ offset = nvmf_get_log_page_offset(cmd);
+ if (offset >= dc->discovery_log_len)
+ goto error;
+
+ length = nvmf_get_log_page_length(cmd);
+ if (length > dc->discovery_log_len - offset)
+ length = dc->discovery_log_len - offset;
+
+ nvmf_send_controller_data(nc, (char *)dc->discovery_log + offset,
+ length);
+ return;
+error:
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+}
+
+static bool
+discovery_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd,
+ void *arg)
+{
+ struct discovery_controller *dc = arg;
+
+ switch (cmd->opc) {
+ case NVME_OPC_GET_LOG_PAGE:
+ handle_get_log_page_command(nc, cmd, dc);
+ return (true);
+ default:
+ return (false);
+ }
+}
+
+static void *
+discovery_thread(void *arg)
+{
+ struct discovery_thread_arg *dta = arg;
+ struct discovery_controller dc;
+
+ pthread_detach(pthread_self());
+
+ memset(&dc, 0, sizeof(dc));
+ dc.s = dta->s;
+
+ controller_handle_admin_commands(dta->c, discovery_command, &dc);
+
+ free(dc.discovery_log);
+ free_controller(dta->c);
+
+ nvmf_free_qpair(dta->qp);
+
+ close(dta->s);
+ free(dta);
+ return (NULL);
+}
+
+void
+handle_discovery_socket(int s)
+{
+ struct nvmf_fabric_connect_data data;
+ struct nvme_controller_data cdata;
+ struct nvmf_qpair_params qparams;
+ struct discovery_thread_arg *dta;
+ struct nvmf_capsule *nc;
+ struct nvmf_qpair *qp;
+ pthread_t thr;
+ int error;
+
+ memset(&qparams, 0, sizeof(qparams));
+ qparams.tcp.fd = s;
+
+ nc = NULL;
+ qp = nvmf_accept(discovery_na, &qparams, &nc, &data);
+ if (qp == NULL) {
+ warnx("Failed to create discovery qpair: %s",
+ nvmf_association_error(discovery_na));
+ goto error;
+ }
+
+ if (strcmp(data.subnqn, NVMF_DISCOVERY_NQN) != 0) {
+ warn("Discovery qpair with invalid SubNQN: %.*s",
+ (int)sizeof(data.subnqn), data.subnqn);
+ nvmf_connect_invalid_parameters(nc, true,
+ offsetof(struct nvmf_fabric_connect_data, subnqn));
+ goto error;
+ }
+
+ /* Just use a controller ID of 1 for all discovery controllers. */
+ error = nvmf_finish_accept(nc, 1);
+ if (error != 0) {
+ warnc(error, "Failed to send CONNECT reponse");
+ goto error;
+ }
+
+ nvmf_init_discovery_controller_data(qp, &cdata);
+
+ dta = malloc(sizeof(*dta));
+ dta->qp = qp;
+ dta->s = s;
+ dta->c = init_controller(qp, &cdata);
+
+ error = pthread_create(&thr, NULL, discovery_thread, dta);
+ if (error != 0) {
+ warnc(error, "Failed to create discovery thread");
+ free_controller(dta->c);
+ free(dta);
+ goto error;
+ }
+
+ nvmf_free_capsule(nc);
+ return;
+
+error:
+ if (nc != NULL)
+ nvmf_free_capsule(nc);
+ if (qp != NULL)
+ nvmf_free_qpair(qp);
+ close(s);
+}
diff --git a/usr.sbin/nvmfd/internal.h b/usr.sbin/nvmfd/internal.h
new file mode 100644
index 000000000000..5ddbc1cf89f0
--- /dev/null
+++ b/usr.sbin/nvmfd/internal.h
@@ -0,0 +1,65 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023-2024 Chelsio Communications, Inc.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ */
+
+#ifndef __INTERNAL_H__
+#define __INTERNAL_H__
+
+#include <stdbool.h>
+
+struct controller;
+struct nvme_command;
+struct nvme_controller_data;
+struct nvme_ns_list;
+struct nvmf_capsule;
+struct nvmf_qpair;
+
+typedef bool handle_command(const struct nvmf_capsule *,
+ const struct nvme_command *, void *);
+
+extern bool data_digests;
+extern bool header_digests;
+extern bool flow_control_disable;
+extern bool kernel_io;
+
+/* controller.c */
+void controller_handle_admin_commands(struct controller *c,
+ handle_command *cb, void *cb_arg);
+struct controller *init_controller(struct nvmf_qpair *qp,
+ const struct nvme_controller_data *cdata);
+void free_controller(struct controller *c);
+
+/* discovery.c */
+void init_discovery(void);
+void handle_discovery_socket(int s);
+void discovery_add_io_controller(int s, const char *subnqn);
+
+/* io.c */
+void init_io(const char *subnqn);
+void handle_io_socket(int s);
+void shutdown_io(void);
+
+/* devices.c */
+void register_devices(int ac, char **av);
+u_int device_count(void);
+void device_active_nslist(uint32_t nsid, struct nvme_ns_list *nslist);
+bool device_identification_descriptor(uint32_t nsid, void *buf);
+bool device_namespace_data(uint32_t nsid, struct nvme_namespace_data *nsdata);
+void device_read(uint32_t nsid, uint64_t lba, u_int nlb,
+ const struct nvmf_capsule *nc);
+void device_write(uint32_t nsid, uint64_t lba, u_int nlb,
+ const struct nvmf_capsule *nc);
+void device_flush(uint32_t nsid, const struct nvmf_capsule *nc);
+
+/* ctl.c */
+void init_ctl_port(const char *subnqn,
+ const struct nvmf_association_params *params);
+void ctl_handoff_qpair(struct nvmf_qpair *qp,
+ const struct nvmf_fabric_connect_cmd *cmd,
+ const struct nvmf_fabric_connect_data *data);
+void shutdown_ctl_port(const char *subnqn);
+
+#endif /* !__INTERNAL_H__ */
diff --git a/usr.sbin/nvmfd/io.c b/usr.sbin/nvmfd/io.c
new file mode 100644
index 000000000000..be845a8ed784
--- /dev/null
+++ b/usr.sbin/nvmfd/io.c
@@ -0,0 +1,677 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023-2024 Chelsio Communications, Inc.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ */
+
+#include <sys/sysctl.h>
+#include <err.h>
+#include <errno.h>
+#include <libnvmf.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "internal.h"
+
+struct io_controller {
+ struct controller *c;
+
+ u_int num_io_queues;
+ u_int active_io_queues;
+ struct nvmf_qpair **io_qpairs;
+ int *io_sockets;
+
+ struct nvme_firmware_page fp;
+ struct nvme_health_information_page hip;
+ uint16_t partial_dur;
+ uint16_t partial_duw;
+
+ uint16_t cntlid;
+ char hostid[16];
+ char hostnqn[NVME_NQN_FIELD_SIZE];
+};
+
+static struct nvmf_association *io_na;
+static pthread_cond_t io_cond;
+static pthread_mutex_t io_na_mutex;
+static struct io_controller *io_controller;
+static const char *nqn;
+static char serial[NVME_SERIAL_NUMBER_LENGTH];
+
+void
+init_io(const char *subnqn)
+{
+ struct nvmf_association_params aparams;
+ u_long hostid;
+ size_t len;
+
+ memset(&aparams, 0, sizeof(aparams));
+ aparams.sq_flow_control = !flow_control_disable;
+ aparams.dynamic_controller_model = true;
+ aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES;
+ aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES;
+ aparams.tcp.pda = 0;
+ aparams.tcp.header_digests = header_digests;
+ aparams.tcp.data_digests = data_digests;
+ aparams.tcp.maxr2t = 1;
+ aparams.tcp.maxh2cdata = 256 * 1024;
+ io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true,
+ &aparams);
+ if (io_na == NULL)
+ err(1, "Failed to create I/O controller association");
+
+ nqn = subnqn;
+
+ /* Generate a serial number from the kern.hostid node. */
+ len = sizeof(hostid);
+ if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1)
+ err(1, "sysctl: kern.hostid");
+
+ nvmf_controller_serial(serial, sizeof(serial), hostid);
+
+ pthread_cond_init(&io_cond, NULL);
+ pthread_mutex_init(&io_na_mutex, NULL);
+
+ if (kernel_io)
+ init_ctl_port(subnqn, &aparams);
+}
+
+void
+shutdown_io(void)
+{
+ if (kernel_io)
+ shutdown_ctl_port(nqn);
+}
+
+static void
+handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc,
+ const struct nvme_command *cmd)
+{
+ uint64_t offset;
+ uint32_t numd;
+ size_t len;
+ uint8_t lid;
+
+ lid = le32toh(cmd->cdw10) & 0xff;
+ numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
+ offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
+
+ if (offset % 3 != 0)
+ goto error;
+
+ len = (numd + 1) * 4;
+
+ switch (lid) {
+ case NVME_LOG_ERROR:
+ {
+ void *buf;
+
+ if (len % sizeof(struct nvme_error_information_entry) != 0)
+ goto error;
+
+ buf = calloc(1, len);
+ nvmf_send_controller_data(nc, buf, len);
+ free(buf);
+ return;
+ }
+ case NVME_LOG_HEALTH_INFORMATION:
+ if (len != sizeof(ioc->hip))
+ goto error;
+
+ nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip));
+ return;
+ case NVME_LOG_FIRMWARE_SLOT:
+ if (len != sizeof(ioc->fp))
+ goto error;
+
+ nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp));
+ return;
+ default:
+ warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid);
+ goto error;
+ }
+
+error:
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+}
+
+static bool
+handle_io_identify_command(const struct nvmf_capsule *nc,
+ const struct nvme_command *cmd)
+{
+ struct nvme_namespace_data nsdata;
+ struct nvme_ns_list nslist;
+ uint32_t nsid;
+ uint8_t cns;
+
+ cns = le32toh(cmd->cdw10) & 0xFF;
+ switch (cns) {
+ case 0: /* Namespace data. */
+ if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) {
+ nvmf_send_generic_error(nc,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ return (true);
+ }
+
+ nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
+ return (true);
+ case 2: /* Active namespace list. */
+ nsid = le32toh(cmd->nsid);
+ if (nsid >= 0xfffffffe) {
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+ return (true);
+ }
+
+ device_active_nslist(nsid, &nslist);
+ nvmf_send_controller_data(nc, &nslist, sizeof(nslist));
+ return (true);
+ case 3: /* Namespace Identification Descriptor list. */
+ if (!device_identification_descriptor(le32toh(cmd->nsid),
+ &nsdata)) {
+ nvmf_send_generic_error(nc,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ return (true);
+ }
+
+ nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata));
+ return (true);
+ default:
+ return (false);
+ }
+}
+
+static void
+handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc,
+ const struct nvme_command *cmd)
+{
+ struct nvme_completion cqe;
+ uint8_t fid;
+
+ fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
+ switch (fid) {
+ case NVME_FEAT_NUMBER_OF_QUEUES:
+ {
+ uint32_t num_queues;
+
+ if (ioc->num_io_queues != 0) {
+ nvmf_send_generic_error(nc,
+ NVME_SC_COMMAND_SEQUENCE_ERROR);
+ return;
+ }
+
+ num_queues = le32toh(cmd->cdw11) & 0xffff;
+
+ /* 5.12.1.7: 65535 is invalid. */
+ if (num_queues == 65535)
+ goto error;
+
+ /* Fabrics requires the same number of SQs and CQs. */
+ if (le32toh(cmd->cdw11) >> 16 != num_queues)
+ goto error;
+
+ /* Convert to 1's based */
+ num_queues++;
+
+ /* Lock to synchronize with handle_io_qpair. */
+ pthread_mutex_lock(&io_na_mutex);
+ ioc->num_io_queues = num_queues;
+ ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs));
+ ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets));
+ pthread_mutex_unlock(&io_na_mutex);
+
+ nvmf_init_cqe(&cqe, nc, 0);
+ cqe.cdw0 = cmd->cdw11;
+ nvmf_send_response(nc, &cqe);
+ return;
+ }
+ case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
+ {
+ uint32_t aer_mask;
+
+ aer_mask = le32toh(cmd->cdw11);
+
+ /* Check for any reserved or unimplemented feature bits. */
+ if ((aer_mask & 0xffffc000) != 0)
+ goto error;
+
+ /* No AERs are generated by this daemon. */
+ nvmf_send_success(nc);
+ return;
+ }
+ default:
+ warnx("Unsupported feature ID %u for SET_FEATURES", fid);
+ goto error;
+ }
+
+error:
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
+}
+
+static bool
+admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd,
+ void *arg)
+{
+ struct io_controller *ioc = arg;
+
+ switch (cmd->opc) {
+ case NVME_OPC_GET_LOG_PAGE:
+ handle_get_log_page(ioc, nc, cmd);
+ return (true);
+ case NVME_OPC_IDENTIFY:
+ return (handle_io_identify_command(nc, cmd));
+ case NVME_OPC_SET_FEATURES:
+ handle_set_features(ioc, nc, cmd);
+ return (true);
+ case NVME_OPC_ASYNC_EVENT_REQUEST:
+ /* Ignore and never complete. */
+ return (true);
+ case NVME_OPC_KEEP_ALIVE:
+ nvmf_send_success(nc);
+ return (true);
+ default:
+ return (false);
+ }
+}
+
+static void
+handle_admin_qpair(struct io_controller *ioc)
+{
+ pthread_setname_np(pthread_self(), "admin queue");
+
+ controller_handle_admin_commands(ioc->c, admin_command, ioc);
+
+ pthread_mutex_lock(&io_na_mutex);
+ for (u_int i = 0; i < ioc->num_io_queues; i++) {
+ if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1)
+ continue;
+ close(ioc->io_sockets[i]);
+ ioc->io_sockets[i] = -1;
+ }
+
+ /* Wait for I/O threads to notice. */
+ while (ioc->active_io_queues > 0)
+ pthread_cond_wait(&io_cond, &io_na_mutex);
+
+ io_controller = NULL;
+ pthread_mutex_unlock(&io_na_mutex);
+
+ free_controller(ioc->c);
+
+ free(ioc);
+}
+
+static bool
+handle_io_fabrics_command(const struct nvmf_capsule *nc,
+ const struct nvmf_fabric_cmd *fc)
+{
+ switch (fc->fctype) {
+ case NVMF_FABRIC_COMMAND_CONNECT:
+ warnx("CONNECT command on connected queue");
+ nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
+ break;
+ case NVMF_FABRIC_COMMAND_DISCONNECT:
+ {
+ const struct nvmf_fabric_disconnect_cmd *dis =
+ (const struct nvmf_fabric_disconnect_cmd *)fc;
+ if (dis->recfmt != htole16(0)) {
+ nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
+ NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT);
+ break;
+ }
+ nvmf_send_success(nc);
+ return (true);
+ }
+ default:
+ warnx("Unsupported fabrics command %#x", fc->fctype);
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
+ break;
+ }
+
+ return (false);
+}
+
+static void
+hip_add(uint64_t pair[2], uint64_t addend)
+{
+ uint64_t old, new;
+
+ old = le64toh(pair[0]);
+ new = old + addend;
+ pair[0] = htole64(new);
+ if (new < old)
+ pair[1] += htole64(1);
+}
+
+static uint64_t
+cmd_lba(const struct nvme_command *cmd)
+{
+ return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10));
+}
+
+static u_int
+cmd_nlb(const struct nvme_command *cmd)
+{
+ return ((le32toh(cmd->cdw12) & 0xffff) + 1);
+}
+
+static void
+handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc,
+ const struct nvme_command *cmd)
+{
+ size_t len;
+
+ len = nvmf_capsule_data_len(nc);
+ device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
+ hip_add(ioc->hip.host_read_commands, 1);
+
+ len /= 512;
+ len += ioc->partial_dur;
+ if (len > 1000)
+ hip_add(ioc->hip.data_units_read, len / 1000);
+ ioc->partial_dur = len % 1000;
+}
+
+static void
+handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc,
+ const struct nvme_command *cmd)
+{
+ size_t len;
+
+ len = nvmf_capsule_data_len(nc);
+ device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc);
+ hip_add(ioc->hip.host_write_commands, 1);
+
+ len /= 512;
+ len += ioc->partial_duw;
+ if (len > 1000)
+ hip_add(ioc->hip.data_units_written, len / 1000);
+ ioc->partial_duw = len % 1000;
+}
+
+static void
+handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd)
+{
+ device_flush(le32toh(cmd->nsid), nc);
+}
+
+static bool
+handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp)
+{
+ const struct nvme_command *cmd;
+ struct nvmf_capsule *nc;
+ int error;
+ bool disconnect;
+
+ disconnect = false;
+
+ while (!disconnect) {
+ error = nvmf_controller_receive_capsule(qp, &nc);
+ if (error != 0) {
+ if (error != ECONNRESET)
+ warnc(error, "Failed to read command capsule");
+ break;
+ }
+
+ cmd = nvmf_capsule_sqe(nc);
+
+ switch (cmd->opc) {
+ case NVME_OPC_FLUSH:
+ if (cmd->nsid == htole32(0xffffffff)) {
+ nvmf_send_generic_error(nc,
+ NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
+ break;
+ }
+ handle_flush(nc, cmd);
+ break;
+ case NVME_OPC_WRITE:
+ handle_write(ioc, nc, cmd);
+ break;
+ case NVME_OPC_READ:
+ handle_read(ioc, nc, cmd);
+ break;
+ case NVME_OPC_FABRICS_COMMANDS:
+ disconnect = handle_io_fabrics_command(nc,
+ (const struct nvmf_fabric_cmd *)cmd);
+ break;
+ default:
+ warnx("Unsupported NVM opcode %#x", cmd->opc);
+ nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE);
+ break;
+ }
+ nvmf_free_capsule(nc);
+ }
+
+ return (disconnect);
+}
+
+static void
+handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid)
+{
+ char name[64];
+ bool disconnect;
+
+ snprintf(name, sizeof(name), "I/O queue %d", qid);
+ pthread_setname_np(pthread_self(), name);
+
+ disconnect = handle_io_commands(ioc, qp);
+
+ pthread_mutex_lock(&io_na_mutex);
+ if (disconnect)
+ ioc->io_qpairs[qid - 1] = NULL;
+ if (ioc->io_sockets[qid - 1] != -1) {
+ close(ioc->io_sockets[qid - 1]);
+ ioc->io_sockets[qid - 1] = -1;
+ }
+ ioc->active_io_queues--;
+ if (ioc->active_io_queues == 0)
+ pthread_cond_broadcast(&io_cond);
+ pthread_mutex_unlock(&io_na_mutex);
+}
+
+static void
+connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
+ const struct nvmf_fabric_connect_data *data)
+{
+ struct nvme_controller_data cdata;
+ struct io_controller *ioc;
+ int error;
+
+ /* Can only have one active I/O controller at a time. */
+ pthread_mutex_lock(&io_na_mutex);
+ if (io_controller != NULL) {
+ pthread_mutex_unlock(&io_na_mutex);
+ nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC,
+ NVMF_FABRIC_SC_CONTROLLER_BUSY);
+ goto error;
+ }
+
+ error = nvmf_finish_accept(nc, 2);
+ if (error != 0) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnc(error, "Failed to send CONNECT response");
+ goto error;
+ }
+
+ ioc = calloc(1, sizeof(*ioc));
+ ioc->cntlid = 2;
+ memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid));
+ memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn));
+
+ nvmf_init_io_controller_data(qp, serial, nqn, device_count(),
+ NVMF_IOCCSZ, &cdata);
+
+ ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
+ memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr));
+
+ ioc->hip.power_cycles[0] = 1;
+
+ ioc->c = init_controller(qp, &cdata);
+
+ io_controller = ioc;
+ pthread_mutex_unlock(&io_na_mutex);
+
+ nvmf_free_capsule(nc);
+
+ handle_admin_qpair(ioc);
+ close(s);
+ return;
+
+error:
+ nvmf_free_capsule(nc);
+ close(s);
+}
+
+static void
+connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc,
+ const struct nvmf_fabric_connect_data *data, uint16_t qid)
+{
+ struct io_controller *ioc;
+ int error;
+
+ pthread_mutex_lock(&io_na_mutex);
+ if (io_controller == NULL) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnx("Attempt to create I/O qpair without admin qpair");
+ nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
+ goto error;
+ }
+
+ if (memcmp(io_controller->hostid, data->hostid,
+ sizeof(data->hostid)) != 0) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnx("hostid mismatch for I/O qpair CONNECT");
+ nvmf_connect_invalid_parameters(nc, true,
+ offsetof(struct nvmf_fabric_connect_data, hostid));
+ goto error;
+ }
+ if (le16toh(data->cntlid) != io_controller->cntlid) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnx("cntlid mismatch for I/O qpair CONNECT");
+ nvmf_connect_invalid_parameters(nc, true,
+ offsetof(struct nvmf_fabric_connect_data, cntlid));
+ goto error;
+ }
+ if (memcmp(io_controller->hostnqn, data->hostnqn,
+ sizeof(data->hostid)) != 0) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnx("host NQN mismatch for I/O qpair CONNECT");
+ nvmf_connect_invalid_parameters(nc, true,
+ offsetof(struct nvmf_fabric_connect_data, hostnqn));
+ goto error;
+ }
+
+ if (io_controller->num_io_queues == 0) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnx("Attempt to create I/O qpair without enabled queues");
+ nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
+ goto error;
+ }
+ if (qid > io_controller->num_io_queues) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnx("Attempt to create invalid I/O qpair %u", qid);
+ nvmf_connect_invalid_parameters(nc, false,
+ offsetof(struct nvmf_fabric_connect_cmd, qid));
+ goto error;
+ }
+ if (io_controller->io_qpairs[qid - 1] != NULL) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnx("Attempt to re-create I/O qpair %u", qid);
+ nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR);
+ goto error;
+ }
+
+ error = nvmf_finish_accept(nc, io_controller->cntlid);
+ if (error != 0) {
+ pthread_mutex_unlock(&io_na_mutex);
+ warnc(error, "Failed to send CONNECT response");
+ goto error;
+ }
+
+ ioc = io_controller;
+ ioc->active_io_queues++;
+ ioc->io_qpairs[qid - 1] = qp;
+ ioc->io_sockets[qid - 1] = s;
+ pthread_mutex_unlock(&io_na_mutex);
+
+ nvmf_free_capsule(nc);
+
+ handle_io_qpair(ioc, qp, qid);
+ return;
+
+error:
+ nvmf_free_capsule(nc);
+ close(s);
+}
+
+static void *
+io_socket_thread(void *arg)
+{
+ struct nvmf_fabric_connect_data data;
+ struct nvmf_qpair_params qparams;
+ const struct nvmf_fabric_connect_cmd *cmd;
+ struct nvmf_capsule *nc;
+ struct nvmf_qpair *qp;
+ int s;
+
+ pthread_detach(pthread_self());
+
+ s = (intptr_t)arg;
+ memset(&qparams, 0, sizeof(qparams));
+ qparams.tcp.fd = s;
+
+ nc = NULL;
+ qp = nvmf_accept(io_na, &qparams, &nc, &data);
+ if (qp == NULL) {
+ warnx("Failed to create I/O qpair: %s",
+ nvmf_association_error(io_na));
+ goto error;
+ }
+
+ if (kernel_io) {
+ ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data);
+ goto error;
+ }
+
+ if (strcmp(data.subnqn, nqn) != 0) {
+ warn("I/O qpair with invalid SubNQN: %.*s",
+ (int)sizeof(data.subnqn), data.subnqn);
+ nvmf_connect_invalid_parameters(nc, true,
+ offsetof(struct nvmf_fabric_connect_data, subnqn));
+ goto error;
+ }
+
+ /* Is this an admin or I/O queue pair? */
+ cmd = nvmf_capsule_sqe(nc);
+ if (cmd->qid == 0)
+ connect_admin_qpair(s, qp, nc, &data);
+ else
+ connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid));
+ nvmf_free_qpair(qp);
+ return (NULL);
+
+error:
+ if (nc != NULL)
+ nvmf_free_capsule(nc);
+ if (qp != NULL)
+ nvmf_free_qpair(qp);
+ close(s);
+ return (NULL);
+}
+
+void
+handle_io_socket(int s)
+{
+ pthread_t thr;
+ int error;
+
+ error = pthread_create(&thr, NULL, io_socket_thread,
+ (void *)(uintptr_t)s);
+ if (error != 0) {
+ warnc(error, "Failed to create I/O qpair thread");
+ close(s);
+ }
+}
diff --git a/usr.sbin/nvmfd/nvmfd.8 b/usr.sbin/nvmfd/nvmfd.8
new file mode 100644
index 000000000000..689ac6d4dda1
--- /dev/null
+++ b/usr.sbin/nvmfd/nvmfd.8
@@ -0,0 +1,126 @@
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2024 Chelsio Communications, Inc.
+.\"
+.Dd May 2, 2024
+.Dt NVMFD 8
+.Os
+.Sh NAME
+.Nm nvmfd
+.Nd "NVMeoF controller daemon"
+.Sh SYNOPSIS
+.Nm
+.Fl K
+.Op Fl dFGg
+.Op Fl P Ar port
+.Op Fl p Ar port
+.Op Fl t Ar transport
+.Op Fl n Ar subnqn
+.Nm
+.Op Fl dFGg
+.Op Fl P Ar port
+.Op Fl p Ar port
+.Op Fl t Ar transport
+.Op Fl n Ar subnqn
+.Ar device
+.Op Ar device ...
+.Sh DESCRIPTION
+.Nm
+accepts incoming NVMeoF connections for both I/O and discovery controllers.
+.Nm
+can either implement a single dynamic I/O controller in user mode or hand
+off incoming I/O controller connections to
+.Xr nvmft 4 .
+A dynamic discovery controller service is always provided in user mode.
+.Pp
+The following options are available:
+.Bl -tag -width "-t transport"
+.It Fl F
+Permit remote hosts to disable SQ flow control.
+.It Fl G
+Permit remote hosts to enable PDU data digests for the TCP transport.
+.It Fl g
+Permit remote hosts to enable PDU header digests for the TCP transport.
+.It Fl K
+Enable kernel mode which hands off incoming I/O controller connections to
+.Xr nvmft 4 .
+.It Fl P Ar port
+Use
+.Ar port
+as the listen TCP port for the discovery controller service.
+The default value is 8009.
+.It Fl d
+Enable debug mode.
+The daemon sends any errors to standard output and does not place
+itself in the background.
+.It Fl p Ar port
+Use
+.Ar port
+as the listen TCP port for the I/O controller service.
+By default an unused ephemeral port will be chosen.
+.It Fl n Ar subnqn
+The Subsystem NVMe Qualified Name for the I/O controller.
+If an explicit NQN is not given, a default value is generated from the
+current host's UUID obtained from the
+.Vt kern.hostuuid
+sysctl.
+.It Fl t Ar transport
+The transport type to use.
+The default transport is
+.Dq tcp .
+.It Ar device
+When implementing a user mode I/O controller,
+one or more
+.Ar device
+arguments must be specified.
+Each
+.Ar device
+describes the backing store for a namespace exported to remote hosts.
+Devices can be specified using one of the following syntaxes:
+.Bl -tag -width "ramdisk:size"
+.It Pa pathname
+File or disk device
+.It ramdisk : Ns Ar size
+Allocate a memory disk with the given
+.Ar size .
+.Ar size
+may use any of the suffixes supported by
+.Xr expand_number 3 .
+.El
+.El
+.Sh FILES
+.Bl -tag -width "/var/run/nvmfd.pid" -compact
+.It Pa /var/run/nvmfd.pid
+The default location of the
+.Nm
+PID file.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh SEE ALSO
+.Xr ctl 4 ,
+.Xr nvmft 4 ,
+.Xr ctladm 8 ,
+.Xr ctld 8
+.Sh HISTORY
+The
+.Nm
+module first appeared in
+.Fx 15.0 .
+.Sh AUTHORS
+The
+.Nm
+subsystem was developed by
+.An John Baldwin Aq Mt jhb@FreeBSD.org
+under sponsorship from Chelsio Communications, Inc.
+.Sh BUGS
+The discovery controller and kernel mode functionality of
+.Nm
+should be merged into
+.Xr ctld 8 .
+.Pp
+Additional paramters such as
+.Va MAXR2T ,
+.Va MAXH2CDATA ,
+and queue sizes should be configurable.
diff --git a/usr.sbin/nvmfd/nvmfd.c b/usr.sbin/nvmfd/nvmfd.c
new file mode 100644
index 000000000000..6fce21b07b74
--- /dev/null
+++ b/usr.sbin/nvmfd/nvmfd.c
@@ -0,0 +1,260 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023-2024 Chelsio Communications, Inc.
+ * Written by: John Baldwin <jhb@FreeBSD.org>
+ */
+
+#include <sys/param.h>
+#include <sys/event.h>
+#include <sys/linker.h>
+#include <sys/module.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <libnvmf.h>
+#include <libutil.h>
+#include <netdb.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "internal.h"
+
+bool data_digests = false;
+bool header_digests = false;
+bool flow_control_disable = false;
+bool kernel_io = false;
+
+static const char *subnqn;
+static volatile bool quit = false;
+
+static void
+usage(void)
+{
+ fprintf(stderr, "nvmfd -K [-FGg] [-P port] [-p port] [-t transport] [-n subnqn]\n"
+ "nvmfd [-dDFH] [-P port] [-p port] [-t transport] [-n subnqn]\n"
+ "\tdevice [device [...]]\n"
+ "\n"
+ "Devices use one of the following syntaxes:\n"
+ "\tpathame - file or disk device\n"
+ "\tramdisk:size - memory disk of given size\n");
+ exit(1);
+}
+
+static void
+handle_sig(int sig __unused)
+{
+ quit = true;
+}
+
+static void
+register_listen_socket(int kqfd, int s, void *udata)
+{
+ struct kevent kev;
+
+ if (listen(s, -1) != 0)
+ err(1, "listen");
+
+ EV_SET(&kev, s, EVFILT_READ, EV_ADD, 0, 0, udata);
+ if (kevent(kqfd, &kev, 1, NULL, 0, NULL) == -1)
+ err(1, "kevent: failed to add listen socket");
+}
+
+static void
+create_passive_sockets(int kqfd, const char *port, bool discovery)
+{
+ struct addrinfo hints, *ai, *list;
+ bool created;
+ int error, s;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_flags = AI_PASSIVE;
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_protocol = IPPROTO_TCP;
+ error = getaddrinfo(NULL, port, &hints, &list);
+ if (error != 0)
+ errx(1, "%s", gai_strerror(error));
+ created = false;
+
+ for (ai = list; ai != NULL; ai = ai->ai_next) {
+ s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (s == -1)
+ continue;
+
+ if (bind(s, ai->ai_addr, ai->ai_addrlen) != 0) {
+ close(s);
+ continue;
+ }
+
+ if (discovery) {
+ register_listen_socket(kqfd, s, (void *)1);
+ } else {
+ register_listen_socket(kqfd, s, (void *)2);
+ discovery_add_io_controller(s, subnqn);
+ }
+ created = true;
+ }
+
+ freeaddrinfo(list);
+ if (!created)
+ err(1, "Failed to create any listen sockets");
+}
+
+static void
+handle_connections(int kqfd)
+{
+ struct kevent ev;
+ int s;
+
+ signal(SIGHUP, handle_sig);
+ signal(SIGINT, handle_sig);
+ signal(SIGQUIT, handle_sig);
+ signal(SIGTERM, handle_sig);
+
+ while (!quit) {
+ if (kevent(kqfd, NULL, 0, &ev, 1, NULL) == -1) {
+ if (errno == EINTR)
+ continue;
+ err(1, "kevent");
+ }
+
+ assert(ev.filter == EVFILT_READ);
+
+ s = accept(ev.ident, NULL, NULL);
+ if (s == -1) {
+ warn("accept");
+ continue;
+ }
+
+ switch ((uintptr_t)ev.udata) {
+ case 1:
+ handle_discovery_socket(s);
+ break;
+ case 2:
+ handle_io_socket(s);
+ break;
+ default:
+ __builtin_unreachable();
+ }
+ }
+}
+
+int
+main(int ac, char **av)
+{
+ struct pidfh *pfh;
+ const char *dport, *ioport, *transport;
+ pid_t pid;
+ int ch, error, kqfd;
+ bool daemonize;
+ static char nqn[NVMF_NQN_MAX_LEN];
+
+ /* 7.4.9.3 Default port for discovery */
+ dport = "8009";
+
+ pfh = NULL;
+ daemonize = true;
+ ioport = "0";
+ subnqn = NULL;
+ transport = "tcp";
+ while ((ch = getopt(ac, av, "dFgGKn:P:p:t:")) != -1) {
+ switch (ch) {
+ case 'd':
+ daemonize = false;
+ break;
+ case 'F':
+ flow_control_disable = true;
+ break;
+ case 'G':
+ data_digests = true;
+ break;
+ case 'g':
+ header_digests = true;
+ break;
+ case 'K':
+ kernel_io = true;
+ break;
+ case 'n':
+ subnqn = optarg;
+ break;
+ case 'P':
+ dport = optarg;
+ break;
+ case 'p':
+ ioport = optarg;
+ break;
+ case 't':
+ transport = optarg;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ av += optind;
+ ac -= optind;
+
+ if (kernel_io) {
+ if (ac > 0)
+ usage();
+ if (modfind("nvmft") == -1 && kldload("nvmft") == -1)
+ warn("couldn't load nvmft");
+ } else {
+ if (ac < 1)
+ usage();
+ }
+
+ if (strcasecmp(transport, "tcp") == 0) {
+ } else
+ errx(1, "Invalid transport %s", transport);
+
+ if (subnqn == NULL) {
+ error = nvmf_nqn_from_hostuuid(nqn);
+ if (error != 0)
+ errc(1, error, "Failed to generate NQN");
+ subnqn = nqn;
+ }
+
+ if (!kernel_io)
+ register_devices(ac, av);
+
+ init_discovery();
+ init_io(subnqn);
+
+ if (daemonize) {
+ pfh = pidfile_open(NULL, 0600, &pid);
+ if (pfh == NULL) {
+ if (errno == EEXIST)
+ errx(1, "Daemon already running, pid: %jd",
+ (intmax_t)pid);
+ warn("Cannot open or create pidfile");
+ }
+
+ if (daemon(0, 0) != 0) {
+ pidfile_remove(pfh);
+ err(1, "Failed to fork into the background");
+ }
+
+ pidfile_write(pfh);
+ }
+
+ kqfd = kqueue();
+ if (kqfd == -1) {
+ pidfile_remove(pfh);
+ err(1, "kqueue");
+ }
+
+ create_passive_sockets(kqfd, dport, true);
+ create_passive_sockets(kqfd, ioport, false);
+
+ handle_connections(kqfd);
+ shutdown_io();
+ if (pfh != NULL)
+ pidfile_remove(pfh);
+ return (0);
+}