summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Motin <mav@FreeBSD.org>2020-12-13 02:19:18 +0000
committerAlexander Motin <mav@FreeBSD.org>2020-12-13 02:19:18 +0000
commitb52bc40cc2c24c5a72438af95ef6c89168ed3504 (patch)
tree31d89f3e669d792e38dca8de94f108d9a427ee46
parentb95930d369e6f0dd7d9e1d983e5d94290f14f109 (diff)
downloadsrc-test2-b52bc40cc2c24c5a72438af95ef6c89168ed3504.tar.gz
src-test2-b52bc40cc2c24c5a72438af95ef6c89168ed3504.zip
MFC r368132: Increase nvme(4) maximum transfer size from 1MB to 2MB.
With 4KB page size the 2MB is the maximum we can address with one page PRP. Going further would require chaining, that would add some more complexity. On the other side, to reduce memory consumption, allocate the PRP memory respecting maximum transfer size reported in the controller identify data. Many of NVMe devices support much smaller values, starting from 128KB. To do that we have to change the initialization sequence to pull the data earlier, before setting up the I/O queue pairs. The admin queue pair is still allocated for full MIN(maxphys, 2MB) size, but it is not a big deal, since there is only one such queue with only 16 trackers.
Notes
Notes: svn path=/stable/12/; revision=368602
-rw-r--r--sys/dev/nvme/nvme.h4
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c11
-rw-r--r--sys/dev/nvme/nvme_private.h9
-rw-r--r--sys/dev/nvme/nvme_qpair.c11
4 files changed, 16 insertions, 19 deletions
diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h
index 13adf9d002fc..6d577c2f96f9 100644
--- a/sys/dev/nvme/nvme.h
+++ b/sys/dev/nvme/nvme.h
@@ -59,8 +59,8 @@
*/
#define NVME_GLOBAL_NAMESPACE_TAG ((uint32_t)0xFFFFFFFF)
-/* Cap nvme to 1MB transfers driver explodes with larger sizes */
-#define NVME_MAX_XFER_SIZE (MAXPHYS < (1<<20) ? MAXPHYS : (1<<20))
+/* Cap transfers by the maximum addressable by page-sized PRP (4KB -> 2MB). */
+#define NVME_MAX_XFER_SIZE MIN(MAXPHYS, (PAGE_SIZE/8*PAGE_SIZE))
/* Register field definitions */
#define NVME_CAP_LO_REG_MQES_SHIFT (0)
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 5bd6906ade1a..9e4268d9f4b5 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -1017,16 +1017,16 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
* the number of I/O queues supported, so cannot reset
* the adminq again here.
*/
- if (resetting)
+ if (resetting) {
nvme_qpair_reset(&ctrlr->adminq);
+ nvme_admin_qpair_enable(&ctrlr->adminq);
+ }
if (ctrlr->ioq != NULL) {
for (i = 0; i < ctrlr->num_io_queues; i++)
nvme_qpair_reset(&ctrlr->ioq[i]);
}
- nvme_admin_qpair_enable(&ctrlr->adminq);
-
/*
* If it was a reset on initialization command timeout, just
* return here, letting initialization code fail gracefully.
@@ -1034,7 +1034,7 @@ nvme_ctrlr_start(void *ctrlr_arg, bool resetting)
if (resetting && !ctrlr->is_initialized)
return;
- if (nvme_ctrlr_identify(ctrlr) != 0) {
+ if (resetting && nvme_ctrlr_identify(ctrlr) != 0) {
nvme_ctrlr_fail(ctrlr);
return;
}
@@ -1109,7 +1109,8 @@ fail:
nvme_qpair_reset(&ctrlr->adminq);
nvme_admin_qpair_enable(&ctrlr->adminq);
- if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 &&
+ if (nvme_ctrlr_identify(ctrlr) == 0 &&
+ nvme_ctrlr_set_num_qpairs(ctrlr) == 0 &&
nvme_ctrlr_construct_io_qpairs(ctrlr) == 0)
nvme_ctrlr_start(ctrlr, false);
else
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 747dfab925fd..85c1e857a1c7 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -56,15 +56,6 @@ MALLOC_DECLARE(M_NVME);
#define IDT32_PCI_ID 0x80d0111d /* 32 channel board */
#define IDT8_PCI_ID 0x80d2111d /* 8 channel board */
-/*
- * For commands requiring more than 2 PRP entries, one PRP will be
- * embedded in the command (prp1), and the rest of the PRP entries
- * will be in a list pointed to by the command (prp2). This means
- * that real max number of PRP entries we support is 32+1, which
- * results in a max xfer size of 32*PAGE_SIZE.
- */
-#define NVME_MAX_PRP_LIST_ENTRIES (NVME_MAX_XFER_SIZE / PAGE_SIZE)
-
#define NVME_ADMIN_TRACKERS (16)
#define NVME_ADMIN_ENTRIES (128)
/* min and max are defined in admin queue attributes section of spec */
diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c
index a8f89733cf43..2bbada25042d 100644
--- a/sys/dev/nvme/nvme_qpair.c
+++ b/sys/dev/nvme/nvme_qpair.c
@@ -691,8 +691,8 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
/* Note: NVMe PRP format is restricted to 4-byte alignment. */
err = bus_dma_tag_create(bus_get_dma_tag(ctrlr->dev),
4, PAGE_SIZE, BUS_SPACE_MAXADDR,
- BUS_SPACE_MAXADDR, NULL, NULL, NVME_MAX_XFER_SIZE,
- (NVME_MAX_XFER_SIZE/PAGE_SIZE)+1, PAGE_SIZE, 0,
+ BUS_SPACE_MAXADDR, NULL, NULL, ctrlr->max_xfer_size,
+ btoc(ctrlr->max_xfer_size) + 1, PAGE_SIZE, 0,
NULL, NULL, &qpair->dma_tag_payload);
if (err != 0) {
nvme_printf(ctrlr, "payload tag create failed %d\n", err);
@@ -707,7 +707,12 @@ nvme_qpair_construct(struct nvme_qpair *qpair,
cmdsz = roundup2(cmdsz, PAGE_SIZE);
cplsz = qpair->num_entries * sizeof(struct nvme_completion);
cplsz = roundup2(cplsz, PAGE_SIZE);
- prpsz = sizeof(uint64_t) * NVME_MAX_PRP_LIST_ENTRIES;;
+ /*
+ * For commands requiring more than 2 PRP entries, one PRP will be
+ * embedded in the command (prp1), and the rest of the PRP entries
+ * will be in a list pointed to by the command (prp2).
+ */
+ prpsz = sizeof(uint64_t) * btoc(ctrlr->max_xfer_size);
prpmemsz = qpair->num_trackers * prpsz;
allocsz = cmdsz + cplsz + prpmemsz;