aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincenzo Maffione <vmaffione@FreeBSD.org>2020-09-01 20:41:47 +0000
committerVincenzo Maffione <vmaffione@FreeBSD.org>2020-09-01 20:41:47 +0000
commit35d8a463e8097cffb0e4942c0e3f3b5412ea7953 (patch)
treead5869c5d1db073e296c4c630d2d777a9a591abb
parentf7eec6b204f10f9f62f78dfe3e9e0ba42a3649fa (diff)
Notes
-rw-r--r--sys/dev/bnxt/bnxt_txrx.c5
-rw-r--r--sys/dev/mgb/if_mgb.c7
-rw-r--r--sys/dev/mgb/if_mgb.h3
-rw-r--r--sys/dev/vmware/vmxnet3/if_vmx.c7
-rw-r--r--sys/net/iflib.c37
5 files changed, 34 insertions, 25 deletions
diff --git a/sys/dev/bnxt/bnxt_txrx.c b/sys/dev/bnxt/bnxt_txrx.c
index 7c56c95eed27..a9bc6a4df740 100644
--- a/sys/dev/bnxt/bnxt_txrx.c
+++ b/sys/dev/bnxt/bnxt_txrx.c
@@ -316,10 +316,9 @@ bnxt_isc_rxd_flush(void *sc, uint16_t rxqid, uint8_t flid,
if (softc->rx_cp_rings[rxqid].cons != UINT32_MAX)
BNXT_CP_IDX_DISABLE_DB(&softc->rx_cp_rings[rxqid].ring,
softc->rx_cp_rings[rxqid].cons);
- /* We're given the last filled RX buffer here, not the next empty one */
- BNXT_RX_DB(rx_ring, RING_NEXT(rx_ring, pidx));
+ BNXT_RX_DB(rx_ring, pidx);
/* TODO: Cumulus+ doesn't need the double doorbell */
- BNXT_RX_DB(rx_ring, RING_NEXT(rx_ring, pidx));
+ BNXT_RX_DB(rx_ring, pidx);
return;
}
diff --git a/sys/dev/mgb/if_mgb.c b/sys/dev/mgb/if_mgb.c
index 071bfd81fd71..29e234191589 100644
--- a/sys/dev/mgb/if_mgb.c
+++ b/sys/dev/mgb/if_mgb.c
@@ -1191,7 +1191,12 @@ mgb_isc_rxd_flush(void *xsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
sc = xsc;
KASSERT(rxqid == 0, ("tried to flush RX Channel %d.\n", rxqid));
- sc->rx_ring_data.last_tail = pidx;
+ /*
+ * According to the programming guide, last_tail must be set to
+ * the last valid RX descriptor, rather than to the one past that.
+ * Note that this is not true for the TX ring!
+ */
+ sc->rx_ring_data.last_tail = MGB_PREV_RING_IDX(pidx);
CSR_WRITE_REG(sc, MGB_DMA_RX_TAIL(rxqid), sc->rx_ring_data.last_tail);
return;
}
diff --git a/sys/dev/mgb/if_mgb.h b/sys/dev/mgb/if_mgb.h
index 84845655bf66..0db5d5e01f85 100644
--- a/sys/dev/mgb/if_mgb.h
+++ b/sys/dev/mgb/if_mgb.h
@@ -178,7 +178,8 @@
#define MGB_DESC_GET_FRAME_LEN(_desc) \
(((_desc)->ctl & MGB_DESC_FRAME_LEN_MASK) >> 16)
-#define MGB_NEXT_RING_IDX(_idx) (((_idx) + 1) % MGB_DMA_RING_SIZE)
+#define MGB_NEXT_RING_IDX(_idx) (((_idx) == MGB_DMA_RING_SIZE - 1) ? 0 : ((_idx_) + 1))
+#define MGB_PREV_RING_IDX(_idx) (((_idx) == 0) ? (MGB_DMA_RING_SIZE - 1) : ((_idx_) - 1))
#define MGB_RING_SPACE(_sc) \
((((_sc)->tx_ring_data.last_head - (_sc)->tx_ring_data.last_tail - 1) \
+ MGB_DMA_RING_SIZE ) % MGB_DMA_RING_SIZE )
diff --git a/sys/dev/vmware/vmxnet3/if_vmx.c b/sys/dev/vmware/vmxnet3/if_vmx.c
index ee1d76388577..5d9872d01737 100644
--- a/sys/dev/vmware/vmxnet3/if_vmx.c
+++ b/sys/dev/vmware/vmxnet3/if_vmx.c
@@ -1744,13 +1744,6 @@ vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
else
r = VMXNET3_BAR0_RXH2(rxqid);
- /*
- * pidx is the index of the last descriptor with a buffer the device
- * can use, and the device needs to be told which index is one past
- * that.
- */
- if (++pidx == rxr->vxrxr_ndesc)
- pidx = 0;
vmxnet3_write_bar0(sc, r, pidx);
}
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
index 8a1254f930d9..2901ad94a844 100644
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -1959,7 +1959,8 @@ _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
* @count: the number of new buffers to allocate
*
* (Re)populate an rxq free-buffer list with up to @count new packet buffers.
- * The caller must assure that @count does not exceed the queue's capacity.
+ * The caller must assure that @count does not exceed the queue's capacity
+ * minus one (since we always leave a descriptor unavailable).
*/
static uint8_t
iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
@@ -1974,6 +1975,8 @@ iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
int err, frag_idx, i, idx, n, pidx;
qidx_t credits;
+ MPASS(count <= fl->ifl_size - fl->ifl_credits - 1);
+
sd_m = fl->ifl_sds.ifsd_m;
sd_map = fl->ifl_sds.ifsd_map;
sd_cl = fl->ifl_sds.ifsd_cl;
@@ -2081,15 +2084,10 @@ iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
fl->ifl_credits = credits;
}
DBG_COUNTER_INC(rxd_flush);
- if (fl->ifl_pidx == 0)
- pidx = fl->ifl_size - 1;
- else
- pidx = fl->ifl_pidx - 1;
-
bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id,
- fl->ifl_id, pidx);
+ fl->ifl_id, fl->ifl_pidx);
if (__predict_true(bit_test(fl->ifl_rx_bitmap, frag_idx))) {
fl->ifl_fragidx = frag_idx + 1;
if (fl->ifl_fragidx == fl->ifl_size)
@@ -2105,7 +2103,17 @@ iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
static inline uint8_t
iflib_fl_refill_all(if_ctx_t ctx, iflib_fl_t fl)
{
- /* we avoid allowing pidx to catch up with cidx as it confuses ixl */
+ /*
+ * We leave an unused descriptor to avoid pidx to catch up with cidx.
+ * This is important as it confuses most NICs. For instance,
+ * Intel NICs have (per receive ring) RDH and RDT registers, where
+ * RDH points to the next receive descriptor to be used by the NIC,
+ * and RDT for the next receive descriptor to be published by the
+ * driver to the NIC (RDT - 1 is thus the last valid one).
+ * The condition RDH == RDT means no descriptors are available to
+ * the NIC, and thus it would be ambiguous if it also meant that
+ * all the descriptors are available to the NIC.
+ */
int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1;
#ifdef INVARIANTS
int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1;
@@ -2210,12 +2218,15 @@ iflib_fl_setup(iflib_fl_t fl)
fl->ifl_zone = m_getzone(fl->ifl_buf_size);
- /* avoid pre-allocating zillions of clusters to an idle card
- * potentially speeding up attach
+ /*
+ * Avoid pre-allocating zillions of clusters to an idle card
+ * potentially speeding up attach. In any case make sure
+ * to leave a descriptor unavailable. See the comment in
+ * iflib_fl_refill_all().
*/
- (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size));
- MPASS(min(128, fl->ifl_size) == fl->ifl_credits);
- if (min(128, fl->ifl_size) != fl->ifl_credits)
+ MPASS(fl->ifl_size > 0);
+ (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size - 1));
+ if (min(128, fl->ifl_size - 1) != fl->ifl_credits)
return (ENOBUFS);
/*
* handle failure