diff options
Diffstat (limited to 'sys/dev/gve/gve_tx.c')
-rw-r--r-- | sys/dev/gve/gve_tx.c | 269 |
1 files changed, 211 insertions, 58 deletions
diff --git a/sys/dev/gve/gve_tx.c b/sys/dev/gve/gve_tx.c index 1e62e1226be1..84e3a4c4eb9f 100644 --- a/sys/dev/gve/gve_tx.c +++ b/sys/dev/gve/gve_tx.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * - * Copyright (c) 2023 Google LLC + * Copyright (c) 2023-2024 Google LLC * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: @@ -30,6 +30,7 @@ */ #include "gve.h" #include "gve_adminq.h" +#include "gve_dqo.h" #define GVE_GQ_TX_MIN_PKT_DESC_BYTES 182 @@ -48,61 +49,112 @@ gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_ring *tx) } static void -gve_tx_free_ring(struct gve_priv *priv, int i) +gve_tx_free_ring_gqi(struct gve_priv *priv, int i) { struct gve_tx_ring *tx = &priv->tx[i]; struct gve_ring_com *com = &tx->com; - /* Safe to call even if never alloced */ - gve_free_counters((counter_u64_t *)&tx->stats, NUM_TX_STATS); - - if (tx->br != NULL) { - buf_ring_free(tx->br, M_DEVBUF); - tx->br = NULL; + if (tx->desc_ring != NULL) { + gve_dma_free_coherent(&tx->desc_ring_mem); + tx->desc_ring = NULL; } - if (mtx_initialized(&tx->ring_mtx)) - mtx_destroy(&tx->ring_mtx); - if (tx->info != NULL) { free(tx->info, M_GVE); tx->info = NULL; } - if (tx->desc_ring != NULL) { - gve_dma_free_coherent(&tx->desc_ring_mem); - tx->desc_ring = NULL; + if (com->qpl != NULL) { + gve_free_qpl(priv, com->qpl); + com->qpl = NULL; } +} + +static void +gve_tx_free_ring(struct gve_priv *priv, int i) +{ + struct gve_tx_ring *tx = &priv->tx[i]; + struct gve_ring_com *com = &tx->com; + + /* Safe to call even if never alloced */ + gve_free_counters((counter_u64_t *)&tx->stats, NUM_TX_STATS); + + if (mtx_initialized(&tx->ring_mtx)) + mtx_destroy(&tx->ring_mtx); if (com->q_resources != NULL) { gve_dma_free_coherent(&com->q_resources_mem); com->q_resources = NULL; } + + if (tx->br != NULL) { + buf_ring_free(tx->br, M_DEVBUF); + tx->br = NULL; + } + + if (gve_is_gqi(priv)) + gve_tx_free_ring_gqi(priv, i); + else + gve_tx_free_ring_dqo(priv, i); } static int -gve_tx_alloc_ring(struct gve_priv *priv, int i) +gve_tx_alloc_ring_gqi(struct gve_priv *priv, int i) { struct gve_tx_ring *tx = &priv->tx[i]; struct gve_ring_com *com = &tx->com; - char mtx_name[16]; int err; - com->priv = priv; - com->id = i; + err = gve_dma_alloc_coherent(priv, + sizeof(union gve_tx_desc) * priv->tx_desc_cnt, + CACHE_LINE_SIZE, &tx->desc_ring_mem); + if (err != 0) { + device_printf(priv->dev, + "Failed to alloc desc ring for tx ring %d", i); + goto abort; + } + tx->desc_ring = tx->desc_ring_mem.cpu_addr; - com->qpl = &priv->qpls[i]; + com->qpl = gve_alloc_qpl(priv, i, priv->tx_desc_cnt / GVE_QPL_DIVISOR, + /*single_kva=*/true); if (com->qpl == NULL) { - device_printf(priv->dev, "No QPL left for tx ring %d\n", i); - return (ENOMEM); + device_printf(priv->dev, + "Failed to alloc QPL for tx ring %d\n", i); + err = ENOMEM; + goto abort; } err = gve_tx_fifo_init(priv, tx); if (err != 0) goto abort; - tx->info = malloc(sizeof(struct gve_tx_buffer_state) * priv->tx_desc_cnt, + tx->info = malloc( + sizeof(struct gve_tx_buffer_state) * priv->tx_desc_cnt, M_GVE, M_WAITOK | M_ZERO); + return (0); + +abort: + gve_tx_free_ring_gqi(priv, i); + return (err); +} + +static int +gve_tx_alloc_ring(struct gve_priv *priv, int i) +{ + struct gve_tx_ring *tx = &priv->tx[i]; + struct gve_ring_com *com = &tx->com; + char mtx_name[16]; + int err; + + com->priv = priv; + com->id = i; + + if (gve_is_gqi(priv)) + err = gve_tx_alloc_ring_gqi(priv, i); + else + err = gve_tx_alloc_ring_dqo(priv, i); + if (err != 0) + goto abort; sprintf(mtx_name, "gvetx%d", i); mtx_init(&tx->ring_mtx, mtx_name, NULL, MTX_DEF); @@ -115,19 +167,13 @@ gve_tx_alloc_ring(struct gve_priv *priv, int i) err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources), PAGE_SIZE, &com->q_resources_mem); if (err != 0) { - device_printf(priv->dev, "Failed to alloc queue resources for tx ring %d", i); + device_printf(priv->dev, + "Failed to alloc queue resources for tx ring %d", i); goto abort; } com->q_resources = com->q_resources_mem.cpu_addr; - err = gve_dma_alloc_coherent(priv, - sizeof(union gve_tx_desc) * priv->tx_desc_cnt, - CACHE_LINE_SIZE, &tx->desc_ring_mem); - if (err != 0) { - device_printf(priv->dev, "Failed to alloc desc ring for tx ring %d", i); - goto abort; - } - tx->desc_ring = tx->desc_ring_mem.cpu_addr; + tx->last_kicked = 0; return (0); @@ -137,39 +183,32 @@ abort: } int -gve_alloc_tx_rings(struct gve_priv *priv) +gve_alloc_tx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx) { - int err = 0; int i; + int err; - priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.num_queues, - M_GVE, M_WAITOK | M_ZERO); + KASSERT(priv->tx != NULL, ("priv->tx is NULL!")); - for (i = 0; i < priv->tx_cfg.num_queues; i++) { + for (i = start_idx; i < stop_idx; i++) { err = gve_tx_alloc_ring(priv, i); if (err != 0) goto free_rings; - } return (0); - free_rings: - while (i--) - gve_tx_free_ring(priv, i); - free(priv->tx, M_GVE); + gve_free_tx_rings(priv, start_idx, i); return (err); } void -gve_free_tx_rings(struct gve_priv *priv) +gve_free_tx_rings(struct gve_priv *priv, uint16_t start_idx, uint16_t stop_idx) { int i; - for (i = 0; i < priv->tx_cfg.num_queues; i++) + for (i = start_idx; i < stop_idx; i++) gve_tx_free_ring(priv, i); - - free(priv->tx, M_GVE); } static void @@ -181,6 +220,7 @@ gve_tx_clear_desc_ring(struct gve_tx_ring *tx) for (i = 0; i < com->priv->tx_desc_cnt; i++) { tx->desc_ring[i] = (union gve_tx_desc){}; tx->info[i] = (struct gve_tx_buffer_state){}; + gve_invalidate_timestamp(&tx->info[i].enqueue_time_sec); } bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map, @@ -209,7 +249,11 @@ gve_start_tx_ring(struct gve_priv *priv, int i) struct gve_tx_ring *tx = &priv->tx[i]; struct gve_ring_com *com = &tx->com; - NET_TASK_INIT(&com->cleanup_task, 0, gve_tx_cleanup_tq, tx); + atomic_store_bool(&tx->stopped, false); + if (gve_is_gqi(priv)) + NET_TASK_INIT(&com->cleanup_task, 0, gve_tx_cleanup_tq, tx); + else + NET_TASK_INIT(&com->cleanup_task, 0, gve_tx_cleanup_tq_dqo, tx); com->cleanup_tq = taskqueue_create_fast("gve tx", M_WAITOK, taskqueue_thread_enqueue, &com->cleanup_tq); taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET, "%s txq %d", @@ -233,8 +277,12 @@ gve_create_tx_rings(struct gve_priv *priv) if (gve_get_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK)) return (0); - for (i = 0; i < priv->tx_cfg.num_queues; i++) - gve_clear_tx_ring(priv, i); + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + if (gve_is_gqi(priv)) + gve_clear_tx_ring(priv, i); + else + gve_clear_tx_ring_dqo(priv, i); + } err = gve_adminq_create_tx_queues(priv, priv->tx_cfg.num_queues); if (err != 0) @@ -300,6 +348,30 @@ gve_destroy_tx_rings(struct gve_priv *priv) } int +gve_check_tx_timeout_gqi(struct gve_priv *priv, struct gve_tx_ring *tx) +{ + struct gve_tx_buffer_state *info; + uint32_t pkt_idx; + int num_timeouts; + + num_timeouts = 0; + + for (pkt_idx = 0; pkt_idx < priv->tx_desc_cnt; pkt_idx++) { + info = &tx->info[pkt_idx]; + + if (!gve_timestamp_valid(&info->enqueue_time_sec)) + continue; + + if (__predict_false( + gve_seconds_since(&info->enqueue_time_sec) > + GVE_TX_TIMEOUT_PKT_SEC)) + num_timeouts += 1; + } + + return (num_timeouts); +} + +int gve_tx_intr(void *arg) { struct gve_tx_ring *tx = arg; @@ -351,7 +423,10 @@ gve_tx_cleanup_tq(void *arg, int pending) if (mbuf == NULL) continue; + gve_invalidate_timestamp(&info->enqueue_time_sec); + info->mbuf = NULL; + counter_enter(); counter_u64_add_protected(tx->stats.tbytes, mbuf->m_pkthdr.len); counter_u64_add_protected(tx->stats.tpackets, 1); @@ -375,7 +450,7 @@ gve_tx_cleanup_tq(void *arg, int pending) * interrupt but they will still be handled by the enqueue below. * Completions born after the barrier WILL trigger an interrupt. */ - mb(); + atomic_thread_fence_seq_cst(); nic_done = gve_tx_load_event_counter(priv, tx); todo = nic_done - tx->done; @@ -383,6 +458,11 @@ gve_tx_cleanup_tq(void *arg, int pending) gve_db_bar_write_4(priv, tx->com.irq_db_offset, GVE_IRQ_MASK); taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task); } + + if (atomic_load_bool(&tx->stopped) && space_freed) { + atomic_store_bool(&tx->stopped, false); + taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); + } } static void @@ -627,8 +707,7 @@ gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf) bytes_required = gve_fifo_bytes_required(tx, first_seg_len, pkt_len); if (__predict_false(!gve_can_tx(tx, bytes_required))) { counter_enter(); - counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_device, 1); - counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1); + counter_u64_add_protected(tx->stats.tx_delayed_pkt_nospace_device, 1); counter_exit(); return (ENOBUFS); } @@ -636,6 +715,8 @@ gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf) /* So that the cleanup taskqueue can free the mbuf eventually. */ info->mbuf = mbuf; + gve_set_timestamp(&info->enqueue_time_sec); + /* * We don't want to split the header, so if necessary, pad to the end * of the fifo and then put the header at the beginning of the fifo. @@ -689,19 +770,86 @@ gve_xmit(struct gve_tx_ring *tx, struct mbuf *mbuf) return (0); } +static int +gve_xmit_mbuf(struct gve_tx_ring *tx, + struct mbuf **mbuf) +{ + if (gve_is_gqi(tx->com.priv)) + return (gve_xmit(tx, *mbuf)); + + if (gve_is_qpl(tx->com.priv)) + return (gve_xmit_dqo_qpl(tx, *mbuf)); + + /* + * gve_xmit_dqo might attempt to defrag the mbuf chain. + * The reference is passed in so that in the case of + * errors, the new mbuf chain is what's put back on the br. + */ + return (gve_xmit_dqo(tx, mbuf)); +} + +/* + * Has the side-effect of stopping the xmit queue by setting tx->stopped + */ +static int +gve_xmit_retry_enobuf_mbuf(struct gve_tx_ring *tx, + struct mbuf **mbuf) +{ + int err; + + atomic_store_bool(&tx->stopped, true); + + /* + * Room made in the queue BEFORE the barrier will be seen by the + * gve_xmit_mbuf retry below. + * + * If room is made in the queue AFTER the barrier, the cleanup tq + * iteration creating the room will either see a tx->stopped value + * of 0 or the 1 we just wrote: + * + * If it sees a 1, then it would enqueue the xmit tq. Enqueue + * implies a retry on the waiting pkt. + * + * If it sees a 0, then that implies a previous iteration overwrote + * our 1, and that iteration would enqueue the xmit tq. Enqueue + * implies a retry on the waiting pkt. + */ + atomic_thread_fence_seq_cst(); + + err = gve_xmit_mbuf(tx, mbuf); + if (err == 0) + atomic_store_bool(&tx->stopped, false); + + return (err); +} + static void gve_xmit_br(struct gve_tx_ring *tx) { struct gve_priv *priv = tx->com.priv; struct ifnet *ifp = priv->ifp; struct mbuf *mbuf; + int err; while ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0 && (mbuf = drbr_peek(ifp, tx->br)) != NULL) { + err = gve_xmit_mbuf(tx, &mbuf); - if (__predict_false(gve_xmit(tx, mbuf) != 0)) { - drbr_putback(ifp, tx->br, mbuf); - taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); + /* + * We need to stop this taskqueue when we can't xmit the pkt due + * to lack of space in the NIC ring (ENOBUFS). The retry exists + * to guard against a TOCTTOU bug that could end up freezing the + * queue forever. + */ + if (__predict_false(mbuf != NULL && err == ENOBUFS)) + err = gve_xmit_retry_enobuf_mbuf(tx, &mbuf); + + if (__predict_false(err != 0 && mbuf != NULL)) { + if (err == EINVAL) { + drbr_advance(ifp, tx->br); + m_freem(mbuf); + } else + drbr_putback(ifp, tx->br, mbuf); break; } @@ -710,7 +858,12 @@ gve_xmit_br(struct gve_tx_ring *tx) bus_dmamap_sync(tx->desc_ring_mem.tag, tx->desc_ring_mem.map, BUS_DMASYNC_PREWRITE); - gve_db_bar_write_4(priv, tx->com.db_offset, tx->req); + + if (gve_is_gqi(priv)) + gve_db_bar_write_4(priv, tx->com.db_offset, tx->req); + else + gve_db_bar_dqo_write_4(priv, tx->com.db_offset, + tx->dqo.desc_tail); } } @@ -763,7 +916,8 @@ gve_xmit_ifp(if_t ifp, struct mbuf *mbuf) is_br_empty = drbr_empty(ifp, tx->br); err = drbr_enqueue(ifp, tx->br, mbuf); if (__predict_false(err != 0)) { - taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); + if (!atomic_load_bool(&tx->stopped)) + taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); counter_enter(); counter_u64_add_protected(tx->stats.tx_dropped_pkt_nospace_bufring, 1); counter_u64_add_protected(tx->stats.tx_dropped_pkt, 1); @@ -778,9 +932,8 @@ gve_xmit_ifp(if_t ifp, struct mbuf *mbuf) if (is_br_empty && (GVE_RING_TRYLOCK(tx) != 0)) { gve_xmit_br(tx); GVE_RING_UNLOCK(tx); - } else { + } else if (!atomic_load_bool(&tx->stopped)) taskqueue_enqueue(tx->xmit_tq, &tx->xmit_task); - } return (0); } |