aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/gve/gve_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/gve/gve_main.c')
-rw-r--r--sys/dev/gve/gve_main.c381
1 files changed, 341 insertions, 40 deletions
diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c
index cd7849778bce..10197a8e15f8 100644
--- a/sys/dev/gve/gve_main.c
+++ b/sys/dev/gve/gve_main.c
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
- * Copyright (c) 2023 Google LLC
+ * Copyright (c) 2023-2024 Google LLC
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
@@ -30,11 +30,12 @@
*/
#include "gve.h"
#include "gve_adminq.h"
+#include "gve_dqo.h"
-#define GVE_DRIVER_VERSION "GVE-FBSD-1.0.1\n"
+#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n"
#define GVE_VERSION_MAJOR 1
-#define GVE_VERSION_MINOR 0
-#define GVE_VERSION_SUB 1
+#define GVE_VERSION_MINOR 3
+#define GVE_VERSION_SUB 5
#define GVE_DEFAULT_RX_COPYBREAK 256
@@ -49,6 +50,9 @@ static struct gve_dev {
struct sx gve_global_lock;
+static void gve_start_tx_timeout_service(struct gve_priv *priv);
+static void gve_stop_tx_timeout_service(struct gve_priv *priv);
+
static int
gve_verify_driver_compatibility(struct gve_priv *priv)
{
@@ -98,6 +102,72 @@ gve_verify_driver_compatibility(struct gve_priv *priv)
return (err);
}
+static void
+gve_handle_tx_timeout(struct gve_priv *priv, struct gve_tx_ring *tx,
+ int num_timeout_pkts)
+{
+ int64_t time_since_last_kick;
+
+ counter_u64_add_protected(tx->stats.tx_timeout, 1);
+
+ /* last_kicked is never GVE_TIMESTAMP_INVALID so we can skip checking */
+ time_since_last_kick = gve_seconds_since(&tx->last_kicked);
+
+ /* Try kicking first in case the timeout is due to a missed interrupt */
+ if (time_since_last_kick > GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC) {
+ device_printf(priv->dev,
+ "Found %d timed out packet(s) on txq%d, kicking it for completions\n",
+ num_timeout_pkts, tx->com.id);
+ gve_set_timestamp(&tx->last_kicked);
+ taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task);
+ } else {
+ device_printf(priv->dev,
+ "Found %d timed out packet(s) on txq%d with its last kick %jd sec ago which is less than the cooldown period %d. Resetting device\n",
+ num_timeout_pkts, tx->com.id,
+ (intmax_t)time_since_last_kick,
+ GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC);
+ gve_schedule_reset(priv);
+ }
+}
+
+static void
+gve_tx_timeout_service_callback(void *data)
+{
+ struct gve_priv *priv = (struct gve_priv *)data;
+ struct gve_tx_ring *tx;
+ uint16_t num_timeout_pkts;
+
+ tx = &priv->tx[priv->check_tx_queue_idx];
+
+ num_timeout_pkts = gve_is_gqi(priv) ?
+ gve_check_tx_timeout_gqi(priv, tx) :
+ gve_check_tx_timeout_dqo(priv, tx);
+ if (num_timeout_pkts)
+ gve_handle_tx_timeout(priv, tx, num_timeout_pkts);
+
+ priv->check_tx_queue_idx = (priv->check_tx_queue_idx + 1) %
+ priv->tx_cfg.num_queues;
+ callout_reset_sbt(&priv->tx_timeout_service,
+ SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
+ gve_tx_timeout_service_callback, (void *)priv, 0);
+}
+
+static void
+gve_start_tx_timeout_service(struct gve_priv *priv)
+{
+ priv->check_tx_queue_idx = 0;
+ callout_init(&priv->tx_timeout_service, true);
+ callout_reset_sbt(&priv->tx_timeout_service,
+ SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0,
+ gve_tx_timeout_service_callback, (void *)priv, 0);
+}
+
+static void
+gve_stop_tx_timeout_service(struct gve_priv *priv)
+{
+ callout_drain(&priv->tx_timeout_service);
+}
+
static int
gve_up(struct gve_priv *priv)
{
@@ -124,9 +194,11 @@ gve_up(struct gve_priv *priv)
if (if_getcapenable(ifp) & IFCAP_TSO6)
if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
- err = gve_register_qpls(priv);
- if (err != 0)
- goto reset;
+ if (gve_is_qpl(priv)) {
+ err = gve_register_qpls(priv);
+ if (err != 0)
+ goto reset;
+ }
err = gve_create_rx_rings(priv);
if (err != 0)
@@ -146,6 +218,9 @@ gve_up(struct gve_priv *priv)
gve_unmask_all_queue_irqs(priv);
gve_set_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
priv->interface_up_cnt++;
+
+ gve_start_tx_timeout_service(priv);
+
return (0);
reset:
@@ -161,6 +236,8 @@ gve_down(struct gve_priv *priv)
if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP))
return;
+ gve_stop_tx_timeout_service(priv);
+
if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) {
if_link_state_change(priv->ifp, LINK_STATE_DOWN);
gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP);
@@ -174,10 +251,13 @@ gve_down(struct gve_priv *priv)
if (gve_destroy_tx_rings(priv) != 0)
goto reset;
- if (gve_unregister_qpls(priv) != 0)
- goto reset;
+ if (gve_is_qpl(priv)) {
+ if (gve_unregister_qpls(priv) != 0)
+ goto reset;
+ }
- gve_mask_all_queue_irqs(priv);
+ if (gve_is_gqi(priv))
+ gve_mask_all_queue_irqs(priv);
gve_clear_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP);
priv->interface_down_cnt++;
return;
@@ -186,10 +266,143 @@ reset:
gve_schedule_reset(priv);
}
+int
+gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
+{
+ int err;
+
+ GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
+
+ gve_down(priv);
+
+ if (new_queue_cnt < priv->rx_cfg.num_queues) {
+ /*
+ * Freeing a ring still preserves its ntfy_id,
+ * which is needed if we create the ring again.
+ */
+ gve_free_rx_rings(priv, new_queue_cnt, priv->rx_cfg.num_queues);
+ } else {
+ err = gve_alloc_rx_rings(priv, priv->rx_cfg.num_queues, new_queue_cnt);
+ if (err != 0) {
+ device_printf(priv->dev, "Failed to allocate new queues");
+ /* Failed to allocate rings, start back up with old ones */
+ gve_up(priv);
+ return (err);
+
+ }
+ }
+ priv->rx_cfg.num_queues = new_queue_cnt;
+
+ err = gve_up(priv);
+ if (err != 0)
+ gve_schedule_reset(priv);
+
+ return (err);
+}
+
+int
+gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt)
+{
+ int err;
+
+ GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
+
+ gve_down(priv);
+
+ if (new_queue_cnt < priv->tx_cfg.num_queues) {
+ /*
+ * Freeing a ring still preserves its ntfy_id,
+ * which is needed if we create the ring again.
+ */
+ gve_free_tx_rings(priv, new_queue_cnt, priv->tx_cfg.num_queues);
+ } else {
+ err = gve_alloc_tx_rings(priv, priv->tx_cfg.num_queues, new_queue_cnt);
+ if (err != 0) {
+ device_printf(priv->dev, "Failed to allocate new queues");
+ /* Failed to allocate rings, start back up with old ones */
+ gve_up(priv);
+ return (err);
+
+ }
+ }
+ priv->tx_cfg.num_queues = new_queue_cnt;
+
+ err = gve_up(priv);
+ if (err != 0)
+ gve_schedule_reset(priv);
+
+ return (err);
+}
+
+int
+gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx)
+{
+ int err;
+ uint16_t prev_desc_cnt;
+
+ GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock);
+
+ gve_down(priv);
+
+ if (is_rx) {
+ gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ prev_desc_cnt = priv->rx_desc_cnt;
+ priv->rx_desc_cnt = new_desc_cnt;
+ err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ if (err != 0) {
+ device_printf(priv->dev,
+ "Failed to allocate rings. Trying to start back up with previous ring size.");
+ priv->rx_desc_cnt = prev_desc_cnt;
+ err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ }
+ } else {
+ gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
+ prev_desc_cnt = priv->tx_desc_cnt;
+ priv->tx_desc_cnt = new_desc_cnt;
+ err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
+ if (err != 0) {
+ device_printf(priv->dev,
+ "Failed to allocate rings. Trying to start back up with previous ring size.");
+ priv->tx_desc_cnt = prev_desc_cnt;
+ err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
+ }
+ }
+
+ if (err != 0) {
+ device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!");
+ return (err);
+ }
+
+ err = gve_up(priv);
+ if (err != 0) {
+ gve_schedule_reset(priv);
+ return (err);
+ }
+
+ return (0);
+}
+
+static int
+gve_get_dqo_rx_buf_size(struct gve_priv *priv, uint16_t mtu)
+{
+ /*
+ * Use 4k buffers only if mode is DQ, 4k buffers flag is on,
+ * and either hw LRO is enabled or mtu is greater than 2048
+ */
+ if (!gve_is_gqi(priv) && gve_allow_4k_rx_buffers &&
+ (!gve_disable_hw_lro || mtu > GVE_DEFAULT_RX_BUFFER_SIZE))
+ return (GVE_4K_RX_BUFFER_SIZE_DQO);
+
+ return (GVE_DEFAULT_RX_BUFFER_SIZE);
+}
+
static int
gve_set_mtu(if_t ifp, uint32_t new_mtu)
{
struct gve_priv *priv = if_getsoftc(ifp);
+ const uint32_t max_problem_range = 8227;
+ const uint32_t min_problem_range = 7822;
+ uint16_t new_rx_buf_size = gve_get_dqo_rx_buf_size(priv, new_mtu);
int err;
if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) {
@@ -198,11 +411,32 @@ gve_set_mtu(if_t ifp, uint32_t new_mtu)
return (EINVAL);
}
+ /*
+ * When hardware LRO is enabled in DQ mode, MTUs within the range
+ * [7822, 8227] trigger hardware issues which cause a drastic drop
+ * in throughput.
+ */
+ if (!gve_is_gqi(priv) && !gve_disable_hw_lro &&
+ new_mtu >= min_problem_range && new_mtu <= max_problem_range &&
+ new_rx_buf_size != GVE_4K_RX_BUFFER_SIZE_DQO) {
+ device_printf(priv->dev,
+ "Cannot set to MTU to %d within the range [%d, %d] while HW LRO is enabled and not using 4k RX Buffers\n",
+ new_mtu, min_problem_range, max_problem_range);
+ return (EINVAL);
+ }
+
err = gve_adminq_set_mtu(priv, new_mtu);
if (err == 0) {
if (bootverbose)
device_printf(priv->dev, "MTU set to %d\n", new_mtu);
if_setmtu(ifp, new_mtu);
+ /* Need to re-alloc RX queues if RX buffer size changed */
+ if (!gve_is_gqi(priv) &&
+ new_rx_buf_size != priv->rx_buf_size_dqo) {
+ gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ priv->rx_buf_size_dqo = new_rx_buf_size;
+ gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ }
} else {
device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu);
}
@@ -352,18 +586,13 @@ gve_get_counter(if_t ifp, ift_counter cnt)
}
}
-static int
+static void
gve_setup_ifnet(device_t dev, struct gve_priv *priv)
{
int caps = 0;
if_t ifp;
ifp = priv->ifp = if_alloc(IFT_ETHER);
- if (ifp == NULL) {
- device_printf(priv->dev, "Failed to allocate ifnet struct\n");
- return (ENXIO);
- }
-
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
if_setsoftc(ifp, priv);
if_setdev(ifp, dev);
@@ -372,6 +601,18 @@ gve_setup_ifnet(device_t dev, struct gve_priv *priv)
if_settransmitfn(ifp, gve_xmit_ifp);
if_setqflushfn(ifp, gve_qflush);
+ /*
+ * Set TSO limits, must match the arguments to bus_dma_tag_create
+ * when creating tx->dqo.buf_dmatag. Only applies to the RDA mode
+ * because in QPL we copy the entire packet into the bounce buffer
+ * and thus it does not matter how fragmented the mbuf is.
+ */
+ if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) {
+ if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO);
+ if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO);
+ }
+ if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO);
+
#if __FreeBSD_version >= 1400086
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
#else
@@ -401,8 +642,6 @@ gve_setup_ifnet(device_t dev, struct gve_priv *priv)
ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
-
- return (0);
}
static int
@@ -454,9 +693,14 @@ static void
gve_free_rings(struct gve_priv *priv)
{
gve_free_irqs(priv);
- gve_free_tx_rings(priv);
- gve_free_rx_rings(priv);
- gve_free_qpls(priv);
+
+ gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues);
+ free(priv->tx, M_GVE);
+ priv->tx = NULL;
+
+ gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues);
+ free(priv->rx, M_GVE);
+ priv->rx = NULL;
}
static int
@@ -464,15 +708,15 @@ gve_alloc_rings(struct gve_priv *priv)
{
int err;
- err = gve_alloc_qpls(priv);
- if (err != 0)
- goto abort;
-
- err = gve_alloc_rx_rings(priv);
+ priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.max_queues,
+ M_GVE, M_WAITOK | M_ZERO);
+ err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues);
if (err != 0)
goto abort;
- err = gve_alloc_tx_rings(priv);
+ priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.max_queues,
+ M_GVE, M_WAITOK | M_ZERO);
+ err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues);
if (err != 0)
goto abort;
@@ -488,7 +732,7 @@ abort:
}
static void
-gve_deconfigure_resources(struct gve_priv *priv)
+gve_deconfigure_and_free_device_resources(struct gve_priv *priv)
{
int err;
@@ -506,10 +750,15 @@ gve_deconfigure_resources(struct gve_priv *priv)
gve_free_irq_db_array(priv);
gve_free_counter_array(priv);
+
+ if (priv->ptype_lut_dqo) {
+ free(priv->ptype_lut_dqo, M_GVE);
+ priv->ptype_lut_dqo = NULL;
+ }
}
static int
-gve_configure_resources(struct gve_priv *priv)
+gve_alloc_and_configure_device_resources(struct gve_priv *priv)
{
int err;
@@ -532,13 +781,25 @@ gve_configure_resources(struct gve_priv *priv)
goto abort;
}
+ if (!gve_is_gqi(priv)) {
+ priv->ptype_lut_dqo = malloc(sizeof(*priv->ptype_lut_dqo), M_GVE,
+ M_WAITOK | M_ZERO);
+
+ err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
+ if (err != 0) {
+ device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
+ err);
+ goto abort;
+ }
+ }
+
gve_set_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK);
if (bootverbose)
device_printf(priv->dev, "Configured device resources\n");
return (0);
abort:
- gve_deconfigure_resources(priv);
+ gve_deconfigure_and_free_device_resources(priv);
return (err);
}
@@ -557,7 +818,7 @@ gve_set_queue_cnts(struct gve_priv *priv)
priv->rx_cfg.num_queues);
}
- priv->num_queues = priv->tx_cfg.num_queues + priv->rx_cfg.num_queues;
+ priv->num_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues;
priv->mgmt_msix_idx = priv->num_queues;
}
@@ -603,7 +864,7 @@ static void
gve_destroy(struct gve_priv *priv)
{
gve_down(priv);
- gve_deconfigure_resources(priv);
+ gve_deconfigure_and_free_device_resources(priv);
gve_release_adminq(priv);
}
@@ -616,9 +877,21 @@ gve_restore(struct gve_priv *priv)
if (err != 0)
goto abort;
- err = gve_configure_resources(priv);
- if (err != 0)
+ err = gve_adminq_configure_device_resources(priv);
+ if (err != 0) {
+ device_printf(priv->dev, "Failed to configure device resources: err=%d\n",
+ err);
+ err = (ENXIO);
goto abort;
+ }
+ if (!gve_is_gqi(priv)) {
+ err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo);
+ if (err != 0) {
+ device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n",
+ err);
+ goto abort;
+ }
+ }
err = gve_up(priv);
if (err != 0)
@@ -632,6 +905,25 @@ abort:
}
static void
+gve_clear_device_resources(struct gve_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < priv->num_event_counters; i++)
+ priv->counters[i] = 0;
+ bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map,
+ BUS_DMASYNC_PREWRITE);
+
+ for (i = 0; i < priv->num_queues; i++)
+ priv->irq_db_indices[i] = (struct gve_irq_db){};
+ bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map,
+ BUS_DMASYNC_PREWRITE);
+
+ if (priv->ptype_lut_dqo)
+ *priv->ptype_lut_dqo = (struct gve_ptype_lut){0};
+}
+
+static void
gve_handle_reset(struct gve_priv *priv)
{
if (!gve_get_state_flag(priv, GVE_STATE_FLAG_DO_RESET))
@@ -662,6 +954,8 @@ gve_handle_reset(struct gve_priv *priv)
gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK);
gve_down(priv);
+ gve_clear_device_resources(priv);
+
gve_restore(priv);
GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock);
@@ -749,6 +1043,9 @@ gve_attach(device_t dev)
int rid;
int err;
+ snprintf(gve_version, sizeof(gve_version), "%d.%d.%d",
+ GVE_VERSION_MAJOR, GVE_VERSION_MINOR, GVE_VERSION_SUB);
+
priv = device_get_softc(dev);
priv->dev = dev;
GVE_IFACE_LOCK_INIT(priv->gve_iface_lock);
@@ -786,17 +1083,16 @@ gve_attach(device_t dev)
if (err != 0)
goto abort;
- err = gve_configure_resources(priv);
+ err = gve_alloc_and_configure_device_resources(priv);
if (err != 0)
goto abort;
+ priv->rx_buf_size_dqo = gve_get_dqo_rx_buf_size(priv, priv->max_mtu);
err = gve_alloc_rings(priv);
if (err != 0)
goto abort;
- err = gve_setup_ifnet(dev, priv);
- if (err != 0)
- goto abort;
+ gve_setup_ifnet(dev, priv);
priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK;
@@ -817,7 +1113,7 @@ gve_attach(device_t dev)
abort:
gve_free_rings(priv);
- gve_deconfigure_resources(priv);
+ gve_deconfigure_and_free_device_resources(priv);
gve_release_adminq(priv);
gve_free_sys_res_mem(priv);
GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock);
@@ -829,6 +1125,11 @@ gve_detach(device_t dev)
{
struct gve_priv *priv = device_get_softc(dev);
if_t ifp = priv->ifp;
+ int error;
+
+ error = bus_generic_detach(dev);
+ if (error != 0)
+ return (error);
ether_ifdetach(ifp);
@@ -845,7 +1146,7 @@ gve_detach(device_t dev)
taskqueue_free(priv->service_tq);
if_free(ifp);
- return (bus_generic_detach(dev));
+ return (0);
}
static device_method_t gve_methods[] = {