diff options
Diffstat (limited to 'sys/dev/gve/gve_main.c')
-rw-r--r-- | sys/dev/gve/gve_main.c | 381 |
1 files changed, 341 insertions, 40 deletions
diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c index cd7849778bce..10197a8e15f8 100644 --- a/sys/dev/gve/gve_main.c +++ b/sys/dev/gve/gve_main.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * - * Copyright (c) 2023 Google LLC + * Copyright (c) 2023-2024 Google LLC * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: @@ -30,11 +30,12 @@ */ #include "gve.h" #include "gve_adminq.h" +#include "gve_dqo.h" -#define GVE_DRIVER_VERSION "GVE-FBSD-1.0.1\n" +#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.4\n" #define GVE_VERSION_MAJOR 1 -#define GVE_VERSION_MINOR 0 -#define GVE_VERSION_SUB 1 +#define GVE_VERSION_MINOR 3 +#define GVE_VERSION_SUB 5 #define GVE_DEFAULT_RX_COPYBREAK 256 @@ -49,6 +50,9 @@ static struct gve_dev { struct sx gve_global_lock; +static void gve_start_tx_timeout_service(struct gve_priv *priv); +static void gve_stop_tx_timeout_service(struct gve_priv *priv); + static int gve_verify_driver_compatibility(struct gve_priv *priv) { @@ -98,6 +102,72 @@ gve_verify_driver_compatibility(struct gve_priv *priv) return (err); } +static void +gve_handle_tx_timeout(struct gve_priv *priv, struct gve_tx_ring *tx, + int num_timeout_pkts) +{ + int64_t time_since_last_kick; + + counter_u64_add_protected(tx->stats.tx_timeout, 1); + + /* last_kicked is never GVE_TIMESTAMP_INVALID so we can skip checking */ + time_since_last_kick = gve_seconds_since(&tx->last_kicked); + + /* Try kicking first in case the timeout is due to a missed interrupt */ + if (time_since_last_kick > GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC) { + device_printf(priv->dev, + "Found %d timed out packet(s) on txq%d, kicking it for completions\n", + num_timeout_pkts, tx->com.id); + gve_set_timestamp(&tx->last_kicked); + taskqueue_enqueue(tx->com.cleanup_tq, &tx->com.cleanup_task); + } else { + device_printf(priv->dev, + "Found %d timed out packet(s) on txq%d with its last kick %jd sec ago which is less than the cooldown period %d. Resetting device\n", + num_timeout_pkts, tx->com.id, + (intmax_t)time_since_last_kick, + GVE_TX_TIMEOUT_KICK_COOLDOWN_SEC); + gve_schedule_reset(priv); + } +} + +static void +gve_tx_timeout_service_callback(void *data) +{ + struct gve_priv *priv = (struct gve_priv *)data; + struct gve_tx_ring *tx; + uint16_t num_timeout_pkts; + + tx = &priv->tx[priv->check_tx_queue_idx]; + + num_timeout_pkts = gve_is_gqi(priv) ? + gve_check_tx_timeout_gqi(priv, tx) : + gve_check_tx_timeout_dqo(priv, tx); + if (num_timeout_pkts) + gve_handle_tx_timeout(priv, tx, num_timeout_pkts); + + priv->check_tx_queue_idx = (priv->check_tx_queue_idx + 1) % + priv->tx_cfg.num_queues; + callout_reset_sbt(&priv->tx_timeout_service, + SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0, + gve_tx_timeout_service_callback, (void *)priv, 0); +} + +static void +gve_start_tx_timeout_service(struct gve_priv *priv) +{ + priv->check_tx_queue_idx = 0; + callout_init(&priv->tx_timeout_service, true); + callout_reset_sbt(&priv->tx_timeout_service, + SBT_1S * GVE_TX_TIMEOUT_CHECK_CADENCE_SEC, 0, + gve_tx_timeout_service_callback, (void *)priv, 0); +} + +static void +gve_stop_tx_timeout_service(struct gve_priv *priv) +{ + callout_drain(&priv->tx_timeout_service); +} + static int gve_up(struct gve_priv *priv) { @@ -124,9 +194,11 @@ gve_up(struct gve_priv *priv) if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); - err = gve_register_qpls(priv); - if (err != 0) - goto reset; + if (gve_is_qpl(priv)) { + err = gve_register_qpls(priv); + if (err != 0) + goto reset; + } err = gve_create_rx_rings(priv); if (err != 0) @@ -146,6 +218,9 @@ gve_up(struct gve_priv *priv) gve_unmask_all_queue_irqs(priv); gve_set_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP); priv->interface_up_cnt++; + + gve_start_tx_timeout_service(priv); + return (0); reset: @@ -161,6 +236,8 @@ gve_down(struct gve_priv *priv) if (!gve_get_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP)) return; + gve_stop_tx_timeout_service(priv); + if (gve_get_state_flag(priv, GVE_STATE_FLAG_LINK_UP)) { if_link_state_change(priv->ifp, LINK_STATE_DOWN); gve_clear_state_flag(priv, GVE_STATE_FLAG_LINK_UP); @@ -174,10 +251,13 @@ gve_down(struct gve_priv *priv) if (gve_destroy_tx_rings(priv) != 0) goto reset; - if (gve_unregister_qpls(priv) != 0) - goto reset; + if (gve_is_qpl(priv)) { + if (gve_unregister_qpls(priv) != 0) + goto reset; + } - gve_mask_all_queue_irqs(priv); + if (gve_is_gqi(priv)) + gve_mask_all_queue_irqs(priv); gve_clear_state_flag(priv, GVE_STATE_FLAG_QUEUES_UP); priv->interface_down_cnt++; return; @@ -186,10 +266,143 @@ reset: gve_schedule_reset(priv); } +int +gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt) +{ + int err; + + GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); + + gve_down(priv); + + if (new_queue_cnt < priv->rx_cfg.num_queues) { + /* + * Freeing a ring still preserves its ntfy_id, + * which is needed if we create the ring again. + */ + gve_free_rx_rings(priv, new_queue_cnt, priv->rx_cfg.num_queues); + } else { + err = gve_alloc_rx_rings(priv, priv->rx_cfg.num_queues, new_queue_cnt); + if (err != 0) { + device_printf(priv->dev, "Failed to allocate new queues"); + /* Failed to allocate rings, start back up with old ones */ + gve_up(priv); + return (err); + + } + } + priv->rx_cfg.num_queues = new_queue_cnt; + + err = gve_up(priv); + if (err != 0) + gve_schedule_reset(priv); + + return (err); +} + +int +gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt) +{ + int err; + + GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); + + gve_down(priv); + + if (new_queue_cnt < priv->tx_cfg.num_queues) { + /* + * Freeing a ring still preserves its ntfy_id, + * which is needed if we create the ring again. + */ + gve_free_tx_rings(priv, new_queue_cnt, priv->tx_cfg.num_queues); + } else { + err = gve_alloc_tx_rings(priv, priv->tx_cfg.num_queues, new_queue_cnt); + if (err != 0) { + device_printf(priv->dev, "Failed to allocate new queues"); + /* Failed to allocate rings, start back up with old ones */ + gve_up(priv); + return (err); + + } + } + priv->tx_cfg.num_queues = new_queue_cnt; + + err = gve_up(priv); + if (err != 0) + gve_schedule_reset(priv); + + return (err); +} + +int +gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx) +{ + int err; + uint16_t prev_desc_cnt; + + GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); + + gve_down(priv); + + if (is_rx) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + prev_desc_cnt = priv->rx_desc_cnt; + priv->rx_desc_cnt = new_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->rx_desc_cnt = prev_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } + } else { + gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues); + prev_desc_cnt = priv->tx_desc_cnt; + priv->tx_desc_cnt = new_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->tx_desc_cnt = prev_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + } + } + + if (err != 0) { + device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!"); + return (err); + } + + err = gve_up(priv); + if (err != 0) { + gve_schedule_reset(priv); + return (err); + } + + return (0); +} + +static int +gve_get_dqo_rx_buf_size(struct gve_priv *priv, uint16_t mtu) +{ + /* + * Use 4k buffers only if mode is DQ, 4k buffers flag is on, + * and either hw LRO is enabled or mtu is greater than 2048 + */ + if (!gve_is_gqi(priv) && gve_allow_4k_rx_buffers && + (!gve_disable_hw_lro || mtu > GVE_DEFAULT_RX_BUFFER_SIZE)) + return (GVE_4K_RX_BUFFER_SIZE_DQO); + + return (GVE_DEFAULT_RX_BUFFER_SIZE); +} + static int gve_set_mtu(if_t ifp, uint32_t new_mtu) { struct gve_priv *priv = if_getsoftc(ifp); + const uint32_t max_problem_range = 8227; + const uint32_t min_problem_range = 7822; + uint16_t new_rx_buf_size = gve_get_dqo_rx_buf_size(priv, new_mtu); int err; if ((new_mtu > priv->max_mtu) || (new_mtu < ETHERMIN)) { @@ -198,11 +411,32 @@ gve_set_mtu(if_t ifp, uint32_t new_mtu) return (EINVAL); } + /* + * When hardware LRO is enabled in DQ mode, MTUs within the range + * [7822, 8227] trigger hardware issues which cause a drastic drop + * in throughput. + */ + if (!gve_is_gqi(priv) && !gve_disable_hw_lro && + new_mtu >= min_problem_range && new_mtu <= max_problem_range && + new_rx_buf_size != GVE_4K_RX_BUFFER_SIZE_DQO) { + device_printf(priv->dev, + "Cannot set to MTU to %d within the range [%d, %d] while HW LRO is enabled and not using 4k RX Buffers\n", + new_mtu, min_problem_range, max_problem_range); + return (EINVAL); + } + err = gve_adminq_set_mtu(priv, new_mtu); if (err == 0) { if (bootverbose) device_printf(priv->dev, "MTU set to %d\n", new_mtu); if_setmtu(ifp, new_mtu); + /* Need to re-alloc RX queues if RX buffer size changed */ + if (!gve_is_gqi(priv) && + new_rx_buf_size != priv->rx_buf_size_dqo) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + priv->rx_buf_size_dqo = new_rx_buf_size; + gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } } else { device_printf(priv->dev, "Failed to set MTU to %d\n", new_mtu); } @@ -352,18 +586,13 @@ gve_get_counter(if_t ifp, ift_counter cnt) } } -static int +static void gve_setup_ifnet(device_t dev, struct gve_priv *priv) { int caps = 0; if_t ifp; ifp = priv->ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(priv->dev, "Failed to allocate ifnet struct\n"); - return (ENXIO); - } - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, priv); if_setdev(ifp, dev); @@ -372,6 +601,18 @@ gve_setup_ifnet(device_t dev, struct gve_priv *priv) if_settransmitfn(ifp, gve_xmit_ifp); if_setqflushfn(ifp, gve_qflush); + /* + * Set TSO limits, must match the arguments to bus_dma_tag_create + * when creating tx->dqo.buf_dmatag. Only applies to the RDA mode + * because in QPL we copy the entire packet into the bounce buffer + * and thus it does not matter how fragmented the mbuf is. + */ + if (!gve_is_gqi(priv) && !gve_is_qpl(priv)) { + if_sethwtsomaxsegcount(ifp, GVE_TX_MAX_DATA_DESCS_DQO); + if_sethwtsomaxsegsize(ifp, GVE_TX_MAX_BUF_SIZE_DQO); + } + if_sethwtsomax(ifp, GVE_TSO_MAXSIZE_DQO); + #if __FreeBSD_version >= 1400086 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); #else @@ -401,8 +642,6 @@ gve_setup_ifnet(device_t dev, struct gve_priv *priv) ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); - - return (0); } static int @@ -454,9 +693,14 @@ static void gve_free_rings(struct gve_priv *priv) { gve_free_irqs(priv); - gve_free_tx_rings(priv); - gve_free_rx_rings(priv); - gve_free_qpls(priv); + + gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues); + free(priv->tx, M_GVE); + priv->tx = NULL; + + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + free(priv->rx, M_GVE); + priv->rx = NULL; } static int @@ -464,15 +708,15 @@ gve_alloc_rings(struct gve_priv *priv) { int err; - err = gve_alloc_qpls(priv); - if (err != 0) - goto abort; - - err = gve_alloc_rx_rings(priv); + priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.max_queues, + M_GVE, M_WAITOK | M_ZERO); + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); if (err != 0) goto abort; - err = gve_alloc_tx_rings(priv); + priv->tx = malloc(sizeof(struct gve_tx_ring) * priv->tx_cfg.max_queues, + M_GVE, M_WAITOK | M_ZERO); + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); if (err != 0) goto abort; @@ -488,7 +732,7 @@ abort: } static void -gve_deconfigure_resources(struct gve_priv *priv) +gve_deconfigure_and_free_device_resources(struct gve_priv *priv) { int err; @@ -506,10 +750,15 @@ gve_deconfigure_resources(struct gve_priv *priv) gve_free_irq_db_array(priv); gve_free_counter_array(priv); + + if (priv->ptype_lut_dqo) { + free(priv->ptype_lut_dqo, M_GVE); + priv->ptype_lut_dqo = NULL; + } } static int -gve_configure_resources(struct gve_priv *priv) +gve_alloc_and_configure_device_resources(struct gve_priv *priv) { int err; @@ -532,13 +781,25 @@ gve_configure_resources(struct gve_priv *priv) goto abort; } + if (!gve_is_gqi(priv)) { + priv->ptype_lut_dqo = malloc(sizeof(*priv->ptype_lut_dqo), M_GVE, + M_WAITOK | M_ZERO); + + err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); + if (err != 0) { + device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n", + err); + goto abort; + } + } + gve_set_state_flag(priv, GVE_STATE_FLAG_RESOURCES_OK); if (bootverbose) device_printf(priv->dev, "Configured device resources\n"); return (0); abort: - gve_deconfigure_resources(priv); + gve_deconfigure_and_free_device_resources(priv); return (err); } @@ -557,7 +818,7 @@ gve_set_queue_cnts(struct gve_priv *priv) priv->rx_cfg.num_queues); } - priv->num_queues = priv->tx_cfg.num_queues + priv->rx_cfg.num_queues; + priv->num_queues = priv->tx_cfg.max_queues + priv->rx_cfg.max_queues; priv->mgmt_msix_idx = priv->num_queues; } @@ -603,7 +864,7 @@ static void gve_destroy(struct gve_priv *priv) { gve_down(priv); - gve_deconfigure_resources(priv); + gve_deconfigure_and_free_device_resources(priv); gve_release_adminq(priv); } @@ -616,9 +877,21 @@ gve_restore(struct gve_priv *priv) if (err != 0) goto abort; - err = gve_configure_resources(priv); - if (err != 0) + err = gve_adminq_configure_device_resources(priv); + if (err != 0) { + device_printf(priv->dev, "Failed to configure device resources: err=%d\n", + err); + err = (ENXIO); goto abort; + } + if (!gve_is_gqi(priv)) { + err = gve_adminq_get_ptype_map_dqo(priv, priv->ptype_lut_dqo); + if (err != 0) { + device_printf(priv->dev, "Failed to configure ptype lut: err=%d\n", + err); + goto abort; + } + } err = gve_up(priv); if (err != 0) @@ -632,6 +905,25 @@ abort: } static void +gve_clear_device_resources(struct gve_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_event_counters; i++) + priv->counters[i] = 0; + bus_dmamap_sync(priv->counter_array_mem.tag, priv->counter_array_mem.map, + BUS_DMASYNC_PREWRITE); + + for (i = 0; i < priv->num_queues; i++) + priv->irq_db_indices[i] = (struct gve_irq_db){}; + bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map, + BUS_DMASYNC_PREWRITE); + + if (priv->ptype_lut_dqo) + *priv->ptype_lut_dqo = (struct gve_ptype_lut){0}; +} + +static void gve_handle_reset(struct gve_priv *priv) { if (!gve_get_state_flag(priv, GVE_STATE_FLAG_DO_RESET)) @@ -662,6 +954,8 @@ gve_handle_reset(struct gve_priv *priv) gve_clear_state_flag(priv, GVE_STATE_FLAG_TX_RINGS_OK); gve_down(priv); + gve_clear_device_resources(priv); + gve_restore(priv); GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); @@ -749,6 +1043,9 @@ gve_attach(device_t dev) int rid; int err; + snprintf(gve_version, sizeof(gve_version), "%d.%d.%d", + GVE_VERSION_MAJOR, GVE_VERSION_MINOR, GVE_VERSION_SUB); + priv = device_get_softc(dev); priv->dev = dev; GVE_IFACE_LOCK_INIT(priv->gve_iface_lock); @@ -786,17 +1083,16 @@ gve_attach(device_t dev) if (err != 0) goto abort; - err = gve_configure_resources(priv); + err = gve_alloc_and_configure_device_resources(priv); if (err != 0) goto abort; + priv->rx_buf_size_dqo = gve_get_dqo_rx_buf_size(priv, priv->max_mtu); err = gve_alloc_rings(priv); if (err != 0) goto abort; - err = gve_setup_ifnet(dev, priv); - if (err != 0) - goto abort; + gve_setup_ifnet(dev, priv); priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; @@ -817,7 +1113,7 @@ gve_attach(device_t dev) abort: gve_free_rings(priv); - gve_deconfigure_resources(priv); + gve_deconfigure_and_free_device_resources(priv); gve_release_adminq(priv); gve_free_sys_res_mem(priv); GVE_IFACE_LOCK_DESTROY(priv->gve_iface_lock); @@ -829,6 +1125,11 @@ gve_detach(device_t dev) { struct gve_priv *priv = device_get_softc(dev); if_t ifp = priv->ifp; + int error; + + error = bus_generic_detach(dev); + if (error != 0) + return (error); ether_ifdetach(ifp); @@ -845,7 +1146,7 @@ gve_detach(device_t dev) taskqueue_free(priv->service_tq); if_free(ifp); - return (bus_generic_detach(dev)); + return (0); } static device_method_t gve_methods[] = { |