diff options
Diffstat (limited to 'sys')
28 files changed, 601 insertions, 174 deletions
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 2c3b8f0b4dd8..bf0719f501ef 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -460,9 +460,9 @@ trap(struct trapframe *frame) if ((print_efirt_faults == 1 && cnt == 0) || print_efirt_faults == 2) { - trap_diag(frame, 0); printf("EFI RT fault %s\n", traptype_to_msg(type)); + trap_diag(frame, 0); } frame->tf_rip = (long)curpcb->pcb_onfault; return; @@ -899,8 +899,8 @@ after_vmfault: if ((print_efirt_faults == 1 && cnt == 0) || print_efirt_faults == 2) { - trap_diag(frame, eva); printf("EFI RT page fault\n"); + trap_diag(frame, eva); } } frame->tf_rip = (long)curpcb->pcb_onfault; diff --git a/sys/arm/include/ieeefp.h b/sys/arm/include/ieeefp.h index 57dd058b8a95..57719b883d58 100644 --- a/sys/arm/include/ieeefp.h +++ b/sys/arm/include/ieeefp.h @@ -49,4 +49,14 @@ typedef enum { #define fp_except_t int +/* Augment the userland declarations. */ +__BEGIN_DECLS +extern fp_rnd_t fpgetround(void); +extern fp_rnd_t fpsetround(fp_rnd_t); +extern fp_except_t fpgetmask(void); +extern fp_except_t fpsetmask(fp_except_t); +extern fp_except_t fpgetsticky(void); +extern fp_except_t fpsetsticky(fp_except_t); +__END_DECLS + #endif /* _MACHINE_IEEEFP_H_ */ diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c index 5f35d01f9ac0..285be3d89cbb 100644 --- a/sys/arm64/arm64/elf32_machdep.c +++ b/sys/arm64/arm64/elf32_machdep.c @@ -208,7 +208,7 @@ freebsd32_fetch_syscall_args(struct thread *td) sa->code = *ap++; nap--; } else if (sa->code == SYS___syscall) { - sa->code = ap[1]; + sa->code = ap[_QUAD_LOWWORD]; nap -= 2; ap += 2; } diff --git a/sys/dev/e1000/e1000_osdep.h b/sys/dev/e1000/e1000_osdep.h index 893979025f01..ba1c8a16fad1 100644 --- a/sys/dev/e1000/e1000_osdep.h +++ b/sys/dev/e1000/e1000_osdep.h @@ -152,6 +152,9 @@ struct e1000_osdep { bus_space_tag_t mem_bus_space_tag; bus_space_handle_t mem_bus_space_handle; +#ifdef INVARIANTS + bus_size_t mem_bus_space_size; +#endif bus_space_tag_t io_bus_space_tag; bus_space_handle_t io_bus_space_handle; bus_space_tag_t flash_bus_space_tag; @@ -175,27 +178,44 @@ struct e1000_osdep bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, offset, value) +static __inline uint32_t +e1000_rd32(struct e1000_osdep *osdep, uint32_t reg) +{ + + KASSERT(reg < osdep->mem_bus_space_size, + ("e1000: register offset %#jx too large (max is %#jx)", + (uintmax_t)reg, (uintmax_t)osdep->mem_bus_space_size)); + + return (bus_space_read_4(osdep->mem_bus_space_tag, + osdep->mem_bus_space_handle, reg)); +} + + +static __inline void +e1000_wr32(struct e1000_osdep *osdep, uint32_t reg, uint32_t value) +{ + + KASSERT(reg < osdep->mem_bus_space_size, + ("e1000: register offset %#jx too large (max is %#jx)", + (uintmax_t)reg, (uintmax_t)osdep->mem_bus_space_size)); + + bus_space_write_4(osdep->mem_bus_space_tag, + osdep->mem_bus_space_handle, reg, value); +} + /* Register READ/WRITE macros */ -#define E1000_READ_REG(hw, reg) \ - bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg)) +#define E1000_READ_REG(hw, reg) \ + e1000_rd32((hw)->back, E1000_REGISTER(hw, reg)) #define E1000_WRITE_REG(hw, reg, value) \ - bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg), value) + e1000_wr32((hw)->back, E1000_REGISTER(hw, reg), value) #define E1000_READ_REG_ARRAY(hw, reg, index) \ - bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg) + ((index)<< 2)) + e1000_rd32((hw)->back, E1000_REGISTER(hw, reg) + ((index) << 2)) #define E1000_WRITE_REG_ARRAY(hw, reg, index, value) \ - bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \ - ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \ - E1000_REGISTER(hw, reg) + ((index)<< 2), value) + e1000_wr32((hw)->back, E1000_REGISTER(hw, reg) + ((index) << 2), value) #define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY #define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index f0ef6051fab1..9040949b36c7 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -1575,7 +1575,7 @@ em_if_init(if_ctx_t ctx) E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN); /* Clear bad data from Rx FIFOs */ - if (sc->hw.mac.type >= igb_mac_min) + if (sc->hw.mac.type >= igb_mac_min && !sc->vf_ifp) e1000_rx_fifo_flush_base(&sc->hw); /* Configure for OS presence */ @@ -1595,7 +1595,9 @@ em_if_init(if_ctx_t ctx) /* Don't lose promiscuous settings */ em_if_set_promisc(ctx, if_getflags(ifp)); - e1000_clear_hw_cntrs_base_generic(&sc->hw); + + if (sc->hw.mac.ops.clear_hw_cntrs != NULL) + sc->hw.mac.ops.clear_hw_cntrs(&sc->hw); /* MSI-X configuration for 82574 */ if (sc->hw.mac.type == e1000_82574) { @@ -2374,7 +2376,7 @@ em_if_stop(if_ctx_t ctx) em_flush_desc_rings(sc); e1000_reset_hw(&sc->hw); - if (sc->hw.mac.type >= e1000_82544) + if (sc->hw.mac.type >= e1000_82544 && !sc->vf_ifp) E1000_WRITE_REG(&sc->hw, E1000_WUFC, 0); e1000_led_off(&sc->hw); @@ -2433,6 +2435,9 @@ em_allocate_pci_resources(if_ctx_t ctx) } sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->memory); sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->memory); +#ifdef INVARIANTS + sc->osdep.mem_bus_space_size = rman_get_size(sc->memory); +#endif sc->hw.hw_addr = (u8 *)&sc->osdep.mem_bus_space_handle; /* Only older adapters use IO mapping */ @@ -3284,11 +3289,13 @@ em_reset(if_ctx_t ctx) /* Issue a global reset */ e1000_reset_hw(hw); - if (hw->mac.type >= igb_mac_min) { - E1000_WRITE_REG(hw, E1000_WUC, 0); - } else { - E1000_WRITE_REG(hw, E1000_WUFC, 0); - em_disable_aspm(sc); + if (!sc->vf_ifp) { + if (hw->mac.type >= igb_mac_min) { + E1000_WRITE_REG(hw, E1000_WUC, 0); + } else { + E1000_WRITE_REG(hw, E1000_WUFC, 0); + em_disable_aspm(sc); + } } if (sc->flags & IGB_MEDIA_RESET) { e1000_setup_init_funcs(hw, true); @@ -3838,7 +3845,7 @@ em_initialize_receive_unit(if_ctx_t ctx) sc->rx_int_delay.value); } - if (hw->mac.type >= em_mac_min) { + if (hw->mac.type >= em_mac_min && !sc->vf_ifp) { uint32_t rfctl; /* Use extended rx descriptor formats */ rfctl = E1000_READ_REG(hw, E1000_RFCTL); @@ -3858,33 +3865,38 @@ em_initialize_receive_unit(if_ctx_t ctx) E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); } - /* Set up L3 and L4 csum Rx descriptor offloads */ - rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); - if (if_getcapenable(ifp) & IFCAP_RXCSUM) { - rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; - if (hw->mac.type > e1000_82575) - rxcsum |= E1000_RXCSUM_CRCOFL; - else if (hw->mac.type < em_mac_min && - if_getcapenable(ifp) & IFCAP_HWCSUM_IPV6) - rxcsum |= E1000_RXCSUM_IPV6OFL; - } else { - rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); - if (hw->mac.type > e1000_82575) - rxcsum &= ~E1000_RXCSUM_CRCOFL; - else if (hw->mac.type < em_mac_min) - rxcsum &= ~E1000_RXCSUM_IPV6OFL; - } + /* + * Set up L3 and L4 csum Rx descriptor offloads only on Physical + * Functions. Virtual Functions have no access to this register. + */ + if (!sc->vf_ifp) { + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); + if (if_getcapenable(ifp) & IFCAP_RXCSUM) { + rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL; + if (hw->mac.type > e1000_82575) + rxcsum |= E1000_RXCSUM_CRCOFL; + else if (hw->mac.type < em_mac_min && + if_getcapenable(ifp) & IFCAP_HWCSUM_IPV6) + rxcsum |= E1000_RXCSUM_IPV6OFL; + } else { + rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL); + if (hw->mac.type > e1000_82575) + rxcsum &= ~E1000_RXCSUM_CRCOFL; + else if (hw->mac.type < em_mac_min) + rxcsum &= ~E1000_RXCSUM_IPV6OFL; + } - if (sc->rx_num_queues > 1) { - /* RSS hash needed in the Rx descriptor */ - rxcsum |= E1000_RXCSUM_PCSD; + if (sc->rx_num_queues > 1) { + /* RSS hash needed in the Rx descriptor */ + rxcsum |= E1000_RXCSUM_PCSD; - if (hw->mac.type >= igb_mac_min) - igb_initialize_rss_mapping(sc); - else - em_initialize_rss_mapping(sc); + if (hw->mac.type >= igb_mac_min) + igb_initialize_rss_mapping(sc); + else + em_initialize_rss_mapping(sc); + } + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); } - E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); for (i = 0, que = sc->rx_queues; i < sc->rx_num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; @@ -4392,6 +4404,8 @@ em_get_wakeup(if_ctx_t ctx) switch (sc->hw.mac.type) { case e1000_82542: case e1000_82543: + case e1000_vfadapt: + case e1000_vfadapt_i350: break; case e1000_82544: e1000_read_nvm(&sc->hw, @@ -4437,8 +4451,6 @@ em_get_wakeup(if_ctx_t ctx) case e1000_i354: case e1000_i210: case e1000_i211: - case e1000_vfadapt: - case e1000_vfadapt_i350: apme_mask = E1000_WUC_APME; sc->has_amt = true; eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC); @@ -4494,7 +4506,6 @@ em_get_wakeup(if_ctx_t ctx) global_quad_port_a = 0; break; } - return; } diff --git a/sys/dev/ice/ice_common.c b/sys/dev/ice/ice_common.c index ad4ea4c8e7a1..b895f661bc46 100644 --- a/sys/dev/ice/ice_common.c +++ b/sys/dev/ice/ice_common.c @@ -213,6 +213,15 @@ int ice_set_mac_type(struct ice_hw *hw) case ICE_DEV_ID_E830_L_QSFP: case ICE_DEV_ID_E830C_SFP: case ICE_DEV_ID_E830_L_SFP: + case ICE_DEV_ID_E835CC_BACKPLANE: + case ICE_DEV_ID_E835CC_QSFP56: + case ICE_DEV_ID_E835CC_SFP: + case ICE_DEV_ID_E835C_BACKPLANE: + case ICE_DEV_ID_E835C_QSFP: + case ICE_DEV_ID_E835C_SFP: + case ICE_DEV_ID_E835_L_BACKPLANE: + case ICE_DEV_ID_E835_L_QSFP: + case ICE_DEV_ID_E835_L_SFP: hw->mac_type = ICE_MAC_E830; break; default: diff --git a/sys/dev/ice/ice_devids.h b/sys/dev/ice/ice_devids.h index 3f91e9dfbcaf..74712c61ae8e 100644 --- a/sys/dev/ice/ice_devids.h +++ b/sys/dev/ice/ice_devids.h @@ -62,6 +62,24 @@ #define ICE_DEV_ID_E830C_SFP 0x12DA /* Intel(R) Ethernet Controller E830-L for SFP */ #define ICE_DEV_ID_E830_L_SFP 0x12DE +/* Intel(R) Ethernet Controller E835-CC for backplane */ +#define ICE_DEV_ID_E835CC_BACKPLANE 0x1248 +/* Intel(R) Ethernet Controller E835-CC for QSFP */ +#define ICE_DEV_ID_E835CC_QSFP56 0x1249 +/* Intel(R) Ethernet Controller E835-CC for SFP */ +#define ICE_DEV_ID_E835CC_SFP 0x124A +/* Intel(R) Ethernet Controller E835-C for backplane */ +#define ICE_DEV_ID_E835C_BACKPLANE 0x1261 +/* Intel(R) Ethernet Controller E835-C for QSFP */ +#define ICE_DEV_ID_E835C_QSFP 0x1262 +/* Intel(R) Ethernet Controller E835-C for SFP */ +#define ICE_DEV_ID_E835C_SFP 0x1263 +/* Intel(R) Ethernet Controller E835-L for backplane */ +#define ICE_DEV_ID_E835_L_BACKPLANE 0x1265 +/* Intel(R) Ethernet Controller E835-L for QSFP */ +#define ICE_DEV_ID_E835_L_QSFP 0x1266 +/* Intel(R) Ethernet Controller E835-L for SFP */ +#define ICE_DEV_ID_E835_L_SFP 0x1267 /* Intel(R) Ethernet Controller E810-C for backplane */ #define ICE_DEV_ID_E810C_BACKPLANE 0x1591 /* Intel(R) Ethernet Controller E810-C for QSFP */ diff --git a/sys/dev/ice/ice_drv_info.h b/sys/dev/ice/ice_drv_info.h index 2a51a7394424..abb11bdb5fd9 100644 --- a/sys/dev/ice/ice_drv_info.h +++ b/sys/dev/ice/ice_drv_info.h @@ -218,6 +218,48 @@ static const pci_vendor_info_t ice_vendor_info_array[] = { "Intel(R) Ethernet Network Adapter E830-XXV-2"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_L_SFP, "Intel(R) Ethernet Connection E830-L for SFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_BACKPLANE, + "Intel(R) Ethernet Connection E835-CC for backplane"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0001, 0, + "Intel(R) Ethernet Network Adapter E835-C-Q2"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0002, 0, + "Intel(R) Ethernet Network Adapter E835-C-Q2 for OCP 3.0"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0003, 0, + "Intel(R) Ethernet Network Adapter E835-CC-Q1"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + ICE_INTEL_VENDOR_ID, 0x0004, 0, + "Intel(R) Ethernet Network Adapter E835-CC-Q1 for OCP 3.0"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56, + "Intel(R) Ethernet Connection E835-CC for QSFP56"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0001, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-2 for OCP 3.0"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0002, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-4"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0003, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-2"), + PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + ICE_INTEL_VENDOR_ID, 0x0004, 0, + "Intel(R) Ethernet Network Adapter E835-XXV-4 for OCP 3.0"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP, + "Intel(R) Ethernet Connection E835-CC for SFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_BACKPLANE, + "Intel(R) Ethernet Connection E835-C for backplane"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_QSFP, + "Intel(R) Ethernet Connection E835-C for QSFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_SFP, + "Intel(R) Ethernet Connection E835-C for SFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_BACKPLANE, + "Intel(R) Ethernet Connection E835-L for backplane"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_QSFP, + "Intel(R) Ethernet Connection E835-L for QSFP"), + PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_SFP, + "Intel(R) Ethernet Connection E835-L for SFP"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_BACKPLANE, "Intel(R) Ethernet Connection E825-C for backplane"), PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_QSFP, diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c index 43c3af056b67..aa2e69ea0b5a 100644 --- a/sys/dev/ixl/if_ixl.c +++ b/sys/dev/ixl/if_ixl.c @@ -1480,17 +1480,33 @@ ixl_if_multi_set(if_ctx_t ctx) struct ixl_pf *pf = iflib_get_softc(ctx); struct ixl_vsi *vsi = &pf->vsi; struct i40e_hw *hw = vsi->hw; + enum i40e_status_code status; int mcnt; + if_t ifp = iflib_get_ifp(ctx); IOCTL_DEBUGOUT("ixl_if_multi_set: begin"); /* Delete filters for removed multicast addresses */ ixl_del_multi(vsi, false); - mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR); + mcnt = min(if_llmaddr_count(ifp), MAX_MULTICAST_ADDR); if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) { - i40e_aq_set_vsi_multicast_promiscuous(hw, + /* Check if promisc mode is already enabled, if yes return */ + if (vsi->flags & IXL_FLAGS_MC_PROMISC) + return; + + status = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); + if (status != I40E_SUCCESS) + if_printf(ifp, "Failed to enable multicast promiscuous " + "mode, status: %s\n", i40e_stat_str(hw, status)); + else { + if_printf(ifp, "Enabled multicast promiscuous mode\n"); + + /* Set the flag to track promiscuous mode */ + vsi->flags |= IXL_FLAGS_MC_PROMISC; + } + /* Delete all existing MC filters */ ixl_del_multi(vsi, true); return; } @@ -1693,6 +1709,13 @@ ixl_if_promisc_set(if_ctx_t ctx, int flags) return (err); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); + + /* Update the multicast promiscuous flag based on the new state */ + if (multi) + vsi->flags |= IXL_FLAGS_MC_PROMISC; + else + vsi->flags &= ~IXL_FLAGS_MC_PROMISC; + return (err); } diff --git a/sys/dev/ixl/ixl.h b/sys/dev/ixl/ixl.h index f45354d29300..69925a131b35 100644 --- a/sys/dev/ixl/ixl.h +++ b/sys/dev/ixl/ixl.h @@ -202,6 +202,7 @@ #define IXL_FLAGS_KEEP_TSO6 (1 << 1) #define IXL_FLAGS_USES_MSIX (1 << 2) #define IXL_FLAGS_IS_VF (1 << 3) +#define IXL_FLAGS_MC_PROMISC (1 << 4) #define IXL_VSI_IS_PF(v) ((v->flags & IXL_FLAGS_IS_VF) == 0) #define IXL_VSI_IS_VF(v) ((v->flags & IXL_FLAGS_IS_VF) != 0) diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c index 4f384e7191af..99851af61cfe 100644 --- a/sys/dev/ixl/ixl_pf_main.c +++ b/sys/dev/ixl/ixl_pf_main.c @@ -592,24 +592,29 @@ ixl_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) * Routines for multicast and vlan filter management. * *********************************************************************/ + +/** + * ixl_add_multi - Add multicast filters to the hardware + * @vsi: The VSI structure + * + * In case number of multicast filters in the IFP exceeds 127 entries, + * multicast promiscuous mode will be enabled and the filters will be removed + * from the hardware + */ void ixl_add_multi(struct ixl_vsi *vsi) { if_t ifp = vsi->ifp; - struct i40e_hw *hw = vsi->hw; int mcnt = 0; struct ixl_add_maddr_arg cb_arg; IOCTL_DEBUGOUT("ixl_add_multi: begin"); - mcnt = if_llmaddr_count(ifp); - if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { - i40e_aq_set_vsi_multicast_promiscuous(hw, - vsi->seid, TRUE, NULL); - /* delete all existing MC filters */ - ixl_del_multi(vsi, true); - return; - } + /* + * There is no need to check if the number of multicast addresses + * exceeds the MAX_MULTICAST_ADDR threshold and set promiscuous mode + * here, as all callers already handle this case. + */ cb_arg.vsi = vsi; LIST_INIT(&cb_arg.to_add); @@ -632,30 +637,103 @@ ixl_match_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) return (0); } +/** + * ixl_dis_multi_promisc - Disable multicast promiscuous mode + * @vsi: The VSI structure + * @vsi_mcnt: Number of multicast filters in the VSI + * + * Disable multicast promiscuous mode based on number of entries in the IFP + * and the VSI, then re-add multicast filters. + * + */ +static void +ixl_dis_multi_promisc(struct ixl_vsi *vsi, int vsi_mcnt) +{ + struct ifnet *ifp = vsi->ifp; + struct i40e_hw *hw = vsi->hw; + int ifp_mcnt = 0; + enum i40e_status_code status; + + /* + * Check if multicast promiscuous mode was actually enabled. + * If promiscuous mode was not enabled, don't attempt to disable it. + * Also, don't disable if IFF_PROMISC or IFF_ALLMULTI is set. + */ + if (!(vsi->flags & IXL_FLAGS_MC_PROMISC) || + (if_getflags(ifp) & (IFF_PROMISC | IFF_ALLMULTI))) + return; + + ifp_mcnt = if_llmaddr_count(ifp); + /* + * Equal lists or empty ifp list mean the list has not been changed + * and in such case avoid disabling multicast promiscuous mode as it + * was not previously enabled. Case where multicast promiscuous mode has + * been enabled is when vsi_mcnt == 0 && ifp_mcnt > 0. + */ + if (ifp_mcnt == vsi_mcnt || ifp_mcnt == 0 || + ifp_mcnt >= MAX_MULTICAST_ADDR) + return; + + status = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, + FALSE, NULL); + if (status != I40E_SUCCESS) { + if_printf(ifp, "Failed to disable multicast promiscuous " + "mode, status: %s\n", i40e_stat_str(hw, status)); + + return; + } + + /* Clear the flag since promiscuous mode is now disabled */ + vsi->flags &= ~IXL_FLAGS_MC_PROMISC; + if_printf(ifp, "Disabled multicast promiscuous mode\n"); + + ixl_add_multi(vsi); +} + +/** + * ixl_del_multi - Delete multicast filters from the hardware + * @vsi: The VSI structure + * @all: Bool to determine if all the multicast filters should be removed + * + * In case number of multicast filters in the IFP drops to 127 entries, + * multicast promiscuous mode will be disabled and the filters will be reapplied + * to the hardware. + */ void ixl_del_multi(struct ixl_vsi *vsi, bool all) { - struct ixl_ftl_head to_del; + int to_del_cnt = 0, vsi_mcnt = 0; if_t ifp = vsi->ifp; struct ixl_mac_filter *f, *fn; - int mcnt = 0; + struct ixl_ftl_head to_del; IOCTL_DEBUGOUT("ixl_del_multi: begin"); LIST_INIT(&to_del); /* Search for removed multicast addresses */ LIST_FOREACH_SAFE(f, &vsi->ftl, ftle, fn) { - if ((f->flags & IXL_FILTER_MC) == 0 || - (!all && (if_foreach_llmaddr(ifp, ixl_match_maddr, f) == 0))) + if ((f->flags & IXL_FILTER_MC) == 0) + continue; + + /* Count all the multicast filters in the VSI for comparison */ + vsi_mcnt++; + + if (!all && if_foreach_llmaddr(ifp, ixl_match_maddr, f) != 0) continue; LIST_REMOVE(f, ftle); LIST_INSERT_HEAD(&to_del, f, ftle); - mcnt++; + to_del_cnt++; } - if (mcnt > 0) - ixl_del_hw_filters(vsi, &to_del, mcnt); + if (to_del_cnt > 0) { + ixl_del_hw_filters(vsi, &to_del, to_del_cnt); + return; + } + + ixl_dis_multi_promisc(vsi, vsi_mcnt); + + IOCTL_DEBUGOUT("ixl_del_multi: end"); } void diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c index 0f850cede292..75a1398ad6aa 100644 --- a/sys/fs/deadfs/dead_vnops.c +++ b/sys/fs/deadfs/dead_vnops.c @@ -124,18 +124,18 @@ dead_read(struct vop_read_args *ap) { /* - * Return EOF for tty devices, EIO for others + * Return EOF for tty devices, ENXIO for others */ - if ((ap->a_vp->v_vflag & VV_ISTTY) == 0) - return (EIO); - return (0); + if (ap->a_vp->v_vflag & VV_ISTTY) + return (0); + return (ENXIO); } int dead_write(struct vop_write_args *ap) { - return (EIO); + return (ENXIO); } int diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index bde9be0885e0..6a9a73b40fb0 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -4033,10 +4033,15 @@ nfssvc_idname(struct nfsd_idargs *nidp) nidp->nid_namelen); if (error == 0 && nidp->nid_ngroup > 0 && (nidp->nid_flag & NFSID_ADDUID) != 0) { - grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, - M_WAITOK); - error = copyin(nidp->nid_grps, grps, - sizeof(gid_t) * nidp->nid_ngroup); + grps = NULL; + if (nidp->nid_ngroup > NGROUPS_MAX) + error = EINVAL; + if (error == 0) { + grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP, + M_WAITOK); + error = copyin(nidp->nid_grps, grps, + sizeof(gid_t) * nidp->nid_ngroup); + } if (error == 0) { /* * Create a credential just like svc_getcred(), diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 3c580b90e6b9..387c5465618a 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -2208,7 +2208,7 @@ nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode, NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); - if (rlen == 0) { + if (rlen <= 0 || rlen > len) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { @@ -5157,7 +5157,7 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; u_char *cp, *cp2, *fhp; - int error, cnt, len, setnil; + int error, cnt, i, len, setnil; u_int32_t *opcntp; nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0, @@ -5198,8 +5198,12 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred, if (error) return (error); if (nd->nd_repstat == 0) { - NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED); - tl += (2 + 2 * cnt); + NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED); + tl += 2; + for (i = 0; i < cnt; i++) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + tl++; + } if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { nd->nd_repstat = NFSERR_BADXDR; @@ -5470,7 +5474,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, } *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ - *tl++ = txdr_unsigned(64); /* Max slots */ + *tl++ = txdr_unsigned(NFSV4_SLOTS); /* Max slots */ *tl = 0; /* No rdma ird */ /* Fill in back channel attributes. */ @@ -5539,6 +5543,11 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); + if (sep->nfsess_foreslots == 0) { + error = NFSERR_BADXDR; + goto nfsmout; + } else if (sep->nfsess_foreslots > NFSV4_SLOTS) + sep->nfsess_foreslots = NFSV4_SLOTS; NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots); irdcnt = fxdr_unsigned(int, *tl); if (irdcnt < 0 || irdcnt > 1) { @@ -5552,6 +5561,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep, NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); tl += 5; sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl); + if (sep->nfsess_backslots > NFSV4_CBSLOTS) + sep->nfsess_backslots = NFSV4_CBSLOTS; NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots); } error = nd->nd_repstat; @@ -5671,7 +5682,8 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype, NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); stripecnt = fxdr_unsigned(int, *tl); NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt); - if (stripecnt < 1 || stripecnt > 4096) { + if (stripecnt >= MHLEN / NFSX_UNSIGNED || + stripecnt < 1) { printf("pNFS File layout devinfo stripecnt %d:" " out of range\n", stripecnt); error = NFSERR_BADXDR; @@ -7120,7 +7132,7 @@ nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit, NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF); rlen = fxdr_unsigned(int, *tl++); NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen); - if (rlen == 0) { + if (rlen <= 0 || rlen > len) { error = NFSERR_IO; goto nfsmout; } else if (rlen < len) { @@ -8117,7 +8129,7 @@ nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp, NFSPROC_T *p) { uint32_t *tl; - char *cp, *str, str0[NFSV4_SMALLSTR + 1]; + char *str, str0[NFSV4_SMALLSTR + 1]; uint32_t len = 0; int error = 0; @@ -8140,9 +8152,9 @@ nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp, str = malloc(len + 1, M_TEMP, M_WAITOK); else str = str0; - NFSM_DISSECT(cp, char *, NFSM_RNDUP(len)); - NFSBCOPY(cp, str, len); - str[len] = '\0'; + error = nfsrv_mtostr(nd, str, len); + if (error != 0) + goto nfsmout; NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str); if (dogrp != 0) error = nfsv4_strtogid(nd, str, len, gidp); diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 88ae643d193e..6047e6f2970e 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -4950,6 +4950,11 @@ nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram, NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); layouttype = fxdr_unsigned(int, *tl++); maxcnt = fxdr_unsigned(int, *tl); + /* There is no limit in the RFC, so use 1000 as a sanity limit. */ + if (maxcnt < 0 || maxcnt > 1000) { + error = NFSERR_BADXDR; + goto nfsmout; + } if (maxcnt > 0) { layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, layp, maxcnt); diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c index 66cc884fab56..be31cc794cb5 100644 --- a/sys/geom/zero/g_zero.c +++ b/sys/geom/zero/g_zero.c @@ -47,11 +47,11 @@ static SYSCTL_NODE(_kern_geom, OID_AUTO, zero, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "GEOM_ZERO stuff"); static int g_zero_clear = 1; SYSCTL_PROC(_kern_geom_zero, OID_AUTO, clear, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &g_zero_clear, 0, + CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &g_zero_clear, 0, g_zero_clear_sysctl, "I", "Clear read data buffer"); static int g_zero_byte = 0; -SYSCTL_INT(_kern_geom_zero, OID_AUTO, byte, CTLFLAG_RW, &g_zero_byte, 0, +SYSCTL_INT(_kern_geom_zero, OID_AUTO, byte, CTLFLAG_RWTUN, &g_zero_byte, 0, "Byte (octet) value to clear the buffers with"); static struct g_provider *gpp; diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 86f392485a4b..13ab25ce2c06 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -330,7 +330,7 @@ kdb_reboot(void) #define KEY_CRTLP 16 /* ^P */ #define KEY_CRTLR 18 /* ^R */ -/* States of th KDB "alternate break sequence" detecting state machine. */ +/* States of the KDB "alternate break sequence" detecting state machine. */ enum { KDB_ALT_BREAK_SEEN_NONE, KDB_ALT_BREAK_SEEN_CR, diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 9c5c5b9dfa80..99f9e129f4cd 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1074,7 +1074,7 @@ flags_to_rights(int flags, cap_rights_t *rightsp) if (flags & O_TRUNC) cap_rights_set_one(rightsp, CAP_FTRUNCATE); - if (flags & (O_SYNC | O_FSYNC)) + if (flags & (O_SYNC | O_FSYNC | O_DSYNC)) cap_rights_set_one(rightsp, CAP_FSYNC); if (flags & (O_EXLOCK | O_SHLOCK)) diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c index e534fdd77635..129dc1cfe892 100644 --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -110,6 +110,8 @@ SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "ICMP"); SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "UDP"); +SYSCTL_NODE(_net_inet, IPPROTO_UDPLITE, udplite, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "UDP-Lite"); SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TCP"); #if defined(SCTP) || defined(SCTP_SUPPORT) diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 6829d2a743f2..81b378f496c9 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -654,7 +654,7 @@ tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp, } } m->m_pkthdr.tcp_tun_port = port = uh->uh_sport; - bcopy(th, uh, m->m_len - off); + bcopy(th, uh, m->m_len - off - sizeof(struct udphdr)); m->m_len -= sizeof(struct udphdr); m->m_pkthdr.len -= sizeof(struct udphdr); /* diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 077c3a5f6ef3..713f6a35ad45 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -1175,7 +1175,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * If listening socket requested TCP digests, check that * received ACK has signature and it is correct. - * If not, drop the ACK and leave sc entry in th cache, + * If not, drop the ACK and leave sc entry in the cache, * because SYN was received with correct signature. */ if (sc->sc_flags & SCF_SIGNATURE) { @@ -1387,6 +1387,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct tcpcb *tp; struct socket *rv = NULL; struct syncache *sc = NULL; + struct ucred *cred; struct syncache_head *sch; struct mbuf *ipopts = NULL; u_int ltflags; @@ -1415,6 +1416,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, */ KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so)); tp = sototcpcb(so); + cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred); #ifdef INET6 if (inc->inc_flags & INC_ISIPV6) { @@ -1643,16 +1645,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, /* * sc_cred is only used in syncache_pcblist() to list TCP endpoints in * TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false. - * Therefore, store the credentials and take a reference count only - * when needed: + * Therefore, store the credentials only when needed: * - sc is allocated from the zone and not using the on stack instance. * - the sysctl variable net.inet.tcp.syncache.see_other is false. * The reference count is decremented when a zone allocated sc is * freed in syncache_free(). */ - if (sc != &scs && !V_tcp_syncache.see_other) - sc->sc_cred = crhold(so->so_cred); - else + if (sc != &scs && !V_tcp_syncache.see_other) { + sc->sc_cred = cred; + cred = NULL; + } else sc->sc_cred = NULL; sc->sc_port = port; sc->sc_ipopts = ipopts; @@ -1790,6 +1792,8 @@ donenoprobe: tcp_fastopen_decrement_counter(tfo_pending); tfo_expanded: + if (cred != NULL) + crfree(cred); if (sc == NULL || sc == &scs) { #ifdef MAC mac_syncache_destroy(&maclabel); diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 65a9fbc84ff7..1c687e94bb4a 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -515,9 +515,12 @@ tcp_timer_persist(struct tcpcb *tp) if (progdrop || (tp->t_rxtshift >= V_tcp_retries && (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) { - if (!progdrop) + if (progdrop) { + tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS); + } else { TCPSTAT_INC(tcps_persistdrop); - tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); + } goto dropit; } /* diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 88df8f8d38fc..a1000dadf583 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -852,6 +852,11 @@ SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist, udp_pcblist, "S,xinpcb", "List of active UDP sockets"); +SYSCTL_PROC(_net_inet_udplite, OID_AUTO, pcblist, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDPLITE, + udp_pcblist, "S,xinpcb", + "List of active UDP-Lite sockets"); + #ifdef INET static int udp_getcred(SYSCTL_HANDLER_ARGS) @@ -1141,7 +1146,19 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, else INP_RLOCK(inp); NET_EPOCH_ENTER(et); +#ifdef INET6 + if ((flags & PRUS_IPV6) != 0) { + if ((inp->in6p_outputopts != NULL) && + (inp->in6p_outputopts->ip6po_tclass != -1)) + tos = (u_char)inp->in6p_outputopts->ip6po_tclass; + else + tos = 0; + } else { + tos = inp->inp_ip_tos; + } +#else tos = inp->inp_ip_tos; +#endif if (control != NULL) { /* * XXX: Currently, we assume all the optional information is @@ -1165,6 +1182,23 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, error = udp_v4mapped_pktinfo(cm, &src, inp, flags); if (error != 0) break; + if (((flags & PRUS_IPV6) != 0) && + (cm->cmsg_level == IPPROTO_IPV6) && + (cm->cmsg_type == IPV6_TCLASS)) { + int tclass; + + if (cm->cmsg_len != CMSG_LEN(sizeof(int))) { + error = EINVAL; + break; + } + tclass = *(int *)CMSG_DATA(cm); + if (tclass < -1 || tclass > 255) { + error = EINVAL; + break; + } + if (tclass != -1) + tos = (u_char)tclass; + } #endif if (cm->cmsg_level != IPPROTO_IP) continue; diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index 51272e7c9349..569b7b8d5240 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -141,6 +141,7 @@ void kmod_udpstat_inc(int statnum); kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(uint64_t)) SYSCTL_DECL(_net_inet_udp); +SYSCTL_DECL(_net_inet_udplite); VNET_DECLARE(struct inpcbinfo, udbinfo); VNET_DECLARE(struct inpcbinfo, ulitecbinfo); diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.c b/sys/netpfil/ipfilter/netinet/ip_htable.c index 1907aa0f9f5f..be5b71d9f6d4 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.c +++ b/sys/netpfil/ipfilter/netinet/ip_htable.c @@ -233,6 +233,8 @@ ipf_htable_stats_get(ipf_main_softc_t *softc, void *arg, iplookupop_t *op) return (EINVAL); } + bzero(&stats, sizeof(stats)); + stats.iphs_tables = softh->ipf_htables[op->iplo_unit + 1]; stats.iphs_numtables = softh->ipf_nhtables[op->iplo_unit + 1]; stats.iphs_numnodes = softh->ipf_nhtnodes[op->iplo_unit + 1]; diff --git a/sys/netpfil/ipfilter/netinet/ip_nat.c b/sys/netpfil/ipfilter/netinet/ip_nat.c index 290af20e4765..3f8f3c2a342c 100644 --- a/sys/netpfil/ipfilter/netinet/ip_nat.c +++ b/sys/netpfil/ipfilter/netinet/ip_nat.c @@ -1775,6 +1775,7 @@ ipf_nat_getent(ipf_main_softc_t *softc, caddr_t data, int getlock) IPFERROR(60029); return (ENOMEM); } + bzero(ipn, ipns.ipn_dsize); if (getlock) { READ_ENTER(&softc->ipf_nat); diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 6a913883cc5c..bf05855439bc 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -371,9 +371,12 @@ lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value) case APIC_LVT_DM_SMI: case APIC_LVT_DM_INIT: case APIC_LVT_DM_EXTINT: - if (!lvt->lvt_edgetrigger && bootverbose) { - printf("lapic%u: Forcing LINT%u to edge trigger\n", - la->la_id, pin); + if (!lvt->lvt_edgetrigger) { + if (bootverbose) { + printf( + "lapic%u: Forcing LINT%u to edge trigger\n", + la->la_id, pin); + } value &= ~APIC_LVT_TM; } /* Use a vector of 0. */ diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c index e43c88b3a27b..735efe307215 100644 --- a/sys/x86/x86/mca.c +++ b/sys/x86/x86/mca.c @@ -46,9 +46,11 @@ #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/sbuf.h> #include <sys/sched.h> #include <sys/smp.h> #include <sys/sysctl.h> +#include <sys/syslog.h> #include <sys/systm.h> #include <sys/taskqueue.h> #include <machine/intr_machdep.h> @@ -124,6 +126,22 @@ SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN, &workaround_erratum383, 0, "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); +#ifdef DIAGNOSTIC +static uint64_t fake_status; +SYSCTL_U64(_hw_mca, OID_AUTO, fake_status, CTLFLAG_RW, + &fake_status, 0, + "Insert artificial MCA with given status (testing purpose only)"); +static int fake_bank; +SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW, + &fake_bank, 0, + "Bank to use for artificial MCAs (testing purpose only)"); +#endif + +static bool mca_uselog = false; +SYSCTL_BOOL(_hw_mca, OID_AUTO, uselog, CTLFLAG_RWTUN, &mca_uselog, 0, + "Should the system send non-fatal machine check errors to the log " + "(instead of the console)?"); + static STAILQ_HEAD(, mca_internal) mca_freelist; static int mca_freecount; static STAILQ_HEAD(, mca_internal) mca_records; @@ -136,12 +154,40 @@ static struct timeout_task mca_scan_task; static struct mtx mca_lock; static bool mca_startup_done = false; -/* Statistics on number of MCA events by type, updated atomically. */ +/* Static buffer to compose messages while in an interrupt context. */ +static char mca_msg_buf[1024]; +static struct mtx mca_msg_buf_lock; + +/* Statistics on number of MCA events by type, updated with the mca_lock. */ static uint64_t mca_stats[MCA_T_COUNT]; SYSCTL_OPAQUE(_hw_mca, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_SKIP, mca_stats, MCA_T_COUNT * sizeof(mca_stats[0]), "S", "Array of MCA events by type"); +/* Variables to track and control message rate limiting. */ +static struct timeval mca_last_log_time; +static struct timeval mca_log_interval; +static int mca_log_skipped; + +static int +sysctl_mca_log_interval(SYSCTL_HANDLER_ARGS) +{ + int error; + u_int val; + + val = mca_log_interval.tv_sec; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + mca_log_interval.tv_sec = val; + return (0); +} +SYSCTL_PROC(_hw_mca, OID_AUTO, log_interval, + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &mca_log_interval, 0, + sysctl_mca_log_interval, "IU", + "Minimum number of seconds between logging correctable MCAs" + " (0 = no limit)"); + static unsigned int mca_ia32_ctl_reg(int bank) { @@ -437,98 +483,111 @@ mca_mute(const struct mca_record *rec) /* Dump details about a single machine check. */ static void -mca_log(const struct mca_record *rec) +mca_log(enum scan_mode mode, const struct mca_record *rec, bool fatal) { + int error, numskipped; uint16_t mca_error; enum mca_stat_types event_type; + struct sbuf sb; + bool uncor, using_shared_buf; if (mca_mute(rec)) return; - if (!log_corrected && (rec->mr_status & MC_STATUS_UC) == 0 && - (!tes_supported(rec->mr_mcg_cap) || + uncor = (rec->mr_status & MC_STATUS_UC) != 0; + + if (!log_corrected && !uncor && (!tes_supported(rec->mr_mcg_cap) || ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) != 0x2)) return; - printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, + /* Try to use an allocated buffer when not in an interrupt context. */ + if (mode == POLLED && sbuf_new(&sb, NULL, 512, SBUF_AUTOEXTEND) != NULL) + using_shared_buf = false; + else { + using_shared_buf = true; + mtx_lock_spin(&mca_msg_buf_lock); + sbuf_new(&sb, mca_msg_buf, sizeof(mca_msg_buf), SBUF_FIXEDLEN); + } + + sbuf_printf(&sb, "MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, (long long)rec->mr_status); - printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", + sbuf_printf(&sb, "MCA: Global Cap 0x%016llx, Status 0x%016llx\n", (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); - printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, - rec->mr_cpu_id, rec->mr_apic_id); - printf("MCA: CPU %d ", rec->mr_cpu); + sbuf_printf(&sb, "MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", + cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id); + sbuf_printf(&sb, "MCA: CPU %d ", rec->mr_cpu); if (rec->mr_status & MC_STATUS_UC) - printf("UNCOR "); + sbuf_printf(&sb, "UNCOR "); else { - printf("COR "); + sbuf_printf(&sb, "COR "); if (cmci_supported(rec->mr_mcg_cap)) - printf("(%lld) ", ((long long)rec->mr_status & + sbuf_printf(&sb, "(%lld) ", ((long long)rec->mr_status & MC_STATUS_COR_COUNT) >> 38); if (tes_supported(rec->mr_mcg_cap)) { switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) { case 0x1: - printf("(Green) "); + sbuf_printf(&sb, "(Green) "); break; case 0x2: - printf("(Yellow) "); + sbuf_printf(&sb, "(Yellow) "); break; } } } if (rec->mr_status & MC_STATUS_EN) - printf("EN "); + sbuf_printf(&sb, "EN "); if (rec->mr_status & MC_STATUS_PCC) - printf("PCC "); + sbuf_printf(&sb, "PCC "); if (ser_supported(rec->mr_mcg_cap)) { if (rec->mr_status & MC_STATUS_S) - printf("S "); + sbuf_printf(&sb, "S "); if (rec->mr_status & MC_STATUS_AR) - printf("AR "); + sbuf_printf(&sb, "AR "); } if (rec->mr_status & MC_STATUS_OVER) - printf("OVER "); + sbuf_printf(&sb, "OVER "); mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; event_type = MCA_T_COUNT; switch (mca_error) { /* Simple error codes. */ case 0x0000: - printf("no error"); + sbuf_printf(&sb, "no error"); event_type = MCA_T_NONE; break; case 0x0001: - printf("unclassified error"); + sbuf_printf(&sb, "unclassified error"); event_type = MCA_T_UNCLASSIFIED; break; case 0x0002: - printf("ucode ROM parity error"); + sbuf_printf(&sb, "ucode ROM parity error"); event_type = MCA_T_UCODE_ROM_PARITY; break; case 0x0003: - printf("external error"); + sbuf_printf(&sb, "external error"); event_type = MCA_T_EXTERNAL; break; case 0x0004: - printf("FRC error"); + sbuf_printf(&sb, "FRC error"); event_type = MCA_T_FRC; break; case 0x0005: - printf("internal parity error"); + sbuf_printf(&sb, "internal parity error"); event_type = MCA_T_INTERNAL_PARITY; break; case 0x0006: - printf("SMM handler code access violation"); + sbuf_printf(&sb, "SMM handler code access violation"); event_type = MCA_T_SMM_HANDLER; break; case 0x0400: - printf("internal timer error"); + sbuf_printf(&sb, "internal timer error"); event_type = MCA_T_INTERNAL_TIMER; break; case 0x0e0b: - printf("generic I/O error"); + sbuf_printf(&sb, "generic I/O error"); event_type = MCA_T_GENERIC_IO; if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL && (rec->mr_status & MC_STATUS_MISCV)) { - printf(" (pci%d:%d:%d:%d)", + sbuf_printf(&sb, " (pci%d:%d:%d:%d)", (int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32), (int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24), (int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19), @@ -537,7 +596,8 @@ mca_log(const struct mca_record *rec) break; default: if ((mca_error & 0xfc00) == 0x0400) { - printf("internal error %x", mca_error & 0x03ff); + sbuf_printf(&sb, "internal error %x", + mca_error & 0x03ff); event_type = MCA_T_INTERNAL; break; } @@ -546,14 +606,16 @@ mca_log(const struct mca_record *rec) /* Memory hierarchy error. */ if ((mca_error & 0xeffc) == 0x000c) { - printf("%s memory error", mca_error_level(mca_error)); + sbuf_printf(&sb, "%s memory error", + mca_error_level(mca_error)); event_type = MCA_T_MEMORY; break; } /* TLB error. */ if ((mca_error & 0xeff0) == 0x0010) { - printf("%sTLB %s error", mca_error_ttype(mca_error), + sbuf_printf(&sb, "%sTLB %s error", + mca_error_ttype(mca_error), mca_error_level(mca_error)); event_type = MCA_T_TLB; break; @@ -561,19 +623,19 @@ mca_log(const struct mca_record *rec) /* Memory controller error. */ if ((mca_error & 0xef80) == 0x0080) { - printf("%s channel ", mca_error_mmtype(mca_error, - &event_type)); + sbuf_printf(&sb, "%s channel ", + mca_error_mmtype(mca_error, &event_type)); if ((mca_error & 0x000f) != 0x000f) - printf("%d", mca_error & 0x000f); + sbuf_printf(&sb, "%d", mca_error & 0x000f); else - printf("??"); - printf(" memory error"); + sbuf_printf(&sb, "??"); + sbuf_printf(&sb, " memory error"); break; } /* Cache error. */ if ((mca_error & 0xef00) == 0x0100) { - printf("%sCACHE %s %s error", + sbuf_printf(&sb, "%sCACHE %s %s error", mca_error_ttype(mca_error), mca_error_level(mca_error), mca_error_request(mca_error)); @@ -583,77 +645,129 @@ mca_log(const struct mca_record *rec) /* Extended memory error. */ if ((mca_error & 0xef80) == 0x0280) { - printf("%s channel ", mca_error_mmtype(mca_error, - &event_type)); + sbuf_printf(&sb, "%s channel ", + mca_error_mmtype(mca_error, &event_type)); if ((mca_error & 0x000f) != 0x000f) - printf("%d", mca_error & 0x000f); + sbuf_printf(&sb, "%d", mca_error & 0x000f); else - printf("??"); - printf(" extended memory error"); + sbuf_printf(&sb, "??"); + sbuf_printf(&sb, " extended memory error"); break; } /* Bus and/or Interconnect error. */ if ((mca_error & 0xe800) == 0x0800) { - printf("BUS%s ", mca_error_level(mca_error)); + sbuf_printf(&sb, "BUS%s ", mca_error_level(mca_error)); event_type = MCA_T_BUS; switch ((mca_error & 0x0600) >> 9) { case 0: - printf("Source"); + sbuf_printf(&sb, "Source"); break; case 1: - printf("Responder"); + sbuf_printf(&sb, "Responder"); break; case 2: - printf("Observer"); + sbuf_printf(&sb, "Observer"); break; default: - printf("???"); + sbuf_printf(&sb, "???"); break; } - printf(" %s ", mca_error_request(mca_error)); + sbuf_printf(&sb, " %s ", mca_error_request(mca_error)); switch ((mca_error & 0x000c) >> 2) { case 0: - printf("Memory"); + sbuf_printf(&sb, "Memory"); break; case 2: - printf("I/O"); + sbuf_printf(&sb, "I/O"); break; case 3: - printf("Other"); + sbuf_printf(&sb, "Other"); break; default: - printf("???"); + sbuf_printf(&sb, "???"); break; } if (mca_error & 0x0100) - printf(" timed out"); + sbuf_printf(&sb, " timed out"); break; } - printf("unknown error %x", mca_error); + sbuf_printf(&sb, "unknown error %x", mca_error); event_type = MCA_T_UNKNOWN; break; } - printf("\n"); + sbuf_printf(&sb, "\n"); if (rec->mr_status & MC_STATUS_ADDRV) { - printf("MCA: Address 0x%llx", (long long)rec->mr_addr); + sbuf_printf(&sb, "MCA: Address 0x%llx", + (long long)rec->mr_addr); if (ser_supported(rec->mr_mcg_cap) && (rec->mr_status & MC_STATUS_MISCV)) { - printf(" (Mode: %s, LSB: %d)", + sbuf_printf(&sb, " (Mode: %s, LSB: %d)", mca_addres_mode(rec->mr_misc), (int)(rec->mr_misc & MC_MISC_RA_LSB)); } - printf("\n"); + sbuf_printf(&sb, "\n"); } if (rec->mr_status & MC_STATUS_MISCV) - printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc); + sbuf_printf(&sb, "MCA: Misc 0x%llx\n", (long long)rec->mr_misc); + if (event_type < 0 || event_type >= MCA_T_COUNT) { KASSERT(0, ("%s: invalid event type (%d)", __func__, event_type)); event_type = MCA_T_UNKNOWN; } - atomic_add_64(&mca_stats[event_type], 1); + numskipped = 0; + if (!fatal && !uncor) { + /* + * Update statistics and check the rate limit for + * correctable errors. The rate limit is only applied + * after the system records a reasonable number of errors + * of the same type. The goal is to reduce the impact of + * the system seeing and attempting to log a burst of + * similar errors, which (especially when printed to the + * console) can be expensive. + */ + mtx_lock_spin(&mca_lock); + mca_stats[event_type]++; + if (mca_log_interval.tv_sec > 0 && mca_stats[event_type] > 50 && + ratecheck(&mca_last_log_time, &mca_log_interval) == 0) { + mca_log_skipped++; + mtx_unlock_spin(&mca_lock); + goto done; + } + numskipped = mca_log_skipped; + mca_log_skipped = 0; + mtx_unlock_spin(&mca_lock); + } + + error = sbuf_finish(&sb); + if (fatal || !mca_uselog) { + if (numskipped > 0) + printf("MCA: %d events skipped due to rate limit\n", + numskipped); + if (error) + printf("MCA: error logging message (sbuf error %d)\n", + error); + else + sbuf_putbuf(&sb); + } else { + if (numskipped > 0) + log(LOG_ERR, + "MCA: %d events skipped due to rate limit\n", + numskipped); + if (error) + log(LOG_ERR, + "MCA: error logging message (sbuf error %d)\n", + error); + else + log(uncor ? LOG_CRIT : LOG_ERR, "%s", sbuf_data(&sb)); + } + +done: + sbuf_delete(&sb); + if (using_shared_buf) + mtx_unlock_spin(&mca_msg_buf_lock); } static bool @@ -701,8 +815,24 @@ mca_check_status(enum scan_mode mode, uint64_t mcg_cap, int bank, bool mce, recover; status = rdmsr(mca_msr_ops.status(bank)); - if (!(status & MC_STATUS_VAL)) + if (!(status & MC_STATUS_VAL)) { +#ifdef DIAGNOSTIC + /* + * Check if we have a pending artificial event to generate. + * Note that this is potentially racy with the sysctl. The + * tradeoff is deemed acceptable given the test nature + * of the code. + */ + if (fake_status && bank == fake_bank) { + status = fake_status; + fake_status = 0; + } + if (!(status & MC_STATUS_VAL)) + return (0); +#else return (0); +#endif + } recover = *recoverablep; mce = mca_is_mce(mcg_cap, status, &recover); @@ -796,9 +926,9 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record) mtx_lock_spin(&mca_lock); rec = STAILQ_FIRST(&mca_freelist); if (rec == NULL) { - printf("MCA: Unable to allocate space for an event.\n"); - mca_log(record); mtx_unlock_spin(&mca_lock); + printf("MCA: Unable to allocate space for an event.\n"); + mca_log(mode, record, false); return; } STAILQ_REMOVE_HEAD(&mca_freelist, link); @@ -955,7 +1085,7 @@ mca_scan(enum scan_mode mode, bool *recoverablep) if (*recoverablep) mca_record_entry(mode, &rec); else - mca_log(&rec); + mca_log(mode, &rec, true); } #ifdef DEV_APIC @@ -1017,6 +1147,7 @@ static void mca_process_records(enum scan_mode mode) { struct mca_internal *mca; + STAILQ_HEAD(, mca_internal) tmplist; /* * If in an interrupt context, defer the post-scan activities to a @@ -1028,10 +1159,21 @@ mca_process_records(enum scan_mode mode) return; } + /* + * Copy the pending list to the stack so we can drop the spin lock + * while we are emitting logs. + */ + STAILQ_INIT(&tmplist); + mtx_lock_spin(&mca_lock); + STAILQ_SWAP(&mca_pending, &tmplist, mca_internal); + mtx_unlock_spin(&mca_lock); + + STAILQ_FOREACH(mca, &tmplist, link) + mca_log(mode, &mca->rec, false); + mtx_lock_spin(&mca_lock); - while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) { - STAILQ_REMOVE_HEAD(&mca_pending, link); - mca_log(&mca->rec); + while ((mca = STAILQ_FIRST(&tmplist)) != NULL) { + STAILQ_REMOVE_HEAD(&tmplist, link); mca_store_record(mca); } mtx_unlock_spin(&mca_lock); @@ -1192,6 +1334,7 @@ mca_setup(uint64_t mcg_cap) mca_banks = mcg_cap & MCG_CAP_COUNT; mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); + mtx_init(&mca_msg_buf_lock, "mca_msg_buf", NULL, MTX_SPIN); STAILQ_INIT(&mca_records); STAILQ_INIT(&mca_pending); mca_tq = taskqueue_create_fast("mca", M_WAITOK, |
