aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/trap.c4
-rw-r--r--sys/arm/include/ieeefp.h10
-rw-r--r--sys/arm64/arm64/elf32_machdep.c2
-rw-r--r--sys/contrib/libnv/bsd_nvpair.c8
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c41
-rw-r--r--sys/dev/e1000/e1000_osdep.h46
-rw-r--r--sys/dev/e1000/if_em.c83
-rw-r--r--sys/dev/ice/ice_common.c9
-rw-r--r--sys/dev/ice/ice_devids.h18
-rw-r--r--sys/dev/ice/ice_drv_info.h42
-rw-r--r--sys/dev/ixl/if_ixl.c27
-rw-r--r--sys/dev/ixl/ixl.h1
-rw-r--r--sys/dev/ixl/ixl_pf_main.c110
-rw-r--r--sys/fs/deadfs/dead_vnops.c10
-rw-r--r--sys/fs/nfs/nfs_commonsubs.c13
-rw-r--r--sys/fs/nfsclient/nfs_clrpcops.c34
-rw-r--r--sys/fs/nfsserver/nfs_nfsdserv.c5
-rw-r--r--sys/fs/nullfs/null.h13
-rw-r--r--sys/fs/nullfs/null_subr.c4
-rw-r--r--sys/fs/nullfs/null_vfsops.c20
-rw-r--r--sys/fs/nullfs/null_vnops.c10
-rw-r--r--sys/geom/zero/g_zero.c4
-rw-r--r--sys/kern/imgact_elf.c8
-rw-r--r--sys/kern/kern_exec.c2
-rw-r--r--sys/kern/subr_kdb.c2
-rw-r--r--sys/kern/vfs_syscalls.c2
-rw-r--r--sys/netinet/in_pcb.c13
-rw-r--r--sys/netinet/in_proto.c2
-rw-r--r--sys/netinet/ip_carp.c27
-rw-r--r--sys/netinet/tcp_lro.c9
-rw-r--r--sys/netinet/tcp_subr.c2
-rw-r--r--sys/netinet/tcp_syncache.c49
-rw-r--r--sys/netinet/tcp_timer.c7
-rw-r--r--sys/netinet/udp_usrreq.c49
-rw-r--r--sys/netinet/udp_var.h1
-rw-r--r--sys/netinet6/in6_src.c54
-rw-r--r--sys/netinet6/ip6_var.h2
-rw-r--r--sys/netinet6/nd6.h4
-rw-r--r--sys/netinet6/nd6_nbr.c121
-rw-r--r--sys/netpfil/ipfilter/netinet/ip_htable.c2
-rw-r--r--sys/netpfil/ipfilter/netinet/ip_nat.c1
-rw-r--r--sys/netpfil/ipfw/ip_fw_nat.c12
-rw-r--r--sys/x86/x86/local_apic.c9
-rw-r--r--sys/x86/x86/mca.c279
44 files changed, 847 insertions, 324 deletions
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 2c3b8f0b4dd8..bf0719f501ef 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -460,9 +460,9 @@ trap(struct trapframe *frame)
if ((print_efirt_faults == 1 && cnt == 0) ||
print_efirt_faults == 2) {
- trap_diag(frame, 0);
printf("EFI RT fault %s\n",
traptype_to_msg(type));
+ trap_diag(frame, 0);
}
frame->tf_rip = (long)curpcb->pcb_onfault;
return;
@@ -899,8 +899,8 @@ after_vmfault:
if ((print_efirt_faults == 1 && cnt == 0) ||
print_efirt_faults == 2) {
- trap_diag(frame, eva);
printf("EFI RT page fault\n");
+ trap_diag(frame, eva);
}
}
frame->tf_rip = (long)curpcb->pcb_onfault;
diff --git a/sys/arm/include/ieeefp.h b/sys/arm/include/ieeefp.h
index 57dd058b8a95..57719b883d58 100644
--- a/sys/arm/include/ieeefp.h
+++ b/sys/arm/include/ieeefp.h
@@ -49,4 +49,14 @@ typedef enum {
#define fp_except_t int
+/* Augment the userland declarations. */
+__BEGIN_DECLS
+extern fp_rnd_t fpgetround(void);
+extern fp_rnd_t fpsetround(fp_rnd_t);
+extern fp_except_t fpgetmask(void);
+extern fp_except_t fpsetmask(fp_except_t);
+extern fp_except_t fpgetsticky(void);
+extern fp_except_t fpsetsticky(fp_except_t);
+__END_DECLS
+
#endif /* _MACHINE_IEEEFP_H_ */
diff --git a/sys/arm64/arm64/elf32_machdep.c b/sys/arm64/arm64/elf32_machdep.c
index 5f35d01f9ac0..285be3d89cbb 100644
--- a/sys/arm64/arm64/elf32_machdep.c
+++ b/sys/arm64/arm64/elf32_machdep.c
@@ -208,7 +208,7 @@ freebsd32_fetch_syscall_args(struct thread *td)
sa->code = *ap++;
nap--;
} else if (sa->code == SYS___syscall) {
- sa->code = ap[1];
+ sa->code = ap[_QUAD_LOWWORD];
nap -= 2;
ap += 2;
}
diff --git a/sys/contrib/libnv/bsd_nvpair.c b/sys/contrib/libnv/bsd_nvpair.c
index a977d7941aa3..f6686a66affa 100644
--- a/sys/contrib/libnv/bsd_nvpair.c
+++ b/sys/contrib/libnv/bsd_nvpair.c
@@ -986,13 +986,13 @@ nvpair_unpack_string_array(bool isbe __unused, nvpair_t *nvp,
size = nvp->nvp_datasize;
tmp = (const char *)ptr;
for (ii = 0; ii < nvp->nvp_nitems; ii++) {
- len = strnlen(tmp, size - 1) + 1;
- size -= len;
- if (tmp[len - 1] != '\0') {
+ if (size <= 0) {
ERRNO_SET(EINVAL);
return (NULL);
}
- if (size < 0) {
+ len = strnlen(tmp, size - 1) + 1;
+ size -= len;
+ if (tmp[len - 1] != '\0') {
ERRNO_SET(EINVAL);
return (NULL);
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index 55e133c096c1..04c4bac2201c 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -163,6 +163,13 @@ param_set_arc_int(SYSCTL_HANDLER_ARGS)
return (0);
}
+static void
+warn_deprecated_sysctl(const char *old, const char *new)
+{
+ printf("WARNING: sysctl vfs.zfs.%s is deprecated. Use vfs.zfs.%s instead.\n",
+ old, new);
+}
+
int
param_set_arc_max(SYSCTL_HANDLER_ARGS)
{
@@ -185,13 +192,16 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS)
if (val != 0)
zfs_arc_max = arc_c_max;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_max", "arc.max");
+
return (0);
}
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_max, "LU",
+ NULL, 1, param_set_arc_max, "LU",
"Maximum ARC size in bytes (LEGACY)");
/* END CSTYLED */
@@ -216,13 +226,16 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS)
if (val != 0)
zfs_arc_min = arc_c_min;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_min", "arc.min");
+
return (0);
}
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_min, "LU",
+ NULL, 1, param_set_arc_min, "LU",
"Minimum ARC size in bytes (LEGACY)");
/* END CSTYLED */
@@ -246,6 +259,9 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
zfs_arc_free_target = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_free_target", "arc.free_target");
+
return (0);
}
@@ -256,7 +272,7 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_free_target, "IU",
+ NULL, 1, param_set_arc_free_target, "IU",
"Desired number of free pages below which ARC triggers reclaim"
" (LEGACY)");
/* END CSTYLED */
@@ -276,13 +292,16 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
arc_no_grow_shift = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_no_grow_shift", "arc.no_grow_shift");
+
return (0);
}
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_no_grow_shift, "I",
+ NULL, 1, param_set_arc_no_grow_shift, "I",
"log2(fraction of ARC which must be free to allow growing) (LEGACY)");
/* END CSTYLED */
@@ -775,13 +794,16 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
zfs_vdev_min_auto_ashift = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("min_auto_ashift",
+ "vdev.min_auto_ashift");
+
return (0);
}
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1,
param_set_min_auto_ashift, "IU",
"Min ashift used when creating new top-level vdev. (LEGACY)");
/* END CSTYLED */
@@ -802,13 +824,16 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
zfs_vdev_max_auto_ashift = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("max_auto_ashift",
+ "vdev.max_auto_ashift");
+
return (0);
}
/* BEGIN CSTYLED */
SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1,
param_set_max_auto_ashift, "IU",
"Max ashift used when optimizing for logical -> physical sector size on"
" new top-level vdevs. (LEGACY)");
diff --git a/sys/dev/e1000/e1000_osdep.h b/sys/dev/e1000/e1000_osdep.h
index 893979025f01..ba1c8a16fad1 100644
--- a/sys/dev/e1000/e1000_osdep.h
+++ b/sys/dev/e1000/e1000_osdep.h
@@ -152,6 +152,9 @@ struct e1000_osdep
{
bus_space_tag_t mem_bus_space_tag;
bus_space_handle_t mem_bus_space_handle;
+#ifdef INVARIANTS
+ bus_size_t mem_bus_space_size;
+#endif
bus_space_tag_t io_bus_space_tag;
bus_space_handle_t io_bus_space_handle;
bus_space_tag_t flash_bus_space_tag;
@@ -175,27 +178,44 @@ struct e1000_osdep
bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \
((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, offset, value)
+static __inline uint32_t
+e1000_rd32(struct e1000_osdep *osdep, uint32_t reg)
+{
+
+ KASSERT(reg < osdep->mem_bus_space_size,
+ ("e1000: register offset %#jx too large (max is %#jx)",
+ (uintmax_t)reg, (uintmax_t)osdep->mem_bus_space_size));
+
+ return (bus_space_read_4(osdep->mem_bus_space_tag,
+ osdep->mem_bus_space_handle, reg));
+}
+
+
+static __inline void
+e1000_wr32(struct e1000_osdep *osdep, uint32_t reg, uint32_t value)
+{
+
+ KASSERT(reg < osdep->mem_bus_space_size,
+ ("e1000: register offset %#jx too large (max is %#jx)",
+ (uintmax_t)reg, (uintmax_t)osdep->mem_bus_space_size));
+
+ bus_space_write_4(osdep->mem_bus_space_tag,
+ osdep->mem_bus_space_handle, reg, value);
+}
+
/* Register READ/WRITE macros */
-#define E1000_READ_REG(hw, reg) \
- bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \
- ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \
- E1000_REGISTER(hw, reg))
+#define E1000_READ_REG(hw, reg) \
+ e1000_rd32((hw)->back, E1000_REGISTER(hw, reg))
#define E1000_WRITE_REG(hw, reg, value) \
- bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \
- ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \
- E1000_REGISTER(hw, reg), value)
+ e1000_wr32((hw)->back, E1000_REGISTER(hw, reg), value)
#define E1000_READ_REG_ARRAY(hw, reg, index) \
- bus_space_read_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \
- ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \
- E1000_REGISTER(hw, reg) + ((index)<< 2))
+ e1000_rd32((hw)->back, E1000_REGISTER(hw, reg) + ((index) << 2))
#define E1000_WRITE_REG_ARRAY(hw, reg, index, value) \
- bus_space_write_4(((struct e1000_osdep *)(hw)->back)->mem_bus_space_tag, \
- ((struct e1000_osdep *)(hw)->back)->mem_bus_space_handle, \
- E1000_REGISTER(hw, reg) + ((index)<< 2), value)
+ e1000_wr32((hw)->back, E1000_REGISTER(hw, reg) + ((index) << 2), value)
#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c
index f0ef6051fab1..9040949b36c7 100644
--- a/sys/dev/e1000/if_em.c
+++ b/sys/dev/e1000/if_em.c
@@ -1575,7 +1575,7 @@ em_if_init(if_ctx_t ctx)
E1000_WRITE_REG(&sc->hw, E1000_VET, ETHERTYPE_VLAN);
/* Clear bad data from Rx FIFOs */
- if (sc->hw.mac.type >= igb_mac_min)
+ if (sc->hw.mac.type >= igb_mac_min && !sc->vf_ifp)
e1000_rx_fifo_flush_base(&sc->hw);
/* Configure for OS presence */
@@ -1595,7 +1595,9 @@ em_if_init(if_ctx_t ctx)
/* Don't lose promiscuous settings */
em_if_set_promisc(ctx, if_getflags(ifp));
- e1000_clear_hw_cntrs_base_generic(&sc->hw);
+
+ if (sc->hw.mac.ops.clear_hw_cntrs != NULL)
+ sc->hw.mac.ops.clear_hw_cntrs(&sc->hw);
/* MSI-X configuration for 82574 */
if (sc->hw.mac.type == e1000_82574) {
@@ -2374,7 +2376,7 @@ em_if_stop(if_ctx_t ctx)
em_flush_desc_rings(sc);
e1000_reset_hw(&sc->hw);
- if (sc->hw.mac.type >= e1000_82544)
+ if (sc->hw.mac.type >= e1000_82544 && !sc->vf_ifp)
E1000_WRITE_REG(&sc->hw, E1000_WUFC, 0);
e1000_led_off(&sc->hw);
@@ -2433,6 +2435,9 @@ em_allocate_pci_resources(if_ctx_t ctx)
}
sc->osdep.mem_bus_space_tag = rman_get_bustag(sc->memory);
sc->osdep.mem_bus_space_handle = rman_get_bushandle(sc->memory);
+#ifdef INVARIANTS
+ sc->osdep.mem_bus_space_size = rman_get_size(sc->memory);
+#endif
sc->hw.hw_addr = (u8 *)&sc->osdep.mem_bus_space_handle;
/* Only older adapters use IO mapping */
@@ -3284,11 +3289,13 @@ em_reset(if_ctx_t ctx)
/* Issue a global reset */
e1000_reset_hw(hw);
- if (hw->mac.type >= igb_mac_min) {
- E1000_WRITE_REG(hw, E1000_WUC, 0);
- } else {
- E1000_WRITE_REG(hw, E1000_WUFC, 0);
- em_disable_aspm(sc);
+ if (!sc->vf_ifp) {
+ if (hw->mac.type >= igb_mac_min) {
+ E1000_WRITE_REG(hw, E1000_WUC, 0);
+ } else {
+ E1000_WRITE_REG(hw, E1000_WUFC, 0);
+ em_disable_aspm(sc);
+ }
}
if (sc->flags & IGB_MEDIA_RESET) {
e1000_setup_init_funcs(hw, true);
@@ -3838,7 +3845,7 @@ em_initialize_receive_unit(if_ctx_t ctx)
sc->rx_int_delay.value);
}
- if (hw->mac.type >= em_mac_min) {
+ if (hw->mac.type >= em_mac_min && !sc->vf_ifp) {
uint32_t rfctl;
/* Use extended rx descriptor formats */
rfctl = E1000_READ_REG(hw, E1000_RFCTL);
@@ -3858,33 +3865,38 @@ em_initialize_receive_unit(if_ctx_t ctx)
E1000_WRITE_REG(hw, E1000_RFCTL, rfctl);
}
- /* Set up L3 and L4 csum Rx descriptor offloads */
- rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
- if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
- rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL;
- if (hw->mac.type > e1000_82575)
- rxcsum |= E1000_RXCSUM_CRCOFL;
- else if (hw->mac.type < em_mac_min &&
- if_getcapenable(ifp) & IFCAP_HWCSUM_IPV6)
- rxcsum |= E1000_RXCSUM_IPV6OFL;
- } else {
- rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
- if (hw->mac.type > e1000_82575)
- rxcsum &= ~E1000_RXCSUM_CRCOFL;
- else if (hw->mac.type < em_mac_min)
- rxcsum &= ~E1000_RXCSUM_IPV6OFL;
- }
+ /*
+ * Set up L3 and L4 csum Rx descriptor offloads only on Physical
+ * Functions. Virtual Functions have no access to this register.
+ */
+ if (!sc->vf_ifp) {
+ rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
+ if (if_getcapenable(ifp) & IFCAP_RXCSUM) {
+ rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPOFL;
+ if (hw->mac.type > e1000_82575)
+ rxcsum |= E1000_RXCSUM_CRCOFL;
+ else if (hw->mac.type < em_mac_min &&
+ if_getcapenable(ifp) & IFCAP_HWCSUM_IPV6)
+ rxcsum |= E1000_RXCSUM_IPV6OFL;
+ } else {
+ rxcsum &= ~(E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
+ if (hw->mac.type > e1000_82575)
+ rxcsum &= ~E1000_RXCSUM_CRCOFL;
+ else if (hw->mac.type < em_mac_min)
+ rxcsum &= ~E1000_RXCSUM_IPV6OFL;
+ }
- if (sc->rx_num_queues > 1) {
- /* RSS hash needed in the Rx descriptor */
- rxcsum |= E1000_RXCSUM_PCSD;
+ if (sc->rx_num_queues > 1) {
+ /* RSS hash needed in the Rx descriptor */
+ rxcsum |= E1000_RXCSUM_PCSD;
- if (hw->mac.type >= igb_mac_min)
- igb_initialize_rss_mapping(sc);
- else
- em_initialize_rss_mapping(sc);
+ if (hw->mac.type >= igb_mac_min)
+ igb_initialize_rss_mapping(sc);
+ else
+ em_initialize_rss_mapping(sc);
+ }
+ E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
}
- E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
for (i = 0, que = sc->rx_queues; i < sc->rx_num_queues; i++, que++) {
struct rx_ring *rxr = &que->rxr;
@@ -4392,6 +4404,8 @@ em_get_wakeup(if_ctx_t ctx)
switch (sc->hw.mac.type) {
case e1000_82542:
case e1000_82543:
+ case e1000_vfadapt:
+ case e1000_vfadapt_i350:
break;
case e1000_82544:
e1000_read_nvm(&sc->hw,
@@ -4437,8 +4451,6 @@ em_get_wakeup(if_ctx_t ctx)
case e1000_i354:
case e1000_i210:
case e1000_i211:
- case e1000_vfadapt:
- case e1000_vfadapt_i350:
apme_mask = E1000_WUC_APME;
sc->has_amt = true;
eeprom_data = E1000_READ_REG(&sc->hw, E1000_WUC);
@@ -4494,7 +4506,6 @@ em_get_wakeup(if_ctx_t ctx)
global_quad_port_a = 0;
break;
}
- return;
}
diff --git a/sys/dev/ice/ice_common.c b/sys/dev/ice/ice_common.c
index ad4ea4c8e7a1..b895f661bc46 100644
--- a/sys/dev/ice/ice_common.c
+++ b/sys/dev/ice/ice_common.c
@@ -213,6 +213,15 @@ int ice_set_mac_type(struct ice_hw *hw)
case ICE_DEV_ID_E830_L_QSFP:
case ICE_DEV_ID_E830C_SFP:
case ICE_DEV_ID_E830_L_SFP:
+ case ICE_DEV_ID_E835CC_BACKPLANE:
+ case ICE_DEV_ID_E835CC_QSFP56:
+ case ICE_DEV_ID_E835CC_SFP:
+ case ICE_DEV_ID_E835C_BACKPLANE:
+ case ICE_DEV_ID_E835C_QSFP:
+ case ICE_DEV_ID_E835C_SFP:
+ case ICE_DEV_ID_E835_L_BACKPLANE:
+ case ICE_DEV_ID_E835_L_QSFP:
+ case ICE_DEV_ID_E835_L_SFP:
hw->mac_type = ICE_MAC_E830;
break;
default:
diff --git a/sys/dev/ice/ice_devids.h b/sys/dev/ice/ice_devids.h
index 3f91e9dfbcaf..74712c61ae8e 100644
--- a/sys/dev/ice/ice_devids.h
+++ b/sys/dev/ice/ice_devids.h
@@ -62,6 +62,24 @@
#define ICE_DEV_ID_E830C_SFP 0x12DA
/* Intel(R) Ethernet Controller E830-L for SFP */
#define ICE_DEV_ID_E830_L_SFP 0x12DE
+/* Intel(R) Ethernet Controller E835-CC for backplane */
+#define ICE_DEV_ID_E835CC_BACKPLANE 0x1248
+/* Intel(R) Ethernet Controller E835-CC for QSFP */
+#define ICE_DEV_ID_E835CC_QSFP56 0x1249
+/* Intel(R) Ethernet Controller E835-CC for SFP */
+#define ICE_DEV_ID_E835CC_SFP 0x124A
+/* Intel(R) Ethernet Controller E835-C for backplane */
+#define ICE_DEV_ID_E835C_BACKPLANE 0x1261
+/* Intel(R) Ethernet Controller E835-C for QSFP */
+#define ICE_DEV_ID_E835C_QSFP 0x1262
+/* Intel(R) Ethernet Controller E835-C for SFP */
+#define ICE_DEV_ID_E835C_SFP 0x1263
+/* Intel(R) Ethernet Controller E835-L for backplane */
+#define ICE_DEV_ID_E835_L_BACKPLANE 0x1265
+/* Intel(R) Ethernet Controller E835-L for QSFP */
+#define ICE_DEV_ID_E835_L_QSFP 0x1266
+/* Intel(R) Ethernet Controller E835-L for SFP */
+#define ICE_DEV_ID_E835_L_SFP 0x1267
/* Intel(R) Ethernet Controller E810-C for backplane */
#define ICE_DEV_ID_E810C_BACKPLANE 0x1591
/* Intel(R) Ethernet Controller E810-C for QSFP */
diff --git a/sys/dev/ice/ice_drv_info.h b/sys/dev/ice/ice_drv_info.h
index 2a51a7394424..abb11bdb5fd9 100644
--- a/sys/dev/ice/ice_drv_info.h
+++ b/sys/dev/ice/ice_drv_info.h
@@ -218,6 +218,48 @@ static const pci_vendor_info_t ice_vendor_info_array[] = {
"Intel(R) Ethernet Network Adapter E830-XXV-2"),
PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E830_L_SFP,
"Intel(R) Ethernet Connection E830-L for SFP"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_BACKPLANE,
+ "Intel(R) Ethernet Connection E835-CC for backplane"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+ ICE_INTEL_VENDOR_ID, 0x0001, 0,
+ "Intel(R) Ethernet Network Adapter E835-C-Q2"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+ ICE_INTEL_VENDOR_ID, 0x0002, 0,
+ "Intel(R) Ethernet Network Adapter E835-C-Q2 for OCP 3.0"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+ ICE_INTEL_VENDOR_ID, 0x0003, 0,
+ "Intel(R) Ethernet Network Adapter E835-CC-Q1"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+ ICE_INTEL_VENDOR_ID, 0x0004, 0,
+ "Intel(R) Ethernet Network Adapter E835-CC-Q1 for OCP 3.0"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_QSFP56,
+ "Intel(R) Ethernet Connection E835-CC for QSFP56"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+ ICE_INTEL_VENDOR_ID, 0x0001, 0,
+ "Intel(R) Ethernet Network Adapter E835-XXV-2 for OCP 3.0"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+ ICE_INTEL_VENDOR_ID, 0x0002, 0,
+ "Intel(R) Ethernet Network Adapter E835-XXV-4"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+ ICE_INTEL_VENDOR_ID, 0x0003, 0,
+ "Intel(R) Ethernet Network Adapter E835-XXV-2"),
+ PVIDV_OEM(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+ ICE_INTEL_VENDOR_ID, 0x0004, 0,
+ "Intel(R) Ethernet Network Adapter E835-XXV-4 for OCP 3.0"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835CC_SFP,
+ "Intel(R) Ethernet Connection E835-CC for SFP"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_BACKPLANE,
+ "Intel(R) Ethernet Connection E835-C for backplane"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_QSFP,
+ "Intel(R) Ethernet Connection E835-C for QSFP"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835C_SFP,
+ "Intel(R) Ethernet Connection E835-C for SFP"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_BACKPLANE,
+ "Intel(R) Ethernet Connection E835-L for backplane"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_QSFP,
+ "Intel(R) Ethernet Connection E835-L for QSFP"),
+ PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E835_L_SFP,
+ "Intel(R) Ethernet Connection E835-L for SFP"),
PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_BACKPLANE,
"Intel(R) Ethernet Connection E825-C for backplane"),
PVIDV(ICE_INTEL_VENDOR_ID, ICE_DEV_ID_E825C_QSFP,
diff --git a/sys/dev/ixl/if_ixl.c b/sys/dev/ixl/if_ixl.c
index 43c3af056b67..aa2e69ea0b5a 100644
--- a/sys/dev/ixl/if_ixl.c
+++ b/sys/dev/ixl/if_ixl.c
@@ -1480,17 +1480,33 @@ ixl_if_multi_set(if_ctx_t ctx)
struct ixl_pf *pf = iflib_get_softc(ctx);
struct ixl_vsi *vsi = &pf->vsi;
struct i40e_hw *hw = vsi->hw;
+ enum i40e_status_code status;
int mcnt;
+ if_t ifp = iflib_get_ifp(ctx);
IOCTL_DEBUGOUT("ixl_if_multi_set: begin");
/* Delete filters for removed multicast addresses */
ixl_del_multi(vsi, false);
- mcnt = min(if_llmaddr_count(iflib_get_ifp(ctx)), MAX_MULTICAST_ADDR);
+ mcnt = min(if_llmaddr_count(ifp), MAX_MULTICAST_ADDR);
if (__predict_false(mcnt == MAX_MULTICAST_ADDR)) {
- i40e_aq_set_vsi_multicast_promiscuous(hw,
+ /* Check if promisc mode is already enabled, if yes return */
+ if (vsi->flags & IXL_FLAGS_MC_PROMISC)
+ return;
+
+ status = i40e_aq_set_vsi_multicast_promiscuous(hw,
vsi->seid, TRUE, NULL);
+ if (status != I40E_SUCCESS)
+ if_printf(ifp, "Failed to enable multicast promiscuous "
+ "mode, status: %s\n", i40e_stat_str(hw, status));
+ else {
+ if_printf(ifp, "Enabled multicast promiscuous mode\n");
+
+ /* Set the flag to track promiscuous mode */
+ vsi->flags |= IXL_FLAGS_MC_PROMISC;
+ }
+ /* Delete all existing MC filters */
ixl_del_multi(vsi, true);
return;
}
@@ -1693,6 +1709,13 @@ ixl_if_promisc_set(if_ctx_t ctx, int flags)
return (err);
err = i40e_aq_set_vsi_multicast_promiscuous(hw,
vsi->seid, multi, NULL);
+
+ /* Update the multicast promiscuous flag based on the new state */
+ if (multi)
+ vsi->flags |= IXL_FLAGS_MC_PROMISC;
+ else
+ vsi->flags &= ~IXL_FLAGS_MC_PROMISC;
+
return (err);
}
diff --git a/sys/dev/ixl/ixl.h b/sys/dev/ixl/ixl.h
index f45354d29300..69925a131b35 100644
--- a/sys/dev/ixl/ixl.h
+++ b/sys/dev/ixl/ixl.h
@@ -202,6 +202,7 @@
#define IXL_FLAGS_KEEP_TSO6 (1 << 1)
#define IXL_FLAGS_USES_MSIX (1 << 2)
#define IXL_FLAGS_IS_VF (1 << 3)
+#define IXL_FLAGS_MC_PROMISC (1 << 4)
#define IXL_VSI_IS_PF(v) ((v->flags & IXL_FLAGS_IS_VF) == 0)
#define IXL_VSI_IS_VF(v) ((v->flags & IXL_FLAGS_IS_VF) != 0)
diff --git a/sys/dev/ixl/ixl_pf_main.c b/sys/dev/ixl/ixl_pf_main.c
index 4f384e7191af..99851af61cfe 100644
--- a/sys/dev/ixl/ixl_pf_main.c
+++ b/sys/dev/ixl/ixl_pf_main.c
@@ -592,24 +592,29 @@ ixl_add_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
* Routines for multicast and vlan filter management.
*
*********************************************************************/
+
+/**
+ * ixl_add_multi - Add multicast filters to the hardware
+ * @vsi: The VSI structure
+ *
+ * In case number of multicast filters in the IFP exceeds 127 entries,
+ * multicast promiscuous mode will be enabled and the filters will be removed
+ * from the hardware
+ */
void
ixl_add_multi(struct ixl_vsi *vsi)
{
if_t ifp = vsi->ifp;
- struct i40e_hw *hw = vsi->hw;
int mcnt = 0;
struct ixl_add_maddr_arg cb_arg;
IOCTL_DEBUGOUT("ixl_add_multi: begin");
- mcnt = if_llmaddr_count(ifp);
- if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) {
- i40e_aq_set_vsi_multicast_promiscuous(hw,
- vsi->seid, TRUE, NULL);
- /* delete all existing MC filters */
- ixl_del_multi(vsi, true);
- return;
- }
+ /*
+ * There is no need to check if the number of multicast addresses
+ * exceeds the MAX_MULTICAST_ADDR threshold and set promiscuous mode
+ * here, as all callers already handle this case.
+ */
cb_arg.vsi = vsi;
LIST_INIT(&cb_arg.to_add);
@@ -632,30 +637,103 @@ ixl_match_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
return (0);
}
+/**
+ * ixl_dis_multi_promisc - Disable multicast promiscuous mode
+ * @vsi: The VSI structure
+ * @vsi_mcnt: Number of multicast filters in the VSI
+ *
+ * Disable multicast promiscuous mode based on number of entries in the IFP
+ * and the VSI, then re-add multicast filters.
+ *
+ */
+static void
+ixl_dis_multi_promisc(struct ixl_vsi *vsi, int vsi_mcnt)
+{
+ struct ifnet *ifp = vsi->ifp;
+ struct i40e_hw *hw = vsi->hw;
+ int ifp_mcnt = 0;
+ enum i40e_status_code status;
+
+ /*
+ * Check if multicast promiscuous mode was actually enabled.
+ * If promiscuous mode was not enabled, don't attempt to disable it.
+ * Also, don't disable if IFF_PROMISC or IFF_ALLMULTI is set.
+ */
+ if (!(vsi->flags & IXL_FLAGS_MC_PROMISC) ||
+ (if_getflags(ifp) & (IFF_PROMISC | IFF_ALLMULTI)))
+ return;
+
+ ifp_mcnt = if_llmaddr_count(ifp);
+ /*
+ * Equal lists or empty ifp list mean the list has not been changed
+ * and in such case avoid disabling multicast promiscuous mode as it
+ * was not previously enabled. Case where multicast promiscuous mode has
+ * been enabled is when vsi_mcnt == 0 && ifp_mcnt > 0.
+ */
+ if (ifp_mcnt == vsi_mcnt || ifp_mcnt == 0 ||
+ ifp_mcnt >= MAX_MULTICAST_ADDR)
+ return;
+
+ status = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid,
+ FALSE, NULL);
+ if (status != I40E_SUCCESS) {
+ if_printf(ifp, "Failed to disable multicast promiscuous "
+ "mode, status: %s\n", i40e_stat_str(hw, status));
+
+ return;
+ }
+
+ /* Clear the flag since promiscuous mode is now disabled */
+ vsi->flags &= ~IXL_FLAGS_MC_PROMISC;
+ if_printf(ifp, "Disabled multicast promiscuous mode\n");
+
+ ixl_add_multi(vsi);
+}
+
+/**
+ * ixl_del_multi - Delete multicast filters from the hardware
+ * @vsi: The VSI structure
+ * @all: Bool to determine if all the multicast filters should be removed
+ *
+ * In case number of multicast filters in the IFP drops to 127 entries,
+ * multicast promiscuous mode will be disabled and the filters will be reapplied
+ * to the hardware.
+ */
void
ixl_del_multi(struct ixl_vsi *vsi, bool all)
{
- struct ixl_ftl_head to_del;
+ int to_del_cnt = 0, vsi_mcnt = 0;
if_t ifp = vsi->ifp;
struct ixl_mac_filter *f, *fn;
- int mcnt = 0;
+ struct ixl_ftl_head to_del;
IOCTL_DEBUGOUT("ixl_del_multi: begin");
LIST_INIT(&to_del);
/* Search for removed multicast addresses */
LIST_FOREACH_SAFE(f, &vsi->ftl, ftle, fn) {
- if ((f->flags & IXL_FILTER_MC) == 0 ||
- (!all && (if_foreach_llmaddr(ifp, ixl_match_maddr, f) == 0)))
+ if ((f->flags & IXL_FILTER_MC) == 0)
+ continue;
+
+ /* Count all the multicast filters in the VSI for comparison */
+ vsi_mcnt++;
+
+ if (!all && if_foreach_llmaddr(ifp, ixl_match_maddr, f) != 0)
continue;
LIST_REMOVE(f, ftle);
LIST_INSERT_HEAD(&to_del, f, ftle);
- mcnt++;
+ to_del_cnt++;
}
- if (mcnt > 0)
- ixl_del_hw_filters(vsi, &to_del, mcnt);
+ if (to_del_cnt > 0) {
+ ixl_del_hw_filters(vsi, &to_del, to_del_cnt);
+ return;
+ }
+
+ ixl_dis_multi_promisc(vsi, vsi_mcnt);
+
+ IOCTL_DEBUGOUT("ixl_del_multi: end");
}
void
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
index 0f850cede292..75a1398ad6aa 100644
--- a/sys/fs/deadfs/dead_vnops.c
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -124,18 +124,18 @@ dead_read(struct vop_read_args *ap)
{
/*
- * Return EOF for tty devices, EIO for others
+ * Return EOF for tty devices, ENXIO for others
*/
- if ((ap->a_vp->v_vflag & VV_ISTTY) == 0)
- return (EIO);
- return (0);
+ if (ap->a_vp->v_vflag & VV_ISTTY)
+ return (0);
+ return (ENXIO);
}
int
dead_write(struct vop_write_args *ap)
{
- return (EIO);
+ return (ENXIO);
}
int
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
index bde9be0885e0..6a9a73b40fb0 100644
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -4033,10 +4033,15 @@ nfssvc_idname(struct nfsd_idargs *nidp)
nidp->nid_namelen);
if (error == 0 && nidp->nid_ngroup > 0 &&
(nidp->nid_flag & NFSID_ADDUID) != 0) {
- grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP,
- M_WAITOK);
- error = copyin(nidp->nid_grps, grps,
- sizeof(gid_t) * nidp->nid_ngroup);
+ grps = NULL;
+ if (nidp->nid_ngroup > NGROUPS_MAX)
+ error = EINVAL;
+ if (error == 0) {
+ grps = malloc(sizeof(gid_t) * nidp->nid_ngroup, M_TEMP,
+ M_WAITOK);
+ error = copyin(nidp->nid_grps, grps,
+ sizeof(gid_t) * nidp->nid_ngroup);
+ }
if (error == 0) {
/*
* Create a credential just like svc_getcred(),
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 3c580b90e6b9..387c5465618a 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -2208,7 +2208,7 @@ nfsrpc_writerpc(vnode_t vp, struct uio *uiop, int *iomode,
NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_UNSIGNED
+ NFSX_VERF);
rlen = fxdr_unsigned(int, *tl++);
- if (rlen == 0) {
+ if (rlen <= 0 || rlen > len) {
error = NFSERR_IO;
goto nfsmout;
} else if (rlen < len) {
@@ -5157,7 +5157,7 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
struct nfsrv_descript nfsd;
struct nfsrv_descript *nd = &nfsd;
u_char *cp, *cp2, *fhp;
- int error, cnt, len, setnil;
+ int error, cnt, i, len, setnil;
u_int32_t *opcntp;
nfscl_reqstart(nd, NFSPROC_PUTROOTFH, nmp, NULL, 0, &opcntp, NULL, 0,
@@ -5198,8 +5198,12 @@ nfsrpc_getdirpath(struct nfsmount *nmp, u_char *dirpath, struct ucred *cred,
if (error)
return (error);
if (nd->nd_repstat == 0) {
- NFSM_DISSECT(tl, u_int32_t *, (3 + 2 * cnt) * NFSX_UNSIGNED);
- tl += (2 + 2 * cnt);
+ NFSM_DISSECT(tl, uint32_t *, 3 * NFSX_UNSIGNED);
+ tl += 2;
+ for (i = 0; i < cnt; i++) {
+ NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
+ tl++;
+ }
if ((len = fxdr_unsigned(int, *tl)) <= 0 ||
len > NFSX_FHMAX) {
nd->nd_repstat = NFSERR_BADXDR;
@@ -5470,7 +5474,7 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
}
*tl++ = txdr_unsigned(4096); /* Max response size cached */
*tl++ = txdr_unsigned(20); /* Max operations */
- *tl++ = txdr_unsigned(64); /* Max slots */
+ *tl++ = txdr_unsigned(NFSV4_SLOTS); /* Max slots */
*tl = 0; /* No rdma ird */
/* Fill in back channel attributes. */
@@ -5539,6 +5543,11 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
sep->nfsess_maxcache = fxdr_unsigned(int, *tl++);
tl++;
sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++);
+ if (sep->nfsess_foreslots == 0) {
+ error = NFSERR_BADXDR;
+ goto nfsmout;
+ } else if (sep->nfsess_foreslots > NFSV4_SLOTS)
+ sep->nfsess_foreslots = NFSV4_SLOTS;
NFSCL_DEBUG(4, "fore slots=%d\n", (int)sep->nfsess_foreslots);
irdcnt = fxdr_unsigned(int, *tl);
if (irdcnt < 0 || irdcnt > 1) {
@@ -5552,6 +5561,8 @@ nfsrpc_createsession(struct nfsmount *nmp, struct nfsclsession *sep,
NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED);
tl += 5;
sep->nfsess_backslots = fxdr_unsigned(uint16_t, *tl);
+ if (sep->nfsess_backslots > NFSV4_CBSLOTS)
+ sep->nfsess_backslots = NFSV4_CBSLOTS;
NFSCL_DEBUG(4, "back slots=%d\n", (int)sep->nfsess_backslots);
}
error = nd->nd_repstat;
@@ -5671,7 +5682,8 @@ nfsrpc_getdeviceinfo(struct nfsmount *nmp, uint8_t *deviceid, int layouttype,
NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED);
stripecnt = fxdr_unsigned(int, *tl);
NFSCL_DEBUG(4, "stripecnt=%d\n", stripecnt);
- if (stripecnt < 1 || stripecnt > 4096) {
+ if (stripecnt >= MHLEN / NFSX_UNSIGNED ||
+ stripecnt < 1) {
printf("pNFS File layout devinfo stripecnt %d:"
" out of range\n", stripecnt);
error = NFSERR_BADXDR;
@@ -7120,7 +7132,7 @@ nfsrpc_writeds(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED + NFSX_VERF);
rlen = fxdr_unsigned(int, *tl++);
NFSCL_DEBUG(4, "nfsrpc_writeds: len=%d rlen=%d\n", len, rlen);
- if (rlen == 0) {
+ if (rlen <= 0 || rlen > len) {
error = NFSERR_IO;
goto nfsmout;
} else if (rlen < len) {
@@ -8117,7 +8129,7 @@ nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
NFSPROC_T *p)
{
uint32_t *tl;
- char *cp, *str, str0[NFSV4_SMALLSTR + 1];
+ char *str, str0[NFSV4_SMALLSTR + 1];
uint32_t len = 0;
int error = 0;
@@ -8140,9 +8152,9 @@ nfsrv_parseug(struct nfsrv_descript *nd, int dogrp, uid_t *uidp, gid_t *gidp,
str = malloc(len + 1, M_TEMP, M_WAITOK);
else
str = str0;
- NFSM_DISSECT(cp, char *, NFSM_RNDUP(len));
- NFSBCOPY(cp, str, len);
- str[len] = '\0';
+ error = nfsrv_mtostr(nd, str, len);
+ if (error != 0)
+ goto nfsmout;
NFSCL_DEBUG(4, "nfsrv_parseug: str=%s\n", str);
if (dogrp != 0)
error = nfsv4_strtogid(nd, str, len, gidp);
diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c
index 88ae643d193e..6047e6f2970e 100644
--- a/sys/fs/nfsserver/nfs_nfsdserv.c
+++ b/sys/fs/nfsserver/nfs_nfsdserv.c
@@ -4950,6 +4950,11 @@ nfsrvd_layoutcommit(struct nfsrv_descript *nd, __unused int isdgram,
NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED);
layouttype = fxdr_unsigned(int, *tl++);
maxcnt = fxdr_unsigned(int, *tl);
+ /* There is no limit in the RFC, so use 1000 as a sanity limit. */
+ if (maxcnt < 0 || maxcnt > 1000) {
+ error = NFSERR_BADXDR;
+ goto nfsmout;
+ }
if (maxcnt > 0) {
layp = malloc(maxcnt + 1, M_TEMP, M_WAITOK);
error = nfsrv_mtostr(nd, layp, maxcnt);
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index 3ae0cd02b2b6..eebf6da571b8 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -37,7 +37,8 @@
#ifndef FS_NULL_H
#define FS_NULL_H
-#define NULLM_CACHE 0x0001
+#define NULLM_CACHE 0x0001
+#define NULLM_NOUNPBYPASS 0x0002
struct null_mount {
struct mount *nullm_vfs;
@@ -80,6 +81,16 @@ struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
#endif
extern struct vop_vector null_vnodeops;
+extern struct vop_vector null_vnodeops_no_unp_bypass;
+
+static inline bool
+null_is_nullfs_vnode(struct vnode *vp)
+{
+ const struct vop_vector *op;
+
+ op = vp->v_op;
+ return (op == &null_vnodeops || op == &null_vnodeops_no_unp_bypass);
+}
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_NULLFSNODE);
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 546f0aea3766..b0e3d15b4789 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -212,7 +212,9 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
*/
xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
- error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
+ error = getnewvnode("nullfs", mp, (MOUNTTONULLMOUNT(mp)->nullm_flags &
+ NULLM_NOUNPBYPASS) != 0 ? &null_vnodeops_no_unp_bypass :
+ &null_vnodeops, &vp);
if (error) {
vput(lowervp);
free(xp, M_NULLFSNODE);
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 8d980932623d..8b9e04775449 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -89,6 +89,10 @@ nullfs_mount(struct mount *mp)
char *target;
int error, len;
bool isvnunlocked;
+ static const char cache_opt_name[] = "cache";
+ static const char nocache_opt_name[] = "nocache";
+ static const char unixbypass_opt_name[] = "unixbypass";
+ static const char nounixbypass_opt_name[] = "nounixbypass";
NULLFSDEBUG("nullfs_mount(mp = %p)\n", (void *)mp);
@@ -120,7 +124,7 @@ nullfs_mount(struct mount *mp)
/*
* Unlock lower node to avoid possible deadlock.
*/
- if (mp->mnt_vnodecovered->v_op == &null_vnodeops &&
+ if (null_is_nullfs_vnode(mp->mnt_vnodecovered) &&
VOP_ISLOCKED(mp->mnt_vnodecovered) == LK_EXCLUSIVE) {
VOP_UNLOCK(mp->mnt_vnodecovered);
isvnunlocked = true;
@@ -154,7 +158,7 @@ nullfs_mount(struct mount *mp)
/*
* Check multi null mount to avoid `lock against myself' panic.
*/
- if (mp->mnt_vnodecovered->v_op == &null_vnodeops) {
+ if (null_is_nullfs_vnode(mp->mnt_vnodecovered)) {
nn = VTONULL(mp->mnt_vnodecovered);
if (nn == NULL || lowerrootvp == nn->null_lowervp) {
NULLFSDEBUG("nullfs_mount: multi null mount?\n");
@@ -209,9 +213,10 @@ nullfs_mount(struct mount *mp)
MNT_IUNLOCK(mp);
}
- if (vfs_getopt(mp->mnt_optnew, "cache", NULL, NULL) == 0) {
+ if (vfs_getopt(mp->mnt_optnew, cache_opt_name, NULL, NULL) == 0) {
xmp->nullm_flags |= NULLM_CACHE;
- } else if (vfs_getopt(mp->mnt_optnew, "nocache", NULL, NULL) == 0) {
+ } else if (vfs_getopt(mp->mnt_optnew, nocache_opt_name, NULL,
+ NULL) == 0) {
;
} else if (null_cache_vnodes &&
(xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) == 0) {
@@ -223,6 +228,13 @@ nullfs_mount(struct mount *mp)
&xmp->notify_node);
}
+ if (vfs_getopt(mp->mnt_optnew, unixbypass_opt_name, NULL, NULL) == 0) {
+ ;
+ } else if (vfs_getopt(mp->mnt_optnew, nounixbypass_opt_name, NULL,
+ NULL) == 0) {
+ xmp->nullm_flags |= NULLM_NOUNPBYPASS;
+ }
+
if (lowerrootvp == mp->mnt_vnodecovered) {
vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
lowerrootvp->v_vflag |= VV_CROSSLOCK;
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 41915da7f13c..2e34b77a5a7e 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -258,7 +258,7 @@ null_bypass(struct vop_generic_args *ap)
* that aren't. (We must always map first vp or vclean fails.)
*/
if (i != 0 && (*this_vp_p == NULLVP ||
- (*this_vp_p)->v_op != &null_vnodeops)) {
+ !null_is_nullfs_vnode(*this_vp_p))) {
old_vps[i] = NULLVP;
} else {
old_vps[i] = *this_vp_p;
@@ -1192,3 +1192,11 @@ struct vop_vector null_vnodeops = {
.vop_copy_file_range = VOP_PANIC,
};
VFS_VOP_VECTOR_REGISTER(null_vnodeops);
+
+struct vop_vector null_vnodeops_no_unp_bypass = {
+ .vop_default = &null_vnodeops,
+ .vop_unp_bind = vop_stdunp_bind,
+ .vop_unp_connect = vop_stdunp_connect,
+ .vop_unp_detach = vop_stdunp_detach,
+};
+VFS_VOP_VECTOR_REGISTER(null_vnodeops_no_unp_bypass);
diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c
index 66cc884fab56..be31cc794cb5 100644
--- a/sys/geom/zero/g_zero.c
+++ b/sys/geom/zero/g_zero.c
@@ -47,11 +47,11 @@ static SYSCTL_NODE(_kern_geom, OID_AUTO, zero, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"GEOM_ZERO stuff");
static int g_zero_clear = 1;
SYSCTL_PROC(_kern_geom_zero, OID_AUTO, clear,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &g_zero_clear, 0,
+ CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &g_zero_clear, 0,
g_zero_clear_sysctl, "I",
"Clear read data buffer");
static int g_zero_byte = 0;
-SYSCTL_INT(_kern_geom_zero, OID_AUTO, byte, CTLFLAG_RW, &g_zero_byte, 0,
+SYSCTL_INT(_kern_geom_zero, OID_AUTO, byte, CTLFLAG_RWTUN, &g_zero_byte, 0,
"Byte (octet) value to clear the buffers with");
static struct g_provider *gpp;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 0a09bb9e3891..f73fd997594d 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -2802,7 +2802,7 @@ __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote,
}
if ((const char *)note_end - (const char *)note <
sizeof(Elf_Note)) {
- uprintf("ELF note to short\n");
+ uprintf("ELF note too short\n");
goto retf;
}
if (note->n_namesz != checknote->n_namesz ||
@@ -2810,9 +2810,9 @@ __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote,
note->n_type != checknote->n_type)
goto nextnote;
note_name = (const char *)(note + 1);
- if (note_name + checknote->n_namesz >=
- (const char *)note_end || strncmp(note_vendor,
- note_name, checknote->n_namesz) != 0)
+ if (note_name + roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
+ note->n_descsz >= (const char *)note_end ||
+ strncmp(note_vendor, note_name, checknote->n_namesz) != 0)
goto nextnote;
if (cb(note, cb_arg, &res))
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index f4197609a700..7f6d9a85c6bc 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1284,7 +1284,7 @@ exec_map_stack(struct image_params *imgp)
MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
} else {
sharedpage_addr = sv->sv_shared_page_base;
- vm_map_fixed(map, obj, 0,
+ error = vm_map_fixed(map, obj, 0,
sharedpage_addr, sv->sv_shared_page_len,
VM_PROT_READ | VM_PROT_EXECUTE,
VM_PROT_READ | VM_PROT_EXECUTE,
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index 86f392485a4b..13ab25ce2c06 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -330,7 +330,7 @@ kdb_reboot(void)
#define KEY_CRTLP 16 /* ^P */
#define KEY_CRTLR 18 /* ^R */
-/* States of th KDB "alternate break sequence" detecting state machine. */
+/* States of the KDB "alternate break sequence" detecting state machine. */
enum {
KDB_ALT_BREAK_SEEN_NONE,
KDB_ALT_BREAK_SEEN_CR,
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 9c5c5b9dfa80..99f9e129f4cd 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1074,7 +1074,7 @@ flags_to_rights(int flags, cap_rights_t *rightsp)
if (flags & O_TRUNC)
cap_rights_set_one(rightsp, CAP_FTRUNCATE);
- if (flags & (O_SYNC | O_FSYNC))
+ if (flags & (O_SYNC | O_FSYNC | O_DSYNC))
cap_rights_set_one(rightsp, CAP_FSYNC);
if (flags & (O_EXLOCK | O_SHLOCK))
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 89000a521bff..7d665c7d2a73 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -2702,10 +2702,13 @@ in_pcbinshash(struct inpcb *inp)
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
/*
- * Add entry to load balance group.
- * Only do this if SO_REUSEPORT_LB is set.
+ * Ignore SO_REUSEPORT_LB if the socket is connected. Really this case
+ * should be an error, but for UDP sockets it is not, and some
+ * applications erroneously set it on connected UDP sockets, so we can't
+ * change this without breaking compatibility.
*/
- if ((inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
+ if (!connected &&
+ (inp->inp_socket->so_options & SO_REUSEPORT_LB) != 0) {
int error = in_pcbinslbgrouphash(inp, M_NODOM);
if (error != 0)
return (error);
@@ -2836,6 +2839,10 @@ in_pcbrehash(struct inpcb *inp)
connected = !in_nullhost(inp->inp_faddr);
}
+ /* See the comment in in_pcbinshash(). */
+ if (connected && (inp->inp_flags & INP_INLBGROUP) != 0)
+ in_pcbremlbgrouphash(inp);
+
/*
* When rehashing, the caller must ensure that either the new or the old
* foreign address was unspecified.
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index e534fdd77635..129dc1cfe892 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -110,6 +110,8 @@ SYSCTL_NODE(_net_inet, IPPROTO_ICMP, icmp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"ICMP");
SYSCTL_NODE(_net_inet, IPPROTO_UDP, udp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"UDP");
+SYSCTL_NODE(_net_inet, IPPROTO_UDPLITE, udplite, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "UDP-Lite");
SYSCTL_NODE(_net_inet, IPPROTO_TCP, tcp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"TCP");
#if defined(SCTP) || defined(SCTP_SUPPORT)
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 97815ea089b5..cf8ebd8d4bfc 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -1200,18 +1200,31 @@ carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr)
static void
carp_send_na(struct carp_softc *sc)
{
- static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
struct ifaddr *ifa;
- struct in6_addr *in6;
+ int flags;
+ /*
+ * Sending Unsolicited Neighbor Advertisements
+ *
+ * If the node is a router, we MUST set the Router flag to one.
+ * We set Override flag to one and send link-layer address option,
+ * thus neighboring nodes will install the new link-layer address.
+ */
+ flags = ND_NA_FLAG_OVERRIDE;
+ if (V_ip6_forwarding)
+ flags |= ND_NA_FLAG_ROUTER;
CARP_FOREACH_IFA(sc, ifa) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
-
- in6 = IFA_IN6(ifa);
- nd6_na_output(sc->sc_carpdev, &mcast, in6,
- ND_NA_FLAG_OVERRIDE, 1, NULL);
- DELAY(1000); /* XXX */
+ /*
+ * We use unspecified address as destination here to avoid
+ * scope initialization for each call.
+ * nd6_na_output() will use all nodes multicast address if
+ * destinaion address is unspecified.
+ */
+ nd6_na_output(sc->sc_carpdev, &in6addr_any, IFA_IN6(ifa),
+ flags, ND6_NA_OPT_LLA | ND6_NA_CARP_MASTER, NULL);
+ DELAY(1000); /* RetransTimer */
}
}
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 2a5839440d69..045412f6856a 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -1475,10 +1475,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
}
/* create sequence number */
- lc->lro_mbuf_data[lc->lro_mbuf_count].seq =
- (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
- (((uint64_t)mb->m_pkthdr.flowid) << 24) |
- ((uint64_t)lc->lro_mbuf_count);
+ lc->lro_mbuf_data[lc->lro_mbuf_count].seq = lc->lro_mbuf_count;
+ if (M_HASHTYPE_ISHASH(mb))
+ lc->lro_mbuf_data[lc->lro_mbuf_count].seq |=
+ (((uint64_t)M_HASHTYPE_GET(mb)) << 56) |
+ (((uint64_t)mb->m_pkthdr.flowid) << 24);
/* enter mbuf */
lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb;
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 6829d2a743f2..81b378f496c9 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -654,7 +654,7 @@ tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp,
}
}
m->m_pkthdr.tcp_tun_port = port = uh->uh_sport;
- bcopy(th, uh, m->m_len - off);
+ bcopy(th, uh, m->m_len - off - sizeof(struct udphdr));
m->m_len -= sizeof(struct udphdr);
m->m_pkthdr.len -= sizeof(struct udphdr);
/*
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 1c585378dc5b..713f6a35ad45 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1056,6 +1056,8 @@ abort:
*
* On syncache_socket() success the newly created socket
* has its underlying inp locked.
+ *
+ * *lsop is updated, if and only if 1 is returned.
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -1103,12 +1105,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* a valid syncookie.
*/
SCH_UNLOCK(sch);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Spurious ACK, "
"segment rejected "
"(syncookies disabled)\n",
s, __func__);
- goto failed;
+ free(s, M_TCPLOG);
+ }
+ return (0);
}
if (sch->sch_last_overflow <
time_uptime - SYNCOOKIE_LIFETIME) {
@@ -1117,12 +1121,14 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* don't even check for a valid syncookie.
*/
SCH_UNLOCK(sch);
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Spurious ACK, "
"segment rejected "
"(no syncache entry)\n",
s, __func__);
- goto failed;
+ free(s, M_TCPLOG);
+ }
+ return (0);
}
SCH_UNLOCK(sch);
}
@@ -1135,11 +1141,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc = &scs;
TCPSTAT_INC(tcps_sc_recvcookie);
} else {
- if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+ if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: Segment failed "
"SYNCOOKIE authentication, segment rejected "
"(probably spoofed)\n", s, __func__);
- goto failed;
+ free(s, M_TCPLOG);
+ }
+ return (0);
}
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
/* If received ACK has MD5 signature, check it. */
@@ -1167,7 +1175,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* If listening socket requested TCP digests, check that
* received ACK has signature and it is correct.
- * If not, drop the ACK and leave sc entry in th cache,
+ * If not, drop the ACK and leave sc entry in the cache,
* because SYN was received with correct signature.
*/
if (sc->sc_flags & SCF_SIGNATURE) {
@@ -1213,9 +1221,9 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
"%s; %s: SEG.TSval %u < TS.Recent %u, "
"segment dropped\n", s, __func__,
to->to_tsval, sc->sc_tsreflect);
- free(s, M_TCPLOG);
}
SCH_UNLOCK(sch);
+ free(s, M_TCPLOG);
return (-1); /* Do not send RST */
}
@@ -1232,7 +1240,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
"expected, segment processed normally\n",
s, __func__);
free(s, M_TCPLOG);
- s = NULL;
}
}
@@ -1319,16 +1326,6 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
if (sc != &scs)
syncache_free(sc);
return (1);
-failed:
- if (sc != NULL) {
- TCPSTATES_DEC(TCPS_SYN_RECEIVED);
- if (sc != &scs)
- syncache_free(sc);
- }
- if (s != NULL)
- free(s, M_TCPLOG);
- *lsop = NULL;
- return (0);
}
static struct socket *
@@ -1390,6 +1387,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct tcpcb *tp;
struct socket *rv = NULL;
struct syncache *sc = NULL;
+ struct ucred *cred;
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
u_int ltflags;
@@ -1418,6 +1416,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
*/
KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
tp = sototcpcb(so);
+ cred = V_tcp_syncache.see_other ? NULL : crhold(so->so_cred);
#ifdef INET6
if (inc->inc_flags & INC_ISIPV6) {
@@ -1646,16 +1645,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* sc_cred is only used in syncache_pcblist() to list TCP endpoints in
* TCPS_SYN_RECEIVED state when V_tcp_syncache.see_other is false.
- * Therefore, store the credentials and take a reference count only
- * when needed:
+ * Therefore, store the credentials only when needed:
* - sc is allocated from the zone and not using the on stack instance.
* - the sysctl variable net.inet.tcp.syncache.see_other is false.
* The reference count is decremented when a zone allocated sc is
* freed in syncache_free().
*/
- if (sc != &scs && !V_tcp_syncache.see_other)
- sc->sc_cred = crhold(so->so_cred);
- else
+ if (sc != &scs && !V_tcp_syncache.see_other) {
+ sc->sc_cred = cred;
+ cred = NULL;
+ } else
sc->sc_cred = NULL;
sc->sc_port = port;
sc->sc_ipopts = ipopts;
@@ -1793,6 +1792,8 @@ donenoprobe:
tcp_fastopen_decrement_counter(tfo_pending);
tfo_expanded:
+ if (cred != NULL)
+ crfree(cred);
if (sc == NULL || sc == &scs) {
#ifdef MAC
mac_syncache_destroy(&maclabel);
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 65a9fbc84ff7..1c687e94bb4a 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -515,9 +515,12 @@ tcp_timer_persist(struct tcpcb *tp)
if (progdrop || (tp->t_rxtshift >= V_tcp_retries &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
- if (!progdrop)
+ if (progdrop) {
+ tcp_log_end_status(tp, TCP_EI_STATUS_PROGRESS);
+ } else {
TCPSTAT_INC(tcps_persistdrop);
- tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
+ tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
+ }
goto dropit;
}
/*
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index f44bd1838d07..a1000dadf583 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -786,7 +786,8 @@ udplite_ctlinput(struct icmp *icmp)
static int
udp_pcblist(SYSCTL_HANDLER_ARGS)
{
- struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_udbinfo,
+ struct inpcbinfo *pcbinfo = udp_get_inpcbinfo(arg2);
+ struct inpcb_iterator inpi = INP_ALL_ITERATOR(pcbinfo,
INPLOOKUP_RLOCKPCB);
struct xinpgen xig;
struct inpcb *inp;
@@ -798,7 +799,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
if (req->oldptr == 0) {
int n;
- n = V_udbinfo.ipi_count;
+ n = pcbinfo->ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
@@ -809,8 +810,8 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = V_udbinfo.ipi_count;
- xig.xig_gen = V_udbinfo.ipi_gencnt;
+ xig.xig_count = pcbinfo->ipi_count;
+ xig.xig_gen = pcbinfo->ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
@@ -837,9 +838,9 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- xig.xig_gen = V_udbinfo.ipi_gencnt;
+ xig.xig_gen = pcbinfo->ipi_gencnt;
xig.xig_sogen = so_gencnt;
- xig.xig_count = V_udbinfo.ipi_count;
+ xig.xig_count = pcbinfo->ipi_count;
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
@@ -847,10 +848,15 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
}
SYSCTL_PROC(_net_inet_udp, UDPCTL_PCBLIST, pcblist,
- CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDP,
udp_pcblist, "S,xinpcb",
"List of active UDP sockets");
+SYSCTL_PROC(_net_inet_udplite, OID_AUTO, pcblist,
+ CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, IPPROTO_UDPLITE,
+ udp_pcblist, "S,xinpcb",
+ "List of active UDP-Lite sockets");
+
#ifdef INET
static int
udp_getcred(SYSCTL_HANDLER_ARGS)
@@ -1140,7 +1146,19 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
else
INP_RLOCK(inp);
NET_EPOCH_ENTER(et);
+#ifdef INET6
+ if ((flags & PRUS_IPV6) != 0) {
+ if ((inp->in6p_outputopts != NULL) &&
+ (inp->in6p_outputopts->ip6po_tclass != -1))
+ tos = (u_char)inp->in6p_outputopts->ip6po_tclass;
+ else
+ tos = 0;
+ } else {
+ tos = inp->inp_ip_tos;
+ }
+#else
tos = inp->inp_ip_tos;
+#endif
if (control != NULL) {
/*
* XXX: Currently, we assume all the optional information is
@@ -1164,6 +1182,23 @@ udp_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
error = udp_v4mapped_pktinfo(cm, &src, inp, flags);
if (error != 0)
break;
+ if (((flags & PRUS_IPV6) != 0) &&
+ (cm->cmsg_level == IPPROTO_IPV6) &&
+ (cm->cmsg_type == IPV6_TCLASS)) {
+ int tclass;
+
+ if (cm->cmsg_len != CMSG_LEN(sizeof(int))) {
+ error = EINVAL;
+ break;
+ }
+ tclass = *(int *)CMSG_DATA(cm);
+ if (tclass < -1 || tclass > 255) {
+ error = EINVAL;
+ break;
+ }
+ if (tclass != -1)
+ tos = (u_char)tclass;
+ }
#endif
if (cm->cmsg_level != IPPROTO_IP)
continue;
diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h
index 51272e7c9349..569b7b8d5240 100644
--- a/sys/netinet/udp_var.h
+++ b/sys/netinet/udp_var.h
@@ -141,6 +141,7 @@ void kmod_udpstat_inc(int statnum);
kmod_udpstat_inc(offsetof(struct udpstat, name) / sizeof(uint64_t))
SYSCTL_DECL(_net_inet_udp);
+SYSCTL_DECL(_net_inet_udplite);
VNET_DECLARE(struct inpcbinfo, udbinfo);
VNET_DECLARE(struct inpcbinfo, ulitecbinfo);
diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c
index 52fe78a1a83b..91e75b1925b5 100644
--- a/sys/netinet6/in6_src.c
+++ b/sys/netinet6/in6_src.c
@@ -134,8 +134,8 @@ static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct ifnet **,
struct ifnet *, u_int);
static int in6_selectsrc(uint32_t, struct sockaddr_in6 *,
- struct ip6_pktopts *, struct inpcb *, struct ucred *,
- struct ifnet **, struct in6_addr *);
+ struct ip6_pktopts *, struct ip6_moptions *, struct inpcb *,
+ struct ucred *, struct ifnet **, struct in6_addr *);
static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *);
@@ -175,8 +175,8 @@ static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *);
static int
in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
- struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred,
- struct ifnet **ifpp, struct in6_addr *srcp)
+ struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct inpcb *inp,
+ struct ucred *cred, struct ifnet **ifpp, struct in6_addr *srcp)
{
struct rm_priotracker in6_ifa_tracker;
struct in6_addr dst, tmp;
@@ -188,7 +188,6 @@ in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
u_int32_t odstzone;
int prefer_tempaddr;
int error;
- struct ip6_moptions *mopts;
NET_EPOCH_ASSERT();
KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__));
@@ -207,13 +206,6 @@ in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock,
*ifpp = NULL;
}
- if (inp != NULL) {
- INP_LOCK_ASSERT(inp);
- mopts = inp->in6p_moptions;
- } else {
- mopts = NULL;
- }
-
/*
* If the source address is explicitly specified by the caller,
* check if the requested source address is indeed a unicast address
@@ -554,10 +546,13 @@ in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
uint32_t fibnum;
int error;
+ INP_LOCK_ASSERT(inp);
+
fibnum = inp->inp_inc.inc_fibnum;
retifp = NULL;
- error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp);
+ error = in6_selectsrc(fibnum, dstsock, opts, inp->in6p_moptions,
+ inp, cred, &retifp, srcp);
if (error != 0)
return (error);
@@ -585,7 +580,7 @@ in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* Stores selected address to @srcp.
* Returns 0 on success.
*
- * Used by non-socket based consumers (ND code mostly)
+ * Used by non-socket based consumers
*/
int
in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
@@ -604,13 +599,42 @@ in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
dst_sa.sin6_scope_id = scopeid;
sa6_embedscope(&dst_sa, 0);
- error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp);
+ error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL,
+ NULL, NULL, &retifp, srcp);
if (hlim != NULL)
*hlim = in6_selecthlim(NULL, retifp);
return (error);
}
+/*
+ * Select source address based on @fibnum, @dst and @mopts.
+ * Stores selected address to @srcp.
+ * Returns 0 on success.
+ *
+ * Used by non-socket based consumers (ND code mostly)
+ */
+int
+in6_selectsrc_nbr(uint32_t fibnum, const struct in6_addr *dst,
+ struct ip6_moptions *mopts, struct ifnet *ifp, struct in6_addr *srcp)
+{
+ struct sockaddr_in6 dst_sa;
+ struct ifnet *retifp;
+ int error;
+
+ retifp = ifp;
+ bzero(&dst_sa, sizeof(dst_sa));
+ dst_sa.sin6_family = AF_INET6;
+ dst_sa.sin6_len = sizeof(dst_sa);
+ dst_sa.sin6_addr = *dst;
+ dst_sa.sin6_scope_id = ntohs(in6_getscope(dst));
+ sa6_embedscope(&dst_sa, 0);
+
+ error = in6_selectsrc(fibnum, &dst_sa, NULL, mopts,
+ NULL, NULL, &retifp, srcp);
+ return (error);
+}
+
static struct nhop_object *
cache_route(uint32_t fibnum, const struct sockaddr_in6 *dst, struct route_in6 *ro,
uint32_t flowid)
diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h
index 464b74c4fd88..a365c6bb6268 100644
--- a/sys/netinet6/ip6_var.h
+++ b/sys/netinet6/ip6_var.h
@@ -403,6 +403,8 @@ int in6_selectsrc_socket(struct sockaddr_in6 *, struct ip6_pktopts *,
struct inpcb *, struct ucred *, int, struct in6_addr *, int *);
int in6_selectsrc_addr(uint32_t, const struct in6_addr *,
uint32_t, struct ifnet *, struct in6_addr *, int *);
+int in6_selectsrc_nbr(uint32_t, const struct in6_addr *,
+ struct ip6_moptions *, struct ifnet *, struct in6_addr *);
int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct nhop_object **, u_int, uint32_t);
diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h
index 1db1b666c60b..6d717a321027 100644
--- a/sys/netinet6/nd6.h
+++ b/sys/netinet6/nd6.h
@@ -170,6 +170,10 @@ struct in6_ndifreq {
#define NDPRF_ONLINK 0x1
#define NDPRF_DETACHED 0x2
+/* ND6 NA output flags */
+#define ND6_NA_OPT_LLA 0x01
+#define ND6_NA_CARP_MASTER 0x02
+
/* protocol constants */
#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */
#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */
diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index 353db66c2323..db0894df4923 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -244,10 +244,9 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
* In implementation, we add target link-layer address by default.
* We do not add one in MUST NOT cases.
*/
- if (!IN6_IS_ADDR_MULTICAST(&daddr6))
- tlladdr = 0;
- else
- tlladdr = 1;
+ tlladdr = 0;
+ if (IN6_IS_ADDR_MULTICAST(&daddr6))
+ tlladdr |= ND6_NA_OPT_LLA;
/*
* Target address (taddr6) must be either:
@@ -256,9 +255,11 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
* (3) "tentative" address on which DAD is being performed.
*/
/* (1) and (3) check. */
- if (ifp->if_carp)
+ if (ifp->if_carp) {
ifa = (*carp_iamatch6_p)(ifp, &taddr6);
- else
+ if (ifa != NULL)
+ tlladdr |= ND6_NA_CARP_MASTER;
+ } else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/* (2) check. */
@@ -322,32 +323,28 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
}
/*
+ * If the Target Address is either an anycast address or a unicast
+ * address for which the node is providing proxy service, or the Target
+ * Link-Layer Address option is not included, the Override flag SHOULD
+ * be set to zero. Otherwise, the Override flag SHOULD be set to one.
+ */
+ if (anycast == 0 && proxy == 0 && (tlladdr & ND6_NA_OPT_LLA) != 0)
+ rflag |= ND_NA_FLAG_OVERRIDE;
+ /*
* If the source address is unspecified address, entries must not
* be created or updated.
- * It looks that sender is performing DAD. Output NA toward
- * all-node multicast address, to tell the sender that I'm using
- * the address.
+ * It looks that sender is performing DAD. nd6_na_output() will
+ * send NA toward all-node multicast address, to tell the sender
+ * that I'm using the address.
* S bit ("solicited") must be zero.
*/
- if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
- struct in6_addr in6_all;
-
- in6_all = in6addr_linklocal_allnodes;
- if (in6_setscope(&in6_all, ifp, NULL) != 0)
- goto bad;
- nd6_na_output_fib(ifp, &in6_all, &taddr6,
- ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
- rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL,
- M_GETFIB(m));
- goto freeit;
+ if (!IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
+ nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
+ ND_NEIGHBOR_SOLICIT, 0);
+ rflag |= ND_NA_FLAG_SOLICITED;
}
- nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
- ND_NEIGHBOR_SOLICIT, 0);
-
- nd6_na_output_fib(ifp, &saddr6, &taddr6,
- ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
- rflag | ND_NA_FLAG_SOLICITED, tlladdr,
+ nd6_na_output_fib(ifp, &saddr6, &taddr6, rflag, tlladdr,
proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m));
freeit:
if (ifa != NULL)
@@ -439,13 +436,6 @@ nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
return;
M_SETFIB(m, fibnum);
- if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) {
- m->m_flags |= M_MCAST;
- im6o.im6o_multicast_ifp = ifp;
- im6o.im6o_multicast_hlim = 255;
- im6o.im6o_multicast_loop = 0;
- }
-
icmp6len = sizeof(*nd_ns);
m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */
@@ -470,6 +460,12 @@ nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
goto bad;
}
+ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
+ m->m_flags |= M_MCAST;
+ im6o.im6o_multicast_ifp = ifp;
+ im6o.im6o_multicast_hlim = 255;
+ im6o.im6o_multicast_loop = 0;
+ }
if (nonce == NULL) {
char ip6buf[INET6_ADDRSTRLEN];
struct ifaddr *ifa = NULL;
@@ -491,20 +487,16 @@ nd6_ns_output_fib(struct ifnet *ifp, const struct in6_addr *saddr6,
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, saddr6);
if (ifa == NULL) {
int error;
- struct in6_addr dst6, src6;
- uint32_t scopeid;
- in6_splitscope(&ip6->ip6_dst, &dst6, &scopeid);
- error = in6_selectsrc_addr(fibnum, &dst6,
- scopeid, ifp, &src6, NULL);
+ error = in6_selectsrc_nbr(fibnum, &ip6->ip6_dst, &im6o,
+ ifp, &ip6->ip6_src);
if (error) {
nd6log((LOG_DEBUG, "%s: source can't be "
"determined: dst=%s, error=%d\n", __func__,
- ip6_sprintf(ip6buf, &dst6),
+ ip6_sprintf(ip6buf, &ip6->ip6_dst),
error));
goto bad;
}
- ip6->ip6_src = src6;
} else
ip6->ip6_src = *saddr6;
@@ -967,7 +959,9 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
* - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
*
- * tlladdr - 1 if include target link-layer address
+ * tlladdr:
+ * - 0x01 if include target link-layer address
+ * - 0x02 if target address is CARP MASTER
* sdl0 - sockaddr_dl (= proxy NA) or NULL
*/
static void
@@ -980,8 +974,7 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
struct ip6_hdr *ip6;
struct nd_neighbor_advert *nd_na;
struct ip6_moptions im6o;
- struct in6_addr daddr6, dst6, src6;
- uint32_t scopeid;
+ struct in6_addr daddr6;
NET_EPOCH_ASSERT();
@@ -1005,13 +998,6 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
return;
M_SETFIB(m, fibnum);
- if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
- m->m_flags |= M_MCAST;
- im6o.im6o_multicast_ifp = ifp;
- im6o.im6o_multicast_hlim = 255;
- im6o.im6o_multicast_loop = 0;
- }
-
icmp6len = sizeof(*nd_na);
m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
m->m_data += max_linkhdr; /* or M_ALIGN() equivalent? */
@@ -1023,26 +1009,24 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_ICMPV6;
ip6->ip6_hlim = 255;
+
if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
/* reply to DAD */
- daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
- daddr6.s6_addr16[1] = 0;
- daddr6.s6_addr32[1] = 0;
- daddr6.s6_addr32[2] = 0;
- daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
+ daddr6 = in6addr_linklocal_allnodes;
if (in6_setscope(&daddr6, ifp, NULL))
goto bad;
flags &= ~ND_NA_FLAG_SOLICITED;
}
- ip6->ip6_dst = daddr6;
+ if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
+ m->m_flags |= M_MCAST;
+ im6o.im6o_multicast_ifp = ifp;
+ im6o.im6o_multicast_hlim = 255;
+ im6o.im6o_multicast_loop = 0;
+ }
- /*
- * Select a source whose scope is the same as that of the dest.
- */
- in6_splitscope(&daddr6, &dst6, &scopeid);
- error = in6_selectsrc_addr(fibnum, &dst6,
- scopeid, ifp, &src6, NULL);
+ ip6->ip6_dst = daddr6;
+ error = in6_selectsrc_nbr(fibnum, &daddr6, &im6o, ifp, &ip6->ip6_src);
if (error) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
@@ -1050,7 +1034,6 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
ip6_sprintf(ip6buf, &daddr6), error));
goto bad;
}
- ip6->ip6_src = src6;
nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
nd_na->nd_na_code = 0;
@@ -1058,20 +1041,24 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
in6_clearscope(&nd_na->nd_na_target); /* XXX */
/*
+ * If we respond from CARP address, we need to prepare mac address
+ * for carp_output().
+ */
+ if (ifp->if_carp && (tlladdr & ND6_NA_CARP_MASTER))
+ mac = (*carp_macmatch6_p)(ifp, m, taddr6);
+ /*
* "tlladdr" indicates NS's condition for adding tlladdr or not.
* see nd6_ns_input() for details.
* Basically, if NS packet is sent to unicast/anycast addr,
* target lladdr option SHOULD NOT be included.
*/
- if (tlladdr) {
+ if (tlladdr & ND6_NA_OPT_LLA) {
/*
* sdl0 != NULL indicates proxy NA. If we do proxy, use
* lladdr in sdl0. If we are not proxying (sending NA for
* my address) use lladdr configured for the interface.
*/
if (sdl0 == NULL) {
- if (ifp->if_carp)
- mac = (*carp_macmatch6_p)(ifp, m, taddr6);
if (mac == NULL)
mac = nd6_ifptomac(ifp);
} else if (sdl0->sa_family == AF_LINK) {
@@ -1081,7 +1068,7 @@ nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0,
mac = LLADDR(sdl);
}
}
- if (tlladdr && mac) {
+ if ((tlladdr & ND6_NA_OPT_LLA) && mac != NULL) {
int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.c b/sys/netpfil/ipfilter/netinet/ip_htable.c
index 1907aa0f9f5f..be5b71d9f6d4 100644
--- a/sys/netpfil/ipfilter/netinet/ip_htable.c
+++ b/sys/netpfil/ipfilter/netinet/ip_htable.c
@@ -233,6 +233,8 @@ ipf_htable_stats_get(ipf_main_softc_t *softc, void *arg, iplookupop_t *op)
return (EINVAL);
}
+ bzero(&stats, sizeof(stats));
+
stats.iphs_tables = softh->ipf_htables[op->iplo_unit + 1];
stats.iphs_numtables = softh->ipf_nhtables[op->iplo_unit + 1];
stats.iphs_numnodes = softh->ipf_nhtnodes[op->iplo_unit + 1];
diff --git a/sys/netpfil/ipfilter/netinet/ip_nat.c b/sys/netpfil/ipfilter/netinet/ip_nat.c
index 290af20e4765..3f8f3c2a342c 100644
--- a/sys/netpfil/ipfilter/netinet/ip_nat.c
+++ b/sys/netpfil/ipfilter/netinet/ip_nat.c
@@ -1775,6 +1775,7 @@ ipf_nat_getent(ipf_main_softc_t *softc, caddr_t data, int getlock)
IPFERROR(60029);
return (ENOMEM);
}
+ bzero(ipn, ipns.ipn_dsize);
if (getlock) {
READ_ENTER(&softc->ipf_nat);
diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c
index 4c83c91cf918..c97bf4bf80a9 100644
--- a/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/sys/netpfil/ipfw/ip_fw_nat.c
@@ -1000,9 +1000,11 @@ ipfw_nat_del(struct sockopt *sopt)
{
struct cfg_nat *ptr;
struct ip_fw_chain *chain = &V_layer3_chain;
- int i;
+ int error, i;
- sooptcopyin(sopt, &i, sizeof i, sizeof i);
+ error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+ if (error != 0)
+ return (error);
/* XXX validate i */
IPFW_UH_WLOCK(chain);
ptr = lookup_nat(&chain->nat, i);
@@ -1105,7 +1107,7 @@ ipfw_nat_get_log(struct sockopt *sopt)
{
uint8_t *data;
struct cfg_nat *ptr;
- int i, size;
+ int error, i, size;
struct ip_fw_chain *chain;
IPFW_RLOCK_TRACKER;
@@ -1135,9 +1137,9 @@ ipfw_nat_get_log(struct sockopt *sopt)
i += LIBALIAS_BUF_SIZE;
}
IPFW_RUNLOCK(chain);
- sooptcopyout(sopt, data, size);
+ error = sooptcopyout(sopt, data, size);
free(data, M_IPFW);
- return(0);
+ return (error);
}
static int
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 6a913883cc5c..bf05855439bc 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -371,9 +371,12 @@ lvt_mode_impl(struct lapic *la, struct lvt *lvt, u_int pin, uint32_t value)
case APIC_LVT_DM_SMI:
case APIC_LVT_DM_INIT:
case APIC_LVT_DM_EXTINT:
- if (!lvt->lvt_edgetrigger && bootverbose) {
- printf("lapic%u: Forcing LINT%u to edge trigger\n",
- la->la_id, pin);
+ if (!lvt->lvt_edgetrigger) {
+ if (bootverbose) {
+ printf(
+ "lapic%u: Forcing LINT%u to edge trigger\n",
+ la->la_id, pin);
+ }
value &= ~APIC_LVT_TM;
}
/* Use a vector of 0. */
diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c
index e43c88b3a27b..735efe307215 100644
--- a/sys/x86/x86/mca.c
+++ b/sys/x86/x86/mca.c
@@ -46,9 +46,11 @@
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/taskqueue.h>
#include <machine/intr_machdep.h>
@@ -124,6 +126,22 @@ SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTUN,
&workaround_erratum383, 0,
"Is the workaround for Erratum 383 on AMD Family 10h processors enabled?");
+#ifdef DIAGNOSTIC
+static uint64_t fake_status;
+SYSCTL_U64(_hw_mca, OID_AUTO, fake_status, CTLFLAG_RW,
+ &fake_status, 0,
+ "Insert artificial MCA with given status (testing purpose only)");
+static int fake_bank;
+SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW,
+ &fake_bank, 0,
+ "Bank to use for artificial MCAs (testing purpose only)");
+#endif
+
+static bool mca_uselog = false;
+SYSCTL_BOOL(_hw_mca, OID_AUTO, uselog, CTLFLAG_RWTUN, &mca_uselog, 0,
+ "Should the system send non-fatal machine check errors to the log "
+ "(instead of the console)?");
+
static STAILQ_HEAD(, mca_internal) mca_freelist;
static int mca_freecount;
static STAILQ_HEAD(, mca_internal) mca_records;
@@ -136,12 +154,40 @@ static struct timeout_task mca_scan_task;
static struct mtx mca_lock;
static bool mca_startup_done = false;
-/* Statistics on number of MCA events by type, updated atomically. */
+/* Static buffer to compose messages while in an interrupt context. */
+static char mca_msg_buf[1024];
+static struct mtx mca_msg_buf_lock;
+
+/* Statistics on number of MCA events by type, updated with the mca_lock. */
static uint64_t mca_stats[MCA_T_COUNT];
SYSCTL_OPAQUE(_hw_mca, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_SKIP,
mca_stats, MCA_T_COUNT * sizeof(mca_stats[0]),
"S", "Array of MCA events by type");
+/* Variables to track and control message rate limiting. */
+static struct timeval mca_last_log_time;
+static struct timeval mca_log_interval;
+static int mca_log_skipped;
+
+static int
+sysctl_mca_log_interval(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ u_int val;
+
+ val = mca_log_interval.tv_sec;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ mca_log_interval.tv_sec = val;
+ return (0);
+}
+SYSCTL_PROC(_hw_mca, OID_AUTO, log_interval,
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &mca_log_interval, 0,
+ sysctl_mca_log_interval, "IU",
+ "Minimum number of seconds between logging correctable MCAs"
+ " (0 = no limit)");
+
static unsigned int
mca_ia32_ctl_reg(int bank)
{
@@ -437,98 +483,111 @@ mca_mute(const struct mca_record *rec)
/* Dump details about a single machine check. */
static void
-mca_log(const struct mca_record *rec)
+mca_log(enum scan_mode mode, const struct mca_record *rec, bool fatal)
{
+ int error, numskipped;
uint16_t mca_error;
enum mca_stat_types event_type;
+ struct sbuf sb;
+ bool uncor, using_shared_buf;
if (mca_mute(rec))
return;
- if (!log_corrected && (rec->mr_status & MC_STATUS_UC) == 0 &&
- (!tes_supported(rec->mr_mcg_cap) ||
+ uncor = (rec->mr_status & MC_STATUS_UC) != 0;
+
+ if (!log_corrected && !uncor && (!tes_supported(rec->mr_mcg_cap) ||
((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) != 0x2))
return;
- printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
+ /* Try to use an allocated buffer when not in an interrupt context. */
+ if (mode == POLLED && sbuf_new(&sb, NULL, 512, SBUF_AUTOEXTEND) != NULL)
+ using_shared_buf = false;
+ else {
+ using_shared_buf = true;
+ mtx_lock_spin(&mca_msg_buf_lock);
+ sbuf_new(&sb, mca_msg_buf, sizeof(mca_msg_buf), SBUF_FIXEDLEN);
+ }
+
+ sbuf_printf(&sb, "MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
(long long)rec->mr_status);
- printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+ sbuf_printf(&sb, "MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
(long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
- printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
- rec->mr_cpu_id, rec->mr_apic_id);
- printf("MCA: CPU %d ", rec->mr_cpu);
+ sbuf_printf(&sb, "MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n",
+ cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id);
+ sbuf_printf(&sb, "MCA: CPU %d ", rec->mr_cpu);
if (rec->mr_status & MC_STATUS_UC)
- printf("UNCOR ");
+ sbuf_printf(&sb, "UNCOR ");
else {
- printf("COR ");
+ sbuf_printf(&sb, "COR ");
if (cmci_supported(rec->mr_mcg_cap))
- printf("(%lld) ", ((long long)rec->mr_status &
+ sbuf_printf(&sb, "(%lld) ", ((long long)rec->mr_status &
MC_STATUS_COR_COUNT) >> 38);
if (tes_supported(rec->mr_mcg_cap)) {
switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) {
case 0x1:
- printf("(Green) ");
+ sbuf_printf(&sb, "(Green) ");
break;
case 0x2:
- printf("(Yellow) ");
+ sbuf_printf(&sb, "(Yellow) ");
break;
}
}
}
if (rec->mr_status & MC_STATUS_EN)
- printf("EN ");
+ sbuf_printf(&sb, "EN ");
if (rec->mr_status & MC_STATUS_PCC)
- printf("PCC ");
+ sbuf_printf(&sb, "PCC ");
if (ser_supported(rec->mr_mcg_cap)) {
if (rec->mr_status & MC_STATUS_S)
- printf("S ");
+ sbuf_printf(&sb, "S ");
if (rec->mr_status & MC_STATUS_AR)
- printf("AR ");
+ sbuf_printf(&sb, "AR ");
}
if (rec->mr_status & MC_STATUS_OVER)
- printf("OVER ");
+ sbuf_printf(&sb, "OVER ");
mca_error = rec->mr_status & MC_STATUS_MCA_ERROR;
event_type = MCA_T_COUNT;
switch (mca_error) {
/* Simple error codes. */
case 0x0000:
- printf("no error");
+ sbuf_printf(&sb, "no error");
event_type = MCA_T_NONE;
break;
case 0x0001:
- printf("unclassified error");
+ sbuf_printf(&sb, "unclassified error");
event_type = MCA_T_UNCLASSIFIED;
break;
case 0x0002:
- printf("ucode ROM parity error");
+ sbuf_printf(&sb, "ucode ROM parity error");
event_type = MCA_T_UCODE_ROM_PARITY;
break;
case 0x0003:
- printf("external error");
+ sbuf_printf(&sb, "external error");
event_type = MCA_T_EXTERNAL;
break;
case 0x0004:
- printf("FRC error");
+ sbuf_printf(&sb, "FRC error");
event_type = MCA_T_FRC;
break;
case 0x0005:
- printf("internal parity error");
+ sbuf_printf(&sb, "internal parity error");
event_type = MCA_T_INTERNAL_PARITY;
break;
case 0x0006:
- printf("SMM handler code access violation");
+ sbuf_printf(&sb, "SMM handler code access violation");
event_type = MCA_T_SMM_HANDLER;
break;
case 0x0400:
- printf("internal timer error");
+ sbuf_printf(&sb, "internal timer error");
event_type = MCA_T_INTERNAL_TIMER;
break;
case 0x0e0b:
- printf("generic I/O error");
+ sbuf_printf(&sb, "generic I/O error");
event_type = MCA_T_GENERIC_IO;
if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL &&
(rec->mr_status & MC_STATUS_MISCV)) {
- printf(" (pci%d:%d:%d:%d)",
+ sbuf_printf(&sb, " (pci%d:%d:%d:%d)",
(int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32),
(int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24),
(int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19),
@@ -537,7 +596,8 @@ mca_log(const struct mca_record *rec)
break;
default:
if ((mca_error & 0xfc00) == 0x0400) {
- printf("internal error %x", mca_error & 0x03ff);
+ sbuf_printf(&sb, "internal error %x",
+ mca_error & 0x03ff);
event_type = MCA_T_INTERNAL;
break;
}
@@ -546,14 +606,16 @@ mca_log(const struct mca_record *rec)
/* Memory hierarchy error. */
if ((mca_error & 0xeffc) == 0x000c) {
- printf("%s memory error", mca_error_level(mca_error));
+ sbuf_printf(&sb, "%s memory error",
+ mca_error_level(mca_error));
event_type = MCA_T_MEMORY;
break;
}
/* TLB error. */
if ((mca_error & 0xeff0) == 0x0010) {
- printf("%sTLB %s error", mca_error_ttype(mca_error),
+ sbuf_printf(&sb, "%sTLB %s error",
+ mca_error_ttype(mca_error),
mca_error_level(mca_error));
event_type = MCA_T_TLB;
break;
@@ -561,19 +623,19 @@ mca_log(const struct mca_record *rec)
/* Memory controller error. */
if ((mca_error & 0xef80) == 0x0080) {
- printf("%s channel ", mca_error_mmtype(mca_error,
- &event_type));
+ sbuf_printf(&sb, "%s channel ",
+ mca_error_mmtype(mca_error, &event_type));
if ((mca_error & 0x000f) != 0x000f)
- printf("%d", mca_error & 0x000f);
+ sbuf_printf(&sb, "%d", mca_error & 0x000f);
else
- printf("??");
- printf(" memory error");
+ sbuf_printf(&sb, "??");
+ sbuf_printf(&sb, " memory error");
break;
}
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
- printf("%sCACHE %s %s error",
+ sbuf_printf(&sb, "%sCACHE %s %s error",
mca_error_ttype(mca_error),
mca_error_level(mca_error),
mca_error_request(mca_error));
@@ -583,77 +645,129 @@ mca_log(const struct mca_record *rec)
/* Extended memory error. */
if ((mca_error & 0xef80) == 0x0280) {
- printf("%s channel ", mca_error_mmtype(mca_error,
- &event_type));
+ sbuf_printf(&sb, "%s channel ",
+ mca_error_mmtype(mca_error, &event_type));
if ((mca_error & 0x000f) != 0x000f)
- printf("%d", mca_error & 0x000f);
+ sbuf_printf(&sb, "%d", mca_error & 0x000f);
else
- printf("??");
- printf(" extended memory error");
+ sbuf_printf(&sb, "??");
+ sbuf_printf(&sb, " extended memory error");
break;
}
/* Bus and/or Interconnect error. */
if ((mca_error & 0xe800) == 0x0800) {
- printf("BUS%s ", mca_error_level(mca_error));
+ sbuf_printf(&sb, "BUS%s ", mca_error_level(mca_error));
event_type = MCA_T_BUS;
switch ((mca_error & 0x0600) >> 9) {
case 0:
- printf("Source");
+ sbuf_printf(&sb, "Source");
break;
case 1:
- printf("Responder");
+ sbuf_printf(&sb, "Responder");
break;
case 2:
- printf("Observer");
+ sbuf_printf(&sb, "Observer");
break;
default:
- printf("???");
+ sbuf_printf(&sb, "???");
break;
}
- printf(" %s ", mca_error_request(mca_error));
+ sbuf_printf(&sb, " %s ", mca_error_request(mca_error));
switch ((mca_error & 0x000c) >> 2) {
case 0:
- printf("Memory");
+ sbuf_printf(&sb, "Memory");
break;
case 2:
- printf("I/O");
+ sbuf_printf(&sb, "I/O");
break;
case 3:
- printf("Other");
+ sbuf_printf(&sb, "Other");
break;
default:
- printf("???");
+ sbuf_printf(&sb, "???");
break;
}
if (mca_error & 0x0100)
- printf(" timed out");
+ sbuf_printf(&sb, " timed out");
break;
}
- printf("unknown error %x", mca_error);
+ sbuf_printf(&sb, "unknown error %x", mca_error);
event_type = MCA_T_UNKNOWN;
break;
}
- printf("\n");
+ sbuf_printf(&sb, "\n");
if (rec->mr_status & MC_STATUS_ADDRV) {
- printf("MCA: Address 0x%llx", (long long)rec->mr_addr);
+ sbuf_printf(&sb, "MCA: Address 0x%llx",
+ (long long)rec->mr_addr);
if (ser_supported(rec->mr_mcg_cap) &&
(rec->mr_status & MC_STATUS_MISCV)) {
- printf(" (Mode: %s, LSB: %d)",
+ sbuf_printf(&sb, " (Mode: %s, LSB: %d)",
mca_addres_mode(rec->mr_misc),
(int)(rec->mr_misc & MC_MISC_RA_LSB));
}
- printf("\n");
+ sbuf_printf(&sb, "\n");
}
if (rec->mr_status & MC_STATUS_MISCV)
- printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+ sbuf_printf(&sb, "MCA: Misc 0x%llx\n", (long long)rec->mr_misc);
+
if (event_type < 0 || event_type >= MCA_T_COUNT) {
KASSERT(0, ("%s: invalid event type (%d)", __func__,
event_type));
event_type = MCA_T_UNKNOWN;
}
- atomic_add_64(&mca_stats[event_type], 1);
+ numskipped = 0;
+ if (!fatal && !uncor) {
+ /*
+ * Update statistics and check the rate limit for
+ * correctable errors. The rate limit is only applied
+ * after the system records a reasonable number of errors
+ * of the same type. The goal is to reduce the impact of
+ * the system seeing and attempting to log a burst of
+ * similar errors, which (especially when printed to the
+ * console) can be expensive.
+ */
+ mtx_lock_spin(&mca_lock);
+ mca_stats[event_type]++;
+ if (mca_log_interval.tv_sec > 0 && mca_stats[event_type] > 50 &&
+ ratecheck(&mca_last_log_time, &mca_log_interval) == 0) {
+ mca_log_skipped++;
+ mtx_unlock_spin(&mca_lock);
+ goto done;
+ }
+ numskipped = mca_log_skipped;
+ mca_log_skipped = 0;
+ mtx_unlock_spin(&mca_lock);
+ }
+
+ error = sbuf_finish(&sb);
+ if (fatal || !mca_uselog) {
+ if (numskipped > 0)
+ printf("MCA: %d events skipped due to rate limit\n",
+ numskipped);
+ if (error)
+ printf("MCA: error logging message (sbuf error %d)\n",
+ error);
+ else
+ sbuf_putbuf(&sb);
+ } else {
+ if (numskipped > 0)
+ log(LOG_ERR,
+ "MCA: %d events skipped due to rate limit\n",
+ numskipped);
+ if (error)
+ log(LOG_ERR,
+ "MCA: error logging message (sbuf error %d)\n",
+ error);
+ else
+ log(uncor ? LOG_CRIT : LOG_ERR, "%s", sbuf_data(&sb));
+ }
+
+done:
+ sbuf_delete(&sb);
+ if (using_shared_buf)
+ mtx_unlock_spin(&mca_msg_buf_lock);
}
static bool
@@ -701,8 +815,24 @@ mca_check_status(enum scan_mode mode, uint64_t mcg_cap, int bank,
bool mce, recover;
status = rdmsr(mca_msr_ops.status(bank));
- if (!(status & MC_STATUS_VAL))
+ if (!(status & MC_STATUS_VAL)) {
+#ifdef DIAGNOSTIC
+ /*
+ * Check if we have a pending artificial event to generate.
+ * Note that this is potentially racy with the sysctl. The
+ * tradeoff is deemed acceptable given the test nature
+ * of the code.
+ */
+ if (fake_status && bank == fake_bank) {
+ status = fake_status;
+ fake_status = 0;
+ }
+ if (!(status & MC_STATUS_VAL))
+ return (0);
+#else
return (0);
+#endif
+ }
recover = *recoverablep;
mce = mca_is_mce(mcg_cap, status, &recover);
@@ -796,9 +926,9 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record)
mtx_lock_spin(&mca_lock);
rec = STAILQ_FIRST(&mca_freelist);
if (rec == NULL) {
- printf("MCA: Unable to allocate space for an event.\n");
- mca_log(record);
mtx_unlock_spin(&mca_lock);
+ printf("MCA: Unable to allocate space for an event.\n");
+ mca_log(mode, record, false);
return;
}
STAILQ_REMOVE_HEAD(&mca_freelist, link);
@@ -955,7 +1085,7 @@ mca_scan(enum scan_mode mode, bool *recoverablep)
if (*recoverablep)
mca_record_entry(mode, &rec);
else
- mca_log(&rec);
+ mca_log(mode, &rec, true);
}
#ifdef DEV_APIC
@@ -1017,6 +1147,7 @@ static void
mca_process_records(enum scan_mode mode)
{
struct mca_internal *mca;
+ STAILQ_HEAD(, mca_internal) tmplist;
/*
* If in an interrupt context, defer the post-scan activities to a
@@ -1028,10 +1159,21 @@ mca_process_records(enum scan_mode mode)
return;
}
+ /*
+ * Copy the pending list to the stack so we can drop the spin lock
+ * while we are emitting logs.
+ */
+ STAILQ_INIT(&tmplist);
+ mtx_lock_spin(&mca_lock);
+ STAILQ_SWAP(&mca_pending, &tmplist, mca_internal);
+ mtx_unlock_spin(&mca_lock);
+
+ STAILQ_FOREACH(mca, &tmplist, link)
+ mca_log(mode, &mca->rec, false);
+
mtx_lock_spin(&mca_lock);
- while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) {
- STAILQ_REMOVE_HEAD(&mca_pending, link);
- mca_log(&mca->rec);
+ while ((mca = STAILQ_FIRST(&tmplist)) != NULL) {
+ STAILQ_REMOVE_HEAD(&tmplist, link);
mca_store_record(mca);
}
mtx_unlock_spin(&mca_lock);
@@ -1192,6 +1334,7 @@ mca_setup(uint64_t mcg_cap)
mca_banks = mcg_cap & MCG_CAP_COUNT;
mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
+ mtx_init(&mca_msg_buf_lock, "mca_msg_buf", NULL, MTX_SPIN);
STAILQ_INIT(&mca_records);
STAILQ_INIT(&mca_pending);
mca_tq = taskqueue_create_fast("mca", M_WAITOK,