aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.inc116
-rw-r--r--RELNOTES2
-rw-r--r--UPDATING16
-rw-r--r--contrib/blocklist/bin/blacklistctl.8136
-rw-r--r--contrib/blocklist/bin/blacklistd.8308
-rw-r--r--contrib/blocklist/bin/blacklistd.conf.5242
-rw-r--r--contrib/blocklist/lib/libblacklist.3188
-rwxr-xr-xcontrib/blocklist/libexec/blocklistd-helper2
-rw-r--r--crypto/openssh/sshd-session.c2
-rw-r--r--etc/Makefile2
-rw-r--r--etc/mtree/BSD.var.dist4
-rw-r--r--lib/libblacklist/Makefile17
-rw-r--r--lib/libc/stdlib/realpath.312
-rw-r--r--lib/libc/stdlib/realpath.c14
-rw-r--r--lib/libc/stdtime/strptime.32
-rw-r--r--lib/libc/stdtime/strptime.c3
-rw-r--r--lib/libc/tests/gen/realpath2_test.c106
-rw-r--r--lib/libpam/modules/modules.inc2
-rw-r--r--lib/libpam/modules/pam_xdg/pam_xdg.81
-rw-r--r--lib/libsys/socket.2302
-rw-r--r--lib/libunbound/Makefile6
-rw-r--r--lib/ncurses/Makefile.inc1
-rw-r--r--libexec/blocklistd-helper/blacklistd-helper2
-rw-r--r--libexec/rc/rc.d/Makefile2
-rw-r--r--release/packages/ucl/local-unbound-all.ucl (renamed from release/packages/ucl/unbound-all.ucl)0
-rw-r--r--release/packages/ucl/local-unbound.ucl27
-rw-r--r--release/tools/vmimage.subr5
-rw-r--r--sbin/fsck_ffs/setup.c47
-rw-r--r--sbin/geom/Makefile2
-rw-r--r--sbin/geom/core/geom.c232
-rw-r--r--sbin/ifconfig/ifconfig.84
-rw-r--r--sbin/ipfw/tables.c3
-rwxr-xr-xsbin/mdconfig/tests/mdconfig_test.sh15
-rw-r--r--sbin/ping/tests/Makefile3
-rwxr-xr-xshare/examples/bhyve/vmrun.sh6
-rw-r--r--share/man/man4/bridge.4210
-rw-r--r--sys/amd64/amd64/elf_machdep.c14
-rw-r--r--sys/amd64/linux/linux_sysvec.c12
-rw-r--r--sys/amd64/linux32/linux32_sysvec.c12
-rw-r--r--sys/arm/arm/pmap-v6.c2
-rw-r--r--sys/arm/arm/unwind.c4
-rw-r--r--sys/arm64/arm64/cpu_errata.c96
-rw-r--r--sys/arm64/arm64/elf_machdep.c7
-rw-r--r--sys/arm64/arm64/spec_workaround.c166
-rw-r--r--sys/arm64/coresight/coresight.c2
-rw-r--r--sys/arm64/linux/linux_sysvec.c10
-rw-r--r--sys/cam/scsi/scsi_all.c4
-rw-r--r--sys/cddl/compat/opensolaris/kern/opensolaris.c2
-rw-r--r--sys/compat/ia32/ia32_sysvec.c24
-rw-r--r--sys/compat/linux/linux.c26
-rw-r--r--sys/compat/linux/linux_common.h2
-rw-r--r--sys/compat/linux/linux_futex.c2
-rw-r--r--sys/compat/linux/linux_socket.c32
-rw-r--r--sys/conf/NOTES1
-rw-r--r--sys/conf/files.arm641
-rw-r--r--sys/conf/options1
-rw-r--r--sys/conf/std.debug1
-rw-r--r--sys/conf/std.nodebug1
-rw-r--r--sys/contrib/libnv/bsd_nvpair.c8
-rw-r--r--sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c41
-rw-r--r--sys/dev/fdt/fdt_slicer.c6
-rw-r--r--sys/dev/iommu/iommu_gas.c2
-rw-r--r--sys/dev/mii/mv88e151x.c8
-rw-r--r--sys/dev/nvme/nvme.c4
-rw-r--r--sys/dev/nvme/nvme.h4
-rw-r--r--sys/dev/nvme/nvme_sim.c4
-rw-r--r--sys/dev/xdma/xdma.c2
-rw-r--r--sys/dev/xen/bus/xen_intr.c6
-rw-r--r--sys/fs/fuse/fuse_vnops.c4
-rw-r--r--sys/fs/p9fs/p9_transport.c3
-rw-r--r--sys/fs/unionfs/union_subr.c2
-rw-r--r--sys/fs/unionfs/union_vnops.c12
-rw-r--r--sys/i386/i386/machdep.c2
-rw-r--r--sys/i386/i386/pmap.c2
-rw-r--r--sys/kern/imgact_elf.c36
-rw-r--r--sys/kern/kern_boottrace.c2
-rw-r--r--sys/kern/kern_devctl.c2
-rw-r--r--sys/kern/kern_event.c4
-rw-r--r--sys/kern/kern_exec.c4
-rw-r--r--sys/kern/kern_jailmeta.c8
-rw-r--r--sys/kern/kern_linker.c2
-rw-r--r--sys/kern/kern_malloc.c2
-rw-r--r--sys/kern/kern_racct.c4
-rw-r--r--sys/kern/kern_rangelock.c2
-rw-r--r--sys/kern/kern_rctl.c4
-rw-r--r--sys/kern/kern_sharedpage.c3
-rw-r--r--sys/kern/kern_sig.c4
-rw-r--r--sys/kern/kern_time.c4
-rw-r--r--sys/kern/subr_pcpu.c2
-rw-r--r--sys/kern/sys_socket.c2
-rw-r--r--sys/kern/uipc_usrreq.c40
-rw-r--r--sys/libkern/arc4random.c4
-rw-r--r--sys/libkern/x86/crc32_sse42.c4
-rw-r--r--sys/modules/ktest/Makefile3
-rw-r--r--sys/modules/ktest/ktest_tcphpts/Makefile13
-rw-r--r--sys/net/route.c2
-rw-r--r--sys/net/route/route_tables.c2
-rw-r--r--sys/net/rtsock.c2
-rw-r--r--sys/net80211/ieee80211_ht.c2
-rw-r--r--sys/net80211/ieee80211_hwmp.c2
-rw-r--r--sys/net80211/ieee80211_mesh.c2
-rw-r--r--sys/net80211/ieee80211_phy.c2
-rw-r--r--sys/net80211/ieee80211_proto.c2
-rw-r--r--sys/net80211/ieee80211_vht.c2
-rw-r--r--sys/netinet/cc/cc.c2
-rw-r--r--sys/netinet/in_fib_algo.c2
-rw-r--r--sys/netinet/in_mcast.c113
-rw-r--r--sys/netinet/tcp_hpts.c933
-rw-r--r--sys/netinet/tcp_hpts.h50
-rw-r--r--sys/netinet/tcp_hpts_internal.h184
-rw-r--r--sys/netinet/tcp_hpts_test.c1682
-rw-r--r--sys/netinet/tcp_input.c2
-rw-r--r--sys/netinet/tcp_lro_hpts.c3
-rw-r--r--sys/netinet/tcp_output.c2
-rw-r--r--sys/netinet/tcp_stacks/bbr.c131
-rw-r--r--sys/netinet/tcp_stacks/rack.c252
-rw-r--r--sys/netinet6/in6_fib_algo.c2
-rw-r--r--sys/netipsec/xform_ipcomp.c4
-rw-r--r--sys/netpfil/ipfw/ip_fw2.c9
-rw-r--r--sys/netpfil/ipfw/ip_fw_nat.c16
-rw-r--r--sys/netpfil/pf/pf_ioctl.c4
-rw-r--r--sys/nfs/nfs_diskless.c2
-rw-r--r--sys/powerpc/aim/mmu_oea64.c4
-rw-r--r--sys/powerpc/cpufreq/pmcr.c3
-rw-r--r--sys/rpc/auth.h4
-rw-r--r--sys/rpc/authunix_prot.c93
-rw-r--r--sys/rpc/svc_auth_unix.c94
-rw-r--r--sys/security/audit/audit.c2
-rw-r--r--sys/security/mac/mac_framework.c4
-rw-r--r--sys/sys/imgact_elf.h8
-rw-r--r--sys/sys/proc.h2
-rw-r--r--sys/sys/sockbuf.h2
-rw-r--r--sys/sys/socket.h1
-rw-r--r--sys/sys/sockopt.h6
-rw-r--r--sys/sys/sysent.h3
-rw-r--r--sys/sys/tree.h57
-rw-r--r--sys/tests/ktest.h10
-rw-r--r--sys/ufs/ffs/ffs_inode.c4
-rw-r--r--sys/vm/vm_meter.c2
-rw-r--r--sys/vm/vm_pageout.c4
-rw-r--r--sys/x86/x86/tsc.c2
-rw-r--r--sys/x86/xen/xen_apic.c2
-rw-r--r--tests/atf_python/ktest.py12
-rw-r--r--tests/sys/fs/fusefs/bad_server.cc5
-rw-r--r--tests/sys/kern/unix_stream.c27
-rw-r--r--tests/sys/netinet/Makefile1
-rwxr-xr-xtests/sys/netinet/carp.sh3
-rw-r--r--tests/sys/netinet/multicast-receive.c16
-rwxr-xr-xtests/sys/netinet/multicast.sh76
-rw-r--r--tests/sys/netinet/tcp_hpts_test.py4
-rw-r--r--tests/sys/vm/mmap_test.c31
-rw-r--r--usr.bin/login/login.conf4
-rw-r--r--usr.bin/sockstat/main.c8
-rw-r--r--usr.bin/sockstat/sockstat.15
-rw-r--r--usr.sbin/blacklistctl/Makefile3
-rw-r--r--usr.sbin/blacklistd/Makefile4
-rw-r--r--usr.sbin/bsdinstall/partedit/part_wizard.c25
-rw-r--r--usr.sbin/certctl/certctl.86
-rw-r--r--usr.sbin/fwget/pci/pci_network_mediatek36
-rw-r--r--usr.sbin/unbound/Makefile.inc2
160 files changed, 5061 insertions, 1548 deletions
diff --git a/Makefile.inc1 b/Makefile.inc1
index 74c4598dd092..a86dead09aa1 100644
--- a/Makefile.inc1
+++ b/Makefile.inc1
@@ -1964,6 +1964,7 @@ REPODIR?= ${OBJROOT}repo
PKG_FORMAT?= tzst
PKG_LEVEL?= -1
PKG_CLEVEL?= ${"${PKG_FORMAT:Mtar}" != "":?:-l ${PKG_LEVEL}}
+PKG_CTHREADS?= 0
PKG_REPO_SIGNING_KEY?= # empty
PKG_OUTPUT_DIR?= ${PKG_VERSION}
PKG_ABI_FILE?= ${WSTAGEDIR}/usr/bin/uname
@@ -2094,6 +2095,7 @@ create-packages-world: _pkgbootstrap _repodir .PHONY
.ORDER: create-packages-world create-packages-sets
.ORDER: create-packages-kernel create-packages-sets
+.ORDER: create-packages-source create-packages-sets
create-packages-sets: _pkgbootstrap _repodir .PHONY
${_+_}@cd ${.CURDIR}; \
${MAKE} -f Makefile.inc1 \
@@ -2143,7 +2145,7 @@ create-source-src-package: _pkgbootstrap .PHONY
${SSTAGEDIR}/src.ucl
${PKG_CMD} -o ABI=${PKG_ABI} \
-o OSVERSION="${SRCRELDATE}" \
- create -f ${PKG_FORMAT} ${PKG_CLEVEL} \
+ create -f ${PKG_FORMAT} ${PKG_CLEVEL} -T${PKG_CTHREADS} \
-M ${SSTAGEDIR}/src.ucl \
-p ${SSTAGEDIR}/src.plist \
-r ${SRCDIR} \
@@ -2169,7 +2171,7 @@ create-source-src-sys-package: _pkgbootstrap .PHONY
${SSTAGEDIR}/src-sys.ucl
${PKG_CMD} -o ABI=${PKG_ABI} \
-o OSVERSION="${SRCRELDATE}" \
- create -f ${PKG_FORMAT} ${PKG_CLEVEL} \
+ create -f ${PKG_FORMAT} ${PKG_CLEVEL} -T${PKG_CTHREADS} \
-M ${SSTAGEDIR}/src-sys.ucl \
-p ${SSTAGEDIR}/src-sys.plist \
-r ${SRCDIR} \
@@ -2209,7 +2211,7 @@ create-world-package-${pkgname}: .PHONY
' ${WSTAGEDIR}/${pkgname}.ucl
${PKG_CMD} -o ABI=${PKG_ABI} -o ALLOW_BASE_SHLIBS=yes \
-o OSVERSION="${SRCRELDATE}" \
- create -f ${PKG_FORMAT} ${PKG_CLEVEL} \
+ create -f ${PKG_FORMAT} ${PKG_CLEVEL} -T${PKG_CTHREADS} \
-M ${WSTAGEDIR}/${pkgname}.ucl \
-p ${WSTAGEDIR}/${pkgname}.plist \
-r ${WSTAGEDIR} \
@@ -2228,7 +2230,7 @@ create-sets-packages: .PHONY
@for manifest in ${WSTAGEDIR}/set-*.ucl; do \
echo "--> Processing manifest: $$manifest"; \
${PKG_CMD} -o ABI=${PKG_ABI} -o OSVERSION="${SRCRELDATE}" \
- create -f ${PKG_FORMAT} ${PKG_CLEVEL} \
+ create -f ${PKG_FORMAT} ${PKG_CLEVEL} -T${PKG_CTHREADS} \
-M $$manifest \
-o "${REPODIR}/${PKG_ABI}/${PKG_OUTPUT_DIR}" \
|| exit 1; \
@@ -2258,7 +2260,7 @@ create-dtb-package: .PHONY
${KSTAGEDIR}/${DISTDIR}/dtb.ucl ; \
${PKG_CMD} -o ABI=${PKG_ABI} -o ALLOW_BASE_SHLIBS=yes \
-o OSVERSION="${SRCRELDATE}" \
- create -f ${PKG_FORMAT} ${PKG_CLEVEL} \
+ create -f ${PKG_FORMAT} ${PKG_CLEVEL} -T${PKG_CTHREADS} \
-M ${KSTAGEDIR}/${DISTDIR}/dtb.ucl \
-p ${KSTAGEDIR}/${DISTDIR}/dtb.plist \
-r ${KSTAGEDIR}/${DISTDIR} \
@@ -2295,7 +2297,7 @@ create-kernel-packages-flavor${flavor:C,^""$,${_default_flavor},}: _pkgbootstrap
${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl ; \
${PKG_CMD} -o ABI=${PKG_ABI} -o ALLOW_BASE_SHLIBS=yes \
-o OSVERSION="${SRCRELDATE}" \
- create -f ${PKG_FORMAT} ${PKG_CLEVEL} \
+ create -f ${PKG_FORMAT} ${PKG_CLEVEL} -T${PKG_CTHREADS} \
-M ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.ucl \
-p ${KSTAGEDIR}/${DISTDIR}/kernel.${INSTALLKERNEL}${flavor}.plist \
-r ${KSTAGEDIR}/${DISTDIR} \
@@ -2338,7 +2340,7 @@ create-kernel-packages-extra-flavor${flavor:C,^""$,${_default_flavor},}-${_kerne
${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl ; \
${PKG_CMD} -o ABI=${PKG_ABI} -o ALLOW_BASE_SHLIBS=yes \
-o OSVERSION="${SRCRELDATE}" \
- create -f ${PKG_FORMAT} ${PKG_CLEVEL} \
+ create -f ${PKG_FORMAT} ${PKG_CLEVEL} -T${PKG_CTHREADS} \
-M ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.ucl \
-p ${KSTAGEDIR}/kernel.${_kernel}/kernel.${_kernel}${flavor}.plist \
-r ${KSTAGEDIR}/kernel.${_kernel} \
diff --git a/RELNOTES b/RELNOTES
index 174ce12e4148..e34a5b23a005 100644
--- a/RELNOTES
+++ b/RELNOTES
@@ -11,7 +11,7 @@ newline. Entries should be separated by a newline.
Changes to this file should not be MFCed.
5000d023a446, 03da141d59ae:
- Add a "-f" option to "kadmin -l dump" with can be used to
+ Add a "-f" option to "kadmin -l dump" which can be used to
dump the Heimdal KDC database in a format that can be loaded
into the MIT KDC.
See https://wiki.freebsd.org/Kerberos/Heimdal2MIT_KDC_Migration
diff --git a/UPDATING b/UPDATING
index 4460898fca2d..ed49bd76e917 100644
--- a/UPDATING
+++ b/UPDATING
@@ -27,6 +27,20 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 16.x IS SLOW:
world, or to merely disable the most expensive debugging functionality
at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".)
+20251015:
+ The "FreeBSD-unbound" package is renamed to "FreeBSD-local-unbound".
+ If you have set-optional or set-base installed, the new package will
+ be installed automatically, otherwise you should manually install the
+ new package and remove the old one.
+
+ This change only affects pkgbase users.
+
+20251012:
+ Blacklist has been renamed upstream to Blocklist. If you have it
+ configured, rename all configuration files, firewall anchors or
+ sentinel files to reflect the new nomenclature. Old setups will
+ continue to work emitting a warning.
+
20251002:
Audio-related utilities including mixer(8) and virtual_oss(8) have
moved to the new FreeBSD-sound package. If you have set-optional or
@@ -700,7 +714,7 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 16.x IS SLOW:
Bump _FreeBSD_version to 1400078 to be able to detect this change.
20221212:
- llvm-objump is now always installed as objdump. Previously there was
+ llvm-objdump is now always installed as objdump. Previously there was
no /usr/bin/objdump unless the WITH_LLVM_BINUTILS knob was used.
Some LLVM objdump options have a different output format compared to
diff --git a/contrib/blocklist/bin/blacklistctl.8 b/contrib/blocklist/bin/blacklistctl.8
new file mode 100644
index 000000000000..4d557c0c979d
--- /dev/null
+++ b/contrib/blocklist/bin/blacklistctl.8
@@ -0,0 +1,136 @@
+.\" $NetBSD: blocklistctl.8,v 1.4 2025/02/07 01:35:38 kre Exp $
+.\"
+.\" Copyright (c) 2015 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Christos Zoulas.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd January 27, 2025
+.Dt BLACKLISTCTL 8
+.Os
+.Sh NAME
+.Nm blacklistctl
+.Nd display and change the state of the blacklistd database
+.Sh SYNOPSIS
+.Nm
+.Cm dump
+.Op Fl abdnrw
+.Op Fl D Ar dbname
+.Sh DESCRIPTION
+.Nm
+is a program used to display and change the state of the
+.Xr blacklistd 8
+database.
+The following sub-commands are supported:
+.Ss dump
+.Pp
+The following options are available for the
+.Cm dump
+sub-command:
+.Bl -tag -width indent
+.It Fl a
+Show all database entries, by default it shows only the active ones.
+Inactive entries will be shown with a last-access (or, with
+.Fl r ,
+the remaining) time of
+.Ql never .
+.It Fl b
+Show only the blocked entries.
+.It Fl D Ar dbname
+Specify the location of the
+.Ic blacklistd
+database file to use.
+The default is
+.Pa /var/db/blocklistd.db .
+.It Fl d
+Increase debugging level.
+.It Fl n
+Don't display a header.
+.It Fl r
+Show the remaining blocked time instead of the last activity time.
+.It Fl w
+Normally the width of addresses is good for IPv4, the
+.Fl w
+flag, makes the display wide enough for IPv6 addresses.
+.El
+.Pp
+The output of the
+.Cm dump
+sub-command consists of a header (unless
+.Fl n
+was given) and one line for each record in the database, where each line
+has the following columns:
+.Bl -tag -width indent
+.It Ql address/ma:port
+The remote address, mask, and local port number of the client connection
+associated with the database entry.
+.It Ql id
+column will show the identifier for the packet filter rule associated
+with the database entry, though this may only be the word
+.Ql OK
+for packet filters which do not creat a unique identifier for each rule.
+.It Ql nfail
+The number of
+.Em failures
+reported for the client on the noted port, as well as the number of
+failures allowed before blocking (or, with
+.Fl a ,
+an asterisk
+.Aq * )
+.It So last access Sc | So remaining time Sc
+The last time a the client was reported as attempting access, or, with
+.Fl r ,
+the time remaining before the rule blocking the client will be removed.
+.El
+.Sh SEE ALSO
+.Xr blacklistd 8
+.Sh NOTES
+The
+.Nm
+program has been renamed to
+.Xr blocklistctl 8 .
+.Pp
+Sometimes the reported number of failed attempts can exceed the number
+of attempts that
+.Xr blacklistd 8
+is configured to block.
+This can happen either because the rule has been removed manually, or
+because there were more attempts in flight while the rule block was being
+added.
+This condition is normal; in that case
+.Xr blacklistd 8
+will first attempt to remove the existing rule, and then it will re-add
+it to make sure that there is only one rule active.
+.Sh HISTORY
+.Nm
+first appeared in
+.Nx 7 .
+.Fx
+support for
+.Nm
+was implemented in
+.Fx 11 .
+.Sh AUTHORS
+.An Christos Zoulas
diff --git a/contrib/blocklist/bin/blacklistd.8 b/contrib/blocklist/bin/blacklistd.8
new file mode 100644
index 000000000000..9ca886e9c4d3
--- /dev/null
+++ b/contrib/blocklist/bin/blacklistd.8
@@ -0,0 +1,308 @@
+.\" $NetBSD: blocklistd.8,v 1.8 2025/02/25 22:13:34 christos Exp $
+.\"
+.\" Copyright (c) 2015 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Christos Zoulas.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd February 25, 2025
+.Dt BLACKLISTD 8
+.Os
+.Sh NAME
+.Nm blacklistd
+.Nd block and release ports on demand to avoid DoS abuse
+.Sh SYNOPSIS
+.Nm
+.Op Fl dfrv
+.Op Fl C Ar controlprog
+.Op Fl c Ar configfile
+.Op Fl D Ar dbfile
+.Op Fl P Ar sockpathsfile
+.Op Fl R Ar rulename
+.Op Fl s Ar sockpath
+.Op Fl t Ar timeout
+.Sh DESCRIPTION
+.Nm
+is a daemon similar to
+.Xr syslogd 8
+that listens to sockets at paths specified in the
+.Ar sockpathsfile
+for notifications from other daemons about successful or failed connection
+attempts.
+If no such file is specified, then it only listens to the socket path
+specified by
+.Ar sockpath
+or if that is not specified to
+.Pa /var/run/blocklistd.sock .
+Each notification contains an (action, port, protocol, address, owner) tuple
+that identifies the remote connection and the action.
+This tuple is consulted against entries from the
+.Ar configfile ,
+with the syntax specified in
+.Xr blacklistd.conf 5 .
+If an entry is matched, a state entry is created for that tuple.
+Each entry contains a number of tries limit and a duration.
+.Pp
+If
+.Ar configfile
+is a directory, or a directory exists with the same name as
+.Ar configfile
+with
+.Qq .d
+appended to it, each file in the directory will be read as configuration file.
+If
+.Ar configfile
+exists as a file it will be processed before the contents of the
+.Ar configfile Ns .d
+directory if that also exists.
+.Pp
+The way
+.Nm
+does configuration entry matching is by having the client side pass the
+file descriptor associated with the connection the client wants to blacklist
+as well as passing socket credentials.
+.Pp
+The file descriptor is used to retrieve information (address and port)
+about the remote side with
+.Xr getpeername 2
+and the local side with
+.Xr getsockname 2 .
+.Pp
+By examining the port of the local side,
+.Nm
+can determine if the client program
+.Dq owns
+the port.
+By examining the optional address portion on the local side, it can match
+interfaces.
+By examining the remote address, it can match specific allow or deny rules.
+.Pp
+Finally
+.Nm
+can examine the socket credentials to match the user in the configuration file.
+.Pp
+While this works well for TCP sockets, it cannot be relied on for unbound
+UDP sockets.
+It is also less meaningful when it comes to connections using non-privileged
+ports.
+On the other hand, if we receive a request that has a local endpoint indicating
+a UDP privileged port, we can presume that the client was privileged to be
+able to acquire that port.
+.Pp
+Once an entry is matched
+.Nm
+can perform various actions.
+If the action is
+.Dq add
+and the number of tries limit is reached, then a
+control script
+.Ar controlprog
+is invoked with arguments:
+.Bd -literal -offset indent
+control add <rulename> <proto> <address> <mask> <port>
+.Ed
+.Pp
+and should invoke a packet filter command to block the connection
+specified by the arguments.
+The
+.Ar rulename
+argument can be set from the command line (default
+.Dv blacklistd ) .
+The script could print a numerical id to stdout as a handle for
+the rule that can be used later to remove that connection, but
+that is not required as all information to remove the rule is
+kept.
+.Pp
+If the action is
+.Dq rem
+Then the same control script is invoked as:
+.Bd -literal -offset indent
+control rem <rulename> <proto> <address> <mask> <port> <id>
+.Ed
+.Pp
+where
+.Ar id
+is the number returned from the
+.Dq add
+action.
+.Pp
+.Nm
+maintains a database of known connections in
+.Ar dbfile .
+On startup it reads entries from that file, and updates its internal state.
+.Pp
+.Nm
+checks the list of active entries every
+.Ar timeout
+seconds (default
+.Dv 15 )
+and removes entries and block rules using the control program as necessary.
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl C Ar controlprog
+Use
+.Ar controlprog
+to communicate with the packet filter, instead of the default, which is
+.Pa /usr/libexec/blacklistd-helper .
+The following arguments are passed to the control program:
+.Bl -tag -width protocol
+.It action
+The action to perform:
+.Dv add ,
+.Dv rem ,
+or
+.Dv flush ;
+to add, remove or flush a firewall rule.
+.It name
+The rule name.
+.It protocol
+The optional protocol name (can be empty):
+.Dv tcp ,
+.Dv tcp6 ,
+.Dv udp ,
+.Dv udp6 .
+.It address
+The IPv4 or IPv6 numeric address to be blocked or released.
+.It mask
+The numeric mask to be applied to the blocked or released address
+.It port
+The optional numeric port to be blocked (can be empty).
+.It id
+For packet filters that support removal of rules by rule identifier, the
+identifier of the rule to be removed.
+The add command is expected to return the rule identifier string to stdout.
+.El
+.It Fl c Ar configuration
+The name of the configuration file to read.
+The default when
+.Fl c
+is not given is
+.Pa /etc/blacklistd.conf .
+.It Fl D Ar dbfile
+The Berkeley DB file where
+.Nm
+stores its state.
+It defaults to
+.Pa /var/db/blocklistd.db .
+.It Fl d
+Normally,
+.Nm
+disassociates itself from the terminal unless the
+.Fl d
+flag is specified, in which case it stays in the foreground.
+.It Fl f
+Truncate the state database and flush all the rules named
+.Ar rulename
+are deleted by invoking the control script as:
+.Bd -literal -offset indent
+control flush <rulename>
+.Ed
+.It Fl P Ar sockpathsfile
+A file containing a list of pathnames, one per line that
+.Nm
+will create sockets to listen to.
+This is useful for chrooted environments.
+.It Fl R Ar rulename
+Specify the default rule name for the packet filter rules, usually
+.Dv blacklistd .
+.It Fl r
+Re-read the firewall rules from the internal database, then
+remove and re-add them.
+This helps for packet filters that do not retain state across reboots.
+.It Fl s Ar sockpath
+Add
+.Ar sockpath
+to the list of Unix sockets
+.Nm
+listens to.
+.It Fl t Ar timeout
+The interval in seconds
+.Nm
+polls the state file to update the rules.
+.It Fl v
+Cause
+.Nm
+to print
+diagnostic messages to
+.Dv stdout
+instead of
+.Xr syslogd 8 .
+.El
+.Sh SIGNAL HANDLING
+.Nm
+deals with the following signals:
+.Bl -tag -width "USR2"
+.It Dv HUP
+Receipt of this signal causes
+.Nm
+to re-read the configuration file.
+.It Dv INT , Dv TERM & Dv QUIT
+These signals tell
+.Nm
+to exit in an orderly fashion.
+.It Dv USR1
+This signal tells
+.Nm
+to increase the internal debugging level by 1.
+.It Dv USR2
+This signal tells
+.Nm
+to decrease the internal debugging level by 1.
+.El
+.Sh FILES
+.Bl -tag -width /usr/libexec/blacklistd-helper -compact
+.It Pa /usr/libexec/blacklistd-helper
+Shell script invoked to interface with the packet filter.
+.It Pa /etc/blacklistd.conf
+Configuration file.
+.It Pa /var/db/blocklistd.db
+Database of current connection entries.
+.It Pa /var/run/blocklistd.sock
+Socket to receive connection notifications.
+.El
+.Sh SEE ALSO
+.Xr blacklistd.conf 5 ,
+.Xr blacklistctl 8 ,
+.Xr ipf 8 ,
+.Xr ipfw 8 ,
+.Xr pfctl 8 ,
+.Xr syslogd 8
+.Sh NOTES
+The
+.Nm
+daemon has been renamed to
+.Xr blocklistd 8 .
+.Sh HISTORY
+.Nm
+first appeared in
+.Nx 7 .
+.Fx
+support for
+.Nm
+was implemented in
+.Fx 11 .
+.Sh AUTHORS
+.An Christos Zoulas
diff --git a/contrib/blocklist/bin/blacklistd.conf.5 b/contrib/blocklist/bin/blacklistd.conf.5
new file mode 100644
index 000000000000..e775d30e7e8e
--- /dev/null
+++ b/contrib/blocklist/bin/blacklistd.conf.5
@@ -0,0 +1,242 @@
+.\" $NetBSD: blocklistd.conf.5,v 1.7 2025/02/11 17:47:05 christos Exp $
+.\"
+.\" Copyright (c) 2015, 2025 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Christos Zoulas.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd February 5, 2025
+.Dt BLACKLISTD.CONF 5
+.Os
+.Sh NAME
+.Nm blacklistd.conf
+.Nd configuration file format for blacklistd
+.Sh DESCRIPTION
+The
+.Nm
+file contains configuration entries for
+.Xr blacklistd 8
+in a fashion similar to
+.Xr inetd.conf 5 .
+Only one entry per line is permitted.
+Every entry must have all fields populated.
+Each field can be separated by a tab or a space.
+Comments are denoted by a
+.Dq #
+at the beginning of a line.
+.Pp
+There are two kinds of configuration lines,
+.Va [local]
+and
+.Va [remote] .
+By default, configuration lines are
+.Va [local] ,
+i.e. the address specified refers to the addresses on the local machine.
+To switch to between
+.Va [local]
+and
+.Va [remote]
+configuration lines you can specify the stanzas:
+.Dq [local]
+and
+.Dq [remote] .
+.Pp
+On
+.Va [local]
+and
+.Va [remote]
+lines
+.Dq *
+means use the default, or wildcard match.
+In addition, for
+.Va [remote]
+lines
+.Dq =
+means use the values from the matched
+.Va [local]
+configuration line.
+.Pp
+The first four fields,
+.Va location ,
+.Va type ,
+.Va proto ,
+and
+.Va owner
+are used to match the
+.Va [local]
+or
+.Va [remote]
+addresses, whereas the last 3 fields
+.Va name ,
+.Va nfail ,
+and
+.Va disable
+are used to modify the filtering action.
+.Pp
+The first field denotes the
+.Va location
+as an address, mask, and port.
+The syntax for the
+.Va location
+is:
+.Bd -literal -offset indent
+ [<address>|<interface>][/<mask>][:<port>]
+.Ed
+.Pp
+The
+.Dv address
+can be an IPv4 address in numeric format, an IPv6 address
+in numeric format and enclosed by square brackets, or an interface name.
+Mask modifiers are not allowed on interfaces because interfaces
+can have multiple addresses in different protocols where the mask has a
+different size.
+.Pp
+The
+.Dv mask
+is always numeric, but the
+.Dv port
+can be either numeric or symbolic.
+.Pp
+The second field is the socket
+.Va type :
+.Dv stream ,
+.Dv dgram ,
+or numeric.
+The third field is the
+.Va protocol :
+.Dv tcp ,
+.Dv udp ,
+.Dv tcp6 ,
+.Dv udp6 ,
+or numeric.
+The fourth field is the effective user
+.Va ( owner )
+of the daemon process reporting the event,
+either as a username or a userid.
+.Pp
+The rest of the fields control the behavior of the filter.
+.Pp
+The
+.Va name
+field, is the name of the packet filter rule to be used.
+If the
+.Va name
+starts with a hyphen
+.Pq Dq - ,
+then the default rulename is prepended to the given name.
+If the
+.Dv name
+contains a
+.Dq / ,
+the remaining portion of the name is interpreted as the mask to be
+applied to the address specified in the rule, causing a single rule violation to
+block the entire subnet for the configured prefix.
+.Pp
+The
+.Va nfail
+field contains the number of failed attempts before access is blocked,
+defaulting to
+.Dq *
+meaning never, and the last field
+.Va duration
+specifies the amount of time since the last access that the blocking
+rule should be active, defaulting to
+.Dq *
+meaning forever.
+The default unit for
+.Va duration
+is seconds, but one can specify suffixes for different units, such as
+.Dq m
+for minutes
+.Dq h
+for hours and
+.Dq d
+for days.
+.Pp
+Matching is done first by checking the
+.Va [local]
+rules individually, in the order of the most specific to the least specific.
+If a match is found, then the matching
+.Va [remote]
+rules are applied.
+The
+.Va name ,
+.Va nfail ,
+and
+.Va duration
+fields can be altered by the
+.Va [remote]
+rule that matched.
+.Pp
+The
+.Va [remote]
+rules can be used for allowing specific addresses, changing the mask
+size (via
+.Va name ) ,
+the rule that the packet filter uses (also via
+.Va name ) ,
+the number of failed attempts (via
+.Va nfail ) ,
+or the duration to block (via
+.Va duration ) .
+.Sh FILES
+.Bl -tag -width /etc/blacklistd.conf -compact
+.It Pa /etc/blacklistd.conf
+Configuration file.
+.El
+.Sh EXAMPLES
+.Bd -literal -offset 8n
+# Block ssh, after 3 attempts for 6 hours on the bnx0 interface
+[local]
+# location type proto owner name nfail duration
+bnx0:ssh * * * * 3 6h
+[remote]
+# Never block 1.2.3.4
+1.2.3.4:ssh * * * * * *
+# Never block the example IPv6 subnet either
+[2001:db8::]/32:ssh * * * * * *
+# For addresses coming from 8.8.0.0/16 block whole /24 networks instead
+# individual hosts, but keep the rest of the blocking parameters the same.
+8.8.0.0/16:ssh * * * /24 = =
+.Ed
+.Sh SEE ALSO
+.Xr blacklistctl 8 ,
+.Xr blacklistd 8
+.Sh NOTES
+The
+.Nm
+file has been renamed to
+.Xr blocklistd.conf 8 .
+.Sh HISTORY
+.Nm
+first appeared in
+.Nx 7 .
+.Fx
+support for
+.Nm
+was implemented in
+.Fx 11 .
+.Sh AUTHORS
+.An Christos Zoulas
diff --git a/contrib/blocklist/lib/libblacklist.3 b/contrib/blocklist/lib/libblacklist.3
new file mode 100644
index 000000000000..5bc093c38f79
--- /dev/null
+++ b/contrib/blocklist/lib/libblacklist.3
@@ -0,0 +1,188 @@
+.\" $NetBSD: libblocklist.3,v 1.7 2025/02/05 20:14:30 christos Exp $
+.\"
+.\" Copyright (c) 2015 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Christos Zoulas.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd February 5, 2025
+.Dt LIBBLACKLIST 3
+.Os
+.Sh NAME
+.Nm blacklist_open ,
+.Nm blacklist_open2 ,
+.Nm blacklist_close ,
+.Nm blacklist_r ,
+.Nm blacklist ,
+.Nm blacklist_sa ,
+.Nm blacklist_sa_r
+.Nd Blacklistd notification library
+.Sh LIBRARY
+.Lb libblacklist
+.Sh SYNOPSIS
+.In blacklist.h
+.Ft struct blacklist *
+.Fn blacklist_open "void"
+.Ft struct blacklist *
+.Fn blacklist_open2 "void (*logger)(int, struct syslog_data *, va_list)"
+.Ft void
+.Fn blacklist_close "struct blacklist *cookie"
+.Ft int
+.Fn blacklist "int action" "int fd" "const char *msg"
+.Ft int
+.Fn blacklist_r "struct blacklist *cookie" "int action" "int fd" "const char *msg"
+.Ft int
+.Fn blacklist_sa "int action" "int fd" "const struct sockaddr *sa" "socklen_t salen" "const char *msg"
+.Ft int
+.Fn blacklist_sa_r "struct blacklist *cookie" "int action" "int fd" "const struct sockaddr *sa" "socklen_t salen" "const char *msg"
+.Sh DESCRIPTION
+These functions can be used by daemons to notify
+.Xr blacklistd 8
+about successful and failed remote connections so that blacklistd can
+block or release port access to prevent Denial of Service attacks.
+.Pp
+The function
+.Fn blacklist_open
+creates the necessary state to communicate with
+.Xr blacklistd 8
+and returns a pointer to it, or
+.Dv NULL
+on failure.
+.Pp
+The function
+.Fn blacklist_open2
+is similar to
+.Fn blacklist_open
+but allows a
+.Fa logger
+to be specified.
+If the
+.Fa logger
+is
+.Dv NULL ,
+then no logging is performed.
+.Pp
+The
+.Fn blacklist_close
+function frees all memory and resources used.
+.Pp
+The
+.Fn blacklist
+function sends a message to
+.Xr blacklistd 8 ,
+with an integer
+.Ar action
+argument specifying the type of notification,
+a file descriptor
+.Ar fd
+specifying the accepted file descriptor connected to the client,
+and an optional message in the
+.Ar msg
+argument.
+.Pp
+The
+.Ar action
+parameter can take these values:
+.Bl -tag -width ".Dv BLACKLIST_ABUSIVE_BEHAVIOR"
+.It Dv BLACKLIST_AUTH_FAIL
+There was an unsuccessful authentication attempt.
+.It Dv BLACKLIST_AUTH_OK
+A user successfully authenticated.
+.It Dv BLACKLIST_ABUSIVE_BEHAVIOR
+The sending daemon has detected abusive behavior
+from the remote system.
+The remote address should
+be blocked as soon as possible.
+.It Dv BLACKLIST_BAD_USER
+The sending daemon has determined the username
+presented for authentication is invalid.
+The
+.Xr blacklistd 8
+daemon compares the username to a configured list of forbidden
+usernames and
+blocks the address immediately if a forbidden username matches.
+(The
+.Dv BLACKLIST_BAD_USER
+support is not currently available.)
+.El
+.Pp
+The
+.Fn blacklist_r
+function is more efficient because it keeps the blacklist state around.
+.Pp
+The
+.Fn blacklist_sa
+and
+.Fn blacklist_sa_r
+functions can be used with unconnected sockets, where
+.Xr getpeername 2
+will not work, the server will pass the peer name in the message.
+.Pp
+In all cases the file descriptor passed in the
+.Fa fd
+argument must be pointing to a valid socket so that
+.Xr blacklistd 8
+can establish ownership of the local endpoint
+using
+.Xr getsockname 2 .
+.Pp
+By default,
+.Xr syslogd 8
+is used for message logging.
+The internal
+.Fn bl_create
+function can be used to create the required internal
+state and specify a custom logging function.
+.Sh RETURN VALUES
+The function
+.Fn blacklist_open
+returns a cookie on success and
+.Dv NULL
+on failure setting
+.Dv errno
+to an appropriate value.
+.Pp
+The functions
+.Fn blacklist ,
+.Fn blacklist_sa ,
+and
+.Fn blacklist_sa_r
+return
+.Dv 0
+on success and
+.Dv \-1
+on failure setting
+.Dv errno
+to an appropriate value.
+.Sh NOTES
+The
+.Lb libblacklist
+has been renamed to
+.Xr libblocklist 3 .
+.Sh SEE ALSO
+.Xr blacklistd.conf 5 ,
+.Xr blacklistd 8
+.Sh AUTHORS
+.An Christos Zoulas
diff --git a/contrib/blocklist/libexec/blocklistd-helper b/contrib/blocklist/libexec/blocklistd-helper
index f27cde4ed4ea..14a192ee35ce 100755
--- a/contrib/blocklist/libexec/blocklistd-helper
+++ b/contrib/blocklist/libexec/blocklistd-helper
@@ -258,7 +258,7 @@ flush)
pf)
# dynamically determine which anchors exist
for anchor in $(/sbin/pfctl -a "$2" -s Anchors 2> /dev/null); do
- /sbin/pfctl -a "$anchor" -t "port${anchor##*/}" -T flush
+ /sbin/pfctl -a "$anchor" -t "port${anchor##*/}" -T flush 2> /dev/null
/sbin/pfctl -a "$anchor" -F rules
done
echo OK
diff --git a/crypto/openssh/sshd-session.c b/crypto/openssh/sshd-session.c
index e8299c254567..ca35790149ac 100644
--- a/crypto/openssh/sshd-session.c
+++ b/crypto/openssh/sshd-session.c
@@ -217,8 +217,6 @@ mm_is_monitor(void)
static void
grace_alarm_handler(int sig)
{
- BLOCKLIST_NOTIFY(the_active_state, BLOCKLIST_AUTH_FAIL,
- "Grace period expired");
/*
* Try to kill any processes that we have spawned, E.g. authorized
* keys command helpers or privsep children.
diff --git a/etc/Makefile b/etc/Makefile
index 93d4b489ec7d..bda70b8af391 100644
--- a/etc/Makefile
+++ b/etc/Makefile
@@ -68,7 +68,7 @@ distribution:
# install the /etc/unbound symlink, otherwise, don't overwrite the user's
# existing symlink.
if [ "${NO_ROOT:Dtrue}" = true -o ! -e ${DESTDIR}/etc/unbound ]; then \
- ${INSTALL_SYMLINK} -Tpackage=unbound ../var/unbound \
+ ${INSTALL_SYMLINK} -Tpackage=local-unbound ../var/unbound \
${DESTDIR}/etc/unbound; \
fi
.endif
diff --git a/etc/mtree/BSD.var.dist b/etc/mtree/BSD.var.dist
index b3372196f5f9..b55c0150848e 100644
--- a/etc/mtree/BSD.var.dist
+++ b/etc/mtree/BSD.var.dist
@@ -104,8 +104,8 @@
vi.recover mode=01777
..
..
- unbound uname=unbound gname=unbound mode=0755 tags=package=unbound
- conf.d uname=unbound gname=unbound mode=0755 tags=package=unbound
+ unbound uname=unbound gname=unbound mode=0755 tags=package=local-unbound
+ conf.d uname=unbound gname=unbound mode=0755 tags=package=local-unbound
..
..
yp
diff --git a/lib/libblacklist/Makefile b/lib/libblacklist/Makefile
index 07c770883eab..cac023d69bb7 100644
--- a/lib/libblacklist/Makefile
+++ b/lib/libblacklist/Makefile
@@ -18,14 +18,13 @@ CFLAGS+=-I${BLOCKLIST_DIR}/include -I${BLOCKLIST_DIR}/port \
SRCS= old_bl.c blacklist.c vsyslog_r.c
INCS= blacklist.h
-MAN= libblocklist.3
-
-MLINKS+=libblocklist.3 libblacklist.3 \
- libblocklist.3 blacklist_open.3 \
- libblocklist.3 blacklist_close.3 \
- libblocklist.3 blacklist.3 \
- libblocklist.3 blacklist_r.3 \
- libblocklist.3 blacklist_sa.3 \
- libblocklist.3 blacklist_sa_r.3
+MAN= libblacklist.3
+
+MLINKS= libblacklist.3 blacklist_open.3 \
+ libblacklist.3 blacklist_close.3 \
+ libblacklist.3 blacklist.3 \
+ libblacklist.3 blacklist_r.3 \
+ libblacklist.3 blacklist_sa.3 \
+ libblacklist.3 blacklist_sa_r.3
.include <bsd.lib.mk>
diff --git a/lib/libc/stdlib/realpath.3 b/lib/libc/stdlib/realpath.3
index 065ba312c2ef..76f40249963b 100644
--- a/lib/libc/stdlib/realpath.3
+++ b/lib/libc/stdlib/realpath.3
@@ -28,7 +28,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd May 11, 2012
+.Dd October 10, 2025
.Dt REALPATH 3
.Os
.Sh NAME
@@ -108,11 +108,11 @@ and
.Xr getcwd 3 .
.Sh SEE ALSO
.Xr getcwd 3
-.\" .Sh STANDARDS
-.\" The
-.\" .Fn realpath
-.\" function conforms to
-.\" .St -p1003.1-2001 .
+.Sh STANDARDS
+The
+.Fn realpath
+function conforms to
+.St -p1003.1-2001 .
.Sh HISTORY
The
.Fn realpath
diff --git a/lib/libc/stdlib/realpath.c b/lib/libc/stdlib/realpath.c
index 4c52b73319ab..18f29e95ee6b 100644
--- a/lib/libc/stdlib/realpath.c
+++ b/lib/libc/stdlib/realpath.c
@@ -49,7 +49,7 @@ realpath1(const char *path, char *resolved)
{
struct stat sb;
char *p, *q;
- size_t left_len, resolved_len, next_token_len;
+ size_t left_len, prev_len, resolved_len, next_token_len;
unsigned symlinks;
ssize_t slen;
char left[PATH_MAX], next_token[PATH_MAX], symlink[PATH_MAX];
@@ -98,6 +98,7 @@ realpath1(const char *path, char *resolved)
left_len = 0;
}
+ prev_len = resolved_len;
if (resolved[resolved_len - 1] != '/') {
if (resolved_len + 1 >= PATH_MAX) {
errno = ENAMETOOLONG;
@@ -133,8 +134,17 @@ realpath1(const char *path, char *resolved)
errno = ENAMETOOLONG;
return (NULL);
}
- if (lstat(resolved, &sb) != 0)
+ if (lstat(resolved, &sb) != 0) {
+ /*
+ * EACCES means the parent directory is not
+ * readable, while ENOTDIR means the parent
+ * directory is not a directory. Rewind the path
+ * to correctly indicate where the error lies.
+ */
+ if (errno == EACCES || errno == ENOTDIR)
+ resolved[prev_len] = '\0';
return (NULL);
+ }
if (S_ISLNK(sb.st_mode)) {
if (symlinks++ > MAXSYMLINKS) {
errno = ELOOP;
diff --git a/lib/libc/stdtime/strptime.3 b/lib/libc/stdtime/strptime.3
index 7df73d2d080a..9456fa757b85 100644
--- a/lib/libc/stdtime/strptime.3
+++ b/lib/libc/stdtime/strptime.3
@@ -171,7 +171,7 @@ is taken as noon.
The
.Fa %Z
format specifier only accepts time zone abbreviations of the local time zone,
-or the value "GMT".
+and the values "GMT", "UTC", or "Z".
This limitation is because of ambiguity due to of the over loading of time
zone abbreviations.
One such example is
diff --git a/lib/libc/stdtime/strptime.c b/lib/libc/stdtime/strptime.c
index 5f1293c7a267..375e49146639 100644
--- a/lib/libc/stdtime/strptime.c
+++ b/lib/libc/stdtime/strptime.c
@@ -546,7 +546,8 @@ label:
zonestr[cp - buf] = '\0';
tzset();
if (0 == strcmp(zonestr, "GMT") ||
- 0 == strcmp(zonestr, "UTC")) {
+ 0 == strcmp(zonestr, "UTC") ||
+ 0 == strcmp(zonestr, "Z")) {
*GMTp = 1;
} else if (0 == strcmp(zonestr, tzname[0])) {
tm->tm_isdst = 0;
diff --git a/lib/libc/tests/gen/realpath2_test.c b/lib/libc/tests/gen/realpath2_test.c
index f89dd99cbb72..431df8721ae0 100644
--- a/lib/libc/tests/gen/realpath2_test.c
+++ b/lib/libc/tests/gen/realpath2_test.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2017 Jan Kokemüller
* All rights reserved.
+ * Copyright (c) 2025 Klara, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -25,6 +26,8 @@
*/
#include <sys/param.h>
+#include <sys/stat.h>
+
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
@@ -34,6 +37,31 @@
#include <atf-c.h>
+ATF_TC(realpath_null);
+ATF_TC_HEAD(realpath_null, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test null input");
+}
+ATF_TC_BODY(realpath_null, tc)
+{
+ ATF_REQUIRE_ERRNO(EINVAL, realpath(NULL, NULL) == NULL);
+}
+
+ATF_TC(realpath_empty);
+ATF_TC_HEAD(realpath_empty, tc)
+{
+ atf_tc_set_md_var(tc, "descr", "Test empty input");
+}
+ATF_TC_BODY(realpath_empty, tc)
+{
+ char resb[PATH_MAX] = "";
+
+ ATF_REQUIRE_EQ(0, mkdir("foo", 0755));
+ ATF_REQUIRE_EQ(0, chdir("foo"));
+ ATF_REQUIRE_ERRNO(ENOENT, realpath("", resb) == NULL);
+ ATF_REQUIRE_STREQ("", resb);
+}
+
ATF_TC(realpath_buffer_overflow);
ATF_TC_HEAD(realpath_buffer_overflow, tc)
{
@@ -44,16 +72,11 @@ ATF_TC_HEAD(realpath_buffer_overflow, tc)
ATF_TC_BODY(realpath_buffer_overflow, tc)
{
- char path[MAXPATHLEN] = { 0 };
- char resb[MAXPATHLEN] = { 0 };
- size_t i;
+ char path[PATH_MAX] = "";
+ char resb[PATH_MAX] = "";
- path[0] = 'a';
+ memset(path, 'a', sizeof(path) - 1);
path[1] = '/';
- for (i = 2; i < sizeof(path) - 1; ++i) {
- path[i] = 'a';
- }
-
ATF_REQUIRE(realpath(path, resb) == NULL);
}
@@ -66,9 +89,9 @@ ATF_TC_HEAD(realpath_empty_symlink, tc)
ATF_TC_BODY(realpath_empty_symlink, tc)
{
- char path[MAXPATHLEN] = { 0 };
- char slnk[MAXPATHLEN] = { 0 };
- char resb[MAXPATHLEN] = { 0 };
+ char path[PATH_MAX] = "";
+ char slnk[PATH_MAX] = "";
+ char resb[PATH_MAX] = "";
int fd;
(void)strlcat(slnk, "empty_symlink", sizeof(slnk));
@@ -89,11 +112,70 @@ ATF_TC_BODY(realpath_empty_symlink, tc)
ATF_REQUIRE(unlink(slnk) == 0);
}
-ATF_TP_ADD_TCS(tp)
+ATF_TC(realpath_partial);
+ATF_TC_HEAD(realpath_partial, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "Test that failure leaves a partial result");
+ atf_tc_set_md_var(tc, "require.user", "unprivileged");
+}
+
+ATF_TC_BODY(realpath_partial, tc)
{
+ char resb[PATH_MAX] = "";
+ size_t len;
+
+ /* scenario 1: missing directory */
+ ATF_REQUIRE_EQ(0, mkdir("foo", 0755));
+ ATF_REQUIRE_ERRNO(ENOENT, realpath("foo/bar/baz", resb) == NULL);
+ len = strnlen(resb, sizeof(resb));
+ ATF_REQUIRE(len > 8 && len < sizeof(resb));
+ ATF_REQUIRE_STREQ("/foo/bar", resb + len - 8);
+
+ /* scenario 2: dead link 1 */
+ ATF_REQUIRE_EQ(0, symlink("nix", "foo/bar"));
+ ATF_REQUIRE_ERRNO(ENOENT, realpath("foo/bar/baz", resb) == NULL);
+ len = strnlen(resb, sizeof(resb));
+ ATF_REQUIRE(len > 8 && len < sizeof(resb));
+ ATF_REQUIRE_STREQ("/foo/nix", resb + len - 8);
+
+ /* scenario 3: missing file */
+ ATF_REQUIRE_EQ(0, unlink("foo/bar"));
+ ATF_REQUIRE_EQ(0, mkdir("foo/bar", 0755));
+ ATF_REQUIRE_ERRNO(ENOENT, realpath("foo/bar/baz", resb) == NULL);
+ len = strnlen(resb, sizeof(resb));
+ ATF_REQUIRE(len > 12 && len < sizeof(resb));
+ ATF_REQUIRE_STREQ("/foo/bar/baz", resb + len - 12);
+
+ /* scenario 4: dead link 2 */
+ ATF_REQUIRE_EQ(0, symlink("nix", "foo/bar/baz"));
+ ATF_REQUIRE_ERRNO(ENOENT, realpath("foo/bar/baz", resb) == NULL);
+ len = strnlen(resb, sizeof(resb));
+ ATF_REQUIRE(len > 12 && len < sizeof(resb));
+ ATF_REQUIRE_STREQ("/foo/bar/nix", resb + len - 12);
+
+ /* scenario 5: unreadable directory */
+ ATF_REQUIRE_EQ(0, chmod("foo", 000));
+ ATF_REQUIRE_ERRNO(EACCES, realpath("foo/bar/baz", resb) == NULL);
+ len = strnlen(resb, sizeof(resb));
+ ATF_REQUIRE(len > 4 && len < sizeof(resb));
+ ATF_REQUIRE_STREQ("/foo", resb + len - 4);
+
+ /* scenario 6: not a directory */
+ ATF_REQUIRE_EQ(0, close(creat("bar", 0644)));
+ ATF_REQUIRE_ERRNO(ENOTDIR, realpath("bar/baz", resb) == NULL);
+ len = strnlen(resb, sizeof(resb));
+ ATF_REQUIRE(len > 4 && len < sizeof(resb));
+ ATF_REQUIRE_STREQ("/bar", resb + len - 4);
+}
+ATF_TP_ADD_TCS(tp)
+{
+ ATF_TP_ADD_TC(tp, realpath_null);
+ ATF_TP_ADD_TC(tp, realpath_empty);
ATF_TP_ADD_TC(tp, realpath_buffer_overflow);
ATF_TP_ADD_TC(tp, realpath_empty_symlink);
+ ATF_TP_ADD_TC(tp, realpath_partial);
return atf_no_error();
}
diff --git a/lib/libpam/modules/modules.inc b/lib/libpam/modules/modules.inc
index f3ab65333f4f..1e9eb8970317 100644
--- a/lib/libpam/modules/modules.inc
+++ b/lib/libpam/modules/modules.inc
@@ -30,4 +30,4 @@ MODULES += pam_ssh
.endif
MODULES += pam_tacplus
MODULES += pam_unix
-MODULES += pam_xdg \ No newline at end of file
+MODULES += pam_xdg
diff --git a/lib/libpam/modules/pam_xdg/pam_xdg.8 b/lib/libpam/modules/pam_xdg/pam_xdg.8
index 9b335751a9fb..031010953e98 100644
--- a/lib/libpam/modules/pam_xdg/pam_xdg.8
+++ b/lib/libpam/modules/pam_xdg/pam_xdg.8
@@ -50,7 +50,6 @@ Use an alternate base directory
.Bl -tag -width indent
.It Ev XDG_RUNTIME_DIR
The location of the runtime files base directory created by this module.
-Note that the module does not set this environment variable.
.El
.Sh STANDARDS
The directory created by this module conforms to the
diff --git a/lib/libsys/socket.2 b/lib/libsys/socket.2
index b211611c6354..48b8f4e87489 100644
--- a/lib/libsys/socket.2
+++ b/lib/libsys/socket.2
@@ -25,7 +25,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd May 17, 2025
+.Dd September 28, 2025
.Dt SOCKET 2
.Os
.Sh NAME
@@ -64,7 +64,7 @@ PF_NETGRAPH Netgraph sockets,
PF_NETLINK Netlink protocols,
PF_BLUETOOTH Bluetooth protocols,
PF_INET_SDP OFED socket direct protocol (IPv4),
-AF_HYPERV HyperV sockets
+PF_HYPERV HyperV sockets
.Ed
.Pp
Each protocol family is connected to an address family, which has the
@@ -89,32 +89,6 @@ SOCK_RAW Raw-protocol interface,
SOCK_SEQPACKET Sequenced packet stream
.Ed
.Pp
-A
-.Dv SOCK_STREAM
-type provides sequenced, reliable,
-two-way connection based byte streams.
-An out-of-band data transmission mechanism may be supported.
-A
-.Dv SOCK_DGRAM
-socket supports
-datagrams (connectionless, unreliable messages of
-a fixed (typically small) maximum length).
-A
-.Dv SOCK_SEQPACKET
-socket may provide a sequenced, reliable,
-two-way connection-based data transmission path for datagrams
-of fixed maximum length; a consumer may be required to read
-an entire packet with each read system call.
-This facility may have protocol-specific properties.
-.Dv SOCK_RAW
-sockets provide access to internal network protocols and interfaces.
-The
-.Dv SOCK_RAW
-type is available only to the super-user and is described in
-.Xr ip 4
-and
-.Xr ip6 4 .
-.Pp
Additionally, the following flags are allowed in the
.Fa type
argument:
@@ -140,32 +114,23 @@ particular to the
in which communication
is to take place; see
.Xr protocols 5 .
-.Pp
The
.Fa protocol
argument may be set to zero (0) to request the default
implementation of a socket type for the protocol, if any.
-.Pp
-Sockets of type
+.Sh STREAM SOCKET TYPE
+The
+.Dv SOCK_STREAM
+socket type provides reliable, sequenced, full-duplex octet streams between
+the socket and a peer to which the socket is connected.
+A socket of type
.Dv SOCK_STREAM
-are full-duplex byte streams, similar
-to pipes.
-A stream socket must be in a
+needs to be in a
.Em connected
-state before any data may be sent or received
-on it.
+state before any data can be sent or received.
A connection to another socket is created with a
.Xr connect 2
system call.
-Once connected, data may be transferred using
-.Xr read 2
-and
-.Xr write 2
-calls or some variant of the
-.Xr send 2
-and
-.Xr recv 2
-functions.
(Some protocol families, such as the Internet family,
support the notion of an
.Dq implied connect ,
@@ -173,62 +138,210 @@ which permits data to be sent piggybacked onto a connect operation by
using the
.Xr sendto 2
system call.)
-When a session has been completed a
-.Xr close 2
-may be performed.
-Out-of-band data may also be transmitted as described in
+Once connected, data may be sent using
+.Xr send 2 ,
+.Xr sendto 2 ,
+.Xr sendmsg 2
+and
+.Xr write 2
+system calls.
+Data may be received using
+.Xr recv 2 ,
+.Xr recvfrom 2 ,
+.Xr recvmsg 2 ,
+and
+.Xr read 2
+system calls.
+Record boundaries are not maintained; data sent on a stream socket using output
+operations of one size can be received using input operations of smaller or
+larger sizes without loss of data.
+Data may be buffered; successful return from an output function does not imply
+that the data has been delivered to the peer or even transmitted from the local
+system.
+For certain protocols out-of-band data may also be transmitted as described in
.Xr send 2
and received as described in
.Xr recv 2 .
.Pp
-The communications protocols used to implement a
-.Dv SOCK_STREAM
-ensure that data
-is not lost or duplicated.
-If a piece of data for which the
-peer protocol has buffer space cannot be successfully transmitted
-within a reasonable length of time, then
-the connection is considered broken and calls
-will indicate an error with
--1 returns and with
-.Er ETIMEDOUT
-as the specific code
-in the global variable
-.Va errno .
-The protocols optionally keep sockets
-.Dq warm
-by forcing transmissions
-roughly every minute in the absence of other activity.
-An error is then indicated if no response can be
-elicited on an otherwise
-idle connection for an extended period (e.g.\& 5 minutes).
-By default, a
+If data cannot be successfully transmitted within a given time then the
+connection is considered broken, and subsequent operations shall fail with
+a protocol specific error code.
+A
.Dv SIGPIPE
-signal is raised if a process sends
-on a broken stream, but this behavior may be inhibited via
+signal is raised if a thread attempts to send data on a broken stream (one that
+is no longer connected).
+The signal can be suppressed by the
+.Dv MSG_NOSIGNAL
+flag with distinct
+.Xr send 2 ,
+.Xr sendto 2 ,
+and
+.Xr sendmsg 2
+system calls or by the
+.Dv SO_NOSIGPIPE
+socket option set on the socket with
.Xr setsockopt 2 .
.Pp
-.Dv SOCK_SEQPACKET
-sockets employ the same system calls
-as
+The
.Dv SOCK_STREAM
-sockets.
-The only difference
-is that
-.Xr read 2
-calls will return only the amount of data requested,
-and any remaining in the arriving packet will be discarded.
+socket is supported by the following protocol families:
+.Dv PF_INET ,
+.Dv PF_INET6 ,
+.Dv PF_UNIX ,
+.Dv PF_BLUETOOTH ,
+.Dv PF_HYPERV ,
+and
+.Dv PF_INET_SDP .
+Out-of-band data transmission mechanism is supported for stream sockets of
+.Dv PF_INET
+and
+.Dv PF_INET6
+protocol families.
+.Sh DATAGRAM SOCKET TYPE
+The
+.Dv SOCK_DGRAM
+socket type supports connectionless data transfer which is not necessarily
+acknowledged or reliable.
+Datagrams can be sent to the address specified (possibly multicast or
+broadcast) in each output operation, and incoming datagrams can be received
+from multiple sources.
+The source address of each datagram is available when receiving the datagram
+with
+.Xr recvfrom 2
+or
+.Xr recvmsg 2 .
+An application can also pre-specify a peer address with
+.Xr sendto 2
+or
+.Xr sendmsg 2 ,
+in which case calls to output functions that do not specify a peer address
+shall send to the pre-specified peer.
+If a peer has been specified, only datagrams from that peer shall be received.
+A datagram shall be sent in a single output operation, and needs to be received
+in a single input operation.
+The maximum size of a datagram is protocol-specific.
+Output datagrams may be buffered within the system; thus, a successful return
+from an output function does not guarantee that a datagram is actually sent or
+received.
.Pp
+The
.Dv SOCK_DGRAM
+socket is supported by the following protocol families:
+.Dv PF_INET ,
+.Dv PF_INET6 ,
+.Dv PF_UNIX ,
+.Dv PF_NETGRAPH ,
and
-.Dv SOCK_RAW
-sockets allow sending of datagrams to correspondents
-named in
+.Dv PF_NETLINK .
+.Sh SEQUENCED PACKET SOCKET TYPE
+The
+.Dv SOCK_SEQPACKET
+socket type is similar to the
+.Dv SOCK_STREAM
+type, and is also connection-oriented.
+The only difference between these types is that record boundaries are
+maintained using the
+.Dv SOCK_SEQPACKET
+type.
+A record can be sent using one or more output operations and received using one
+or more input operations, but a single operation never transfers parts of more
+than one record.
+Record boundaries are set by the sender with the
+.Dv MSG_EOR
+flag of
.Xr send 2
-calls.
-Datagrams are generally received with
+or
+.Xr sendmsg 2
+functions.
+There is no possibility to set a record boundary with
+.Xr write 2 .
+Record boundaries are visible to the receiver via the
+.Dv MSG_EOR
+flag in the received message flags returned by the
+.Xr recvmsg 2
+function.
+It is protocol-specific whether a maximum record size is imposed.
+.Pp
+The
+.Dv SOCK_SEQPACKET
+socket is supported by the following protocol families:
+.Dv PF_INET ,
+.Dv PF_INET6 ,
+and
+.Dv PF_UNIX .
+.Pp
+.Sh RAW SOCKET TYPE
+The
+.Dv SOCK_RAW
+socket type provides access to internal network protocols and interfaces.
+It is a datagram socket in its nature, thus has the same semantics of
+read and write operations.
+The
+.Dv SOCK_RAW
+type is available only to the super-user and is described in
+.Xr ip 4
+and
+.Xr ip6 4 .
+.Sh NON-BLOCKING MODE
+A socket can be created in
+.Em non-blocking mode
+with the help of
+.Dv SOCK_NONBLOCK
+flag.
+Alternatively, the non-blocking mode on a socket can be turned on and off with
+the help of the
+.Dv O_NONBLOCK
+flag of the
+.Xr fcntl 2
+system call.
+.Pp
+When a non-blocking socket has not enough data in its receive buffer to fulfill
+the application supplied buffer, then data receiving system calls like
+.Xr recv 2 ,
.Xr recvfrom 2 ,
-which returns the next datagram with its return address.
+.Xr recvmsg 2
+and
+.Xr read 2
+will not block waiting for the data but immediately return.
+Return value will indicate amount of bytes read into the supplied buffer.
+The
+.Va errno
+will be set to
+.Dv EAGAIN
+.Po
+has same value as
+.Dv EWOULDBLOCK
+.Pc .
+.Pp
+If application tries to send more data on a non-blocking socket than the socket
+send buffer can accomodate with
+.Xr send 2 ,
+.Xr sendto 2 ,
+.Xr sendmsg 2
+or
+.Xr write 2
+system calls partial data will be sent.
+Return value will indicate amount of bytes sent.
+The
+.Va errno
+will be set to
+.Dv EAGAIN .
+Note that sockets of
+.Dv SOCK_DGRAM
+type are unreliable, thus for these sockets sending operations will never fail
+with
+.Dv EAGAIN
+in non-blocking mode neither will block in blocking mode.
+.Sh OTHER OPERATIONS ON SOCKETS
+Since socket descriptors are file descriptors, many generic file operations
+performed by
+.Xr fcntl 2 ,
+apply.
+Socket descriptors can be used with all event engines, such as
+.Xr kevent 2 ,
+.Xr select 2
+and
+.Xr poll 2 .
.Pp
An
.Xr fcntl 2
@@ -250,6 +363,12 @@ The
and
.Xr getsockopt 2
system calls are used to set and get options, respectively.
+.Pp
+Connection associated with a socket can be terminated by
+.Xr close 2
+system call.
+One direction of communication can be disabled with
+.Xr shutdown 2 .
.Sh RETURN VALUES
A -1 is returned if an error occurs, otherwise the return
value is a descriptor referencing the socket.
@@ -282,16 +401,23 @@ The socket type is not supported by the protocol.
.Sh SEE ALSO
.Xr accept 2 ,
.Xr bind 2 ,
+.Xr close 2 ,
.Xr connect 2 ,
+.Xr fcntl 2 ,
.Xr getpeername 2 ,
.Xr getsockname 2 ,
.Xr getsockopt 2 ,
.Xr ioctl 2 ,
+.Xr kevent 2 ,
.Xr listen 2 ,
+.Xr poll 2 ,
.Xr read 2 ,
.Xr recv 2 ,
.Xr select 2 ,
.Xr send 2 ,
+.Xr sendmsg 2 ,
+.Xr sendto 2 ,
+.Xr signal 3 ,
.Xr shutdown 2 ,
.Xr socketpair 2 ,
.Xr write 2 ,
diff --git a/lib/libunbound/Makefile b/lib/libunbound/Makefile
index e2cd25ea8b34..1a31e50e6416 100644
--- a/lib/libunbound/Makefile
+++ b/lib/libunbound/Makefile
@@ -1,4 +1,3 @@
-PACKAGE=lib${LIB}
# Vendor sources and generated files
LDNSDIR= ${SRCTOP}/contrib/ldns
UNBOUNDDIR= ${SRCTOP}/contrib/unbound
@@ -6,9 +5,10 @@ UNBOUNDDIR= ${SRCTOP}/contrib/unbound
# Hold my beer and watch this
.PATH: ${UNBOUNDDIR} ${UNBOUNDDIR}/cachedb ${UNBOUNDDIR}/dns64 ${UNBOUNDDIR}/iterator ${UNBOUNDDIR}/sldns ${UNBOUNDDIR}/libunbound ${UNBOUNDDIR}/services ${UNBOUNDDIR}/services/cache ${UNBOUNDDIR}/util ${UNBOUNDDIR}/util/data ${UNBOUNDDIR}/respip ${UNBOUNDDIR}/util/storage ${UNBOUNDDIR}/validator
-LIB= unbound
+PACKAGE= local-unbound
+
+LIB= unbound
PRIVATELIB=
-PACKAGE= unbound
CFLAGS+= -I${UNBOUNDDIR} -I${LDNSDIR} -I${.OBJDIR} -I${.CURDIR}
CFLAGS+= -DOPENSSL_API_COMPAT=0x10100000L
diff --git a/lib/ncurses/Makefile.inc b/lib/ncurses/Makefile.inc
index eea49908474c..e14867696834 100644
--- a/lib/ncurses/Makefile.inc
+++ b/lib/ncurses/Makefile.inc
@@ -1,6 +1,7 @@
# This is to include src/lib/Makefile.inc
PACKAGE?= ncurses
+LIB_PACKAGE=
WARNS?= 3
.include "../Makefile.inc"
diff --git a/libexec/blocklistd-helper/blacklistd-helper b/libexec/blocklistd-helper/blacklistd-helper
index 4195f070e8ee..92f768e86cdf 100644
--- a/libexec/blocklistd-helper/blacklistd-helper
+++ b/libexec/blocklistd-helper/blacklistd-helper
@@ -279,7 +279,7 @@ flush)
pf)
# dynamically determine which anchors exist
for anchor in $(/sbin/pfctl -a "$2" -s Anchors 2> /dev/null); do
- /sbin/pfctl -a "$anchor" -t "port${anchor##*/}" -T flush
+ /sbin/pfctl -a "$anchor" -t "port${anchor##*/}" -T flush 2> /dev/null
/sbin/pfctl -a "$anchor" -F rules
done
echo OK
diff --git a/libexec/rc/rc.d/Makefile b/libexec/rc/rc.d/Makefile
index 093da31ed787..3b7f45e8f101 100644
--- a/libexec/rc/rc.d/Makefile
+++ b/libexec/rc/rc.d/Makefile
@@ -294,7 +294,7 @@ SSHPACKAGE= ssh
SSH= sshd
CONFGROUPS.${MK_UNBOUND}+= UNBOUND
-UNBOUNDPACKAGE= unbound
+UNBOUNDPACKAGE= local-unbound
UNBOUND= local_unbound
CONFGROUPS.${MK_VI}+= VI
diff --git a/release/packages/ucl/unbound-all.ucl b/release/packages/ucl/local-unbound-all.ucl
index e66f00be16a7..e66f00be16a7 100644
--- a/release/packages/ucl/unbound-all.ucl
+++ b/release/packages/ucl/local-unbound-all.ucl
diff --git a/release/packages/ucl/local-unbound.ucl b/release/packages/ucl/local-unbound.ucl
new file mode 100644
index 000000000000..0f1c77a0d0ee
--- /dev/null
+++ b/release/packages/ucl/local-unbound.ucl
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: ISC
+ *
+ * Copyright (c) 2025 Lexi Winter <ivy@FreeBSD.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+messages = [
+ {
+ type: upgrade
+ message: <<EOM
+After upgrading local-unbound, the configuration file should be regenerated
+by running "service local_unbound setup" before restarting the service.
+EOM
+ }
+]
diff --git a/release/tools/vmimage.subr b/release/tools/vmimage.subr
index 15c4dd53e70b..c3c917dcd642 100644
--- a/release/tools/vmimage.subr
+++ b/release/tools/vmimage.subr
@@ -290,6 +290,11 @@ buildfs() {
cat ${DESTDIR}/METALOG.pkg >> ${DESTDIR}/METALOG
fi
+ # Sort METALOG file; makefs produces directories with 000 permissions
+ # if their contents are seen before the directories themselves.
+ env -i LC_COLLATE=C sort -u ${DESTDIR}/METALOG > ${DESTDIR}/METALOG.sorted
+ mv ${DESTDIR}/METALOG.sorted ${DESTDIR}/METALOG
+
case "${VMFS}" in
ufs)
cd ${DESTDIR} && ${MAKEFS} ${MAKEFSARGS} -o label=rootfs -o version=2 -o softupdates=1 \
diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c
index f10f02d159c3..41b4a5336350 100644
--- a/sbin/fsck_ffs/setup.c
+++ b/sbin/fsck_ffs/setup.c
@@ -58,7 +58,6 @@ char *copybuf; /* buffer to copy snapshot blocks */
static int sbhashfailed;
#define POWEROF2(num) (((num) & ((num) - 1)) == 0)
-static int calcsb(char *dev, int devfd, struct fs *fs);
static void saverecovery(int readfd, int writefd);
static int chkrecovery(int devfd);
static int getlbnblkno(struct inodesc *);
@@ -501,52 +500,6 @@ sblock_init(void)
}
/*
- * Calculate a prototype superblock based on information in the boot area.
- * When done the cgsblock macro can be calculated and the fs_ncg field
- * can be used. Do NOT attempt to use other macros without verifying that
- * their needed information is available!
- */
-static int
-calcsb(char *dev, int devfd, struct fs *fs)
-{
- struct fsrecovery *fsr;
- char *fsrbuf;
- u_int secsize;
-
- /*
- * We need fragments-per-group and the partition-size.
- *
- * Newfs stores these details at the end of the boot block area
- * at the start of the filesystem partition. If they have been
- * overwritten by a boot block, we fail. But usually they are
- * there and we can use them.
- */
- if (ioctl(devfd, DIOCGSECTORSIZE, &secsize) == -1)
- return (0);
- fsrbuf = Balloc(secsize);
- if (fsrbuf == NULL)
- errx(EEXIT, "calcsb: cannot allocate recovery buffer");
- if (blread(devfd, fsrbuf,
- (SBLOCK_UFS2 - secsize) / dev_bsize, secsize) != 0) {
- free(fsrbuf);
- return (0);
- }
- fsr = (struct fsrecovery *)&fsrbuf[secsize - sizeof *fsr];
- if (fsr->fsr_magic != FS_UFS2_MAGIC) {
- free(fsrbuf);
- return (0);
- }
- memset(fs, 0, sizeof(struct fs));
- fs->fs_fpg = fsr->fsr_fpg;
- fs->fs_fsbtodb = fsr->fsr_fsbtodb;
- fs->fs_sblkno = fsr->fsr_sblkno;
- fs->fs_magic = fsr->fsr_magic;
- fs->fs_ncg = fsr->fsr_ncg;
- free(fsrbuf);
- return (1);
-}
-
-/*
* Check to see if recovery information exists.
* Return 1 if it exists or cannot be created.
* Return 0 if it does not exist and can be created.
diff --git a/sbin/geom/Makefile b/sbin/geom/Makefile
index 078503d3ae67..61561ef1ff1b 100644
--- a/sbin/geom/Makefile
+++ b/sbin/geom/Makefile
@@ -9,7 +9,7 @@ MAN= geom.8
CFLAGS+= -I${.CURDIR} -I${.CURDIR}/core
CFLAGS+= -DGEOM_CLASS_DIR=\"${GEOM_CLASS_DIR}\"
-LIBADD= geom util
+LIBADD= geom util xo
.if defined(RESCUE)
.PATH: ${SRCTOP}/lib/geom/part \
diff --git a/sbin/geom/core/geom.c b/sbin/geom/core/geom.c
index b78021194ddd..496123f08274 100644
--- a/sbin/geom/core/geom.c
+++ b/sbin/geom/core/geom.c
@@ -49,9 +49,12 @@
#include <assert.h>
#include <libgeom.h>
#include <geom.h>
+#include <libxo/xo.h>
#include "misc/subr.h"
+#define GEOM_XO_VERSION "1"
+
#ifdef STATIC_GEOM_CLASSES
extern uint32_t gpart_version;
extern struct g_command gpart_class_commands[];
@@ -513,6 +516,7 @@ run_command(int argc, char *argv[])
gctl_free(req);
if (verbose)
printf("Done.\n");
+ xo_finish();
exit(EXIT_SUCCESS);
}
@@ -810,6 +814,10 @@ main(int argc, char *argv[])
provider_name = NULL;
tflag = false;
+ argc = xo_parse_args(argc, argv);
+ if (argc < 0)
+ return (argc);
+
if (strcmp(getprogname(), "geom") == 0) {
while ((ch = getopt(argc, argv, "hp:t")) != -1) {
switch (ch) {
@@ -831,6 +839,7 @@ main(int argc, char *argv[])
* Don't adjust argc and argv, it would break get_class().
*/
}
+ xo_set_version(GEOM_XO_VERSION);
if (tflag && provider_name != NULL) {
errx(EXIT_FAILURE,
@@ -839,6 +848,7 @@ main(int argc, char *argv[])
if (provider_name != NULL) {
list_one_geom_by_provider(provider_name);
+ xo_finish();
return (0);
}
@@ -882,29 +892,33 @@ find_geom(struct gclass *classp, const char *name)
}
static void
-list_one_provider(struct gprovider *pp, const char *prefix)
+list_one_provider(struct gprovider *pp, const char *padding)
{
struct gconfig *conf;
char buf[5];
- printf("Name: %s\n", pp->lg_name);
+ xo_emit("{Lcw:Name}{:Name}\n", pp->lg_name);
humanize_number(buf, sizeof(buf), (int64_t)pp->lg_mediasize, "",
HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
- printf("%sMediasize: %jd (%s)\n", prefix, (intmax_t)pp->lg_mediasize,
- buf);
- printf("%sSectorsize: %u\n", prefix, pp->lg_sectorsize);
+ xo_emit("{P:/%s}{Lcw:Mediasize}{:Mediasize/%jd} ({N:/%s})\n",
+ padding, (intmax_t)pp->lg_mediasize, buf);
+ xo_emit("{P:/%s}{Lcw:Sectorsize}{:Sectorsize/%u} \n",
+ padding, pp->lg_sectorsize);
if (pp->lg_stripesize > 0 || pp->lg_stripeoffset > 0) {
- printf("%sStripesize: %ju\n", prefix, pp->lg_stripesize);
- printf("%sStripeoffset: %ju\n", prefix, pp->lg_stripeoffset);
+ xo_emit("{P:/%s}{Lcw:Stripesize}{Stripesize/%ju}\n",
+ padding, pp->lg_stripesize);
+ xo_emit("{P:/%s}{Lcw:Stripeoffset}{Stripeoffset/%ju}\n",
+ padding, pp->lg_stripeoffset);
}
- printf("%sMode: %s\n", prefix, pp->lg_mode);
+ xo_emit("{P:/%s}{Lcw:Mode}{Mode}\n", padding, pp->lg_mode);
LIST_FOREACH(conf, &pp->lg_config, lg_config) {
- printf("%s%s: %s\n", prefix, conf->lg_name, conf->lg_val);
+ xo_emit("{P:/%s}{Lcwa:}{a:}\n", padding, conf->lg_name,
+ conf->lg_name, conf->lg_val);
}
}
static void
-list_one_consumer(struct gconsumer *cp, const char *prefix)
+list_one_consumer(struct gconsumer *cp, const char *padding)
{
struct gprovider *pp;
struct gconfig *conf;
@@ -915,20 +929,24 @@ list_one_consumer(struct gconsumer *cp, const char *prefix)
else {
char buf[5];
- printf("Name: %s\n", pp->lg_name);
+ xo_emit("{Lcw:Name}{:Name}\n", pp->lg_name);
humanize_number(buf, sizeof(buf), (int64_t)pp->lg_mediasize, "",
HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
- printf("%sMediasize: %jd (%s)\n", prefix,
- (intmax_t)pp->lg_mediasize, buf);
- printf("%sSectorsize: %u\n", prefix, pp->lg_sectorsize);
+ xo_emit("{P:/%s}{Lcw:Mediasize}{:Mediasize/%jd} ({N:/%s})\n",
+ padding, (intmax_t)pp->lg_mediasize, buf);
+ xo_emit("{P:/%s}{Lcw:Sectorsize}{:Sectorsize/%u}\n",
+ padding, pp->lg_sectorsize);
if (pp->lg_stripesize > 0 || pp->lg_stripeoffset > 0) {
- printf("%sStripesize: %ju\n", prefix, pp->lg_stripesize);
- printf("%sStripeoffset: %ju\n", prefix, pp->lg_stripeoffset);
+ xo_emit("{P:/%s}{Lcw:Stripesize}{:Stripesize/%ju}\n",
+ padding, pp->lg_stripesize);
+ xo_emit("{P:/%s}{Lcw:Stripeoffset}{:Stripesize/%ju}\n",
+ padding, pp->lg_stripeoffset);
}
- printf("%sMode: %s\n", prefix, cp->lg_mode);
+ xo_emit("{P:/%s}{Lcw:Mode}{:Mode}\n", padding, pp->lg_mode);
}
LIST_FOREACH(conf, &cp->lg_config, lg_config) {
- printf("%s%s: %s\n", prefix, conf->lg_name, conf->lg_val);
+ xo_emit("{P:/%s}{Lcwa:}{a:}\n", padding, conf->lg_name,
+ conf->lg_name, conf->lg_val);
}
}
@@ -940,27 +958,36 @@ list_one_geom(struct ggeom *gp)
struct gconfig *conf;
unsigned n;
- printf("Geom name: %s\n", gp->lg_name);
+ xo_emit("{Lcw:Geom name}{:Name}\n", gp->lg_name);
LIST_FOREACH(conf, &gp->lg_config, lg_config) {
- printf("%s: %s\n", conf->lg_name, conf->lg_val);
+ xo_emit("{Lcwa:}{a:}\n", conf->lg_name, conf->lg_name,
+ conf->lg_val);
}
if (!LIST_EMPTY(&gp->lg_provider)) {
- printf("Providers:\n");
+ xo_open_list("Providers");
+ xo_emit("{Tc:Providers}\n");
n = 1;
LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
- printf("%u. ", n++);
+ xo_emit("{T:/%u} ", n++);
+ xo_open_instance("provider");
list_one_provider(pp, " ");
+ xo_close_instance("provider");
}
+ xo_close_list("Providers");
}
if (!LIST_EMPTY(&gp->lg_consumer)) {
- printf("Consumers:\n");
+ xo_open_list("Consumers");
+ xo_emit("{Tc:Consumers}\n");
n = 1;
LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) {
- printf("%u. ", n++);
+ xo_emit("{T:/%u} ", n++);
+ xo_open_instance("consumer");
list_one_consumer(cp, " ");
+ xo_close_instance("consumer");
}
+ xo_close_list("Consumers");
}
- printf("\n");
+ xo_emit("\n");
}
static void
@@ -978,8 +1005,10 @@ list_one_geom_by_provider(const char *provider_name)
if (gp == NULL)
errx(EXIT_FAILURE, "Cannot find provider '%s'.", provider_name);
- printf("Geom class: %s\n", gp->lg_class->lg_name);
+ xo_open_container("Geom");
+ xo_emit("{Lwc:Geom class}{:Class}\n", gp->lg_class->lg_name);
list_one_geom(gp);
+ xo_close_container("Geom");
}
static void
@@ -1038,14 +1067,20 @@ std_list(struct gctl_req *req, unsigned flags __unused)
"an instance named '%s'.",
gclass_name, name);
}
+ xo_open_container("Geom");
list_one_geom(gp);
+ xo_close_container("Geom");
}
} else {
+ xo_open_list("Geoms");
LIST_FOREACH(gp, &classp->lg_geom, lg_geom) {
if (LIST_EMPTY(&gp->lg_provider) && !all)
continue;
+ xo_open_instance("geom");
list_one_geom(gp);
+ xo_close_instance("geom");
}
+ xo_close_list("Geoms");
}
geom_deletetree(&mesh);
}
@@ -1115,34 +1150,24 @@ status_update_len_prs(struct ggeom *gp, int *name_len, int *status_len)
}
static char *
-status_one_consumer(struct gconsumer *cp)
+status_one_consumer(struct gconsumer *cp, const char *value)
{
- static char buf[256];
struct gprovider *pp;
struct gconfig *conf;
- const char *state, *syncr;
+ char *ret;
pp = cp->lg_provider;
if (pp == NULL)
return (NULL);
- state = NULL;
- syncr = NULL;
+ ret = NULL;
LIST_FOREACH(conf, &cp->lg_config, lg_config) {
- if (strcasecmp(conf->lg_name, "state") == 0)
- state = conf->lg_val;
- if (strcasecmp(conf->lg_name, "synchronized") == 0)
- syncr = conf->lg_val;
- }
- if (state == NULL && syncr == NULL)
- snprintf(buf, sizeof(buf), "%s", pp->lg_name);
- else if (state != NULL && syncr != NULL) {
- snprintf(buf, sizeof(buf), "%s (%s, %s)", pp->lg_name,
- state, syncr);
- } else {
- snprintf(buf, sizeof(buf), "%s (%s)", pp->lg_name,
- state ? state : syncr);
+ if (strcasecmp(conf->lg_name, value) == 0)
+ ret = conf->lg_val;
}
- return (buf);
+
+ if (ret == NULL)
+ return (NULL);
+ return (ret);
}
static void
@@ -1150,8 +1175,8 @@ status_one_geom(struct ggeom *gp, int script, int name_len, int status_len)
{
struct gconsumer *cp;
struct gconfig *conf;
- const char *name, *status, *component;
- int gotone;
+ const char *name, *status, *cstate, *csyncr;
+ int gotone, len;
name = gp->lg_name;
status = "N/A";
@@ -1161,21 +1186,49 @@ status_one_geom(struct ggeom *gp, int script, int name_len, int status_len)
break;
}
}
- gotone = 0;
+ gotone = len = 0;
+ xo_open_instance("status");
LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) {
- component = status_one_consumer(cp);
- if (component == NULL)
+ cstate = status_one_consumer(cp, "state");
+ csyncr = status_one_consumer(cp, "synchronized");
+ if (cstate == NULL && csyncr == NULL)
continue;
+ if (!gotone || script) {
+ if (!gotone) {
+ xo_emit("{:name/%*s} {:status/%*s} ",
+ name_len, name, status_len, status);
+ } else {
+ xo_emit("{d:name/%*s} {d:status/%*s} ",
+ name_len, name, status_len, status);
+ }
+ xo_open_list("components");
+ }
+
+ xo_open_instance("components");
+ if (cstate != NULL && csyncr != NULL) {
+ xo_emit("{P:/%*s}{:compontent} ({:state}, {:synchronized})\n",
+ len, "", cp->lg_provider->lg_name, cstate, csyncr);
+ } else if (cstate != NULL) {
+ xo_emit("{P:/%*s}{:compontent} ({:state})\n",
+ len, "", cp->lg_provider->lg_name, cstate);
+ } else {
+ xo_emit("{P:/%*s}{:compontent} ({:synchronized})\n",
+ len, "", cp->lg_provider->lg_name, csyncr);
+ }
+ xo_close_instance("components");
gotone = 1;
- printf("%*s %*s %s\n", name_len, name, status_len, status,
- component);
- if (!script)
- name = status = "";
+ if (!len && !script)
+ len = name_len + status_len + 4;
}
if (!gotone) {
- printf("%*s %*s %s\n", name_len, name, status_len, status,
- "N/A");
+ xo_emit("{:name/%*s} {:status/%*s} ", name_len, name, status_len, status);
+ xo_open_list("components");
+ xo_open_instance("components");
+ xo_emit("{P:/%*s}{d:compontent}\n", len, "", "N/A");
+ xo_close_instance("components");
}
+ xo_close_list("components");
+ xo_close_instance("status");
}
static void
@@ -1184,9 +1237,10 @@ status_one_geom_prs(struct ggeom *gp, int script, int name_len, int status_len)
struct gprovider *pp;
struct gconsumer *cp;
struct gconfig *conf;
- const char *name, *status, *component;
- int gotone;
+ const char *name, *status, *cstate, *csyncr;
+ int gotone, len;
+ xo_open_instance("status");
LIST_FOREACH(pp, &gp->lg_provider, lg_provider) {
name = pp->lg_name;
status = "N/A";
@@ -1202,22 +1256,50 @@ status_one_geom_prs(struct ggeom *gp, int script, int name_len, int status_len)
break;
}
}
- gotone = 0;
+ gotone = len = 0;
LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) {
- component = status_one_consumer(cp);
- if (component == NULL)
+ cstate = status_one_consumer(cp, "state");
+ csyncr = status_one_consumer(cp, "synchronized");
+ if (cstate == NULL && csyncr == NULL)
continue;
+
+ if (!gotone || script) {
+ if (!gotone) {
+ xo_emit("{:name/%*s} {:status/%*s} ",
+ name_len, name, status_len, status);
+ } else {
+ xo_emit("{d:name/%*s} {d:status/%*s} ",
+ name_len, name, status_len, status);
+ }
+ xo_open_list("components");
+ }
+
+ xo_open_instance("component");
+ if (cstate != NULL && csyncr != NULL) {
+ xo_emit("{P:/%*s}{:compontent} ({:state}, {:synchronized})\n",
+ len, "", cp->lg_provider->lg_name, cstate, csyncr);
+ } else if (cstate != NULL) {
+ xo_emit("{P:/%*s}{:compontent} ({:state})\n",
+ len, "", cp->lg_provider->lg_name, cstate);
+ } else {
+ xo_emit("{P:/%*s}{:compontent} ({:synchronized})\n",
+ len, "", cp->lg_provider->lg_name, csyncr);
+ }
+ xo_close_instance("component");
gotone = 1;
- printf("%*s %*s %s\n", name_len, name,
- status_len, status, component);
- if (!script)
- name = status = "";
+ if (!len && !script)
+ len = name_len + status_len + 4;
}
if (!gotone) {
- printf("%*s %*s %s\n", name_len, name,
- status_len, status, "N/A");
+ xo_emit("{:name/%*s} {:status/%*s} ", name_len, name, status_len, status);
+ xo_open_list("components");
+ xo_open_instance("components");
+ xo_emit("{P:/%*s}{d:compontent}\n", len, "", "N/A");
+ xo_close_instance("components");
}
+ xo_close_list("components");
}
+ xo_close_instance("status");
}
static void
@@ -1240,13 +1322,9 @@ std_status(struct gctl_req *req, unsigned flags __unused)
all = gctl_get_int(req, "all");
geoms = gctl_get_int(req, "geoms");
script = gctl_get_int(req, "script");
- if (script) {
- name_len = 0;
- status_len = 0;
- } else {
- name_len = strlen("Name");
- status_len = strlen("Status");
- }
+ name_len = strlen("Name");
+ status_len = strlen("Status");
+
if (nargs > 0) {
for (i = 0, n = 0; i < nargs; i++) {
name = gctl_get_ascii(req, "arg%d", i);
@@ -1282,9 +1360,10 @@ std_status(struct gctl_req *req, unsigned flags __unused)
goto end;
}
if (!script) {
- printf("%*s %*s %s\n", name_len, "Name", status_len, "Status",
- "Components");
+ xo_emit("{T:/%*s} {T:/%*s} {T:Components}\n",
+ name_len, "Name", status_len, "Status");
}
+ xo_open_list("status");
if (nargs > 0) {
for (i = 0; i < nargs; i++) {
name = gctl_get_ascii(req, "arg%d", i);
@@ -1312,6 +1391,7 @@ std_status(struct gctl_req *req, unsigned flags __unused)
}
}
}
+ xo_close_list("status");
end:
geom_deletetree(&mesh);
}
diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8
index fafb77e1ca6c..d4f8d2b5747a 100644
--- a/sbin/ifconfig/ifconfig.8
+++ b/sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd September 12, 2025
+.Dd October 12, 2025
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -2799,7 +2799,7 @@ Set the untagged VLAN identifier for an interface.
Frames received on this interface without an 802.1Q tag will be assigned
to this VLAN instead of the default VLAN 0,
and outgoing frames on this VLAN will have their 802.1Q tag removed.
-.It Cm -ifuntagged Ar interface Ar vlan-id
+.It Cm -ifuntagged Ar interface
Clear the untagged VLAN identifier for an interface.
.It Cm defuntagged Ar vlan-id
Enable the
diff --git a/sbin/ipfw/tables.c b/sbin/ipfw/tables.c
index 7c3b1bb35a01..245c0c9e0399 100644
--- a/sbin/ipfw/tables.c
+++ b/sbin/ipfw/tables.c
@@ -1037,9 +1037,6 @@ table_modify_record(ipfw_obj_header *oh, int ac, char *av[], int add,
}
}
- /* Get real OS error */
- error = errno;
-
/* Report results back */
ptent = tent_buf;
for (i = 0; i < count; ptent++, i++) {
diff --git a/sbin/mdconfig/tests/mdconfig_test.sh b/sbin/mdconfig/tests/mdconfig_test.sh
index ea87ff5d542d..cc29c188cbd8 100755
--- a/sbin/mdconfig/tests/mdconfig_test.sh
+++ b/sbin/mdconfig/tests/mdconfig_test.sh
@@ -274,22 +274,23 @@ attach_size_rounddown()
attach_size_rounddown_body()
{
local md
- local ss=8192
- local ms=$(($ss + 4096))
- local ms2=$((2 * $ss + 4096))
+ local pgsz=$(pagesize)
+ local ss=$(($pgsz * 2))
+ local ms=$(($ss + $pgsz))
+ local ms2=$((2 * $ss + $pgsz))
- # Use a sector size that's a likely multiple of PAGE_SIZE, as md(4)
+ # Use a sector size that's a multiple of the kernel page size, as md(4)
# expects that for swap MDs.
atf_check -s exit:0 -o save:mdconfig.out -e empty \
-x "mdconfig -a -t swap -S $ss -s ${ms}b"
md=$(cat mdconfig.out)
- # 12288 bytes should be rounded down to one sector.
- check_diskinfo "$md" 8192 1 $ss
+ # one sector plus one page should be rounded down to one sector.
+ check_diskinfo "$md" $ss 1 $ss
# Resize and verify that the new size was also rounded down.
atf_check -s exit:0 -o empty -e empty \
-x "mdconfig -r -u ${md#md} -s ${ms2}b"
- check_diskinfo "$md" 16384 2 $ss
+ check_diskinfo "$md" $((2 * $ss)) 2 $ss
}
attach_size_rounddown_cleanup()
{
diff --git a/sbin/ping/tests/Makefile b/sbin/ping/tests/Makefile
index 0520b1d634cf..7d3ab02b9a86 100644
--- a/sbin/ping/tests/Makefile
+++ b/sbin/ping/tests/Makefile
@@ -1,5 +1,6 @@
ATF_TESTS_C+= in_cksum_test
-SRCS.in_cksum_test= in_cksum_test.c ../utils.c
+.PATH: ${.CURDIR:H}
+SRCS.in_cksum_test= in_cksum_test.c utils.c
PACKAGE= tests
diff --git a/share/examples/bhyve/vmrun.sh b/share/examples/bhyve/vmrun.sh
index 52935363023a..e0052e781dc0 100755
--- a/share/examples/bhyve/vmrun.sh
+++ b/share/examples/bhyve/vmrun.sh
@@ -268,8 +268,10 @@ fi
if [ -z "$firmware" ]; then
case ${platform} in
amd64)
- firmware="${efi_firmware}"
- firmware_pkg="edk2-bhyve"
+ if [ ${efi_mode} -ne 0 ]; then
+ firmware="${efi_firmware}"
+ firmware_pkg="edk2-bhyve"
+ fi
;;
arm64)
firmware="${uboot_firmware}"
diff --git a/share/man/man4/bridge.4 b/share/man/man4/bridge.4
index 7048df4593bf..3af952256d3a 100644
--- a/share/man/man4/bridge.4
+++ b/share/man/man4/bridge.4
@@ -36,7 +36,7 @@
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd July 28, 2025
+.Dd October 13, 2025
.Dt IF_BRIDGE 4
.Os
.Sh NAME
@@ -272,53 +272,149 @@ by setting the
node using
.Xr sysctl 8 .
.Sh VLAN SUPPORT
-The
+Virtual LANs (VLANs), defined in the IEEE 802.1Q standard, allow traffic
+on a bridge to be segregated into separate logical networks which cannot
+communicate with each other.
+For example, two interfaces in VLAN 10 would be able to communicate
+with each other, but not with another interface in VLAN 20.
+.Pp
+Each VLAN is identified by a number between 1 and 4094 inclusive.
+By default, all traffic on the bridge is assigned to "VLAN 0",
+a pseudo-VLAN used for historical compatibility.
+When VLANs are in use on a bridge, it is recommended to explicitly
+assign all traffic to a VLAN rather than using VLAN 0.
+.Pp
+The bridge implements Independent VLAN Learning (IVL), meaning that
+host addresses are learned separately for each VLAN, and the same host
+address may exist on several different ports in different VLANs.
+.Pp
+If a
+.Xr vlan 4
+interface is configured on an interface which is also an
.Nm
-driver has full support for virtual LANs (VLANs).
-The bridge implements independent VLAN learning, i.e. MAC addresses are
-learned on a per-VLAN basis, and the same MAC address may be learned on
-multiple interfaces on different VLANs.
-Incoming frames with an 802.1Q tag will be assigned to the appropriate
-VLAN.
-.Pp
-Traffic sent to or from the host is not assigned to a VLAN by default.
-To allow the host to communicate on a VLAN, configure a
+member interface, all tagged frames will be processed by the
.Xr vlan 4
-interface on the bridge and (if necessary) assign IP addresses there.
-.Pp
-By default no access control is enabled, so any interface may
-participate in any VLAN.
-.Pp
-VLAN filtering may be enabled on a bridge using the
+interface and will not be visible to the bridge.
+This configuration is not recommended and may be unsupported in a
+future release.
+.Ss Tagged and untagged traffic
+Incoming frames on a member interface may be either tagged or untagged.
+Tagged frames contain an 802.1Q header indicating which VLAN the
+frame belongs to, while untagged frames do not.
+When a tagged frame is received, the frame is automatically assigned to
+the VLAN in the tag (subject to any configured VLAN access list),
+while untagged frames are assigned to the interface's configured
+Port VLAN ID (PVID), or to VLAN 0 if no PVID is configured.
+.Ss Assigning interfaces to VLANs
+An interface's PVID may be configured using the
.Xr ifconfig 8
-.Cm vlanfilter
-option.
-When VLAN filtering is enabled, an interface may only send and receive
-frames based on its configured VLAN access list.
+.Cm ifuntagged
+command:
+.Bd -literal -offset indent
+ifconfig bridge0 ifuntagged ix0 10
+.Ed
.Pp
-The interface's untagged VLAN ID may be configured using the
-.Xr ifconfig 8
+Or by using the
.Cm untagged
-option.
-If an untagged VLAN ID is configured, incoming frames will be assigned
-to that VLAN, and the interface may receive outgoing untagged frames
-in that VLAN.
-.Pp
-The tagged VLAN access list may be configured using the
-.Cm tagged ,
-.Cm +tagged
-and
-.Cm -tagged
-options to
-.Xr ifconfig 8 .
-An interface may send and receive tagged frames for any VLAN in its
-access list.
+option to
+.Cm addm :
+.Bd -literal -offset indent
+ifconfig bridge0 addm ix0 untagged 10
+.Ed
.Pp
-The bridge will automatically insert or remove 802.1q tags as needed,
-based on the interface configuration, when forwarding frames between
-interfaces.
-This tag processing is only done for interfaces with VLAN filtering
-enabled.
+This will assign all untagged traffic received on the interface to the
+specified VLAN, and any traffic transmitted on the interface in this
+VLAN will have its VLAN tag (if present) removed.
+Conversely, any traffic transmitted on the interface in a different
+VLAN will have a tag added, to allow the remote system to assign the
+traffic to the appropriate VLAN.
+.Ss Host communication in a VLAN
+Sometimes it is useful to allow the host itself to communicate in a VLAN,
+for example to provide routing to other hosts in the VLAN.
+To do this, create a
+.Xr vlan 4
+interface on top of the
+.Nm
+interface with the appropriate VLAN tag.
+For example, to allow the host to communicate in VLAN 10:
+.Bd -literal -offset indent
+ifconfig bridge0.10 create inet6 2001:db8::1/64
+.Ed
+.Ss Configuring the VLAN access list (VLAN filtering)
+For historical reasons, the default
+.Nm
+configuration allows all interfaces to send tagged traffic for any VLAN,
+meaning that VLANs do not provide security separation.
+To restrict which interfaces may communicate in which VLANs,
+enable VLAN filtering on the bridge:
+.Bd -literal -offset indent
+ifconfig bridge0 vlanfilter
+.Ed
+.Pp
+This has the following effects on bridge members:
+.Bl -bullet -offset indent
+.It
+No untagged frames will be accepted from a member interface unless
+the interface has a PVID configured.
+.It
+No tagged frames will be accepted from a member interface unless
+the VLAN identifier is present in the interface's VLAN access list.
+.It
+Frames with stacked tags (Q-in-Q) will not be accepted from a
+member interface unless the
+.Cm qinq
+option (see below) has been configured for that member.
+.El
+.Pp
+To configure the VLAN access list, use the
+.Xr ifconfig 8
+.Cm iftagged ,
+.Cm +iftagged
+or
+.Cm -iftagged
+commands.
+For example, to allow an interface to communicate in VLANs 10, 20,
+and any VLAN from 100 to 199:
+.Bd -literal -offset indent
+ifconfig bridge0 iftagged ix0 10,20,100-199
+.Ed
+.Ss IEEE 802.1ad (Q-in-Q) configuration
+IEEE 802.1ad, also called Q-in-Q or
+.Dq tag stacking ,
+allows a single Ethernet frame to contain multiple tags.
+This allows one Ethernet network to transport traffic between endpoints
+using its own VLAN tags without interfering with any pre-existing tags,
+and is often used in service provider networks to provide
+.Dq virtual wire
+Ethernet services.
+.Pp
+When VLAN filtering is enabled,
+.Nm
+does not permit member interfaces to send Q-in-Q frames, because in
+certain configuration this allows
+.Dq VLAN-hopping
+attacks on the bridge.
+For example, consider a bridge with port ix0 configured as a tagged
+port in VLAN 10, and port ix1 configured as untagged in VLAN 10 and
+tagged in VLAN 20.
+If ix0 is allowed to send Q-in-Q frames, then it can send a frame with
+two tags: one for VLAN 10, followed by one for VLAN 20.
+When the bridge forwards the frame to ix1, it will strip the VLAN tag
+for VLAN 10, then forward the frame to ix1 with the tag for VLAN 20
+intact, effectively allowing ix1 to send traffic on VLAN 20 even
+though the bridge configuration should not permit that.
+.Pp
+To permit an interface to send Q-in-Q frames, set the
+.Xr ifconfig 8
+.Cm qinq
+flag on the interface.
+This is only required on the interface which will send Q-in-Q frames,
+not the interface receiving the frames.
+.Pp
+Alternatively, set the
+.Cm defqinq
+flag on the bridge itself to enable Q-in-Q for all newly-added
+interfaces by default.
.Sh PACKET FILTERING
Packet filtering can be used with any firewall package that hooks in via the
.Xr pfil 9
@@ -537,6 +633,36 @@ ifconfig_wlan0="up ssid my_ap mode 11g"
ifconfig_fxp0="up"
.Ed
.Pp
+The following will cause a bridge to be created with two VLANs,
+10 and 20, where the
+.Dq Li em
+interfaces can only communicate in their assigned VLANs,
+while
+.Dq Li ix0
+is a trunk port which can communicate in either VLAN:
+.Bd -literal -offset indent
+cloned_interfaces="bridge0"
+ifconfig_bridge0="vlanfilter \e
+ addm em0 untagged 10 \e
+ addm em1 untagged 10 \e
+ addm em2 untagged 20 \e
+ addm em3 untagged 20 \e
+ addm ix0 tagged 10,20"
+ifconfig_em0="up"
+ifconfig_em1="up"
+ifconfig_em2="up"
+ifconfig_em3="up"
+ifconfig_ix0="up"
+.Ed
+.Pp
+The previous example could be extended to allow the host to
+communicate in VLANs 10 and 20:
+.Bd -literal -offset indent
+vlans_bridge0="10 20"
+ifconfig_bridge0_10_ipv6="inet6 2001:db8:0:10::1/64"
+ifconfig_bridge0_20_ipv6="inet6 2001:db8:0:20::1/64"
+.Ed
+.Pp
Consider a system with two 4-port Ethernet boards.
The following will cause a bridge consisting of all 8 ports with
Rapid Spanning Tree enabled to be created:
diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index 6cc2d58bbbcc..933f1ac0051f 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -179,7 +179,7 @@ freebsd_brand_info_la57_img_compat(const struct image_params *imgp,
return (!prefer_uva_la48);
}
-static Elf64_Brandinfo freebsd_brand_info_la48 = {
+static const Elf64_Brandinfo freebsd_brand_info_la48 = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_X86_64,
.compat_3_brand = "FreeBSD",
@@ -190,7 +190,7 @@ static Elf64_Brandinfo freebsd_brand_info_la48 = {
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE,
};
-static Elf64_Brandinfo freebsd_brand_info_la57 = {
+static const Elf64_Brandinfo freebsd_brand_info_la57 = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_X86_64,
.compat_3_brand = "FreeBSD",
@@ -216,7 +216,7 @@ sysinit_register_elf64_brand_entries(void *arg __unused)
SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
sysinit_register_elf64_brand_entries, NULL);
-static Elf64_Brandinfo freebsd_brand_oinfo = {
+static const Elf64_Brandinfo freebsd_brand_oinfo = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_X86_64,
.compat_3_brand = "FreeBSD",
@@ -226,11 +226,10 @@ static Elf64_Brandinfo freebsd_brand_oinfo = {
.brand_note = &elf64_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-
-SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+C_SYSINIT(oelf64, SI_SUB_EXEC, SI_ORDER_ANY,
(sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_oinfo);
-static Elf64_Brandinfo kfreebsd_brand_info = {
+static const Elf64_Brandinfo kfreebsd_brand_info = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_X86_64,
.compat_3_brand = "FreeBSD",
@@ -240,8 +239,7 @@ static Elf64_Brandinfo kfreebsd_brand_info = {
.brand_note = &elf64_kfreebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
};
-
-SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
+C_SYSINIT(kelf64, SI_SUB_EXEC, SI_ORDER_ANY,
(sysinit_cfunc_t)elf64_insert_brand_entry, &kfreebsd_brand_info);
void
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index c8579c5da4ad..890cf01c46a0 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -857,7 +857,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
}
}
-static Elf_Brandnote linux64_brandnote = {
+static const Elf_Brandnote linux64_brandnote = {
.hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
.hdr.n_descsz = 16,
.hdr.n_type = 1,
@@ -866,7 +866,7 @@ static Elf_Brandnote linux64_brandnote = {
.trans_osrel = linux_trans_osrel
};
-static Elf64_Brandinfo linux_glibc2brand = {
+static const Elf64_Brandinfo linux_glibc2brand = {
.brand = ELFOSABI_LINUX,
.machine = EM_X86_64,
.compat_3_brand = "Linux",
@@ -877,7 +877,7 @@ static Elf64_Brandinfo linux_glibc2brand = {
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-static Elf64_Brandinfo linux_glibc2brandshort = {
+static const Elf64_Brandinfo linux_glibc2brandshort = {
.brand = ELFOSABI_LINUX,
.machine = EM_X86_64,
.compat_3_brand = "Linux",
@@ -888,7 +888,7 @@ static Elf64_Brandinfo linux_glibc2brandshort = {
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-static Elf64_Brandinfo linux_muslbrand = {
+static const Elf64_Brandinfo linux_muslbrand = {
.brand = ELFOSABI_LINUX,
.machine = EM_X86_64,
.compat_3_brand = "Linux",
@@ -900,7 +900,7 @@ static Elf64_Brandinfo linux_muslbrand = {
LINUX_BI_FUTEX_REQUEUE
};
-static Elf64_Brandinfo *linux_brandlist[] = {
+static const Elf64_Brandinfo *linux_brandlist[] = {
&linux_glibc2brand,
&linux_glibc2brandshort,
&linux_muslbrand,
@@ -910,7 +910,7 @@ static Elf64_Brandinfo *linux_brandlist[] = {
static int
linux64_elf_modevent(module_t mod, int type, void *data)
{
- Elf64_Brandinfo **brandinfo;
+ const Elf64_Brandinfo **brandinfo;
int error;
struct linux_ioctl_handler **lihp;
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 8fac626f9053..735ebb151017 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -954,7 +954,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
}
}
-static Elf_Brandnote linux32_brandnote = {
+static const Elf_Brandnote linux32_brandnote = {
.hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
.hdr.n_descsz = 16, /* XXX at least 16 */
.hdr.n_type = 1,
@@ -963,7 +963,7 @@ static Elf_Brandnote linux32_brandnote = {
.trans_osrel = linux_trans_osrel
};
-static Elf32_Brandinfo linux_brand = {
+static const Elf32_Brandinfo linux_brand = {
.brand = ELFOSABI_LINUX,
.machine = EM_386,
.compat_3_brand = "Linux",
@@ -974,7 +974,7 @@ static Elf32_Brandinfo linux_brand = {
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-static Elf32_Brandinfo linux_glibc2brand = {
+static const Elf32_Brandinfo linux_glibc2brand = {
.brand = ELFOSABI_LINUX,
.machine = EM_386,
.compat_3_brand = "Linux",
@@ -985,7 +985,7 @@ static Elf32_Brandinfo linux_glibc2brand = {
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-static Elf32_Brandinfo linux_muslbrand = {
+static const Elf32_Brandinfo linux_muslbrand = {
.brand = ELFOSABI_LINUX,
.machine = EM_386,
.compat_3_brand = "Linux",
@@ -997,7 +997,7 @@ static Elf32_Brandinfo linux_muslbrand = {
LINUX_BI_FUTEX_REQUEUE
};
-static Elf32_Brandinfo *linux_brandlist[] = {
+static const Elf32_Brandinfo *linux_brandlist[] = {
&linux_brand,
&linux_glibc2brand,
&linux_muslbrand,
@@ -1007,7 +1007,7 @@ static Elf32_Brandinfo *linux_brandlist[] = {
static int
linux_elf_modevent(module_t mod, int type, void *data)
{
- Elf32_Brandinfo **brandinfo;
+ const Elf32_Brandinfo **brandinfo;
int error;
struct linux_ioctl_handler **lihp;
diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
index 78883296c5b7..6a0ece1e4d98 100644
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c
@@ -1246,7 +1246,7 @@ pmap_bootstrap(vm_offset_t firstaddr)
}
static void
-pmap_init_reserved_pages(void)
+pmap_init_reserved_pages(void *dummy __unused)
{
struct pcpu *pc;
vm_offset_t pages;
diff --git a/sys/arm/arm/unwind.c b/sys/arm/arm/unwind.c
index 7ad91a3e01a5..0d77074fae34 100644
--- a/sys/arm/arm/unwind.c
+++ b/sys/arm/arm/unwind.c
@@ -278,7 +278,7 @@ unwind_module_unloaded(struct linker_file *lf)
* the unwind tables might be stripped, so instead we have to use the
* _exidx_start/end symbols created by ldscript.arm.
*/
-static int
+static void
module_info_init(void *arg __unused)
{
struct linker_file thekernel;
@@ -291,8 +291,6 @@ module_info_init(void *arg __unused)
thekernel.exidx_addr = CADDR(&_exidx_start);
thekernel.exidx_size = UADDR(&_exidx_end) - UADDR(&_exidx_start);
populate_module_info(create_module_info(), &thekernel);
-
- return (0);
}
SYSINIT(unwind_init, SI_SUB_KMEM, SI_ORDER_ANY, module_info_init, NULL);
diff --git a/sys/arm64/arm64/cpu_errata.c b/sys/arm64/arm64/cpu_errata.c
index 989924bc0567..b876703a2a15 100644
--- a/sys/arm64/arm64/cpu_errata.c
+++ b/sys/arm64/arm64/cpu_errata.c
@@ -52,56 +52,11 @@ struct cpu_quirks {
u_int flags;
};
-static enum {
- SSBD_FORCE_ON,
- SSBD_FORCE_OFF,
- SSBD_KERNEL,
-} ssbd_method = SSBD_KERNEL;
-
-static cpu_quirk_install install_psci_bp_hardening;
-static cpu_quirk_install install_ssbd_workaround;
static cpu_quirk_install install_thunderx_bcast_tlbi_workaround;
static struct cpu_quirks cpu_quirks[] = {
{
.midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
- .midr_value =
- CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0),
- .quirk_install = install_psci_bp_hardening,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = 0,
- .midr_value = 0,
- .quirk_install = install_ssbd_workaround,
- .flags = CPU_QUIRK_POST_DEVICE,
- },
- {
- .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
.midr_value =
CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX, 0, 0),
.quirk_install = install_thunderx_bcast_tlbi_workaround,
@@ -114,57 +69,6 @@ static struct cpu_quirks cpu_quirks[] = {
},
};
-static void
-install_psci_bp_hardening(void)
-{
- /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
- if (!psci_present)
- return;
-
- if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) != SMCCC_RET_SUCCESS)
- return;
-
- PCPU_SET(bp_harden, smccc_arch_workaround_1);
-}
-
-static void
-install_ssbd_workaround(void)
-{
- char *env;
-
- if (PCPU_GET(cpuid) == 0) {
- env = kern_getenv("kern.cfg.ssbd");
- if (env != NULL) {
- if (strcmp(env, "force-on") == 0) {
- ssbd_method = SSBD_FORCE_ON;
- } else if (strcmp(env, "force-off") == 0) {
- ssbd_method = SSBD_FORCE_OFF;
- }
- }
- }
-
- /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
- if (!psci_present)
- return;
-
- /* Enable the workaround on this CPU if it's enabled in the firmware */
- if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS)
- return;
-
- switch(ssbd_method) {
- case SSBD_FORCE_ON:
- smccc_arch_workaround_2(1);
- break;
- case SSBD_FORCE_OFF:
- smccc_arch_workaround_2(0);
- break;
- case SSBD_KERNEL:
- default:
- PCPU_SET(ssbd, smccc_arch_workaround_2);
- break;
- }
-}
-
/*
* Workaround Cavium erratum 27456.
*
diff --git a/sys/arm64/arm64/elf_machdep.c b/sys/arm64/arm64/elf_machdep.c
index 13af5c5065d6..207b37180a26 100644
--- a/sys/arm64/arm64/elf_machdep.c
+++ b/sys/arm64/arm64/elf_machdep.c
@@ -121,7 +121,7 @@ static struct sysentvec elf64_freebsd_sysvec = {
};
INIT_SYSENTVEC(elf64_sysvec, &elf64_freebsd_sysvec);
-static Elf64_Brandinfo freebsd_brand_info = {
+static const Elf64_Brandinfo freebsd_brand_info = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_AARCH64,
.compat_3_brand = "FreeBSD",
@@ -131,8 +131,7 @@ static Elf64_Brandinfo freebsd_brand_info = {
.brand_note = &elf64_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-
-SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
+C_SYSINIT(elf64, SI_SUB_EXEC, SI_ORDER_FIRST,
(sysinit_cfunc_t)elf64_insert_brand_entry, &freebsd_brand_info);
static bool
@@ -336,7 +335,7 @@ elf_cpu_parse_dynamic(caddr_t loadbase __unused, Elf_Dyn *dynamic __unused)
return (0);
}
-static Elf_Note gnu_property_note = {
+static const Elf_Note gnu_property_note = {
.n_namesz = sizeof(GNU_ABI_VENDOR),
.n_descsz = 16,
.n_type = NT_GNU_PROPERTY_TYPE_0,
diff --git a/sys/arm64/arm64/spec_workaround.c b/sys/arm64/arm64/spec_workaround.c
new file mode 100644
index 000000000000..7f4f86cdb48c
--- /dev/null
+++ b/sys/arm64/arm64/spec_workaround.c
@@ -0,0 +1,166 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Arm Ltd
+ * Copyright (c) 2018 Andrew Turner
+ *
+ * This software was developed by SRI International and the University of
+ * Cambridge Computer Laboratory under DARPA/AFRL contract FA8750-10-C-0237
+ * ("CTSRD"), as part of the DARPA CRASH research programme.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/systm.h>
+
+#include <machine/cpu.h>
+#include <machine/cpu_feat.h>
+
+#include <dev/psci/psci.h>
+#include <dev/psci/smccc.h>
+
+static enum {
+ SSBD_FORCE_ON,
+ SSBD_FORCE_OFF,
+ SSBD_KERNEL,
+} ssbd_method = SSBD_KERNEL;
+
+struct psci_bp_hardening_impl {
+ u_int midr_mask;
+ u_int midr_value;
+};
+
+static struct psci_bp_hardening_impl psci_bp_hardening_impl[] = {
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A57,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A72,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A73,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value = CPU_ID_RAW(CPU_IMPL_ARM, CPU_PART_CORTEX_A75,0,0),
+ },
+ {
+ .midr_mask = CPU_IMPL_MASK | CPU_PART_MASK,
+ .midr_value =
+ CPU_ID_RAW(CPU_IMPL_CAVIUM, CPU_PART_THUNDERX2, 0,0),
+ }
+};
+
+static cpu_feat_en
+psci_bp_hardening_check(const struct cpu_feat *feat __unused, u_int midr)
+{
+ size_t i;
+
+ for (i = 0; i < nitems(psci_bp_hardening_impl); i++) {
+ if ((midr & psci_bp_hardening_impl[i].midr_mask) ==
+ psci_bp_hardening_impl[i].midr_value) {
+ /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
+ if (!psci_present)
+ return (FEAT_ALWAYS_DISABLE);
+
+ if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_1) !=
+ SMCCC_RET_SUCCESS)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
+ }
+ }
+
+ return (FEAT_ALWAYS_DISABLE);
+}
+
+static bool
+psci_bp_hardening_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ PCPU_SET(bp_harden, smccc_arch_workaround_1);
+
+ return (true);
+}
+
+CPU_FEAT(feat_csv2_missing, "Branch Predictor Hardening",
+ psci_bp_hardening_check, NULL, psci_bp_hardening_enable, NULL,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
+
+static cpu_feat_en
+ssbd_workaround_check(const struct cpu_feat *feat __unused, u_int midr __unused)
+{
+ char *env;
+
+ if (PCPU_GET(cpuid) == 0) {
+ env = kern_getenv("kern.cfg.ssbd");
+ if (env != NULL) {
+ if (strcmp(env, "force-on") == 0) {
+ ssbd_method = SSBD_FORCE_ON;
+ } else if (strcmp(env, "force-off") == 0) {
+ ssbd_method = SSBD_FORCE_OFF;
+ }
+ }
+ }
+
+ /* SMCCC depends on PSCI. If PSCI is missing so is SMCCC */
+ if (!psci_present)
+ return (FEAT_ALWAYS_DISABLE);
+
+ /* Enable the workaround on this CPU if it's enabled in the firmware */
+ if (smccc_arch_features(SMCCC_ARCH_WORKAROUND_2) != SMCCC_RET_SUCCESS)
+ return (FEAT_ALWAYS_DISABLE);
+
+ return (FEAT_DEFAULT_ENABLE);
+}
+
+static bool
+ssbd_workaround_enable(const struct cpu_feat *feat __unused,
+ cpu_feat_errata errata_status __unused, u_int *errata_list __unused,
+ u_int errata_count __unused)
+{
+ switch(ssbd_method) {
+ case SSBD_FORCE_ON:
+ smccc_arch_workaround_2(1);
+ break;
+ case SSBD_FORCE_OFF:
+ smccc_arch_workaround_2(0);
+ break;
+ case SSBD_KERNEL:
+ default:
+ PCPU_SET(ssbd, smccc_arch_workaround_2);
+ break;
+ }
+
+ return (true);
+}
+
+CPU_FEAT(feat_ssbs_missing, "Speculator Store Bypass Disable Workaround",
+ ssbd_workaround_check, NULL, ssbd_workaround_enable, NULL,
+ CPU_FEAT_AFTER_DEV | CPU_FEAT_PER_CPU);
diff --git a/sys/arm64/coresight/coresight.c b/sys/arm64/coresight/coresight.c
index 5928c153f4ae..9b9d3c65ecc9 100644
--- a/sys/arm64/coresight/coresight.c
+++ b/sys/arm64/coresight/coresight.c
@@ -113,7 +113,7 @@ coresight_get_output_device(struct endpoint *endp, struct endpoint **out_endp)
}
static void
-coresight_init(void)
+coresight_init(void *dummy __unused)
{
mtx_init(&cs_mtx, "ARM Coresight", NULL, MTX_DEF);
diff --git a/sys/arm64/linux/linux_sysvec.c b/sys/arm64/linux/linux_sysvec.c
index 084b7a11b01f..ac05820f89bc 100644
--- a/sys/arm64/linux/linux_sysvec.c
+++ b/sys/arm64/linux/linux_sysvec.c
@@ -584,7 +584,7 @@ linux_vdso_reloc(char *mapping, Elf_Addr offset)
}
}
-static Elf_Brandnote linux64_brandnote = {
+static const Elf_Brandnote linux64_brandnote = {
.hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
.hdr.n_descsz = 16,
.hdr.n_type = 1,
@@ -593,7 +593,7 @@ static Elf_Brandnote linux64_brandnote = {
.trans_osrel = linux_trans_osrel
};
-static Elf64_Brandinfo linux_glibc2brand = {
+static const Elf64_Brandinfo linux_glibc2brand = {
.brand = ELFOSABI_LINUX,
.machine = EM_AARCH64,
.compat_3_brand = "Linux",
@@ -604,7 +604,7 @@ static Elf64_Brandinfo linux_glibc2brand = {
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
-Elf64_Brandinfo *linux_brandlist[] = {
+const Elf64_Brandinfo *linux_brandlist[] = {
&linux_glibc2brand,
NULL
};
@@ -612,8 +612,8 @@ Elf64_Brandinfo *linux_brandlist[] = {
static int
linux64_elf_modevent(module_t mod, int type, void *data)
{
- Elf64_Brandinfo **brandinfo;
- struct linux_ioctl_handler**lihp;
+ const Elf64_Brandinfo **brandinfo;
+ struct linux_ioctl_handler **lihp;
int error;
error = 0;
diff --git a/sys/cam/scsi/scsi_all.c b/sys/cam/scsi/scsi_all.c
index b518f84454ad..9c097f52d136 100644
--- a/sys/cam/scsi/scsi_all.c
+++ b/sys/cam/scsi/scsi_all.c
@@ -112,7 +112,7 @@ static void fetchtableentries(int sense_key, int asc, int ascq,
const struct asc_table_entry **);
#ifdef _KERNEL
-static void init_scsi_delay(void);
+static void init_scsi_delay(void *);
static int sysctl_scsi_delay(SYSCTL_HANDLER_ARGS);
static int set_scsi_delay(int delay);
#endif
@@ -9379,7 +9379,7 @@ scsi_vpd_supported_page(struct cam_periph *periph, uint8_t page_id)
}
static void
-init_scsi_delay(void)
+init_scsi_delay(void *dummy __unused)
{
int delay;
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris.c b/sys/cddl/compat/opensolaris/kern/opensolaris.c
index 10924977c20d..898b2ea49f96 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris.c
@@ -67,7 +67,7 @@ opensolaris_load(void *dummy)
SYSINIT(opensolaris_register, SI_SUB_OPENSOLARIS, SI_ORDER_FIRST, opensolaris_load, NULL);
static void
-opensolaris_unload(void)
+opensolaris_unload(void *dummy __unused)
{
mutex_destroy(&cpu_lock);
}
diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c
index 0ea7d072e911..b9dada4eee7b 100644
--- a/sys/compat/ia32/ia32_sysvec.c
+++ b/sys/compat/ia32/ia32_sysvec.c
@@ -145,7 +145,7 @@ struct sysentvec ia32_freebsd_sysvec = {
};
INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec);
-static Elf32_Brandinfo ia32_brand_info = {
+static const Elf32_Brandinfo ia32_brand_info = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_386,
.compat_3_brand = "FreeBSD",
@@ -155,12 +155,10 @@ static Elf32_Brandinfo ia32_brand_info = {
.brand_note = &elf32_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
+C_SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE,
+ (sysinit_cfunc_t)elf32_insert_brand_entry, &ia32_brand_info);
-SYSINIT(ia32, SI_SUB_EXEC, SI_ORDER_MIDDLE,
- (sysinit_cfunc_t) elf32_insert_brand_entry,
- &ia32_brand_info);
-
-static Elf32_Brandinfo ia32_brand_oinfo = {
+static const Elf32_Brandinfo ia32_brand_oinfo = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_386,
.compat_3_brand = "FreeBSD",
@@ -170,12 +168,10 @@ static Elf32_Brandinfo ia32_brand_oinfo = {
.brand_note = &elf32_freebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
};
+C_SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY,
+ (sysinit_cfunc_t)elf32_insert_brand_entry, &ia32_brand_oinfo);
-SYSINIT(oia32, SI_SUB_EXEC, SI_ORDER_ANY,
- (sysinit_cfunc_t) elf32_insert_brand_entry,
- &ia32_brand_oinfo);
-
-static Elf32_Brandinfo kia32_brand_info = {
+static const Elf32_Brandinfo kia32_brand_info = {
.brand = ELFOSABI_FREEBSD,
.machine = EM_386,
.compat_3_brand = "FreeBSD",
@@ -184,10 +180,8 @@ static Elf32_Brandinfo kia32_brand_info = {
.brand_note = &elf32_kfreebsd_brandnote,
.flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE_MANDATORY
};
-
-SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY,
- (sysinit_cfunc_t) elf32_insert_brand_entry,
- &kia32_brand_info);
+C_SYSINIT(kia32, SI_SUB_EXEC, SI_ORDER_ANY,
+ (sysinit_cfunc_t)elf32_insert_brand_entry, &kia32_brand_info);
void
elf32_dump_thread(struct thread *td, void *dst, size_t *off)
diff --git a/sys/compat/linux/linux.c b/sys/compat/linux/linux.c
index 61b207070963..a40f110634f7 100644
--- a/sys/compat/linux/linux.c
+++ b/sys/compat/linux/linux.c
@@ -578,8 +578,13 @@ bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
return (0);
}
+/*
+ * If sap is NULL, then osa points at already copied in linux sockaddr that
+ * should be edited in place. Otherwise memory is allocated, sockaddr
+ * copied in and returned in *sap.
+ */
int
-linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
+linux_to_bsd_sockaddr(struct l_sockaddr *osa, struct sockaddr **sap,
socklen_t *len)
{
struct sockaddr *sa;
@@ -609,10 +614,12 @@ linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
}
#endif
- kosa = malloc(salen, M_SONAME, M_WAITOK);
-
- if ((error = copyin(osa, kosa, *len)))
- goto out;
+ if (sap != NULL) {
+ kosa = malloc(salen, M_SONAME, M_WAITOK);
+ if ((error = copyin(osa, kosa, *len)))
+ goto out;
+ } else
+ kosa = osa;
bdom = linux_to_bsd_domain(kosa->sa_family);
if (bdom == AF_UNKNOWN) {
@@ -686,12 +693,15 @@ linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
sa->sa_family = bdom;
sa->sa_len = salen;
- *sap = sa;
- *len = salen;
+ if (sap != NULL) {
+ *sap = sa;
+ *len = salen;
+ }
return (0);
out:
- free(kosa, M_SONAME);
+ if (sap != NULL)
+ free(kosa, M_SONAME);
return (error);
}
diff --git a/sys/compat/linux/linux_common.h b/sys/compat/linux/linux_common.h
index 97f5a259f300..814c183b338a 100644
--- a/sys/compat/linux/linux_common.h
+++ b/sys/compat/linux/linux_common.h
@@ -43,7 +43,7 @@ sa_family_t bsd_to_linux_domain(sa_family_t domain);
#define AF_UNKNOWN UINT8_MAX
int bsd_to_linux_sockaddr(const struct sockaddr *sa,
struct l_sockaddr **lsa, socklen_t len);
-int linux_to_bsd_sockaddr(const struct l_sockaddr *lsa,
+int linux_to_bsd_sockaddr(struct l_sockaddr *lsa,
struct sockaddr **sap, socklen_t *len);
void linux_to_bsd_poll_events(struct thread *td, int fd,
short lev, short *bev);
diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c
index 37d0142bae8b..0586eb55a8f3 100644
--- a/sys/compat/linux/linux_futex.c
+++ b/sys/compat/linux/linux_futex.c
@@ -251,7 +251,7 @@ linux_futex(struct thread *td, struct linux_futex_args *args)
* set LINUX_BI_FUTEX_REQUEUE bit of Brandinfo flags.
*/
p = td->td_proc;
- Elf_Brandinfo *bi = p->p_elf_brandinfo;
+ const Elf_Brandinfo *bi = p->p_elf_brandinfo;
if (bi == NULL || ((bi->flags & LINUX_BI_FUTEX_REQUEUE)) == 0)
return (EINVAL);
args->val3_compare = false;
diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c
index 0e07b0a60ced..b1a483ce611c 100644
--- a/sys/compat/linux/linux_socket.c
+++ b/sys/compat/linux/linux_socket.c
@@ -2146,7 +2146,8 @@ linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
return (ENOPROTOOPT);
}
- if (name == IPV6_NEXTHOP) {
+ switch (name) {
+ case IPV6_NEXTHOP: {
len = args->optlen;
error = linux_to_bsd_sockaddr(PTRIN(args->optval), &sa, &len);
if (error != 0)
@@ -2155,7 +2156,34 @@ linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
error = kern_setsockopt(td, args->s, level,
name, sa, UIO_SYSSPACE, len);
free(sa, M_SONAME);
- } else {
+ break;
+ }
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP: {
+ struct group_source_req req;
+ size_t size;
+
+ size = (name == MCAST_JOIN_SOURCE_GROUP ||
+ name == MCAST_LEAVE_SOURCE_GROUP) ?
+ sizeof(struct group_source_req) : sizeof(struct group_req);
+
+ if ((error = copyin(PTRIN(args->optval), &req, size)))
+ return (error);
+ len = sizeof(struct sockaddr_storage);
+ if ((error = linux_to_bsd_sockaddr(
+ (struct l_sockaddr *)&req.gsr_group, NULL, &len)))
+ return (error);
+ if (size == sizeof(struct group_source_req) &&
+ (error = linux_to_bsd_sockaddr(
+ (struct l_sockaddr *)&req.gsr_source, NULL, &len)))
+ return (error);
+ error = kern_setsockopt(td, args->s, level, name, &req,
+ UIO_SYSSPACE, size);
+ break;
+ }
+ default:
error = kern_setsockopt(td, args->s, level,
name, PTRIN(args->optval), UIO_USERSPACE, args->optlen);
}
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index ea9b2667607e..a25ee8f6e1af 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -679,6 +679,7 @@ options TCP_OFFLOAD # TCP offload support.
options TCP_RFC7413 # TCP Fast Open
options TCPHPTS
+#options TCP_HPTS_KTEST # Add KTEST support for HPTS
# In order to enable IPSEC you MUST also add device crypto to
# your kernel configuration
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 856ea3af1372..2f412fa3cb1b 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -73,6 +73,7 @@ arm64/arm64/pmap.c standard
arm64/arm64/ptrace_machdep.c standard
arm64/arm64/sdt_machdep.c optional kdtrace_hooks
arm64/arm64/sigtramp.S standard
+arm64/arm64/spec_workaround.c standard
arm64/arm64/stack_machdep.c optional ddb | stack
arm64/arm64/strcmp.S standard
arm64/arm64/strncmp.S standard
diff --git a/sys/conf/options b/sys/conf/options
index b48ad1cf42cf..0b795a8d28fb 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -231,6 +231,7 @@ SYSVSEM opt_sysvipc.h
SYSVSHM opt_sysvipc.h
SW_WATCHDOG opt_watchdog.h
TCPHPTS
+TCP_HPTS_KTEST opt_inet.h
TCP_REQUEST_TRK opt_global.h
TCP_ACCOUNTING opt_global.h
TCP_BBR opt_inet.h
diff --git a/sys/conf/std.debug b/sys/conf/std.debug
index f5ed5582c78d..0149779b3e5c 100644
--- a/sys/conf/std.debug
+++ b/sys/conf/std.debug
@@ -16,3 +16,4 @@ options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones
options VERBOSE_SYSINIT=0 # Support debug.verbose_sysinit, off by default
options ALT_BREAK_TO_DEBUGGER # Enter debugger on keyboard escape sequence
options KDTRACE_MIB_SDT # Add SDT probes to network counters
+options TCP_HPTS_KTEST # Add KTEST support for HPTS
diff --git a/sys/conf/std.nodebug b/sys/conf/std.nodebug
index 4035e28d2a62..79676a1d618f 100644
--- a/sys/conf/std.nodebug
+++ b/sys/conf/std.nodebug
@@ -16,6 +16,7 @@ nooptions KCOV
nooptions MALLOC_DEBUG_MAXZONES
nooptions QUEUE_MACRO_DEBUG_TRASH
nooptions KDTRACE_MIB_SDT
+nooptions TCP_HPTS_KTEST
# Net80211 debugging
nooptions IEEE80211_DEBUG
diff --git a/sys/contrib/libnv/bsd_nvpair.c b/sys/contrib/libnv/bsd_nvpair.c
index c73bc2189121..b884dd260b84 100644
--- a/sys/contrib/libnv/bsd_nvpair.c
+++ b/sys/contrib/libnv/bsd_nvpair.c
@@ -985,13 +985,13 @@ nvpair_unpack_string_array(bool isbe __unused, nvpair_t *nvp,
size = nvp->nvp_datasize;
tmp = (const char *)ptr;
for (ii = 0; ii < nvp->nvp_nitems; ii++) {
- len = strnlen(tmp, size - 1) + 1;
- size -= len;
- if (tmp[len - 1] != '\0') {
+ if (size <= 0) {
ERRNO_SET(EINVAL);
return (NULL);
}
- if (size < 0) {
+ len = strnlen(tmp, size - 1) + 1;
+ size -= len;
+ if (tmp[len - 1] != '\0') {
ERRNO_SET(EINVAL);
return (NULL);
}
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
index ace2360c032d..ebc2c0eeb6d2 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@@ -163,6 +163,13 @@ param_set_arc_int(SYSCTL_HANDLER_ARGS)
return (0);
}
+static void
+warn_deprecated_sysctl(const char *old, const char *new)
+{
+ printf("WARNING: sysctl vfs.zfs.%s is deprecated. Use vfs.zfs.%s instead.\n",
+ old, new);
+}
+
int
param_set_arc_max(SYSCTL_HANDLER_ARGS)
{
@@ -185,12 +192,15 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS)
if (val != 0)
zfs_arc_max = arc_c_max;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_max", "arc.max");
+
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max,
CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_max, "LU",
+ NULL, 1, param_set_arc_max, "LU",
"Maximum ARC size in bytes (LEGACY)");
int
@@ -214,12 +224,15 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS)
if (val != 0)
zfs_arc_min = arc_c_min;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_min", "arc.min");
+
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min,
CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_min, "LU",
+ NULL, 1, param_set_arc_min, "LU",
"Minimum ARC size in bytes (LEGACY)");
extern uint_t zfs_arc_free_target;
@@ -242,6 +255,9 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
zfs_arc_free_target = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_free_target", "arc.free_target");
+
return (0);
}
@@ -251,7 +267,7 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS)
*/
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target,
CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_free_target, "IU",
+ NULL, 1, param_set_arc_free_target, "IU",
"Desired number of free pages below which ARC triggers reclaim"
" (LEGACY)");
@@ -270,12 +286,15 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS)
arc_no_grow_shift = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("arc_no_grow_shift", "arc.no_grow_shift");
+
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift,
CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- NULL, 0, param_set_arc_no_grow_shift, "I",
+ NULL, 1, param_set_arc_no_grow_shift, "I",
"log2(fraction of ARC which must be free to allow growing) (LEGACY)");
extern uint64_t l2arc_write_max;
@@ -746,12 +765,15 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS)
zfs_vdev_min_auto_ashift = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("min_auto_ashift",
+ "vdev.min_auto_ashift");
+
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift),
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1,
param_set_min_auto_ashift, "IU",
"Min ashift used when creating new top-level vdev. (LEGACY)");
@@ -771,12 +793,15 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS)
zfs_vdev_max_auto_ashift = val;
+ if (arg2 != 0)
+ warn_deprecated_sysctl("max_auto_ashift",
+ "vdev.max_auto_ashift");
+
return (0);
}
SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift,
- CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
- &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift),
+ CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 1,
param_set_max_auto_ashift, "IU",
"Max ashift used when optimizing for logical -> physical sector size on"
" new top-level vdevs. (LEGACY)");
diff --git a/sys/dev/fdt/fdt_slicer.c b/sys/dev/fdt/fdt_slicer.c
index 3ba4eddf8b61..50112db5cfae 100644
--- a/sys/dev/fdt/fdt_slicer.c
+++ b/sys/dev/fdt/fdt_slicer.c
@@ -45,7 +45,7 @@
static int fill_slices(device_t dev, const char *provider,
struct flash_slice *slices, int *slices_num);
-static void fdt_slicer_init(void);
+static void fdt_slicer_init(void *);
static int
fill_slices_from_node(phandle_t node, struct flash_slice *slices, int *count)
@@ -138,7 +138,7 @@ fill_slices(device_t dev, const char *provider __unused,
}
static void
-fdt_slicer_init(void)
+fdt_slicer_init(void *dummy __unused)
{
flash_register_slicer(fill_slices, FLASH_SLICES_TYPE_NAND, false);
@@ -147,7 +147,7 @@ fdt_slicer_init(void)
}
static void
-fdt_slicer_cleanup(void)
+fdt_slicer_cleanup(void *dummy __unused)
{
flash_register_slicer(NULL, FLASH_SLICES_TYPE_NAND, true);
diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c
index ffa8dc096adc..80e37341b3dc 100644
--- a/sys/dev/iommu/iommu_gas.c
+++ b/sys/dev/iommu/iommu_gas.c
@@ -77,7 +77,7 @@ static int iommu_check_free;
#endif
static void
-intel_gas_init(void)
+intel_gas_init(void *dummy __unused)
{
iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY",
diff --git a/sys/dev/mii/mv88e151x.c b/sys/dev/mii/mv88e151x.c
index 618ad81471c9..fb03b2a7a917 100644
--- a/sys/dev/mii/mv88e151x.c
+++ b/sys/dev/mii/mv88e151x.c
@@ -97,7 +97,7 @@ mv88e151x_attach(device_t dev)
{
const struct mii_attach_args *ma;
struct mii_softc *sc;
- uint32_t cop_cap, cop_extcap;
+ uint32_t cop_cap = 0, cop_extcap = 0;
sc = device_get_softc(dev);
ma = device_get_ivars(dev);
@@ -224,10 +224,12 @@ mv88e151x_fiber_status(struct mii_softc *phy)
else if (reg & MV88E151X_STATUS_LINK &&
reg & MV88E151X_STATUS_SYNC &&
(reg & MV88E151X_STATUS_ENERGY) == 0) {
- if ((reg & MV88E151X_STATUS_SPEED_MASK) ==
+ if (((reg & MV88E151X_STATUS_SPEED_MASK) >>
+ MV88E151X_STATUS_SPEED_SHIFT) ==
MV88E151X_STATUS_SPEED_1000)
mii->mii_media_active |= IFM_1000_SX;
- else if ((reg & MV88E151X_STATUS_SPEED_MASK) ==
+ else if (((reg & MV88E151X_STATUS_SPEED_MASK) >>
+ MV88E151X_STATUS_SPEED_SHIFT) ==
MV88E151X_STATUS_SPEED_100)
mii->mii_media_active |= IFM_100_FX;
else
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c
index ead91f0d01fe..d119f9877aaa 100644
--- a/sys/dev/nvme/nvme.c
+++ b/sys/dev/nvme/nvme.c
@@ -51,7 +51,7 @@ int32_t nvme_retry_count;
MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
static void
-nvme_init(void)
+nvme_init(void *dummy __unused)
{
uint32_t i;
@@ -62,7 +62,7 @@ nvme_init(void)
SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
static void
-nvme_uninit(void)
+nvme_uninit(void *dummy __unused)
{
}
diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h
index 57cb37907e65..f4ea08f129c0 100644
--- a/sys/dev/nvme/nvme.h
+++ b/sys/dev/nvme/nvme.h
@@ -2153,8 +2153,6 @@ static inline
void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused)
{
#if _BYTE_ORDER != _LITTLE_ENDIAN
- int i;
-
s->nsze = le64toh(s->nsze);
s->ncap = le64toh(s->ncap);
s->nuse = le64toh(s->nuse);
@@ -2173,7 +2171,7 @@ void nvme_namespace_data_swapbytes(struct nvme_namespace_data *s __unused)
s->anagrpid = le32toh(s->anagrpid);
s->nvmsetid = le16toh(s->nvmsetid);
s->endgid = le16toh(s->endgid);
- for (i = 0; i < 64; i++)
+ for (unsigned i = 0; i < nitems(s->lbaf); i++)
s->lbaf[i] = le32toh(s->lbaf[i]);
#endif
}
diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c
index a06774a64761..7693aa6d54d3 100644
--- a/sys/dev/nvme/nvme_sim.c
+++ b/sys/dev/nvme/nvme_sim.c
@@ -391,7 +391,7 @@ nvme_sim_controller_fail(void *ctrlr_arg)
struct nvme_consumer *consumer_cookie;
static void
-nvme_sim_init(void)
+nvme_sim_init(void *dummy __unused)
{
if (nvme_use_nvd)
return;
@@ -404,7 +404,7 @@ SYSINIT(nvme_sim_register, SI_SUB_DRIVERS, SI_ORDER_ANY,
nvme_sim_init, NULL);
static void
-nvme_sim_uninit(void)
+nvme_sim_uninit(void *dummy __unused)
{
if (nvme_use_nvd)
return;
diff --git a/sys/dev/xdma/xdma.c b/sys/dev/xdma/xdma.c
index 62b781159d03..cdd9ad0b8f39 100644
--- a/sys/dev/xdma/xdma.c
+++ b/sys/dev/xdma/xdma.c
@@ -555,7 +555,7 @@ xdma_put(xdma_controller_t *xdma)
}
static void
-xdma_init(void)
+xdma_init(void *dummy __unused)
{
mtx_init(&xdma_mtx, "xDMA", NULL, MTX_DEF);
diff --git a/sys/dev/xen/bus/xen_intr.c b/sys/dev/xen/bus/xen_intr.c
index cb30b6efa484..2b5fa8fb7cd1 100644
--- a/sys/dev/xen/bus/xen_intr.c
+++ b/sys/dev/xen/bus/xen_intr.c
@@ -460,7 +460,7 @@ xen_intr_handle_upcall(void *unused __unused)
return (FILTER_HANDLED);
}
-static int
+static void
xen_intr_init(void *dummy __unused)
{
shared_info_t *s = HYPERVISOR_shared_info;
@@ -468,7 +468,7 @@ xen_intr_init(void *dummy __unused)
int i;
if (!xen_domain())
- return (0);
+ return;
_Static_assert(is_valid_evtchn(0),
"is_valid_evtchn(0) fails (unused by Xen, but valid by interface");
@@ -502,8 +502,6 @@ xen_intr_init(void *dummy __unused)
if (bootverbose)
printf("Xen interrupt system initialized\n");
-
- return (0);
}
SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL);
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index 5c28db29fc63..683ee2f7ad56 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -284,7 +284,7 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
struct mount *mp = vnode_mount(vp);
int err;
- if (fsess_not_impl(vnode_mount(vp), FUSE_FLUSH))
+ if (fsess_not_impl(mp, FUSE_FLUSH))
return 0;
err = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
@@ -292,7 +292,7 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag)
return err;
if (fufh->fuse_open_flags & FOPEN_NOFLUSH &&
- (!fsess_opt_writeback(vnode_mount(vp))))
+ (!fsess_opt_writeback(mp)))
return (0);
fdisp_init(&fdi, sizeof(*ffi));
diff --git a/sys/fs/p9fs/p9_transport.c b/sys/fs/p9fs/p9_transport.c
index c82d81fedcd7..25eee984265c 100644
--- a/sys/fs/p9fs/p9_transport.c
+++ b/sys/fs/p9fs/p9_transport.c
@@ -34,9 +34,8 @@
TAILQ_HEAD(, p9_trans_module) transports;
static void
-p9_transport_init(void)
+p9_transport_init(void *dummy __unused)
{
-
TAILQ_INIT(&transports);
}
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index a14f9ca74305..b6d6db60ca3d 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -587,6 +587,7 @@ unionfs_find_node_status(struct unionfs_node *unp, struct thread *td)
struct unionfs_node_status *unsp;
pid_t pid;
+ MPASS(td != NULL);
pid = td->td_proc->p_pid;
ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__);
@@ -612,6 +613,7 @@ unionfs_get_node_status(struct unionfs_node *unp, struct thread *td,
struct unionfs_node_status *unsp;
pid_t pid;
+ MPASS(td != NULL);
pid = td->td_proc->p_pid;
KASSERT(NULL != unspp, ("%s: NULL status", __func__));
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index 627b2f6e9a1d..66fee97a07d5 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -814,7 +814,7 @@ unionfs_close(struct vop_close_args *ap)
unp = VTOUNIONFS(vp);
lvp = unp->un_lowervp;
uvp = unp->un_uppervp;
- unsp = unionfs_find_node_status(unp, td);
+ unsp = (td != NULL) ? unionfs_find_node_status(unp, td) : NULL;
if (unsp == NULL ||
(unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0)) {
@@ -2208,7 +2208,6 @@ unionfs_lock_restart:
vholdnz(tvp);
VI_UNLOCK(vp);
error = VOP_LOCK(tvp, flags);
- vdrop(tvp);
if (error == 0 && (lvp_locked || VTOUNIONFS(vp) == NULL)) {
/*
* After dropping the interlock above, there exists a window
@@ -2234,6 +2233,7 @@ unionfs_lock_restart:
unp = VTOUNIONFS(vp);
if (unp == NULL || unp->un_uppervp != NULL) {
VOP_UNLOCK(tvp);
+ vdrop(tvp);
/*
* If we previously held the lock, the upgrade may
* have temporarily dropped the lock, in which case
@@ -2249,6 +2249,7 @@ unionfs_lock_restart:
goto unionfs_lock_restart;
}
}
+ vdrop(tvp);
return (error);
}
@@ -2259,7 +2260,6 @@ unionfs_unlock(struct vop_unlock_args *ap)
struct vnode *vp;
struct vnode *tvp;
struct unionfs_node *unp;
- int error;
KASSERT_UNIONFS_VNODE(ap->a_vp);
@@ -2271,11 +2271,7 @@ unionfs_unlock(struct vop_unlock_args *ap)
tvp = (unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp);
- vholdnz(tvp);
- error = VOP_UNLOCK(tvp);
- vdrop(tvp);
-
- return (error);
+ return (VOP_UNLOCK(tvp));
}
static int
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 6aac0e968362..3f659432552c 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1605,7 +1605,7 @@ init386(int first)
}
static void
-machdep_init_trampoline(void)
+machdep_init_trampoline(void *dummy __unused)
{
struct region_descriptor r_gdt, r_idt;
struct i386tss *tss;
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index b44f5e08bbcf..1cf0867d57c3 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -720,7 +720,7 @@ __CONCAT(PMTYPE, bootstrap)(vm_paddr_t firstaddr)
}
static void
-pmap_init_reserved_pages(void)
+pmap_init_reserved_pages(void *dummy __unused)
{
struct pcpu *pc;
vm_offset_t pages;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 1bc2491a1a12..a1fabbc86f27 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -92,7 +92,7 @@
#define ELF_ABI_ID __CONCAT(elf, __ELF_WORD_SIZE)
static int __elfN(check_header)(const Elf_Ehdr *hdr);
-static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
+static const Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp,
const char *interp, int32_t *osrel, uint32_t *fctl0);
static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
u_long *entry);
@@ -104,7 +104,7 @@ static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note,
int32_t *osrel);
static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel);
static bool __elfN(check_note)(struct image_params *imgp,
- Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0,
+ const Elf_Brandnote *checknote, int32_t *osrel, bool *has_fctl0,
uint32_t *fctl0);
static vm_prot_t __elfN(trans_prot)(Elf_Word);
static Elf_Word __elfN(untrans_prot)(vm_prot_t);
@@ -227,7 +227,7 @@ SYSCTL_BOOL(ELF_NODE_OID, OID_AUTO, allow_wx,
CTLFLAG_RWTUN, &__elfN(allow_wx), 0,
"Allow pages to be mapped simultaneously writable and executable");
-static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
+static const Elf_Brandinfo *elf_brand_list[MAX_BRANDS];
#define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a))
@@ -286,7 +286,7 @@ kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel)
}
int
-__elfN(insert_brand_entry)(Elf_Brandinfo *entry)
+__elfN(insert_brand_entry)(const Elf_Brandinfo *entry)
{
int i;
@@ -305,7 +305,7 @@ __elfN(insert_brand_entry)(Elf_Brandinfo *entry)
}
int
-__elfN(remove_brand_entry)(Elf_Brandinfo *entry)
+__elfN(remove_brand_entry)(const Elf_Brandinfo *entry)
{
int i;
@@ -321,7 +321,7 @@ __elfN(remove_brand_entry)(Elf_Brandinfo *entry)
}
bool
-__elfN(brand_inuse)(Elf_Brandinfo *entry)
+__elfN(brand_inuse)(const Elf_Brandinfo *entry)
{
struct proc *p;
bool rval = false;
@@ -338,12 +338,12 @@ __elfN(brand_inuse)(Elf_Brandinfo *entry)
return (rval);
}
-static Elf_Brandinfo *
+static const Elf_Brandinfo *
__elfN(get_brandinfo)(struct image_params *imgp, const char *interp,
int32_t *osrel, uint32_t *fctl0)
{
const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header;
- Elf_Brandinfo *bi, *bi_m;
+ const Elf_Brandinfo *bi, *bi_m;
bool ret, has_fctl0;
int i, interp_name_len;
@@ -492,7 +492,7 @@ __elfN(phdr_in_zero_page)(const Elf_Ehdr *hdr)
static int
__elfN(check_header)(const Elf_Ehdr *hdr)
{
- Elf_Brandinfo *bi;
+ const Elf_Brandinfo *bi;
int i;
if (!IS_ELF(*hdr) ||
@@ -1109,7 +1109,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
struct vmspace *vmspace;
vm_map_t map;
char *interp;
- Elf_Brandinfo *brand_info;
+ const Elf_Brandinfo *brand_info;
struct sysentvec *sv;
u_long addr, baddr, entry, proghdr;
u_long maxalign, maxsalign, mapsz, maxv, maxv1, anon_loc;
@@ -1925,7 +1925,7 @@ __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs,
Elf_Phdr *phdr;
Elf_Shdr *shdr;
struct phdr_closure phc;
- Elf_Brandinfo *bi;
+ const Elf_Brandinfo *bi;
ehdr = (Elf_Ehdr *)hdr;
bi = td->td_proc->p_elf_brandinfo;
@@ -2831,7 +2831,7 @@ __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote,
}
if ((const char *)note_end - (const char *)note <
sizeof(Elf_Note)) {
- uprintf("ELF note to short\n");
+ uprintf("ELF note too short\n");
goto retf;
}
if (note->n_namesz != checknote->n_namesz ||
@@ -2839,9 +2839,9 @@ __elfN(parse_notes)(const struct image_params *imgp, const Elf_Note *checknote,
note->n_type != checknote->n_type)
goto nextnote;
note_name = (const char *)(note + 1);
- if (note_name + checknote->n_namesz >=
- (const char *)note_end || strncmp(note_vendor,
- note_name, checknote->n_namesz) != 0)
+ if (note_name + roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) +
+ note->n_descsz >= (const char *)note_end ||
+ strncmp(note_vendor, note_name, checknote->n_namesz) != 0)
goto nextnote;
if (cb(note, cb_arg, &res))
@@ -2861,7 +2861,7 @@ ret:
}
struct brandnote_cb_arg {
- Elf_Brandnote *brandnote;
+ const Elf_Brandnote *brandnote;
int32_t *osrel;
};
@@ -2883,7 +2883,7 @@ brandnote_cb(const Elf_Note *note, void *arg0, bool *res)
return (true);
}
-static Elf_Note fctl_note = {
+static const Elf_Note fctl_note = {
.n_namesz = sizeof(FREEBSD_ABI_VENDOR),
.n_descsz = sizeof(uint32_t),
.n_type = NT_FREEBSD_FEATURE_CTL,
@@ -2918,7 +2918,7 @@ note_fctl_cb(const Elf_Note *note, void *arg0, bool *res)
* as for headers.
*/
static bool
-__elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote,
+__elfN(check_note)(struct image_params *imgp, const Elf_Brandnote *brandnote,
int32_t *osrel, bool *has_fctl0, uint32_t *fctl0)
{
const Elf_Phdr *phdr;
diff --git a/sys/kern/kern_boottrace.c b/sys/kern/kern_boottrace.c
index 1fa87955a299..c83255bc74ee 100644
--- a/sys/kern/kern_boottrace.c
+++ b/sys/kern/kern_boottrace.c
@@ -579,7 +579,7 @@ sysctl_boottrace_reset(SYSCTL_HANDLER_ARGS)
}
static void
-boottrace_init(void)
+boottrace_init(void *dummy __unused)
{
if (!boottrace_enabled)
diff --git a/sys/kern/kern_devctl.c b/sys/kern/kern_devctl.c
index 7a2818c29b1a..a1696225df32 100644
--- a/sys/kern/kern_devctl.c
+++ b/sys/kern/kern_devctl.c
@@ -140,7 +140,7 @@ static struct devctlbridge {
} devctl_notify_hook = { .send_f = NULL };
static void
-devctl_init(void)
+devctl_init(void *dummy __unused)
{
int reserve;
uma_zone_t z;
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 23d8dc9cf54a..a6333d8011b1 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -156,7 +156,7 @@ static void knote_drop(struct knote *kn, struct thread *td);
static void knote_drop_detached(struct knote *kn, struct thread *td);
static void knote_enqueue(struct knote *kn);
static void knote_dequeue(struct knote *kn);
-static void knote_init(void);
+static void knote_init(void *);
static struct knote *knote_alloc(int mflag);
static void knote_free(struct knote *kn);
@@ -2887,7 +2887,7 @@ knote_dequeue(struct knote *kn)
}
static void
-knote_init(void)
+knote_init(void *dummy __unused)
{
knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 0fc2d0e7f1bc..2bdd6faa025a 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -418,7 +418,7 @@ do_execve(struct thread *td, struct image_args *args, struct mac *mac_p,
#endif
int error, i, orig_osrel;
uint32_t orig_fctl0;
- Elf_Brandinfo *orig_brandinfo;
+ const Elf_Brandinfo *orig_brandinfo;
size_t freepath_size;
static const char fexecv_proc_title[] = "(fexecv)";
@@ -1314,7 +1314,7 @@ exec_map_stack(struct image_params *imgp)
MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
} else {
sharedpage_addr = sv->sv_shared_page_base;
- vm_map_fixed(map, obj, 0,
+ error = vm_map_fixed(map, obj, 0,
sharedpage_addr, sv->sv_shared_page_len,
VM_PROT_READ | VM_PROT_EXECUTE,
VM_PROT_READ | VM_PROT_EXECUTE,
diff --git a/sys/kern/kern_jailmeta.c b/sys/kern/kern_jailmeta.c
index 4e37eccad03a..91bb7155820d 100644
--- a/sys/kern/kern_jailmeta.c
+++ b/sys/kern/kern_jailmeta.c
@@ -599,22 +599,18 @@ SYSCTL_PROC(_security_jail, OID_AUTO, env,
/* Setup and tear down. */
-static int
+static void
jm_sysinit(void *arg __unused)
{
meta.osd_slot = osd_jail_register(jm_osd_destructor, meta.methods);
env.osd_slot = osd_jail_register(jm_osd_destructor, env.methods);
-
- return (0);
}
-static int
+static void
jm_sysuninit(void *arg __unused)
{
osd_jail_deregister(meta.osd_slot);
osd_jail_deregister(env.osd_slot);
-
- return (0);
}
SYSINIT(jailmeta, SI_SUB_DRIVERS, SI_ORDER_ANY, jm_sysinit, NULL);
diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c
index d566bc01bc5e..e2f63cbc0c5a 100644
--- a/sys/kern/kern_linker.c
+++ b/sys/kern/kern_linker.c
@@ -435,7 +435,7 @@ linker_file_register_modules(linker_file_t lf)
}
static void
-linker_init_kernel_modules(void)
+linker_init_kernel_modules(void *dummy __unused)
{
sx_xlock(&kld_sx);
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 653ce1ee556b..e919b15543b2 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -303,7 +303,7 @@ sysctl_vm_malloc_zone_sizes(SYSCTL_HANDLER_ARGS)
*/
#if MALLOC_DEBUG_MAXZONES > 1
static void
-tunable_set_numzones(void)
+tunable_set_numzones(void *dummy __unused)
{
TUNABLE_INT_FETCH("debug.malloc.numzones",
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
index 7351e9cb6313..2aab151aba08 100644
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -1312,7 +1312,7 @@ static struct kproc_desc racctd_kp = {
};
static void
-racctd_init(void)
+racctd_init(void *dummy __unused)
{
if (!racct_enable)
return;
@@ -1322,7 +1322,7 @@ racctd_init(void)
SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, racctd_init, NULL);
static void
-racct_init(void)
+racct_init(void *dummy __unused)
{
if (!racct_enable)
return;
diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c
index 3854ffbeec29..cd66bff62608 100644
--- a/sys/kern/kern_rangelock.c
+++ b/sys/kern/kern_rangelock.c
@@ -300,7 +300,7 @@ static void rangelock_free_free(struct rl_q_entry *free);
static void rangelock_noncheating_destroy(struct rangelock *lock);
static void
-rangelock_sys_init(void)
+rangelock_sys_init(void *dummy __unused)
{
rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c
index 4232c71f86fb..682ba86d23ff 100644
--- a/sys/kern/kern_rctl.c
+++ b/sys/kern/kern_rctl.c
@@ -209,7 +209,7 @@ static struct dict actionnames[] = {
{ "throttle", RCTL_ACTION_THROTTLE },
{ NULL, -1 }};
-static void rctl_init(void);
+static void rctl_init(void *);
SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
static uma_zone_t rctl_rule_zone;
@@ -2175,7 +2175,7 @@ rctl_racct_release(struct racct *racct)
}
static void
-rctl_init(void)
+rctl_init(void *dummy __unused)
{
if (!racct_enable)
diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c
index 5b8398caaca9..f48d0e3d616b 100644
--- a/sys/kern/kern_sharedpage.c
+++ b/sys/kern/kern_sharedpage.c
@@ -130,8 +130,7 @@ shared_page_init(void *dummy __unused)
shared_page_mapping = (char *)addr;
}
-SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)shared_page_init,
- NULL);
+SYSINIT(shp, SI_SUB_EXEC, SI_ORDER_FIRST, shared_page_init, NULL);
/*
* Push the timehands update to the shared page.
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 8efc0886988b..21f765b17f62 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -113,7 +113,7 @@ static int filt_sigattach(struct knote *kn);
static void filt_sigdetach(struct knote *kn);
static int filt_signal(struct knote *kn, long hint);
static struct thread *sigtd(struct proc *p, int sig, bool fast_sigblock);
-static void sigqueue_start(void);
+static void sigqueue_start(void *);
static void sigfastblock_setpend(struct thread *td, bool resched);
static void sig_handle_first_stop(struct thread *td, struct proc *p,
int sig);
@@ -344,7 +344,7 @@ ast_sigsuspend(struct thread *td, int tda __unused)
}
static void
-sigqueue_start(void)
+sigqueue_start(void *dummy __unused)
{
ksiginfo_zone = uma_zcreate("ksiginfo", sizeof(ksiginfo_t),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 2a6f0989f6aa..5b7485c25cd7 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -90,7 +90,7 @@ static int user_clock_nanosleep(struct thread *td, clockid_t clock_id,
int flags, const struct timespec *ua_rqtp,
struct timespec *ua_rmtp);
-static void itimer_start(void);
+static void itimer_start(void *);
static int itimer_init(void *, int, int);
static void itimer_fini(void *, int);
static void itimer_enter(struct itimer *);
@@ -1170,7 +1170,7 @@ eventratecheck(struct timeval *lasttime, int *cureps, int maxeps)
}
static void
-itimer_start(void)
+itimer_start(void *dummy __unused)
{
static const struct kclock rt_clock = {
.timer_create = realtimer_create,
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index 5c14e15830f4..c9a387a5e87b 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -140,7 +140,7 @@ uma_zone_t pcpu_zone_32;
uma_zone_t pcpu_zone_64;
static void
-pcpu_zones_startup(void)
+pcpu_zones_startup(void *dummy __unused)
{
pcpu_zone_4 = uma_zcreate("pcpu-4", 4,
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index c221106ae067..bc0725230cca 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -586,7 +586,7 @@ soaio_enqueue(struct task *task)
}
static void
-soaio_init(void)
+soaio_init(void *dummy __unused)
{
soaio_lifetime = AIOD_LIFETIME_DEFAULT;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 340d84666459..90489e99491a 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1069,6 +1069,21 @@ uipc_stream_sbspace(struct sockbuf *sb)
return (min(space, mbspace));
}
+/*
+ * UNIX version of generic sbwait() for writes. We wait on peer's receive
+ * buffer, using our timeout.
+ */
+static int
+uipc_stream_sbwait(struct socket *so, sbintime_t timeo)
+{
+ struct sockbuf *sb = &so->so_rcv;
+
+ SOCK_RECVBUF_LOCK_ASSERT(so);
+ sb->sb_flags |= SB_WAIT;
+ return (msleep_sbt(&sb->sb_acc, SOCK_RECVBUF_MTX(so), PSOCK | PCATCH,
+ "sbwait", timeo, 0, 0));
+}
+
static int
uipc_sosend_stream_or_seqpacket(struct socket *so, struct sockaddr *addr,
struct uio *uio0, struct mbuf *m, struct mbuf *c, int flags,
@@ -1203,7 +1218,8 @@ restart:
error = EWOULDBLOCK;
goto out4;
}
- if ((error = sbwait(so2, SO_RCV)) != 0) {
+ if ((error = uipc_stream_sbwait(so2,
+ so->so_snd.sb_timeo)) != 0) {
SOCK_RECVBUF_UNLOCK(so2);
goto out4;
} else
@@ -1543,15 +1559,19 @@ restart:
mc_init_m(&cmc, control);
SOCK_RECVBUF_LOCK(so);
- MPASS(!(sb->sb_state & SBS_CANTRCVMORE));
-
- if (__predict_false(cmc.mc_len + sb->sb_ccc +
- sb->sb_ctl > sb->sb_hiwat)) {
+ if (__predict_false(
+ (sb->sb_state & SBS_CANTRCVMORE) ||
+ cmc.mc_len + sb->sb_ccc + sb->sb_ctl >
+ sb->sb_hiwat)) {
/*
- * Too bad, while unp_externalize() was
- * failing, the other side had filled
- * the buffer and we can't prepend data
- * back. Losing data!
+ * While the lock was dropped and we
+ * were failing in unp_externalize(),
+ * the peer could has a) disconnected,
+ * b) filled the buffer so that we
+ * can't prepend data back.
+ * These are two edge conditions that
+ * we just can't handle, so lose the
+ * data and return the error.
*/
SOCK_RECVBUF_UNLOCK(so);
SOCK_IO_RECV_UNLOCK(so);
@@ -2397,7 +2417,7 @@ uipc_sendfile_wait(struct socket *so, off_t need, int *space)
}
if (!sockref)
soref(so2);
- error = sbwait(so2, SO_RCV);
+ error = uipc_stream_sbwait(so2, so->so_snd.sb_timeo);
if (error == 0 &&
__predict_false(sb->sb_state & SBS_CANTRCVMORE))
error = EPIPE;
diff --git a/sys/libkern/arc4random.c b/sys/libkern/arc4random.c
index 016822e9f03c..6fca7c3c4e9d 100644
--- a/sys/libkern/arc4random.c
+++ b/sys/libkern/arc4random.c
@@ -156,7 +156,7 @@ chacha20_randomstir(struct chacha20_s *chacha20)
* Initialize the contexts.
*/
static void
-chacha20_init(void)
+chacha20_init(void *dummy __unused)
{
struct chacha20_s *chacha20;
@@ -176,7 +176,7 @@ SYSINIT(chacha20, SI_SUB_LOCK, SI_ORDER_ANY, chacha20_init, NULL);
static void
-chacha20_uninit(void)
+chacha20_uninit(void *dummy __unused)
{
struct chacha20_s *chacha20;
diff --git a/sys/libkern/x86/crc32_sse42.c b/sys/libkern/x86/crc32_sse42.c
index b79c7afbeeb1..94ffdc178910 100644
--- a/sys/libkern/x86/crc32_sse42.c
+++ b/sys/libkern/x86/crc32_sse42.c
@@ -199,8 +199,10 @@ crc32c_shift(uint32_t zeros[][256], uint32_t crc)
static void
#ifndef _KERNEL
__attribute__((__constructor__))
-#endif
crc32c_init_hw(void)
+#else
+crc32c_init_hw(void *dummy __unused)
+#endif
{
crc32c_zeros(crc32c_long, LONG);
crc32c_zeros(crc32c_2long, 2 * LONG);
diff --git a/sys/modules/ktest/Makefile b/sys/modules/ktest/Makefile
index a3052efa9ed9..d5f15576f38b 100644
--- a/sys/modules/ktest/Makefile
+++ b/sys/modules/ktest/Makefile
@@ -1,5 +1,6 @@
SUBDIR= ktest \
ktest_example \
- ktest_netlink_message_writer
+ ktest_netlink_message_writer \
+ ktest_tcphpts
.include <bsd.subdir.mk>
diff --git a/sys/modules/ktest/ktest_tcphpts/Makefile b/sys/modules/ktest/ktest_tcphpts/Makefile
new file mode 100644
index 000000000000..b642c0cb4209
--- /dev/null
+++ b/sys/modules/ktest/ktest_tcphpts/Makefile
@@ -0,0 +1,13 @@
+PACKAGE= tests
+WARNS?= 6
+
+SYSDIR?=${SRCTOP}/sys
+.include "${SYSDIR}/conf/kern.opts.mk"
+
+.PATH: ${SYSDIR}/netinet
+
+KMOD= ktest_tcphpts
+SRCS= tcp_hpts_test.c
+
+.include <bsd.kmod.mk>
+
diff --git a/sys/net/route.c b/sys/net/route.c
index 7a50bcc43e06..d2c9f3e39c17 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -89,7 +89,7 @@ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
* SI_ORDER_MIDDLE.
*/
static void
-route_init(void)
+route_init(void *dummy __unused)
{
nhops_init();
diff --git a/sys/net/route/route_tables.c b/sys/net/route/route_tables.c
index 176ca43fa1c5..3b7bb1385d0e 100644
--- a/sys/net/route/route_tables.c
+++ b/sys/net/route/route_tables.c
@@ -186,7 +186,7 @@ rtables_prison_destructor(void *data)
}
static void
-rtables_init(void)
+rtables_init(void *dummy __unused)
{
osd_method_t methods[PR_MAXMETHOD] = {
[PR_METHOD_ATTACH] = rtables_check_proc_fib,
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index f0dcc973ca7c..be858428bb3e 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -309,7 +309,7 @@ rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc)
}
static void
-rtsock_init(void)
+rtsock_init(void *dummy __unused)
{
rtsbridge_orig_p = rtsock_callback_p;
rtsock_callback_p = &rtsbridge;
diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index 3af56a228295..a8a767785fce 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -167,7 +167,7 @@ static ieee80211_send_action_func ht_send_action_ba_delba;
static ieee80211_send_action_func ht_send_action_ht_txchwidth;
static void
-ieee80211_ht_init(void)
+ieee80211_ht_init(void *dummy __unused)
{
/*
* Setup HT parameters that depends on the clock frequency.
diff --git a/sys/net80211/ieee80211_hwmp.c b/sys/net80211/ieee80211_hwmp.c
index b69210768c54..084e67da13db 100644
--- a/sys/net80211/ieee80211_hwmp.c
+++ b/sys/net80211/ieee80211_hwmp.c
@@ -212,7 +212,7 @@ SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact,
"mesh route inactivity timeout (ms)");
static void
-ieee80211_hwmp_init(void)
+ieee80211_hwmp_init(void *dummy __unused)
{
/* Default values as per amendment */
ieee80211_hwmp_pathtimeout = msecs_to_ticks(5*1000);
diff --git a/sys/net80211/ieee80211_mesh.c b/sys/net80211/ieee80211_mesh.c
index 3f0410a69e3c..7f2e8bdcb963 100644
--- a/sys/net80211/ieee80211_mesh.c
+++ b/sys/net80211/ieee80211_mesh.c
@@ -548,7 +548,7 @@ mesh_gatemode_cb(void *arg)
}
static void
-ieee80211_mesh_init(void)
+ieee80211_mesh_init(void *dummy __unused)
{
memset(mesh_proto_paths, 0, sizeof(mesh_proto_paths));
diff --git a/sys/net80211/ieee80211_phy.c b/sys/net80211/ieee80211_phy.c
index 7f53c717152b..b4d9b16907d2 100644
--- a/sys/net80211/ieee80211_phy.c
+++ b/sys/net80211/ieee80211_phy.c
@@ -348,7 +348,7 @@ ieee80211_setup_ratetable(struct ieee80211_rate_table *rt)
/* Setup all rate tables */
static void
-ieee80211_phy_init(void)
+ieee80211_phy_init(void *dummy __unused)
{
static struct ieee80211_rate_table * const ratetables[] = {
&ieee80211_half_table,
diff --git a/sys/net80211/ieee80211_proto.c b/sys/net80211/ieee80211_proto.c
index 0c161d98a55a..4918bf7d025f 100644
--- a/sys/net80211/ieee80211_proto.c
+++ b/sys/net80211/ieee80211_proto.c
@@ -459,7 +459,7 @@ static const struct ieee80211_authenticator auth_internal = {
* Setup internal authenticators once; they are never unregistered.
*/
static void
-ieee80211_auth_setup(void)
+ieee80211_auth_setup(void *dummy __unused)
{
ieee80211_authenticator_register(IEEE80211_AUTH_OPEN, &auth_internal);
ieee80211_authenticator_register(IEEE80211_AUTH_SHARED, &auth_internal);
diff --git a/sys/net80211/ieee80211_vht.c b/sys/net80211/ieee80211_vht.c
index 10a5fc7f08ab..095c4108c768 100644
--- a/sys/net80211/ieee80211_vht.c
+++ b/sys/net80211/ieee80211_vht.c
@@ -102,7 +102,7 @@ vht_send_action_placeholder(struct ieee80211_node *ni,
}
static void
-ieee80211_vht_init(void)
+ieee80211_vht_init(void *dummy __unused)
{
ieee80211_recv_action_register(IEEE80211_ACTION_CAT_VHT,
diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
index c20a20cd983d..bc06616dbf93 100644
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -271,7 +271,7 @@ cc_check_default(struct cc_algo *remove_cc)
* Initialise CC subsystem on system boot.
*/
static void
-cc_init(void)
+cc_init(void *dummy __unused)
{
CC_LIST_LOCK_INIT();
STAILQ_INIT(&cc_list);
diff --git a/sys/netinet/in_fib_algo.c b/sys/netinet/in_fib_algo.c
index 123dacb409e7..95621c300064 100644
--- a/sys/netinet/in_fib_algo.c
+++ b/sys/netinet/in_fib_algo.c
@@ -767,7 +767,7 @@ struct fib_lookup_module flm_radix4 = {
};
static void
-fib4_algo_init(void)
+fib4_algo_init(void *dummy __unused)
{
fib_module_register(&flm_bsearch4);
diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c
index f5b20c49ffd2..ba112afbf002 100644
--- a/sys/netinet/in_mcast.c
+++ b/sys/netinet/in_mcast.c
@@ -159,9 +159,6 @@ static struct ip_moptions *
static int inp_get_source_filters(struct inpcb *, struct sockopt *);
static int inp_join_group(struct inpcb *, struct sockopt *);
static int inp_leave_group(struct inpcb *, struct sockopt *);
-static struct ifnet *
- inp_lookup_mcast_ifp(const struct inpcb *,
- const struct sockaddr_in *, const struct in_addr);
static int inp_block_unblock_source(struct inpcb *, struct sockopt *);
static int inp_set_multicast_if(struct inpcb *, struct sockopt *);
static int inp_set_source_filters(struct inpcb *, struct sockopt *);
@@ -1832,69 +1829,55 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
}
/*
- * Look up the ifnet to use for a multicast group membership,
- * given the IPv4 address of an interface, and the IPv4 group address.
- *
- * This routine exists to support legacy multicast applications
- * which do not understand that multicast memberships are scoped to
- * specific physical links in the networking stack, or which need
- * to join link-scope groups before IPv4 addresses are configured.
- *
- * Use this socket's current FIB number for any required FIB lookup.
- * If ina is INADDR_ANY, look up the group address in the unicast FIB,
- * and use its ifp; usually, this points to the default next-hop.
- *
- * If the FIB lookup fails, attempt to use the first non-loopback
- * interface with multicast capability in the system as a
- * last resort. The legacy IPv4 ASM API requires that we do
- * this in order to allow groups to be joined when the routing
- * table has not yet been populated during boot.
- *
- * Returns NULL if no ifp could be found, otherwise return referenced ifp.
+ * Look up the ifnet to join a multicast group membership via legacy
+ * IP_ADD_MEMBERSHIP or via more modern MCAST_JOIN_GROUP.
*
- * FUTURE: Implement IPv4 source-address selection.
+ * If the interface index was specified explicitly, just use it. If the
+ * address was specified (legacy), try to find matching interface. Else
+ * (index == 0 && no address) do a route lookup. If that fails for a modern
+ * MCAST_JOIN_GROUP return failure, for legacy IP_ADD_MEMBERSHIP find first
+ * multicast capable interface.
*/
static struct ifnet *
-inp_lookup_mcast_ifp(const struct inpcb *inp,
- const struct sockaddr_in *gsin, const struct in_addr ina)
+inp_lookup_mcast_ifp(const struct inpcb *inp, const struct in_addr maddr,
+const struct in_addr *ina, const u_int index)
{
struct ifnet *ifp;
struct nhop_object *nh;
NET_EPOCH_ASSERT();
- KASSERT(inp != NULL, ("%s: inp must not be NULL", __func__));
- KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
- KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
- ("%s: not multicast", __func__));
- ifp = NULL;
- if (!in_nullhost(ina)) {
- INADDR_TO_IFP(ina, ifp);
+ if (index != 0)
+ return (ifnet_byindex_ref(index));
+
+ if (ina != NULL && !in_nullhost(*ina)) {
+ INADDR_TO_IFP(*ina, ifp);
if (ifp != NULL)
if_ref(ifp);
- } else {
- nh = fib4_lookup(inp->inp_inc.inc_fibnum, gsin->sin_addr, 0, NHR_NONE, 0);
- if (nh != NULL) {
- ifp = nh->nh_ifp;
- if_ref(ifp);
- } else {
- struct in_ifaddr *ia;
- struct ifnet *mifp;
-
- mifp = NULL;
- CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
- mifp = ia->ia_ifp;
- if (!(mifp->if_flags & IFF_LOOPBACK) &&
- (mifp->if_flags & IFF_MULTICAST)) {
- ifp = mifp;
- if_ref(ifp);
- break;
- }
+ return (ifp);
+ }
+
+ nh = fib4_lookup(inp->inp_inc.inc_fibnum, maddr, 0, NHR_NONE, 0);
+ if (nh != NULL) {
+ ifp = nh->nh_ifp;
+ if_ref(ifp);
+ return (ifp);
+ }
+
+ if (ina != NULL) {
+ struct in_ifaddr *ia;
+
+ CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
+ if (!(ia->ia_ifp->if_flags & IFF_LOOPBACK) &&
+ (ia->ia_ifp->if_flags & IFF_MULTICAST)) {
+ ifp = ia->ia_ifp;
+ if_ref(ifp);
+ return (ifp);
}
}
}
- return (ifp);
+ return (NULL);
}
/*
@@ -1926,13 +1909,13 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
switch (sopt->sopt_name) {
case IP_ADD_MEMBERSHIP: {
struct ip_mreqn mreqn;
+ bool mreq;
- if (sopt->sopt_valsize == sizeof(struct ip_mreqn))
- error = sooptcopyin(sopt, &mreqn,
- sizeof(struct ip_mreqn), sizeof(struct ip_mreqn));
- else
- error = sooptcopyin(sopt, &mreqn,
- sizeof(struct ip_mreq), sizeof(struct ip_mreq));
+ mreq = (sopt->sopt_valsize != sizeof(struct ip_mreqn));
+
+ error = sooptcopyin(sopt, &mreqn,
+ mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn),
+ mreq ? sizeof(struct ip_mreq) : sizeof(struct ip_mreqn));
if (error)
return (error);
@@ -1943,12 +1926,9 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
return (EINVAL);
NET_EPOCH_ENTER(et);
- if (sopt->sopt_valsize == sizeof(struct ip_mreqn) &&
- mreqn.imr_ifindex != 0)
- ifp = ifnet_byindex_ref(mreqn.imr_ifindex);
- else
- ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
- mreqn.imr_address);
+ ifp = inp_lookup_mcast_ifp(inp, mreqn.imr_multiaddr,
+ mreq ? &mreqn.imr_address : NULL,
+ mreq ? 0 : mreqn.imr_ifindex);
NET_EPOCH_EXIT(et);
break;
}
@@ -1971,8 +1951,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
ssa->sin.sin_addr = mreqs.imr_sourceaddr;
NET_EPOCH_ENTER(et);
- ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
- mreqs.imr_interface);
+ ifp = inp_lookup_mcast_ifp(inp, mreqs.imr_multiaddr,
+ &mreqs.imr_interface, 0);
NET_EPOCH_EXIT(et);
CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
__func__, ntohl(mreqs.imr_interface.s_addr), ifp);
@@ -2013,7 +1993,8 @@ inp_join_group(struct inpcb *inp, struct sockopt *sopt)
return (EINVAL);
NET_EPOCH_ENTER(et);
- ifp = ifnet_byindex_ref(gsr.gsr_interface);
+ ifp = inp_lookup_mcast_ifp(inp, gsa->sin.sin_addr, NULL,
+ gsr.gsr_interface);
NET_EPOCH_EXIT(et);
if (ifp == NULL)
return (EADDRNOTAVAIL);
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index 63bbe4bba11b..c54459bb5f01 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -39,15 +39,14 @@
* First, and probably the main thing its used by Rack and BBR, it can
* be used to call tcp_output() of a transport stack at some time in the future.
* The normal way this is done is that tcp_output() of the stack schedules
- * itself to be called again by calling tcp_hpts_insert(tcpcb, slot). The
- * slot is the time from now that the stack wants to be called but it
- * must be converted to tcp_hpts's notion of slot. This is done with
- * one of the macros HPTS_MS_TO_SLOTS or HPTS_USEC_TO_SLOTS. So a typical
+ * itself to be called again by calling tcp_hpts_insert(tcpcb, usecs). The
+ * usecs is the time from now that the stack wants to be called and is
+ * passing time directly in microseconds. So a typical
* call from the tcp_output() routine might look like:
*
- * tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(550));
+ * tcp_hpts_insert(tp, 550, NULL);
*
- * The above would schedule tcp_output() to be called in 550 useconds.
+ * The above would schedule tcp_output() to be called in 550 microseconds.
* Note that if using this mechanism the stack will want to add near
* its top a check to prevent unwanted calls (from user land or the
* arrival of incoming ack's). So it would add something like:
@@ -149,27 +148,44 @@
#include <netinet/tcpip.h>
#include <netinet/cc/cc.h>
#include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
#include <netinet/tcp_log_buf.h>
#ifdef tcp_offload
#include <netinet/tcp_offload.h>
#endif
-/*
- * The hpts uses a 102400 wheel. The wheel
- * defines the time in 10 usec increments (102400 x 10).
- * This gives a range of 10usec - 1024ms to place
- * an entry within. If the user requests more than
- * 1.024 second, a remaineder is attached and the hpts
- * when seeing the remainder will re-insert the
- * inpcb forward in time from where it is until
- * the remainder is zero.
- */
+/* Global instance for TCP HPTS */
+struct tcp_hptsi *tcp_hptsi_pace;
+
+/* Default function table for production use. */
+const struct tcp_hptsi_funcs tcp_hptsi_default_funcs = {
+ .microuptime = microuptime,
+ .swi_add = swi_add,
+ .swi_remove = swi_remove,
+ .swi_sched = swi_sched,
+ .intr_event_bind = intr_event_bind,
+ .intr_event_bind_ithread_cpuset = intr_event_bind_ithread_cpuset,
+ .callout_init = callout_init,
+ .callout_reset_sbt_on = callout_reset_sbt_on,
+ ._callout_stop_safe = _callout_stop_safe,
+};
-#define NUM_OF_HPTSI_SLOTS 102400
+#ifdef TCP_HPTS_KTEST
+#define microuptime pace->funcs->microuptime
+#define swi_add pace->funcs->swi_add
+#define swi_remove pace->funcs->swi_remove
+#define swi_sched pace->funcs->swi_sched
+#define intr_event_bind pace->funcs->intr_event_bind
+#define intr_event_bind_ithread_cpuset pace->funcs->intr_event_bind_ithread_cpuset
+#define callout_init pace->funcs->callout_init
+#define callout_reset_sbt_on pace->funcs->callout_reset_sbt_on
+#define _callout_stop_safe pace->funcs->_callout_stop_safe
+#endif
-/* The number of connections after which the dynamic sleep logic kicks in. */
-#define DEFAULT_CONNECTION_THRESHOLD 100
+static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
+
+static void tcp_hpts_thread(void *ctx);
/*
* When using the hpts, a TCP stack must make sure
@@ -204,87 +220,22 @@
*
* When we are in the "new" mode i.e. conn_cnt > conn_cnt_thresh
* then we do a dynamic adjustment on the time we sleep.
- * Our threshold is if the lateness of the first client served (in ticks) is
+ * Our threshold is if the lateness of the first client served (in slots) is
* greater than or equal too slots_indicate_more_sleep (10ms
- * or 10000 ticks). If we were that late, the actual sleep time
- * is adjusted down by 50%. If the ticks_ran is less than
- * slots_indicate_more_sleep (100 ticks or 1000usecs).
+ * or 10000 slots). If we were that late, the actual sleep time
+ * is adjusted down by 50%. If the slots_ran is less than
+ * slots_indicate_more_sleep (100 slots or 1000usecs).
*
*/
-/* Each hpts has its own p_mtx which is used for locking */
-#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED)
-#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx)
-#define HPTS_TRYLOCK(hpts) mtx_trylock(&(hpts)->p_mtx)
-#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx)
-struct tcp_hpts_entry {
- /* Cache line 0x00 */
- struct mtx p_mtx; /* Mutex for hpts */
- struct timeval p_mysleep; /* Our min sleep time */
- uint64_t syscall_cnt;
- uint64_t sleeping; /* What the actual sleep was (if sleeping) */
- uint16_t p_hpts_active; /* Flag that says hpts is awake */
- uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */
- uint32_t p_curtick; /* Tick in 10 us the hpts is going to */
- uint32_t p_runningslot; /* Current tick we are at if we are running */
- uint32_t p_prev_slot; /* Previous slot we were on */
- uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */
- uint32_t p_nxt_slot; /* The next slot outside the current range of
- * slots that the hpts is running on. */
- int32_t p_on_queue_cnt; /* Count on queue in this hpts */
- uint32_t p_lasttick; /* Last tick before the current one */
- uint8_t p_direct_wake :1, /* boolean */
- p_on_min_sleep:1, /* boolean */
- p_hpts_wake_scheduled:1, /* boolean */
- hit_callout_thresh:1,
- p_avail:4;
- uint8_t p_fill[3]; /* Fill to 32 bits */
- /* Cache line 0x40 */
- struct hptsh {
- TAILQ_HEAD(, tcpcb) head;
- uint32_t count;
- uint32_t gencnt;
- } *p_hptss; /* Hptsi wheel */
- uint32_t p_hpts_sleep_time; /* Current sleep interval having a max
- * of 255ms */
- uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */
- uint32_t saved_lasttick; /* for logging */
- uint32_t saved_curtick; /* for logging */
- uint32_t saved_curslot; /* for logging */
- uint32_t saved_prev_slot; /* for logging */
- uint32_t p_delayed_by; /* How much were we delayed by */
- /* Cache line 0x80 */
- struct sysctl_ctx_list hpts_ctx;
- struct sysctl_oid *hpts_root;
- struct intr_event *ie;
- void *ie_cookie;
- uint16_t p_num; /* The hpts number one per cpu */
- uint16_t p_cpu; /* The hpts CPU */
- /* There is extra space in here */
- /* Cache line 0x100 */
- struct callout co __aligned(CACHE_LINE_SIZE);
-} __aligned(CACHE_LINE_SIZE);
-
-static struct tcp_hptsi {
- struct cpu_group **grps;
- struct tcp_hpts_entry **rp_ent; /* Array of hptss */
- uint32_t *cts_last_ran;
- uint32_t grp_cnt;
- uint32_t rp_num_hptss; /* Number of hpts threads */
-} tcp_pace;
-
-static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
#ifdef RSS
-static int tcp_bind_threads = 1;
+int tcp_bind_threads = 1;
#else
-static int tcp_bind_threads = 2;
+int tcp_bind_threads = 2;
#endif
static int tcp_use_irq_cpu = 0;
static int hpts_does_tp_logging = 0;
-
-static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
-static void tcp_hpts_thread(void *ctx);
-
+static int32_t tcp_hpts_precision = 120;
int32_t tcp_min_hptsi_time = DEFAULT_MIN_SLEEP;
static int conn_cnt_thresh = DEFAULT_CONNECTION_THRESHOLD;
static int32_t dynamic_min_sleep = DYNAMIC_MIN_SLEEP;
@@ -295,23 +246,6 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
SYSCTL_NODE(_net_inet_tcp_hpts, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"TCP Hpts statistics");
-#define timersub(tvp, uvp, vvp) \
- do { \
- (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
- (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
- if ((vvp)->tv_usec < 0) { \
- (vvp)->tv_sec--; \
- (vvp)->tv_usec += 1000000; \
- } \
- } while (0)
-
-static int32_t tcp_hpts_precision = 120;
-
-static struct hpts_domain_info {
- int count;
- int cpu[MAXCPU];
-} hpts_domains[MAXMEMDOM];
-
counter_u64_t hpts_hopelessly_behind;
SYSCTL_COUNTER_U64(_net_inet_tcp_hpts_stats, OID_AUTO, hopeless, CTLFLAG_RD,
@@ -459,14 +393,14 @@ SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, nowake_over_thresh, CTLFLAG_RW,
&tcp_hpts_no_wake_over_thresh, 0,
"When we are over the threshold on the pacer do we prohibit wakeups?");
-static uint16_t
-hpts_random_cpu(void)
+uint16_t
+tcp_hptsi_random_cpu(struct tcp_hptsi *pace)
{
uint16_t cpuid;
uint32_t ran;
ran = arc4random();
- cpuid = (((ran & 0xffff) % mp_ncpus) % tcp_pace.rp_num_hptss);
+ cpuid = (((ran & 0xffff) % mp_ncpus) % pace->rp_num_hptss);
return (cpuid);
}
@@ -487,13 +421,11 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
log.u_bbr.flex2 = hpts->p_cur_slot;
log.u_bbr.flex3 = hpts->p_prev_slot;
log.u_bbr.flex4 = idx;
- log.u_bbr.flex5 = hpts->p_curtick;
log.u_bbr.flex6 = hpts->p_on_queue_cnt;
log.u_bbr.flex7 = hpts->p_cpu;
log.u_bbr.flex8 = (uint8_t)from_callout;
log.u_bbr.inflight = slots_to_run;
log.u_bbr.applimited = hpts->overidden_sleep;
- log.u_bbr.delivered = hpts->saved_curtick;
log.u_bbr.timeStamp = tcp_tv_to_usec(tv);
log.u_bbr.epoch = hpts->saved_curslot;
log.u_bbr.lt_epoch = hpts->saved_prev_slot;
@@ -510,11 +442,67 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
}
}
+/*
+ * Timeout handler for the HPTS sleep callout. It immediately schedules the SWI
+ * for the HPTS entry to run.
+ */
static void
-tcp_wakehpts(struct tcp_hpts_entry *hpts)
+tcp_hpts_sleep_timeout(void *arg)
{
+#ifdef TCP_HPTS_KTEST
+ struct tcp_hptsi *pace;
+#endif
+ struct tcp_hpts_entry *hpts;
+
+ hpts = (struct tcp_hpts_entry *)arg;
+#ifdef TCP_HPTS_KTEST
+ pace = hpts->p_hptsi;
+#endif
+ swi_sched(hpts->ie_cookie, 0);
+}
+
+/*
+ * Reset the HPTS callout timer with the provided timeval. Returns the results
+ * of the callout_reset_sbt_on() function.
+ */
+static int
+tcp_hpts_sleep(struct tcp_hpts_entry *hpts, struct timeval *tv)
+{
+#ifdef TCP_HPTS_KTEST
+ struct tcp_hptsi *pace;
+#endif
+ sbintime_t sb;
+
+#ifdef TCP_HPTS_KTEST
+ pace = hpts->p_hptsi;
+#endif
+
+ /* Store off to make visible the actual sleep time */
+ hpts->sleeping = tv->tv_usec;
+
+ sb = tvtosbt(*tv);
+ return (callout_reset_sbt_on(
+ &hpts->co, sb, 0, tcp_hpts_sleep_timeout, hpts, hpts->p_cpu,
+ (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision))));
+}
+
+/*
+ * Schedules the SWI for the HTPS entry to run, if not already scheduled or
+ * running.
+ */
+void
+tcp_hpts_wake(struct tcp_hpts_entry *hpts)
+{
+#ifdef TCP_HPTS_KTEST
+ struct tcp_hptsi *pace;
+#endif
+
HPTS_MTX_ASSERT(hpts);
+#ifdef TCP_HPTS_KTEST
+ pace = hpts->p_hptsi;
+#endif
+
if (tcp_hpts_no_wake_over_thresh && (hpts->p_on_queue_cnt >= conn_cnt_thresh)) {
hpts->p_direct_wake = 0;
return;
@@ -526,15 +514,6 @@ tcp_wakehpts(struct tcp_hpts_entry *hpts)
}
static void
-hpts_timeout_swi(void *arg)
-{
- struct tcp_hpts_entry *hpts;
-
- hpts = (struct tcp_hpts_entry *)arg;
- swi_sched(hpts->ie_cookie, 0);
-}
-
-static void
tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts)
{
struct inpcb *inp = tptoinpcb(tp);
@@ -562,13 +541,13 @@ tcp_hpts_insert_internal(struct tcpcb *tp, struct tcp_hpts_entry *hpts)
}
static struct tcp_hpts_entry *
-tcp_hpts_lock(struct tcpcb *tp)
+tcp_hpts_lock(struct tcp_hptsi *pace, struct tcpcb *tp)
{
struct tcp_hpts_entry *hpts;
INP_LOCK_ASSERT(tptoinpcb(tp));
- hpts = tcp_pace.rp_ent[tp->t_hpts_cpu];
+ hpts = pace->rp_ent[tp->t_hpts_cpu];
HPTS_LOCK(hpts);
return (hpts);
@@ -595,11 +574,10 @@ tcp_hpts_release(struct tcpcb *tp)
* and has never received a first packet.
*/
void
-tcp_hpts_init(struct tcpcb *tp)
+__tcp_hpts_init(struct tcp_hptsi *pace, struct tcpcb *tp)
{
-
if (__predict_true(tp->t_hpts_cpu == HPTS_CPU_NONE)) {
- tp->t_hpts_cpu = hpts_random_cpu();
+ tp->t_hpts_cpu = tcp_hptsi_random_cpu(pace);
MPASS(!(tp->t_flags2 & TF2_HPTS_CPU_SET));
}
}
@@ -611,14 +589,14 @@ tcp_hpts_init(struct tcpcb *tp)
* INP lock and then get the hpts lock.
*/
void
-tcp_hpts_remove(struct tcpcb *tp)
+__tcp_hpts_remove(struct tcp_hptsi *pace, struct tcpcb *tp)
{
struct tcp_hpts_entry *hpts;
struct hptsh *hptsh;
INP_WLOCK_ASSERT(tptoinpcb(tp));
- hpts = tcp_hpts_lock(tp);
+ hpts = tcp_hpts_lock(pace, tp);
if (tp->t_in_hpts == IHPTS_ONQUEUE) {
hptsh = &hpts->p_hptss[tp->t_hpts_slot];
tp->t_hpts_request = 0;
@@ -662,23 +640,19 @@ hpts_slot(uint32_t wheel_slot, uint32_t plus)
{
/*
* Given a slot on the wheel, what slot
- * is that plus ticks out?
+ * is that plus slots out?
*/
- KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid tick %u not on wheel", wheel_slot));
+ KASSERT(wheel_slot < NUM_OF_HPTSI_SLOTS, ("Invalid slot %u not on wheel", wheel_slot));
return ((wheel_slot + plus) % NUM_OF_HPTSI_SLOTS);
}
static inline int
-tick_to_wheel(uint32_t cts_in_wticks)
+cts_to_wheel(uint32_t cts)
{
/*
- * Given a timestamp in ticks (so by
- * default to get it to a real time one
- * would multiply by 10.. i.e the number
- * of ticks in a slot) map it to our limited
- * space wheel.
+ * Given a timestamp in useconds map it to our limited space wheel.
*/
- return (cts_in_wticks % NUM_OF_HPTSI_SLOTS);
+ return ((cts / HPTS_USECS_PER_SLOT) % NUM_OF_HPTSI_SLOTS);
}
static inline int
@@ -721,7 +695,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
if ((hpts->p_hpts_active == 1) &&
(hpts->p_wheel_complete == 0)) {
end_slot = hpts->p_runningslot;
- /* Back up one tick */
+ /* Back up one slot */
if (end_slot == 0)
end_slot = NUM_OF_HPTSI_SLOTS - 1;
else
@@ -734,7 +708,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
* not active, or we have
* completed the pass over
* the wheel, we can use the
- * prev tick and subtract one from it. This puts us
+ * prev slot and subtract one from it. This puts us
* as far out as possible on the wheel.
*/
end_slot = hpts->p_prev_slot;
@@ -747,7 +721,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
/*
* Now we have close to the full wheel left minus the
* time it has been since the pacer went to sleep. Note
- * that wheel_tick, passed in, should be the current time
+ * that wheel_slot, passed in, should be the current time
* from the perspective of the caller, mapped to the wheel.
*/
if (hpts->p_prev_slot != wheel_slot)
@@ -824,7 +798,7 @@ max_slots_available(struct tcp_hpts_entry *hpts, uint32_t wheel_slot, uint32_t *
#ifdef INVARIANTS
static void
check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct tcpcb *tp,
- uint32_t hptsslot, int line)
+ uint32_t hptsslot)
{
/*
* Sanity checks for the pacer with invariants
@@ -855,12 +829,13 @@ check_if_slot_would_be_wrong(struct tcp_hpts_entry *hpts, struct tcpcb *tp,
}
#endif
-uint32_t
-tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_diag *diag)
+void
+__tcp_hpts_insert(struct tcp_hptsi *pace, struct tcpcb *tp, uint32_t usecs,
+ struct hpts_diag *diag)
{
struct tcp_hpts_entry *hpts;
struct timeval tv;
- uint32_t slot_on, wheel_cts, last_slot, need_new_to = 0;
+ uint32_t slot, wheel_cts, last_slot, need_new_to = 0;
int32_t wheel_slot, maxslots;
bool need_wakeup = false;
@@ -869,11 +844,13 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
MPASS(!(tp->t_in_hpts == IHPTS_ONQUEUE));
/*
+ * Convert microseconds to slots for internal use.
* We now return the next-slot the hpts will be on, beyond its
* current run (if up) or where it was when it stopped if it is
* sleeping.
*/
- hpts = tcp_hpts_lock(tp);
+ slot = HPTS_USEC_TO_SLOTS(usecs);
+ hpts = tcp_hpts_lock(pace, tp);
microuptime(&tv);
if (diag) {
memset(diag, 0, sizeof(struct hpts_diag));
@@ -882,8 +859,6 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
diag->p_runningslot = hpts->p_runningslot;
diag->p_nxt_slot = hpts->p_nxt_slot;
diag->p_cur_slot = hpts->p_cur_slot;
- diag->p_curtick = hpts->p_curtick;
- diag->p_lasttick = hpts->p_lasttick;
diag->slot_req = slot;
diag->p_on_min_sleep = hpts->p_on_min_sleep;
diag->hpts_sleep_time = hpts->p_hpts_sleep_time;
@@ -910,17 +885,15 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
* timeout is not 1.
*/
hpts->p_direct_wake = 1;
- tcp_wakehpts(hpts);
+ tcp_hpts_wake(hpts);
}
- slot_on = hpts->p_nxt_slot;
HPTS_UNLOCK(hpts);
- return (slot_on);
+ return;
}
- /* Get the current time relative to the wheel */
- wheel_cts = tcp_tv_to_hpts_slot(&tv);
- /* Map it onto the wheel */
- wheel_slot = tick_to_wheel(wheel_cts);
+ /* Get the current time stamp and map it onto the wheel */
+ wheel_cts = tcp_tv_to_usec(&tv);
+ wheel_slot = cts_to_wheel(wheel_cts);
/* Now what's the max we can place it at? */
maxslots = max_slots_available(hpts, wheel_slot, &last_slot);
if (diag) {
@@ -952,11 +925,11 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
tp->t_hpts_slot = last_slot;
}
if (diag) {
- diag->slot_remaining = tp->t_hpts_request;
+ diag->time_remaining = tp->t_hpts_request;
diag->inp_hptsslot = tp->t_hpts_slot;
}
#ifdef INVARIANTS
- check_if_slot_would_be_wrong(hpts, tp, tp->t_hpts_slot, line);
+ check_if_slot_would_be_wrong(hpts, tp, tp->t_hpts_slot);
#endif
if (__predict_true(tp->t_in_hpts != IHPTS_MOVING))
tcp_hpts_insert_internal(tp, hpts);
@@ -995,12 +968,12 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
}
/*
* Now how far is the hpts sleeping to? if active is 1, its
- * up and ticking we do nothing, otherwise we may need to
+ * up and running we do nothing, otherwise we may need to
* reschedule its callout if need_new_to is set from above.
*/
if (need_wakeup) {
hpts->p_direct_wake = 1;
- tcp_wakehpts(hpts);
+ tcp_hpts_wake(hpts);
if (diag) {
diag->need_new_to = 0;
diag->co_ret = 0xffff0000;
@@ -1008,7 +981,6 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
} else if (need_new_to) {
int32_t co_ret;
struct timeval tv;
- sbintime_t sb;
tv.tv_sec = 0;
tv.tv_usec = 0;
@@ -1016,24 +988,18 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
tv.tv_sec++;
need_new_to -= HPTS_USEC_IN_SEC;
}
- tv.tv_usec = need_new_to;
- sb = tvtosbt(tv);
- co_ret = callout_reset_sbt_on(&hpts->co, sb, 0,
- hpts_timeout_swi, hpts, hpts->p_cpu,
- (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
+ tv.tv_usec = need_new_to; /* XXX: Why is this sleeping over the max? */
+ co_ret = tcp_hpts_sleep(hpts, &tv);
if (diag) {
diag->need_new_to = need_new_to;
diag->co_ret = co_ret;
}
}
- slot_on = hpts->p_nxt_slot;
HPTS_UNLOCK(hpts);
-
- return (slot_on);
}
static uint16_t
-hpts_cpuid(struct tcpcb *tp, int *failed)
+hpts_cpuid(struct tcp_hptsi *pace, struct tcpcb *tp, int *failed)
{
struct inpcb *inp = tptoinpcb(tp);
u_int cpuid;
@@ -1060,7 +1026,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
#ifdef RSS
cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
if (cpuid == NETISR_CPUID_NONE)
- return (hpts_random_cpu());
+ return (tcp_hptsi_random_cpu(pace));
else
return (cpuid);
#endif
@@ -1071,7 +1037,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
*/
if (inp->inp_flowtype == M_HASHTYPE_NONE) {
counter_u64_add(cpu_uses_random, 1);
- return (hpts_random_cpu());
+ return (tcp_hptsi_random_cpu(pace));
}
/*
* Hash to a thread based on the flowid. If we are using numa,
@@ -1086,7 +1052,7 @@ hpts_cpuid(struct tcpcb *tp, int *failed)
#ifdef NUMA
} else {
/* Hash into the cpu's that use that domain */
- di = &hpts_domains[inp->inp_numa_domain];
+ di = &pace->domains[inp->inp_numa_domain];
cpuid = di->cpu[inp->inp_flowid % di->count];
}
#endif
@@ -1118,9 +1084,16 @@ tcp_hpts_set_max_sleep(struct tcp_hpts_entry *hpts, int wrap_loop_cnt)
}
}
-static int32_t
+static bool
+tcp_hpts_different_slots(uint32_t cts, uint32_t cts_last_run)
+{
+ return ((cts / HPTS_USECS_PER_SLOT) != (cts_last_run / HPTS_USECS_PER_SLOT));
+}
+
+int32_t
tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
{
+ struct tcp_hptsi *pace;
struct tcpcb *tp;
struct timeval tv;
int32_t slots_to_run, i, error;
@@ -1130,6 +1103,7 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
int32_t wrap_loop_cnt = 0;
int32_t slot_pos_of_endpoint = 0;
int32_t orig_exit_slot;
+ uint32_t cts, cts_last_run;
bool completed_measure, seen_endpoint;
completed_measure = false;
@@ -1137,32 +1111,34 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
HPTS_MTX_ASSERT(hpts);
NET_EPOCH_ASSERT();
+
+ pace = hpts->p_hptsi;
+ MPASS(pace != NULL);
+
/* record previous info for any logging */
- hpts->saved_lasttick = hpts->p_lasttick;
- hpts->saved_curtick = hpts->p_curtick;
hpts->saved_curslot = hpts->p_cur_slot;
hpts->saved_prev_slot = hpts->p_prev_slot;
- hpts->p_lasttick = hpts->p_curtick;
- hpts->p_curtick = tcp_gethptstick(&tv);
- tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
- orig_exit_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+ microuptime(&tv);
+ cts_last_run = pace->cts_last_ran[hpts->p_cpu];
+ pace->cts_last_ran[hpts->p_cpu] = cts = tcp_tv_to_usec(&tv);
+
+ orig_exit_slot = hpts->p_cur_slot = cts_to_wheel(cts);
if ((hpts->p_on_queue_cnt == 0) ||
- (hpts->p_lasttick == hpts->p_curtick)) {
+ !tcp_hpts_different_slots(cts, cts_last_run)) {
/*
- * No time has yet passed,
- * or nothing to do.
+ * Not enough time has yet passed or nothing to do.
*/
hpts->p_prev_slot = hpts->p_cur_slot;
- hpts->p_lasttick = hpts->p_curtick;
goto no_run;
}
again:
hpts->p_wheel_complete = 0;
HPTS_MTX_ASSERT(hpts);
slots_to_run = hpts_slots_diff(hpts->p_prev_slot, hpts->p_cur_slot);
- if (((hpts->p_curtick - hpts->p_lasttick) > (NUM_OF_HPTSI_SLOTS - 1)) &&
- (hpts->p_on_queue_cnt != 0)) {
+ if ((hpts->p_on_queue_cnt != 0) &&
+ ((cts - cts_last_run) >
+ ((NUM_OF_HPTSI_SLOTS-1) * HPTS_USECS_PER_SLOT))) {
/*
* Wheel wrap is occuring, basically we
* are behind and the distance between
@@ -1238,7 +1214,7 @@ again:
uint32_t runningslot;
/*
- * Calculate our delay, if there are no extra ticks there
+ * Calculate our delay, if there are no extra slots there
* was not any (i.e. if slots_to_run == 1, no delay).
*/
hpts->p_delayed_by = (slots_to_run - (i + 1)) *
@@ -1391,7 +1367,7 @@ again:
* gets added to the hpts (not this one)
* :-)
*/
- tcp_set_hpts(tp);
+ __tcp_set_hpts(pace, tp);
}
CURVNET_SET(inp->inp_vnet);
/* Lets do any logging that we might want to */
@@ -1450,10 +1426,12 @@ no_one:
hpts->p_delayed_by = 0;
/*
* Check to see if we took an excess amount of time and need to run
- * more ticks (if we did not hit eno-bufs).
+ * more slots (if we did not hit eno-bufs).
*/
hpts->p_prev_slot = hpts->p_cur_slot;
- hpts->p_lasttick = hpts->p_curtick;
+ microuptime(&tv);
+ cts_last_run = cts;
+ cts = tcp_tv_to_usec(&tv);
if (!from_callout || (loop_cnt > max_pacer_loops)) {
/*
* Something is serious slow we have
@@ -1465,7 +1443,7 @@ no_one:
* can never catch up :(
*
* We will just lie to this thread
- * and let it thing p_curtick is
+ * and let it think p_curslot is
* correct. When it next awakens
* it will find itself further behind.
*/
@@ -1473,20 +1451,19 @@ no_one:
counter_u64_add(hpts_hopelessly_behind, 1);
goto no_run;
}
- hpts->p_curtick = tcp_gethptstick(&tv);
- hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+
+ hpts->p_cur_slot = cts_to_wheel(cts);
if (!seen_endpoint) {
/* We saw no endpoint but we may be looping */
orig_exit_slot = hpts->p_cur_slot;
}
- if ((wrap_loop_cnt < 2) &&
- (hpts->p_lasttick != hpts->p_curtick)) {
+ if ((wrap_loop_cnt < 2) && tcp_hpts_different_slots(cts, cts_last_run)) {
counter_u64_add(hpts_loops, 1);
loop_cnt++;
goto again;
}
no_run:
- tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
+ pace->cts_last_ran[hpts->p_cpu] = cts;
/*
* Set flag to tell that we are done for
* any slot input that happens during
@@ -1494,25 +1471,36 @@ no_run:
*/
hpts->p_wheel_complete = 1;
/*
- * Now did we spend too long running input and need to run more ticks?
- * Note that if wrap_loop_cnt < 2 then we should have the conditions
- * in the KASSERT's true. But if the wheel is behind i.e. wrap_loop_cnt
- * is greater than 2, then the condtion most likely are *not* true.
- * Also if we are called not from the callout, we don't run the wheel
- * multiple times so the slots may not align either.
- */
- KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
- (wrap_loop_cnt >= 2) || !from_callout),
- ("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
- hpts->p_prev_slot, hpts->p_cur_slot));
- KASSERT(((hpts->p_lasttick == hpts->p_curtick)
- || (wrap_loop_cnt >= 2) || !from_callout),
- ("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
- hpts->p_lasttick, hpts->p_curtick));
- if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
- hpts->p_curtick = tcp_gethptstick(&tv);
+ * If enough time has elapsed that we should be processing the next
+ * slot(s), then we should have kept running and not marked the wheel as
+ * complete.
+ *
+ * But there are several other conditions where we would have stopped
+ * processing, so the prev/cur slots and cts variables won't match.
+ * These conditions are:
+ *
+ * - Calls not from callouts don't run multiple times
+ * - The wheel is empty
+ * - We've processed more than max_pacer_loops times
+ * - We've wrapped more than 2 times
+ *
+ * This assert catches when the logic above has violated this design.
+ *
+ */
+ KASSERT((!from_callout || (hpts->p_on_queue_cnt == 0) ||
+ (loop_cnt > max_pacer_loops) || (wrap_loop_cnt >= 2) ||
+ ((hpts->p_prev_slot == hpts->p_cur_slot) &&
+ !tcp_hpts_different_slots(cts, cts_last_run))),
+ ("H:%p Shouldn't be done! prev_slot:%u, cur_slot:%u, "
+ "cts_last_run:%u, cts:%u, loop_cnt:%d, wrap_loop_cnt:%d",
+ hpts, hpts->p_prev_slot, hpts->p_cur_slot,
+ cts_last_run, cts, loop_cnt, wrap_loop_cnt));
+
+ if (from_callout && tcp_hpts_different_slots(cts, cts_last_run)) {
+ microuptime(&tv);
+ cts = tcp_tv_to_usec(&tv);
+ hpts->p_cur_slot = cts_to_wheel(cts);
counter_u64_add(hpts_loops, 1);
- hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
goto again;
}
@@ -1526,16 +1514,16 @@ no_run:
}
void
-tcp_set_hpts(struct tcpcb *tp)
+__tcp_set_hpts(struct tcp_hptsi *pace, struct tcpcb *tp)
{
struct tcp_hpts_entry *hpts;
int failed;
INP_WLOCK_ASSERT(tptoinpcb(tp));
- hpts = tcp_hpts_lock(tp);
+ hpts = tcp_hpts_lock(pace, tp);
if (tp->t_in_hpts == IHPTS_NONE && !(tp->t_flags2 & TF2_HPTS_CPU_SET)) {
- tp->t_hpts_cpu = hpts_cpuid(tp, &failed);
+ tp->t_hpts_cpu = hpts_cpuid(pace, tp, &failed);
if (failed == 0)
tp->t_flags2 |= TF2_HPTS_CPU_SET;
}
@@ -1543,33 +1531,35 @@ tcp_set_hpts(struct tcpcb *tp)
}
static struct tcp_hpts_entry *
-tcp_choose_hpts_to_run(void)
+tcp_choose_hpts_to_run(struct tcp_hptsi *pace)
{
+ struct timeval tv;
int i, oldest_idx, start, end;
uint32_t cts, time_since_ran, calc;
- cts = tcp_get_usecs(NULL);
+ microuptime(&tv);
+ cts = tcp_tv_to_usec(&tv);
time_since_ran = 0;
/* Default is all one group */
start = 0;
- end = tcp_pace.rp_num_hptss;
+ end = pace->rp_num_hptss;
/*
* If we have more than one L3 group figure out which one
* this CPU is in.
*/
- if (tcp_pace.grp_cnt > 1) {
- for (i = 0; i < tcp_pace.grp_cnt; i++) {
- if (CPU_ISSET(curcpu, &tcp_pace.grps[i]->cg_mask)) {
- start = tcp_pace.grps[i]->cg_first;
- end = (tcp_pace.grps[i]->cg_last + 1);
+ if (pace->grp_cnt > 1) {
+ for (i = 0; i < pace->grp_cnt; i++) {
+ if (CPU_ISSET(curcpu, &pace->grps[i]->cg_mask)) {
+ start = pace->grps[i]->cg_first;
+ end = (pace->grps[i]->cg_last + 1);
break;
}
}
}
oldest_idx = -1;
for (i = start; i < end; i++) {
- if (TSTMP_GT(cts, tcp_pace.cts_last_ran[i]))
- calc = cts - tcp_pace.cts_last_ran[i];
+ if (TSTMP_GT(cts, pace->cts_last_ran[i]))
+ calc = cts - pace->cts_last_ran[i];
else
calc = 0;
if (calc > time_since_ran) {
@@ -1578,9 +1568,9 @@ tcp_choose_hpts_to_run(void)
}
}
if (oldest_idx >= 0)
- return(tcp_pace.rp_ent[oldest_idx]);
+ return(pace->rp_ent[oldest_idx]);
else
- return(tcp_pace.rp_ent[(curcpu % tcp_pace.rp_num_hptss)]);
+ return(pace->rp_ent[(curcpu % pace->rp_num_hptss)]);
}
static void
@@ -1588,9 +1578,9 @@ __tcp_run_hpts(void)
{
struct epoch_tracker et;
struct tcp_hpts_entry *hpts;
- int ticks_ran;
+ int slots_ran;
- hpts = tcp_choose_hpts_to_run();
+ hpts = tcp_choose_hpts_to_run(tcp_hptsi_pace);
if (hpts->p_hpts_active) {
/* Already active */
@@ -1606,12 +1596,11 @@ __tcp_run_hpts(void)
hpts->syscall_cnt++;
counter_u64_add(hpts_direct_call, 1);
hpts->p_hpts_active = 1;
- ticks_ran = tcp_hptsi(hpts, false);
+ slots_ran = tcp_hptsi(hpts, false);
/* We may want to adjust the sleep values here */
if (hpts->p_on_queue_cnt >= conn_cnt_thresh) {
- if (ticks_ran > slots_indicate_less_sleep) {
+ if (slots_ran > slots_indicate_less_sleep) {
struct timeval tv;
- sbintime_t sb;
hpts->p_mysleep.tv_usec /= 2;
if (hpts->p_mysleep.tv_usec < dynamic_min_sleep)
@@ -1635,13 +1624,8 @@ __tcp_run_hpts(void)
* the dynamic value and set the on_min_sleep
* flag so we will not be awoken.
*/
- sb = tvtosbt(tv);
- /* Store off to make visible the actual sleep time */
- hpts->sleeping = tv.tv_usec;
- callout_reset_sbt_on(&hpts->co, sb, 0,
- hpts_timeout_swi, hpts, hpts->p_cpu,
- (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
- } else if (ticks_ran < slots_indicate_more_sleep) {
+ (void)tcp_hpts_sleep(hpts, &tv);
+ } else if (slots_ran < slots_indicate_more_sleep) {
/* For the further sleep, don't reschedule hpts */
hpts->p_mysleep.tv_usec *= 2;
if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
@@ -1658,17 +1642,22 @@ out_with_mtx:
static void
tcp_hpts_thread(void *ctx)
{
+#ifdef TCP_HPTS_KTEST
+ struct tcp_hptsi *pace;
+#endif
struct tcp_hpts_entry *hpts;
struct epoch_tracker et;
struct timeval tv;
- sbintime_t sb;
- int ticks_ran;
+ int slots_ran;
hpts = (struct tcp_hpts_entry *)ctx;
+#ifdef TCP_HPTS_KTEST
+ pace = hpts->p_hptsi;
+#endif
HPTS_LOCK(hpts);
if (hpts->p_direct_wake) {
/* Signaled by input or output with low occupancy count. */
- callout_stop(&hpts->co);
+ _callout_stop_safe(&hpts->co, 0);
counter_u64_add(hpts_direct_awakening, 1);
} else {
/* Timed out, the normal case. */
@@ -1721,7 +1710,7 @@ tcp_hpts_thread(void *ctx)
}
hpts->sleeping = 0;
hpts->p_hpts_active = 1;
- ticks_ran = tcp_hptsi(hpts, true);
+ slots_ran = tcp_hptsi(hpts, true);
tv.tv_sec = 0;
tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
if ((hpts->p_on_queue_cnt > conn_cnt_thresh) && (hpts->hit_callout_thresh == 0)) {
@@ -1737,11 +1726,11 @@ tcp_hpts_thread(void *ctx)
* Only adjust sleep time if we were
* called from the callout i.e. direct_wake == 0.
*/
- if (ticks_ran < slots_indicate_more_sleep) {
+ if (slots_ran < slots_indicate_more_sleep) {
hpts->p_mysleep.tv_usec *= 2;
if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
hpts->p_mysleep.tv_usec = dynamic_max_sleep;
- } else if (ticks_ran > slots_indicate_less_sleep) {
+ } else if (slots_ran > slots_indicate_less_sleep) {
hpts->p_mysleep.tv_usec /= 2;
if (hpts->p_mysleep.tv_usec < dynamic_min_sleep)
hpts->p_mysleep.tv_usec = dynamic_min_sleep;
@@ -1797,18 +1786,11 @@ tcp_hpts_thread(void *ctx)
hpts->p_hpts_active = 0;
back_to_sleep:
hpts->p_direct_wake = 0;
- sb = tvtosbt(tv);
- /* Store off to make visible the actual sleep time */
- hpts->sleeping = tv.tv_usec;
- callout_reset_sbt_on(&hpts->co, sb, 0,
- hpts_timeout_swi, hpts, hpts->p_cpu,
- (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
+ (void)tcp_hpts_sleep(hpts, &tv);
NET_EPOCH_EXIT(et);
HPTS_UNLOCK(hpts);
}
-#undef timersub
-
static int32_t
hpts_count_level(struct cpu_group *cg)
{
@@ -1845,57 +1827,63 @@ hpts_gather_grps(struct cpu_group **grps, int32_t *at, int32_t max, struct cpu_g
}
}
-static void
-tcp_hpts_mod_load(void)
+/*
+ * Initialize a tcp_hptsi structure. This performs the core initialization
+ * without starting threads.
+ */
+struct tcp_hptsi*
+tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs, bool enable_sysctl)
{
+ struct tcp_hptsi *pace;
struct cpu_group *cpu_top;
- int32_t error __diagused;
- int32_t i, j, bound = 0, created = 0;
+ uint32_t i, j, cts;
+ int32_t count;
size_t sz, asz;
struct timeval tv;
- sbintime_t sb;
struct tcp_hpts_entry *hpts;
- struct pcpu *pc;
char unit[16];
uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
- int count, domain;
+ KASSERT(funcs != NULL, ("funcs is NULL"));
+
+ /* Allocate the main structure */
+ pace = malloc(sizeof(struct tcp_hptsi), M_TCPHPTS, M_WAITOK | M_ZERO);
+ if (pace == NULL)
+ return (NULL);
+
+ memset(pace, 0, sizeof(*pace));
+ pace->funcs = funcs;
+
+ /* Setup CPU topology information */
#ifdef SMP
cpu_top = smp_topo();
#else
cpu_top = NULL;
#endif
- tcp_pace.rp_num_hptss = ncpus;
- hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
- hpts_loops = counter_u64_alloc(M_WAITOK);
- back_tosleep = counter_u64_alloc(M_WAITOK);
- combined_wheel_wrap = counter_u64_alloc(M_WAITOK);
- wheel_wrap = counter_u64_alloc(M_WAITOK);
- hpts_wake_timeout = counter_u64_alloc(M_WAITOK);
- hpts_direct_awakening = counter_u64_alloc(M_WAITOK);
- hpts_back_tosleep = counter_u64_alloc(M_WAITOK);
- hpts_direct_call = counter_u64_alloc(M_WAITOK);
- cpu_uses_flowid = counter_u64_alloc(M_WAITOK);
- cpu_uses_random = counter_u64_alloc(M_WAITOK);
+ pace->rp_num_hptss = ncpus;
- sz = (tcp_pace.rp_num_hptss * sizeof(struct tcp_hpts_entry *));
- tcp_pace.rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
- sz = (sizeof(uint32_t) * tcp_pace.rp_num_hptss);
- tcp_pace.cts_last_ran = malloc(sz, M_TCPHPTS, M_WAITOK);
- tcp_pace.grp_cnt = 0;
+ /* Allocate hpts entry array */
+ sz = (pace->rp_num_hptss * sizeof(struct tcp_hpts_entry *));
+ pace->rp_ent = malloc(sz, M_TCPHPTS, M_WAITOK | M_ZERO);
+
+ /* Allocate timestamp tracking array */
+ sz = (sizeof(uint32_t) * pace->rp_num_hptss);
+ pace->cts_last_ran = malloc(sz, M_TCPHPTS, M_WAITOK);
+
+ /* Setup CPU groups */
if (cpu_top == NULL) {
- tcp_pace.grp_cnt = 1;
+ pace->grp_cnt = 1;
} else {
/* Find out how many cache level 3 domains we have */
count = 0;
- tcp_pace.grp_cnt = hpts_count_level(cpu_top);
- if (tcp_pace.grp_cnt == 0) {
- tcp_pace.grp_cnt = 1;
+ pace->grp_cnt = hpts_count_level(cpu_top);
+ if (pace->grp_cnt == 0) {
+ pace->grp_cnt = 1;
}
- sz = (tcp_pace.grp_cnt * sizeof(struct cpu_group *));
- tcp_pace.grps = malloc(sz, M_TCPHPTS, M_WAITOK);
+ sz = (pace->grp_cnt * sizeof(struct cpu_group *));
+ pace->grps = malloc(sz, M_TCPHPTS, M_WAITOK);
/* Now populate the groups */
- if (tcp_pace.grp_cnt == 1) {
+ if (pace->grp_cnt == 1) {
/*
* All we need is the top level all cpu's are in
* the same cache so when we use grp[0]->cg_mask
@@ -1903,193 +1891,290 @@ tcp_hpts_mod_load(void)
* all cpu's in it. The level here is probably
* zero which is ok.
*/
- tcp_pace.grps[0] = cpu_top;
+ pace->grps[0] = cpu_top;
} else {
/*
* Here we must find all the level three cache domains
* and setup our pointers to them.
*/
count = 0;
- hpts_gather_grps(tcp_pace.grps, &count, tcp_pace.grp_cnt, cpu_top);
+ hpts_gather_grps(pace->grps, &count, pace->grp_cnt, cpu_top);
}
}
+
+ /* Cache the current time for initializing the hpts entries */
+ microuptime(&tv);
+ cts = tcp_tv_to_usec(&tv);
+
+ /* Initialize each hpts entry */
asz = sizeof(struct hptsh) * NUM_OF_HPTSI_SLOTS;
- for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
- tcp_pace.rp_ent[i] = malloc(sizeof(struct tcp_hpts_entry),
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ pace->rp_ent[i] = malloc(sizeof(struct tcp_hpts_entry),
M_TCPHPTS, M_WAITOK | M_ZERO);
- tcp_pace.rp_ent[i]->p_hptss = malloc(asz, M_TCPHPTS, M_WAITOK);
- hpts = tcp_pace.rp_ent[i];
- /*
- * Init all the hpts structures that are not specifically
- * zero'd by the allocations. Also lets attach them to the
- * appropriate sysctl block as well.
- */
- mtx_init(&hpts->p_mtx, "tcp_hpts_lck",
- "hpts", MTX_DEF | MTX_DUPOK);
- for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
- TAILQ_INIT(&hpts->p_hptss[j].head);
- hpts->p_hptss[j].count = 0;
- hpts->p_hptss[j].gencnt = 0;
- }
- sysctl_ctx_init(&hpts->hpts_ctx);
- sprintf(unit, "%d", i);
- hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx,
- SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts),
- OID_AUTO,
- unit,
- CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
- "");
- SYSCTL_ADD_INT(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "out_qcnt", CTLFLAG_RD,
- &hpts->p_on_queue_cnt, 0,
- "Count TCB's awaiting output processing");
- SYSCTL_ADD_U16(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "active", CTLFLAG_RD,
- &hpts->p_hpts_active, 0,
- "Is the hpts active");
- SYSCTL_ADD_UINT(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "curslot", CTLFLAG_RD,
- &hpts->p_cur_slot, 0,
- "What the current running pacers goal");
- SYSCTL_ADD_UINT(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "runtick", CTLFLAG_RD,
- &hpts->p_runningslot, 0,
- "What the running pacers current slot is");
- SYSCTL_ADD_UINT(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "curtick", CTLFLAG_RD,
- &hpts->p_curtick, 0,
- "What the running pacers last tick mapped to the wheel was");
- SYSCTL_ADD_UINT(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "lastran", CTLFLAG_RD,
- &tcp_pace.cts_last_ran[i], 0,
- "The last usec tick that this hpts ran");
- SYSCTL_ADD_LONG(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "cur_min_sleep", CTLFLAG_RD,
- &hpts->p_mysleep.tv_usec,
- "What the running pacers is using for p_mysleep.tv_usec");
- SYSCTL_ADD_U64(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "now_sleeping", CTLFLAG_RD,
- &hpts->sleeping, 0,
- "What the running pacers is actually sleeping for");
- SYSCTL_ADD_U64(&hpts->hpts_ctx,
- SYSCTL_CHILDREN(hpts->hpts_root),
- OID_AUTO, "syscall_cnt", CTLFLAG_RD,
- &hpts->syscall_cnt, 0,
- "How many times we had syscalls on this hpts");
+ pace->rp_ent[i]->p_hptss = malloc(asz, M_TCPHPTS,
+ M_WAITOK | M_ZERO);
+ hpts = pace->rp_ent[i];
+ /* Basic initialization */
hpts->p_hpts_sleep_time = hpts_sleep_max;
- hpts->p_num = i;
- hpts->p_curtick = tcp_gethptstick(&tv);
- tcp_pace.cts_last_ran[i] = tcp_tv_to_usec(&tv);
- hpts->p_prev_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
- hpts->p_cpu = 0xffff;
+ hpts->p_cpu = i;
+ pace->cts_last_ran[i] = cts;
+ hpts->p_cur_slot = cts_to_wheel(cts);
+ hpts->p_prev_slot = hpts->p_cur_slot;
hpts->p_nxt_slot = hpts_slot(hpts->p_cur_slot, 1);
callout_init(&hpts->co, 1);
+ hpts->p_hptsi = pace;
+ mtx_init(&hpts->p_mtx, "tcp_hpts_lck", "hpts",
+ MTX_DEF | MTX_DUPOK);
+ for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
+ TAILQ_INIT(&hpts->p_hptss[j].head);
+ }
+
+ /* Setup SYSCTL if requested */
+ if (enable_sysctl) {
+ sysctl_ctx_init(&hpts->hpts_ctx);
+ sprintf(unit, "%d", i);
+ hpts->hpts_root = SYSCTL_ADD_NODE(&hpts->hpts_ctx,
+ SYSCTL_STATIC_CHILDREN(_net_inet_tcp_hpts),
+ OID_AUTO,
+ unit,
+ CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "");
+ SYSCTL_ADD_INT(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "out_qcnt", CTLFLAG_RD,
+ &hpts->p_on_queue_cnt, 0,
+ "Count TCB's awaiting output processing");
+ SYSCTL_ADD_U16(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "active", CTLFLAG_RD,
+ &hpts->p_hpts_active, 0,
+ "Is the hpts active");
+ SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "curslot", CTLFLAG_RD,
+ &hpts->p_cur_slot, 0,
+ "What the current running pacers goal");
+ SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "runslot", CTLFLAG_RD,
+ &hpts->p_runningslot, 0,
+ "What the running pacers current slot is");
+ SYSCTL_ADD_UINT(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "lastran", CTLFLAG_RD,
+ &pace->cts_last_ran[i], 0,
+ "The last usec timestamp that this hpts ran");
+ SYSCTL_ADD_LONG(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "cur_min_sleep", CTLFLAG_RD,
+ &hpts->p_mysleep.tv_usec,
+ "What the running pacers is using for p_mysleep.tv_usec");
+ SYSCTL_ADD_U64(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "now_sleeping", CTLFLAG_RD,
+ &hpts->sleeping, 0,
+ "What the running pacers is actually sleeping for");
+ SYSCTL_ADD_U64(&hpts->hpts_ctx,
+ SYSCTL_CHILDREN(hpts->hpts_root),
+ OID_AUTO, "syscall_cnt", CTLFLAG_RD,
+ &hpts->syscall_cnt, 0,
+ "How many times we had syscalls on this hpts");
+ }
}
- /* Don't try to bind to NUMA domains if we don't have any */
- if (vm_ndomains == 1 && tcp_bind_threads == 2)
- tcp_bind_threads = 0;
- /*
- * Now lets start ithreads to handle the hptss.
- */
- for (i = 0; i < tcp_pace.rp_num_hptss; i++) {
- hpts = tcp_pace.rp_ent[i];
- hpts->p_cpu = i;
+ return (pace);
+}
+
+/*
+ * Create threads for a tcp_hptsi structure and starts timers for the current
+ * (minimum) sleep interval.
+ */
+void
+tcp_hptsi_start(struct tcp_hptsi *pace)
+{
+ struct tcp_hpts_entry *hpts;
+ struct pcpu *pc;
+ struct timeval tv;
+ uint32_t i, j;
+ int count, domain;
+ int error __diagused;
+
+ KASSERT(pace != NULL, ("tcp_hptsi_start: pace is NULL"));
+
+ /* Start threads for each hpts entry */
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ hpts = pace->rp_ent[i];
+
+ KASSERT(hpts->ie_cookie == NULL,
+ ("tcp_hptsi_start: hpts[%d]->ie_cookie is not NULL", i));
error = swi_add(&hpts->ie, "hpts",
tcp_hpts_thread, (void *)hpts,
SWI_NET, INTR_MPSAFE, &hpts->ie_cookie);
KASSERT(error == 0,
- ("Can't add hpts:%p i:%d err:%d",
- hpts, i, error));
- created++;
- hpts->p_mysleep.tv_sec = 0;
- hpts->p_mysleep.tv_usec = tcp_min_hptsi_time;
+ ("Can't add hpts:%p i:%d err:%d", hpts, i, error));
+
if (tcp_bind_threads == 1) {
- if (intr_event_bind(hpts->ie, i) == 0)
- bound++;
+ (void)intr_event_bind(hpts->ie, i);
} else if (tcp_bind_threads == 2) {
/* Find the group for this CPU (i) and bind into it */
- for (j = 0; j < tcp_pace.grp_cnt; j++) {
- if (CPU_ISSET(i, &tcp_pace.grps[j]->cg_mask)) {
+ for (j = 0; j < pace->grp_cnt; j++) {
+ if (CPU_ISSET(i, &pace->grps[j]->cg_mask)) {
if (intr_event_bind_ithread_cpuset(hpts->ie,
- &tcp_pace.grps[j]->cg_mask) == 0) {
- bound++;
+ &pace->grps[j]->cg_mask) == 0) {
pc = pcpu_find(i);
domain = pc->pc_domain;
- count = hpts_domains[domain].count;
- hpts_domains[domain].cpu[count] = i;
- hpts_domains[domain].count++;
+ count = pace->domains[domain].count;
+ pace->domains[domain].cpu[count] = i;
+ pace->domains[domain].count++;
break;
}
}
}
}
+
+ hpts->p_mysleep.tv_sec = 0;
+ hpts->p_mysleep.tv_usec = tcp_min_hptsi_time;
tv.tv_sec = 0;
tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
- hpts->sleeping = tv.tv_usec;
- sb = tvtosbt(tv);
- callout_reset_sbt_on(&hpts->co, sb, 0,
- hpts_timeout_swi, hpts, hpts->p_cpu,
- (C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
- }
- /*
- * If we somehow have an empty domain, fall back to choosing
- * among all htps threads.
- */
- for (i = 0; i < vm_ndomains; i++) {
- if (hpts_domains[i].count == 0) {
- tcp_bind_threads = 0;
- break;
- }
+ (void)tcp_hpts_sleep(hpts, &tv);
}
- tcp_hpts_softclock = __tcp_run_hpts;
- tcp_lro_hpts_init();
- printf("TCP Hpts created %d swi interrupt threads and bound %d to %s\n",
- created, bound,
- tcp_bind_threads == 2 ? "NUMA domains" : "cpus");
}
-static void
-tcp_hpts_mod_unload(void)
+/*
+ * Stop all callouts/threads for a tcp_hptsi structure.
+ */
+void
+tcp_hptsi_stop(struct tcp_hptsi *pace)
{
+ struct tcp_hpts_entry *hpts;
int rv __diagused;
+ uint32_t i;
- tcp_lro_hpts_uninit();
- atomic_store_ptr(&tcp_hpts_softclock, NULL);
+ KASSERT(pace != NULL, ("tcp_hptsi_stop: pace is NULL"));
- for (int i = 0; i < tcp_pace.rp_num_hptss; i++) {
- struct tcp_hpts_entry *hpts = tcp_pace.rp_ent[i];
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ hpts = pace->rp_ent[i];
+ KASSERT(hpts != NULL, ("tcp_hptsi_stop: hpts[%d] is NULL", i));
+ KASSERT(hpts->ie_cookie != NULL,
+ ("tcp_hptsi_stop: hpts[%d]->ie_cookie is NULL", i));
- rv = callout_drain(&hpts->co);
+ rv = _callout_stop_safe(&hpts->co, CS_DRAIN);
MPASS(rv != 0);
rv = swi_remove(hpts->ie_cookie);
MPASS(rv == 0);
+ hpts->ie_cookie = NULL;
+ }
+}
- rv = sysctl_ctx_free(&hpts->hpts_ctx);
- MPASS(rv == 0);
+/*
+ * Destroy a tcp_hptsi structure initialized by tcp_hptsi_create.
+ */
+void
+tcp_hptsi_destroy(struct tcp_hptsi *pace)
+{
+ struct tcp_hpts_entry *hpts;
+ uint32_t i;
+
+ KASSERT(pace != NULL, ("tcp_hptsi_destroy: pace is NULL"));
+ KASSERT(pace->rp_ent != NULL, ("tcp_hptsi_destroy: pace->rp_ent is NULL"));
+
+ /* Cleanup each hpts entry */
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ hpts = pace->rp_ent[i];
+ if (hpts != NULL) {
+ /* Cleanup SYSCTL if it was initialized */
+ if (hpts->hpts_root != NULL) {
+ sysctl_ctx_free(&hpts->hpts_ctx);
+ }
- mtx_destroy(&hpts->p_mtx);
- free(hpts->p_hptss, M_TCPHPTS);
- free(hpts, M_TCPHPTS);
+ mtx_destroy(&hpts->p_mtx);
+ free(hpts->p_hptss, M_TCPHPTS);
+ free(hpts, M_TCPHPTS);
+ }
}
- free(tcp_pace.rp_ent, M_TCPHPTS);
- free(tcp_pace.cts_last_ran, M_TCPHPTS);
+ /* Cleanup main arrays */
+ free(pace->rp_ent, M_TCPHPTS);
+ free(pace->cts_last_ran, M_TCPHPTS);
#ifdef SMP
- free(tcp_pace.grps, M_TCPHPTS);
+ free(pace->grps, M_TCPHPTS);
#endif
+ /* Free the main structure */
+ free(pace, M_TCPHPTS);
+}
+
+static int
+tcp_hpts_mod_load(void)
+{
+ int i;
+
+ /* Don't try to bind to NUMA domains if we don't have any */
+ if (vm_ndomains == 1 && tcp_bind_threads == 2)
+ tcp_bind_threads = 0;
+
+ /* Create the tcp_hptsi structure */
+ tcp_hptsi_pace = tcp_hptsi_create(&tcp_hptsi_default_funcs, true);
+ if (tcp_hptsi_pace == NULL)
+ return (ENOMEM);
+
+ /* Initialize global counters */
+ hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
+ hpts_loops = counter_u64_alloc(M_WAITOK);
+ back_tosleep = counter_u64_alloc(M_WAITOK);
+ combined_wheel_wrap = counter_u64_alloc(M_WAITOK);
+ wheel_wrap = counter_u64_alloc(M_WAITOK);
+ hpts_wake_timeout = counter_u64_alloc(M_WAITOK);
+ hpts_direct_awakening = counter_u64_alloc(M_WAITOK);
+ hpts_back_tosleep = counter_u64_alloc(M_WAITOK);
+ hpts_direct_call = counter_u64_alloc(M_WAITOK);
+ cpu_uses_flowid = counter_u64_alloc(M_WAITOK);
+ cpu_uses_random = counter_u64_alloc(M_WAITOK);
+
+ /* Start the threads */
+ tcp_hptsi_start(tcp_hptsi_pace);
+
+ /* Enable the global HPTS softclock function */
+ tcp_hpts_softclock = __tcp_run_hpts;
+
+ /* Initialize LRO HPTS */
+ tcp_lro_hpts_init();
+
+ /*
+ * If we somehow have an empty domain, fall back to choosing among all
+ * HPTS threads.
+ */
+ for (i = 0; i < vm_ndomains; i++) {
+ if (tcp_hptsi_pace->domains[i].count == 0) {
+ tcp_bind_threads = 0;
+ break;
+ }
+ }
+
+ printf("TCP HPTS started %u (%s) swi interrupt threads\n",
+ tcp_hptsi_pace->rp_num_hptss, (tcp_bind_threads == 0) ?
+ "(unbounded)" :
+ (tcp_bind_threads == 1 ? "per-cpu" : "per-NUMA-domain"));
+
+ return (0);
+}
+
+static void
+tcp_hpts_mod_unload(void)
+{
+ tcp_lro_hpts_uninit();
+
+ /* Disable the global HPTS softclock function */
+ atomic_store_ptr(&tcp_hpts_softclock, NULL);
+
+ tcp_hptsi_stop(tcp_hptsi_pace);
+ tcp_hptsi_destroy(tcp_hptsi_pace);
+ tcp_hptsi_pace = NULL;
+
+ /* Cleanup global counters */
counter_u64_free(hpts_hopelessly_behind);
counter_u64_free(hpts_loops);
counter_u64_free(back_tosleep);
@@ -2104,13 +2189,11 @@ tcp_hpts_mod_unload(void)
}
static int
-tcp_hpts_modevent(module_t mod, int what, void *arg)
+tcp_hpts_mod_event(module_t mod, int what, void *arg)
{
-
switch (what) {
case MOD_LOAD:
- tcp_hpts_mod_load();
- return (0);
+ return (tcp_hpts_mod_load());
case MOD_QUIESCE:
/*
* Since we are a dependency of TCP stack modules, they should
@@ -2130,7 +2213,7 @@ tcp_hpts_modevent(module_t mod, int what, void *arg)
static moduledata_t tcp_hpts_module = {
.name = "tcphpts",
- .evhand = tcp_hpts_modevent,
+ .evhand = tcp_hpts_mod_event,
};
DECLARE_MODULE(tcphpts, tcp_hpts_module, SI_SUB_SOFTINTR, SI_ORDER_ANY);
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index 6172baf2a062..6b05f9701ac2 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -28,19 +28,11 @@
/* Number of useconds represented by an hpts slot */
#define HPTS_USECS_PER_SLOT 10
-#define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1)
-#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
#define HPTS_USEC_IN_SEC 1000000
#define HPTS_MSEC_IN_SEC 1000
#define HPTS_USEC_IN_MSEC 1000
static inline uint32_t
-tcp_tv_to_hpts_slot(const struct timeval *sv)
-{
- return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_USECS_PER_SLOT));
-}
-
-static inline uint32_t
tcp_tv_to_usec(const struct timeval *sv)
{
return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
@@ -66,7 +58,7 @@ struct hpts_diag {
uint32_t p_runningslot; /* bbr->inflight */
uint32_t slot_req; /* bbr->flex3 x */
uint32_t inp_hptsslot; /* bbr->flex4 x */
- uint32_t slot_remaining; /* bbr->flex5 x */
+ uint32_t time_remaining; /* bbr->flex5 x */
uint32_t have_slept; /* bbr->epoch x */
uint32_t hpts_sleep_time; /* bbr->applimited x */
uint32_t yet_to_sleep; /* bbr->lt_epoch x */
@@ -75,8 +67,6 @@ struct hpts_diag {
uint32_t maxslots; /* bbr->delRate x */
uint32_t wheel_cts; /* bbr->rttProp x */
int32_t co_ret; /* bbr->pkts_out x */
- uint32_t p_curtick; /* upper bbr->cur_del_rate */
- uint32_t p_lasttick; /* lower bbr->cur_del_rate */
uint8_t p_on_min_sleep; /* bbr->flex8 x */
};
@@ -92,13 +82,18 @@ struct hpts_diag {
#ifdef _KERNEL
+extern struct tcp_hptsi *tcp_hptsi_pace;
+
/*
* The following are the definitions for the kernel HPTS interface for managing
* the HPTS ring and the TCBs on it.
*/
-void tcp_hpts_init(struct tcpcb *);
-void tcp_hpts_remove(struct tcpcb *);
+void __tcp_hpts_init(struct tcp_hptsi *pace, struct tcpcb *);
+#define tcp_hpts_init(tp) __tcp_hpts_init(tcp_hptsi_pace, tp)
+
+void __tcp_hpts_remove(struct tcp_hptsi *pace, struct tcpcb *);
+#define tcp_hpts_remove(tp) __tcp_hpts_remove(tcp_hptsi_pace, tp)
static inline bool
tcp_in_hpts(struct tcpcb *tp)
@@ -132,12 +127,13 @@ tcp_in_hpts(struct tcpcb *tp)
* that INP_WLOCK() or from destroying your TCB where again
* you should already have the INP_WLOCK().
*/
-uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line,
- struct hpts_diag *diag);
-#define tcp_hpts_insert(inp, slot) \
- tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL)
+void __tcp_hpts_insert(struct tcp_hptsi *pace, struct tcpcb *tp, uint32_t usecs,
+ struct hpts_diag *diag);
+#define tcp_hpts_insert(tp, usecs, diag) \
+ __tcp_hpts_insert(tcp_hptsi_pace, (tp), (usecs), (diag))
-void tcp_set_hpts(struct tcpcb *tp);
+void __tcp_set_hpts(struct tcp_hptsi *pace, struct tcpcb *tp);
+#define tcp_set_hpts(tp) __tcp_set_hpts(tcp_hptsi_pace, tp)
extern int32_t tcp_min_hptsi_time;
@@ -147,17 +143,6 @@ get_hpts_min_sleep_time(void)
return (tcp_min_hptsi_time + HPTS_USECS_PER_SLOT);
}
-static inline uint32_t
-tcp_gethptstick(struct timeval *sv)
-{
- struct timeval tv;
-
- if (sv == NULL)
- sv = &tv;
- microuptime(sv);
- return (tcp_tv_to_hpts_slot(sv));
-}
-
static inline uint64_t
tcp_get_u64_usecs(struct timeval *tv)
{
@@ -180,12 +165,5 @@ tcp_get_usecs(struct timeval *tv)
return (tcp_tv_to_usec(tv));
}
-/*
- * LRO HPTS initialization and uninitialization, only for internal use by the
- * HPTS code.
- */
-void tcp_lro_hpts_init(void);
-void tcp_lro_hpts_uninit(void);
-
#endif /* _KERNEL */
#endif /* __tcp_hpts_h__ */
diff --git a/sys/netinet/tcp_hpts_internal.h b/sys/netinet/tcp_hpts_internal.h
new file mode 100644
index 000000000000..8b33e03a6981
--- /dev/null
+++ b/sys/netinet/tcp_hpts_internal.h
@@ -0,0 +1,184 @@
+/*-
+ * Copyright (c) 2025 Netflix, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __tcp_hpts_internal_h__
+#define __tcp_hpts_internal_h__
+
+/*
+ * TCP High Precision Timer System (HPTS) - Internal Definitions
+ *
+ * This header contains internal structures, constants, and interfaces that are
+ * implemented in tcp_hpts.c but exposed to enable comprehensive unit testing of
+ * the HPTS subsystem.
+ */
+
+#if defined(_KERNEL)
+
+/*
+ * The hpts uses a 102400 wheel. The wheel
+ * defines the time in 10 usec increments (102400 x 10).
+ * This gives a range of 10usec - 1024ms to place
+ * an entry within. If the user requests more than
+ * 1.024 second, a remaineder is attached and the hpts
+ * when seeing the remainder will re-insert the
+ * inpcb forward in time from where it is until
+ * the remainder is zero.
+ */
+
+#define NUM_OF_HPTSI_SLOTS 102400
+
+/* The number of connections after which the dynamic sleep logic kicks in. */
+#define DEFAULT_CONNECTION_THRESHOLD 100
+
+/*
+ * The hpts uses a 102400 wheel. The wheel
+ * defines the time in 10 usec increments (102400 x 10).
+ * This gives a range of 10usec - 1024ms to place
+ * an entry within. If the user requests more than
+ * 1.024 second, a remaineder is attached and the hpts
+ * when seeing the remainder will re-insert the
+ * inpcb forward in time from where it is until
+ * the remainder is zero.
+ */
+
+#define NUM_OF_HPTSI_SLOTS 102400
+
+/* Convert microseconds to HPTS slots */
+#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
+
+/* The number of connections after which the dynamic sleep logic kicks in. */
+#define DEFAULT_CONNECTION_THRESHOLD 100
+
+extern int tcp_bind_threads; /* Thread binding configuration
+ * (0=none, 1=cpu, 2=numa) */
+
+/*
+ * Abstraction layer controlling time, interrupts and callouts.
+ */
+struct tcp_hptsi_funcs {
+ void (*microuptime)(struct timeval *tv);
+ int (*swi_add)(struct intr_event **eventp, const char *name,
+ driver_intr_t handler, void *arg, int pri, enum intr_type flags,
+ void **cookiep);
+ int (*swi_remove)(void *cookie);
+ void (*swi_sched)(void *cookie, int flags);
+ int (*intr_event_bind)(struct intr_event *ie, int cpu);
+ int (*intr_event_bind_ithread_cpuset)(struct intr_event *ie,
+ struct _cpuset *mask);
+ void (*callout_init)(struct callout *c, int mpsafe);
+ int (*callout_reset_sbt_on)(struct callout *c, sbintime_t sbt,
+ sbintime_t precision, void (*func)(void *), void *arg, int cpu,
+ int flags);
+ int (*_callout_stop_safe)(struct callout *c, int flags);
+};
+
+/* Default function table for system operation */
+extern const struct tcp_hptsi_funcs tcp_hptsi_default_funcs;
+
+/* Each hpts has its own p_mtx which is used for locking */
+#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED)
+#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx)
+#define HPTS_TRYLOCK(hpts) mtx_trylock(&(hpts)->p_mtx)
+#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx)
+
+struct tcp_hpts_entry {
+ /* Cache line 0x00 */
+ struct mtx p_mtx; /* Mutex for hpts */
+ struct timeval p_mysleep; /* Our min sleep time */
+ uint64_t syscall_cnt;
+ uint64_t sleeping; /* What the actual sleep was (if sleeping) */
+ uint16_t p_hpts_active; /* Flag that says hpts is awake */
+ uint8_t p_wheel_complete; /* have we completed the wheel arc walk? */
+ uint32_t p_runningslot; /* Current slot we are at if we are running */
+ uint32_t p_prev_slot; /* Previous slot we were on */
+ uint32_t p_cur_slot; /* Current slot in wheel hpts is draining */
+ uint32_t p_nxt_slot; /* The next slot outside the current range
+ * of slots that the hpts is running on. */
+ int32_t p_on_queue_cnt; /* Count on queue in this hpts */
+ uint8_t p_direct_wake :1, /* boolean */
+ p_on_min_sleep:1, /* boolean */
+ p_hpts_wake_scheduled:1,/* boolean */
+ hit_callout_thresh:1,
+ p_avail:4;
+ uint8_t p_fill[3]; /* Fill to 32 bits */
+ /* Cache line 0x40 */
+ struct hptsh {
+ TAILQ_HEAD(, tcpcb) head;
+ uint32_t count;
+ uint32_t gencnt;
+ } *p_hptss; /* Hptsi wheel */
+ uint32_t p_hpts_sleep_time; /* Current sleep interval having a max
+ * of 255ms */
+ uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */
+ uint32_t saved_curslot; /* for logging */
+ uint32_t saved_prev_slot; /* for logging */
+ uint32_t p_delayed_by; /* How much were we delayed by */
+ /* Cache line 0x80 */
+ struct sysctl_ctx_list hpts_ctx;
+ struct sysctl_oid *hpts_root;
+ struct intr_event *ie;
+ void *ie_cookie;
+ uint16_t p_cpu; /* The hpts CPU */
+ struct tcp_hptsi *p_hptsi; /* Back pointer to parent hptsi structure */
+ /* There is extra space in here */
+ /* Cache line 0x100 */
+ struct callout co __aligned(CACHE_LINE_SIZE);
+} __aligned(CACHE_LINE_SIZE);
+
+struct tcp_hptsi {
+ struct cpu_group **grps;
+ struct tcp_hpts_entry **rp_ent; /* Array of hptss */
+ uint32_t *cts_last_ran;
+ uint32_t grp_cnt;
+ uint32_t rp_num_hptss; /* Number of hpts threads */
+ struct hpts_domain_info {
+ int count;
+ int cpu[MAXCPU];
+ } domains[MAXMEMDOM]; /* Per-NUMA domain CPU assignments */
+ const struct tcp_hptsi_funcs *funcs; /* Function table for testability */
+};
+
+/*
+ * Core tcp_hptsi structure manipulation functions.
+ */
+struct tcp_hptsi* tcp_hptsi_create(const struct tcp_hptsi_funcs *funcs,
+ bool enable_sysctl);
+void tcp_hptsi_destroy(struct tcp_hptsi *pace);
+void tcp_hptsi_start(struct tcp_hptsi *pace);
+void tcp_hptsi_stop(struct tcp_hptsi *pace);
+uint16_t tcp_hptsi_random_cpu(struct tcp_hptsi *pace);
+int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
+
+void tcp_hpts_wake(struct tcp_hpts_entry *hpts);
+
+/*
+ * LRO HPTS initialization and uninitialization, only for internal use by the
+ * HPTS code.
+ */
+void tcp_lro_hpts_init(void);
+void tcp_lro_hpts_uninit(void);
+
+#endif /* defined(_KERNEL) */
+#endif /* __tcp_hpts_internal_h__ */
diff --git a/sys/netinet/tcp_hpts_test.c b/sys/netinet/tcp_hpts_test.c
new file mode 100644
index 000000000000..c5dc9cb5b03b
--- /dev/null
+++ b/sys/netinet/tcp_hpts_test.c
@@ -0,0 +1,1682 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Netflix, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <tests/ktest.h>
+#include <sys/cdefs.h>
+#include "opt_inet.h"
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/refcount.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
+#include <dev/tcp_log/tcp_log_dev.h>
+#include <netinet/tcp_log_buf.h>
+
+#undef tcp_hpts_init
+#undef tcp_hpts_remove
+#undef tcp_hpts_insert
+#undef tcp_set_hpts
+
+/* Custom definitions that take the tcp_hptsi */
+#define tcp_hpts_init(pace, tp) __tcp_hpts_init((pace), (tp))
+#define tcp_hpts_remove(pace, tp) __tcp_hpts_remove((pace), (tp))
+#define tcp_hpts_insert(pace, tp, usecs, diag) \
+ __tcp_hpts_insert((pace), (tp), (usecs), (diag))
+#define tcp_set_hpts(pace, tp) __tcp_set_hpts((pace), (tp))
+
+static MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts_test", "TCP hpts test");
+
+static int test_exit_on_failure = true;
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts_test, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "TCP HPTS test controls");
+SYSCTL_INT(_net_inet_tcp_hpts_test, OID_AUTO, exit_on_failure, CTLFLAG_RW,
+ &test_exit_on_failure, 0,
+ "Exit HPTS test immediately on first failure (1) or continue running all tests (0)");
+
+#define KTEST_VERIFY(x) do { \
+ if (!(x)) { \
+ KTEST_ERR(ctx, "FAIL: %s", #x); \
+ if (test_exit_on_failure) \
+ return (EINVAL); \
+ } else { \
+ KTEST_LOG(ctx, "PASS: %s", #x); \
+ } \
+} while (0)
+
+#define KTEST_EQUAL(x, y) do { \
+ if ((x) != (y)) { \
+ KTEST_ERR(ctx, "FAIL: %s != %s (%d != %d)", #x, #y, (x), (y)); \
+ if (test_exit_on_failure) \
+ return (EINVAL); \
+ } else { \
+ KTEST_LOG(ctx, "PASS: %s == %s", #x, #y); \
+ } \
+} while (0)
+
+#define KTEST_NEQUAL(x, y) do { \
+ if ((x) == (y)) { \
+ KTEST_ERR(ctx, "FAIL: %s == %s (%d == %d)", #x, #y, (x), (y)); \
+ if (test_exit_on_failure) \
+ return (EINVAL); \
+ } else { \
+ KTEST_LOG(ctx, "PASS: %s != %s", #x, #y); \
+ } \
+} while (0)
+
+#define KTEST_GREATER_THAN(x, y) do { \
+ if ((x) <= (y)) { \
+ KTEST_ERR(ctx, "FAIL: %s <= %s (%d <= %d)", #x, #y, (x), (y)); \
+ if (test_exit_on_failure) \
+ return (EINVAL); \
+ } else { \
+ KTEST_LOG(ctx, "PASS: %s > %s", #x, #y); \
+ } \
+} while (0)
+
+#define KTEST_VERIFY_RET(x, y) do { \
+ if (!(x)) { \
+ KTEST_ERR(ctx, "FAIL: %s", #x); \
+ if (test_exit_on_failure) \
+ return (y); \
+ } else { \
+ KTEST_LOG(ctx, "PASS: %s", #x); \
+ } \
+} while (0)
+
+#ifdef TCP_HPTS_KTEST
+
+static void
+dump_hpts_entry(struct ktest_test_context *ctx, struct tcp_hpts_entry *hpts)
+{
+ KTEST_LOG(ctx, "tcp_hpts_entry(%p)", hpts);
+ KTEST_LOG(ctx, " p_cur_slot: %u", hpts->p_cur_slot);
+ KTEST_LOG(ctx, " p_prev_slot: %u", hpts->p_prev_slot);
+ KTEST_LOG(ctx, " p_nxt_slot: %u", hpts->p_nxt_slot);
+ KTEST_LOG(ctx, " p_runningslot: %u", hpts->p_runningslot);
+ KTEST_LOG(ctx, " p_on_queue_cnt: %d", hpts->p_on_queue_cnt);
+ KTEST_LOG(ctx, " p_hpts_active: %u", hpts->p_hpts_active);
+ KTEST_LOG(ctx, " p_wheel_complete: %u", hpts->p_wheel_complete);
+ KTEST_LOG(ctx, " p_direct_wake: %u", hpts->p_direct_wake);
+ KTEST_LOG(ctx, " p_on_min_sleep: %u", hpts->p_on_min_sleep);
+ KTEST_LOG(ctx, " p_hpts_wake_scheduled: %u", hpts->p_hpts_wake_scheduled);
+ KTEST_LOG(ctx, " hit_callout_thresh: %u", hpts->hit_callout_thresh);
+ KTEST_LOG(ctx, " p_hpts_sleep_time: %u", hpts->p_hpts_sleep_time);
+ KTEST_LOG(ctx, " p_delayed_by: %u", hpts->p_delayed_by);
+ KTEST_LOG(ctx, " overidden_sleep: %u", hpts->overidden_sleep);
+ KTEST_LOG(ctx, " saved_curslot: %u", hpts->saved_curslot);
+ KTEST_LOG(ctx, " saved_prev_slot: %u", hpts->saved_prev_slot);
+ KTEST_LOG(ctx, " syscall_cnt: %lu", hpts->syscall_cnt);
+ KTEST_LOG(ctx, " sleeping: %lu", hpts->sleeping);
+ KTEST_LOG(ctx, " p_cpu: %u", hpts->p_cpu);
+ KTEST_LOG(ctx, " ie_cookie: %p", hpts->ie_cookie);
+ KTEST_LOG(ctx, " p_hptsi: %p", hpts->p_hptsi);
+ KTEST_LOG(ctx, " p_mysleep: %ld.%06ld", hpts->p_mysleep.tv_sec, hpts->p_mysleep.tv_usec);
+}
+
+static void
+dump_tcpcb(struct tcpcb *tp)
+{
+ struct ktest_test_context *ctx = tp->t_fb_ptr;
+ struct inpcb *inp = &tp->t_inpcb;
+
+ KTEST_LOG(ctx, "tcp_control_block(%p)", tp);
+
+ /* HPTS-specific fields */
+ KTEST_LOG(ctx, " t_in_hpts: %d", tp->t_in_hpts);
+ KTEST_LOG(ctx, " t_hpts_cpu: %u", tp->t_hpts_cpu);
+ KTEST_LOG(ctx, " t_hpts_slot: %d", tp->t_hpts_slot);
+ KTEST_LOG(ctx, " t_hpts_gencnt: %u", tp->t_hpts_gencnt);
+ KTEST_LOG(ctx, " t_hpts_request: %u", tp->t_hpts_request);
+
+ /* LRO CPU field */
+ KTEST_LOG(ctx, " t_lro_cpu: %u", tp->t_lro_cpu);
+
+ /* TCP flags that affect HPTS */
+ KTEST_LOG(ctx, " t_flags2: 0x%x", tp->t_flags2);
+ KTEST_LOG(ctx, " TF2_HPTS_CPU_SET: %s", (tp->t_flags2 & TF2_HPTS_CPU_SET) ? "YES" : "NO");
+ KTEST_LOG(ctx, " TF2_HPTS_CALLS: %s", (tp->t_flags2 & TF2_HPTS_CALLS) ? "YES" : "NO");
+ KTEST_LOG(ctx, " TF2_SUPPORTS_MBUFQ: %s", (tp->t_flags2 & TF2_SUPPORTS_MBUFQ) ? "YES" : "NO");
+
+ /* Input PCB fields that HPTS uses */
+ KTEST_LOG(ctx, " inp_flags: 0x%x", inp->inp_flags);
+ KTEST_LOG(ctx, " INP_DROPPED: %s", (inp->inp_flags & INP_DROPPED) ? "YES" : "NO");
+ KTEST_LOG(ctx, " inp_flowid: 0x%x", inp->inp_flowid);
+ KTEST_LOG(ctx, " inp_flowtype: %u", inp->inp_flowtype);
+ KTEST_LOG(ctx, " inp_numa_domain: %d", inp->inp_numa_domain);
+}
+
+/* Enum for call counting indices */
+enum test_call_counts {
+ CCNT_MICROUPTIME = 0,
+ CCNT_SWI_ADD,
+ CCNT_SWI_REMOVE,
+ CCNT_SWI_SCHED,
+ CCNT_INTR_EVENT_BIND,
+ CCNT_INTR_EVENT_BIND_CPUSET,
+ CCNT_CALLOUT_INIT,
+ CCNT_CALLOUT_RESET_SBT_ON,
+ CCNT_CALLOUT_STOP_SAFE,
+ CCNT_TCP_OUTPUT,
+ CCNT_TCP_TFB_DO_QUEUED_SEGMENTS,
+ CCNT_MAX
+};
+
+static uint32_t call_counts[CCNT_MAX];
+
+static uint64_t test_time_usec = 0;
+
+/*
+ * Reset all test global variables to a clean state.
+ */
+static void
+test_hpts_init(void)
+{
+ memset(call_counts, 0, sizeof(call_counts));
+ test_time_usec = 0;
+}
+
+static void
+test_microuptime(struct timeval *tv)
+{
+ call_counts[CCNT_MICROUPTIME]++;
+ tv->tv_sec = test_time_usec / 1000000;
+ tv->tv_usec = test_time_usec % 1000000;
+}
+
+static int
+test_swi_add(struct intr_event **eventp, const char *name,
+ driver_intr_t handler, void *arg, int pri, enum intr_type flags,
+ void **cookiep)
+{
+ call_counts[CCNT_SWI_ADD]++;
+ /* Simulate successful SWI creation */
+ *eventp = (struct intr_event *)0xfeedface; /* Mock event */
+ *cookiep = (void *)0xdeadbeef; /* Mock cookie */
+ return (0);
+}
+
+static int
+test_swi_remove(void *cookie)
+{
+ call_counts[CCNT_SWI_REMOVE]++;
+ /* Simulate successful removal */
+ return (0);
+}
+
+static void
+test_swi_sched(void *cookie, int flags)
+{
+ call_counts[CCNT_SWI_SCHED]++;
+ /* Simulate successful SWI scheduling */
+}
+
+static int
+test_intr_event_bind(struct intr_event *ie, int cpu)
+{
+ call_counts[CCNT_INTR_EVENT_BIND]++;
+ /* Simulate successful binding */
+ return (0);
+}
+
+static int
+test_intr_event_bind_ithread_cpuset(struct intr_event *ie, struct _cpuset *mask)
+{
+ call_counts[CCNT_INTR_EVENT_BIND_CPUSET]++;
+ /* Simulate successful cpuset binding */
+ return (0);
+}
+
+static void
+test_callout_init(struct callout *c, int mpsafe)
+{
+ call_counts[CCNT_CALLOUT_INIT]++;
+ memset(c, 0, sizeof(*c));
+}
+
+static int
+test_callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
+ void (*func)(void *), void *arg, int cpu, int flags)
+{
+ call_counts[CCNT_CALLOUT_RESET_SBT_ON]++;
+ /* Return 1 to simulate successful timer scheduling */
+ return (1);
+}
+
+static int
+test_callout_stop_safe(struct callout *c, int flags)
+{
+ call_counts[CCNT_CALLOUT_STOP_SAFE]++;
+ /* Return 1 to simulate successful timer stopping */
+ return (1);
+}
+
+static const struct tcp_hptsi_funcs test_funcs = {
+ .microuptime = test_microuptime,
+ .swi_add = test_swi_add,
+ .swi_remove = test_swi_remove,
+ .swi_sched = test_swi_sched,
+ .intr_event_bind = test_intr_event_bind,
+ .intr_event_bind_ithread_cpuset = test_intr_event_bind_ithread_cpuset,
+ .callout_init = test_callout_init,
+ .callout_reset_sbt_on = test_callout_reset_sbt_on,
+ ._callout_stop_safe = test_callout_stop_safe,
+};
+
+#define TP_REMOVE_FROM_HPTS(tp) tp->bits_spare
+#define TP_LOG_TEST(tp) tp->t_log_state_set
+
+static int
+test_tcp_output(struct tcpcb *tp)
+{
+ struct ktest_test_context *ctx = tp->t_fb_ptr;
+ struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending;
+ struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+ call_counts[CCNT_TCP_OUTPUT]++;
+ if (TP_LOG_TEST(tp)) {
+ KTEST_LOG(ctx, "=> tcp_output(%p)", tp);
+ dump_tcpcb(tp);
+ dump_hpts_entry(ctx, hpts);
+ }
+
+ if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) {
+ if (TP_LOG_TEST(tp))
+ KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp);
+ tcp_hpts_remove(pace, tp);
+ }
+
+ if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) {
+ INP_WUNLOCK(&tp->t_inpcb); /* tcp_output unlocks on error */
+ return (-1); /* Simulate tcp_output error */
+ }
+
+ return (0);
+}
+
+static int
+test_tfb_do_queued_segments(struct tcpcb *tp, int flag)
+{
+ struct ktest_test_context *ctx = tp->t_fb_ptr;
+ struct tcp_hptsi *pace = (struct tcp_hptsi*)tp->t_tfo_pending;
+ struct tcp_hpts_entry *hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+ call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS]++;
+ KTEST_LOG(ctx, "=> tfb_do_queued_segments(%p, %d)", tp, flag);
+ dump_tcpcb(tp);
+ dump_hpts_entry(ctx, hpts);
+
+ if ((TP_REMOVE_FROM_HPTS(tp) & 1) != 0) {
+ if (TP_LOG_TEST(tp))
+ KTEST_LOG(ctx, "=> tcp_hpts_remove(%p)", tp);
+ tcp_hpts_remove(pace, tp);
+ }
+
+ if ((TP_REMOVE_FROM_HPTS(tp) & 2) != 0) {
+ INP_WUNLOCK(&tp->t_inpcb); /* do_queued_segments unlocks on error */
+ return (-1); /* Simulate do_queued_segments error */
+ }
+
+ return (0);
+}
+
+static struct tcp_function_block test_tcp_fb = {
+ .tfb_tcp_block_name = "hpts_test_tcp",
+ .tfb_tcp_output = test_tcp_output,
+ .tfb_do_queued_segments = test_tfb_do_queued_segments,
+};
+
+/*
+ * Create a minimally initialized tcpcb that can be safely inserted into HPTS.
+ * This function allocates and initializes all the fields that HPTS code
+ * reads or writes.
+ */
+static struct tcpcb *
+test_hpts_create_tcpcb(struct ktest_test_context *ctx, struct tcp_hptsi *pace)
+{
+ struct tcpcb *tp;
+
+ tp = malloc(sizeof(struct tcpcb), M_TCPHPTS, M_WAITOK | M_ZERO);
+ if (tp) {
+ rw_init_flags(&tp->t_inpcb.inp_lock, "test-inp",
+ RW_RECURSE | RW_DUPOK);
+ refcount_init(&tp->t_inpcb.inp_refcount, 1);
+ tp->t_inpcb.inp_pcbinfo = &V_tcbinfo;
+ tp->t_fb = &test_tcp_fb;
+ tp->t_hpts_cpu = HPTS_CPU_NONE;
+ STAILQ_INIT(&tp->t_inqueue);
+ tcp_hpts_init(pace, tp);
+
+ /* Stuff some pointers in the tcb for test purposes. */
+ tp->t_fb_ptr = ctx;
+ tp->t_tfo_pending = (unsigned int*)pace;
+ }
+
+ return (tp);
+}
+
+/*
+ * Free a test tcpcb created by test_hpts_create_tcpcb()
+ */
+static void
+test_hpts_free_tcpcb(struct tcpcb *tp)
+{
+ if (tp == NULL)
+ return;
+
+ INP_LOCK_DESTROY(&tp->t_inpcb);
+ free(tp, M_TCPHPTS);
+}
+
+/*
+ * ***********************************************
+ * * KTEST functions for testing the HPTS module *
+ * ***********************************************
+ */
+
+/*
+ * Validates that the HPTS module is properly loaded and initialized by checking
+ * that the minimum HPTS time is configured.
+ */
+KTEST_FUNC(module_load)
+{
+ test_hpts_init();
+ KTEST_NEQUAL(tcp_min_hptsi_time, 0);
+ KTEST_VERIFY(tcp_bind_threads >= 0 && tcp_bind_threads <= 2);
+ KTEST_NEQUAL(tcp_hptsi_pace, NULL);
+ return (0);
+}
+
+/*
+ * Validates the creation and destruction of tcp_hptsi structures, ensuring
+ * proper initialization of internal fields and clean destruction.
+ */
+KTEST_FUNC(hptsi_create_destroy)
+{
+ struct tcp_hptsi *pace;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ KTEST_NEQUAL(pace->rp_ent, NULL);
+ KTEST_NEQUAL(pace->cts_last_ran, NULL);
+ KTEST_VERIFY(pace->rp_num_hptss > 0);
+ KTEST_VERIFY(pace->rp_num_hptss <= MAXCPU); /* Reasonable upper bound */
+ KTEST_VERIFY(pace->grp_cnt >= 1); /* At least one group */
+ KTEST_EQUAL(pace->funcs, &test_funcs); /* Verify function pointer was set */
+
+ /* Verify individual HPTS entries are properly initialized */
+ for (uint32_t i = 0; i < pace->rp_num_hptss; i++) {
+ KTEST_NEQUAL(pace->rp_ent[i], NULL);
+ KTEST_EQUAL(pace->rp_ent[i]->p_cpu, i);
+ KTEST_EQUAL(pace->rp_ent[i]->p_hptsi, pace);
+ KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+ }
+
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates that tcp_hptsi structures can be started and stopped properly,
+ * including verification that threads are created during start and cleaned up
+ * during stop operations.
+ */
+KTEST_FUNC(hptsi_start_stop)
+{
+ struct tcp_hptsi *pace;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+
+ tcp_hptsi_start(pace);
+
+ /* Verify that entries have threads started */
+ struct tcp_hpts_entry *hpts = pace->rp_ent[0];
+ KTEST_NEQUAL(hpts->ie_cookie, NULL); /* Should have SWI handler */
+ KTEST_EQUAL(hpts->p_hptsi, pace); /* Should point to our pace */
+
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates that multiple tcp_hptsi instances can coexist independently, with
+ * different configurations and CPU assignments without interfering with each
+ * other.
+ */
+KTEST_FUNC(hptsi_independence)
+{
+ struct tcp_hptsi *pace1, *pace2;
+ uint16_t cpu1, cpu2;
+
+ test_hpts_init();
+
+ pace1 = tcp_hptsi_create(&test_funcs, false);
+ pace2 = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace1, NULL);
+ KTEST_NEQUAL(pace2, NULL);
+ KTEST_NEQUAL(pace2->rp_ent, NULL);
+
+ cpu1 = tcp_hptsi_random_cpu(pace1);
+ cpu2 = tcp_hptsi_random_cpu(pace2);
+ KTEST_VERIFY(cpu1 < pace1->rp_num_hptss);
+ KTEST_VERIFY(cpu2 < pace2->rp_num_hptss);
+
+ /* Verify both instances have independent entry arrays */
+ KTEST_NEQUAL(pace1->rp_ent, pace2->rp_ent);
+ /* Verify they may have different CPU counts but both reasonable */
+ KTEST_VERIFY(pace1->rp_num_hptss > 0 && pace1->rp_num_hptss <= MAXCPU);
+ KTEST_VERIFY(pace2->rp_num_hptss > 0 && pace2->rp_num_hptss <= MAXCPU);
+
+ tcp_hptsi_destroy(pace1);
+ tcp_hptsi_destroy(pace2);
+
+ return (0);
+}
+
+/*
+ * Validates that custom function injection works correctly, ensuring that
+ * test-specific implementations of microuptime and others are properly
+ * called by the HPTS system.
+ */
+KTEST_FUNC(function_injection)
+{
+ struct tcp_hptsi *pace;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ KTEST_EQUAL(pace->funcs, &test_funcs);
+ KTEST_VERIFY(call_counts[CCNT_MICROUPTIME] > 0);
+ KTEST_VERIFY(call_counts[CCNT_CALLOUT_INIT] > 0);
+
+ tcp_hptsi_start(pace);
+ KTEST_VERIFY(call_counts[CCNT_SWI_ADD] > 0);
+ KTEST_VERIFY(tcp_bind_threads == 0 ||
+ call_counts[CCNT_INTR_EVENT_BIND] > 0 ||
+ call_counts[CCNT_INTR_EVENT_BIND_CPUSET] > 0);
+ KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] > 0);
+
+ tcp_hptsi_stop(pace);
+ KTEST_VERIFY(call_counts[CCNT_CALLOUT_STOP_SAFE] > 0);
+ KTEST_VERIFY(call_counts[CCNT_SWI_REMOVE] > 0);
+
+ tcp_hptsi_destroy(pace);
+
+ /* Verify we have a reasonable balance of create/destroy calls */
+ KTEST_EQUAL(call_counts[CCNT_SWI_ADD], call_counts[CCNT_SWI_REMOVE]);
+ KTEST_VERIFY(call_counts[CCNT_CALLOUT_RESET_SBT_ON] <= call_counts[CCNT_CALLOUT_STOP_SAFE]);
+
+ return (0);
+}
+
+/*
+ * Validates that a tcpcb can be properly initialized for HPTS compatibility,
+ * ensuring all required fields are set correctly and function pointers are
+ * valid for safe HPTS operations.
+ */
+KTEST_FUNC(tcpcb_initialization)
+{
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ /* Verify the tcpcb is properly initialized for HPTS */
+ tp = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp, NULL);
+ KTEST_NEQUAL(tp->t_fb, NULL);
+ KTEST_NEQUAL(tp->t_fb->tfb_tcp_output, NULL);
+ KTEST_NEQUAL(tp->t_fb->tfb_do_queued_segments, NULL);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+ KTEST_EQUAL((tp->t_flags2 & (TF2_HPTS_CPU_SET | TF2_HPTS_CALLS)), 0);
+
+ /* Verify that HPTS-specific fields are initialized */
+ KTEST_EQUAL(tp->t_hpts_gencnt, 0);
+ KTEST_EQUAL(tp->t_hpts_slot, 0);
+ KTEST_EQUAL(tp->t_hpts_request, 0);
+ KTEST_EQUAL(tp->t_lro_cpu, 0);
+ KTEST_VERIFY(tp->t_hpts_cpu < pace->rp_num_hptss);
+ KTEST_EQUAL(tp->t_inpcb.inp_refcount, 1);
+ KTEST_VERIFY(!(tp->t_inpcb.inp_flags & INP_DROPPED));
+
+ test_hpts_free_tcpcb(tp);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates that tcpcb structures can be successfully inserted into and removed
+ * from the HPTS wheel, with proper state tracking and slot assignment during
+ * the process.
+ */
+KTEST_FUNC(tcpcb_insertion)
+{
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp;
+ struct tcp_hpts_entry *hpts;
+ uint32_t timeout_usecs = 10;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ tp = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp, NULL);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+ KTEST_EQUAL((tp->t_flags2 & TF2_HPTS_CALLS), 0);
+
+ INP_WLOCK(&tp->t_inpcb);
+ tp->t_flags2 |= TF2_HPTS_CALLS;
+ KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0);
+ tcp_hpts_insert(pace, tp, timeout_usecs, NULL);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+ INP_WUNLOCK(&tp->t_inpcb);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+ KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1);
+ KTEST_VERIFY(tcp_in_hpts(tp));
+ KTEST_VERIFY(tp->t_hpts_slot >= 0);
+ KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+ hpts = pace->rp_ent[tp->t_hpts_cpu];
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+ KTEST_EQUAL(tp->t_hpts_request, 0);
+ KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(timeout_usecs));
+ //KTEST_EQUAL(tp->t_hpts_gencnt, 1);
+
+ INP_WLOCK(&tp->t_inpcb);
+ tcp_hpts_remove(pace, tp);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+ INP_WUNLOCK(&tp->t_inpcb);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+ KTEST_VERIFY(!tcp_in_hpts(tp));
+
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+ test_hpts_free_tcpcb(tp);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates the core HPTS timer functionality by verifying that scheduled
+ * tcpcb entries trigger tcp_output calls at appropriate times, simulating
+ * real-world timer-driven TCP processing.
+ */
+KTEST_FUNC(timer_functionality)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcp_hpts_entry *hpts;
+ struct tcpcb *tp;
+ int32_t slots_ran;
+ uint32_t i;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ for (i = 0; i < pace->rp_num_hptss; i++)
+ dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+ /* Create and insert the tcpcb into the HPTS wheel to wait for 500 usec */
+ tp = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp, NULL);
+ dump_tcpcb(tp);
+ TP_LOG_TEST(tp) = 1; /* Enable logging for this tcpcb */
+
+ KTEST_LOG(ctx, "=> tcp_hpts_insert(%p)", tp);
+ INP_WLOCK(&tp->t_inpcb);
+ tp->t_flags2 |= TF2_HPTS_CALLS; /* Mark as needing HPTS processing */
+ tcp_hpts_insert(pace, tp, 500, NULL);
+ INP_WUNLOCK(&tp->t_inpcb);
+
+ dump_tcpcb(tp);
+ for (i = 0; i < pace->rp_num_hptss; i++)
+ dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+ hpts = pace->rp_ent[tp->t_hpts_cpu];
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+ KTEST_EQUAL(hpts->p_prev_slot, 0);
+ KTEST_EQUAL(hpts->p_cur_slot, 0);
+ KTEST_EQUAL(hpts->p_runningslot, 0);
+ KTEST_EQUAL(hpts->p_nxt_slot, 1);
+ KTEST_EQUAL(hpts->p_hpts_active, 0);
+
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+ KTEST_EQUAL(tp->t_hpts_request, 0);
+ KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500));
+
+ /* Set our test flag to indicate the tcpcb should be removed from the
+ * wheel when tcp_output is called. */
+ TP_REMOVE_FROM_HPTS(tp) = 1;
+
+ /* Test early exit condition: advance time by insufficient amount */
+ KTEST_LOG(ctx, "Testing early exit with insufficient time advancement");
+ test_time_usec += 1; /* Very small advancement - should cause early exit */
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ /* Should return 0 slots due to insufficient time advancement */
+ KTEST_EQUAL(slots_ran, 0);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0); /* No processing should occur */
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE); /* Connection still queued */
+
+ /* Wait for 498 more usecs and trigger the HPTS workers and verify
+ * nothing happens yet (total 499 usec) */
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+ test_time_usec += 498;
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]);
+ HPTS_LOCK(pace->rp_ent[i]);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+ HPTS_UNLOCK(pace->rp_ent[i]);
+ NET_EPOCH_EXIT(et);
+
+ dump_hpts_entry(ctx, pace->rp_ent[i]);
+ KTEST_VERIFY(slots_ran >= 0);
+ KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 49);
+ KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 49);
+ }
+
+ dump_tcpcb(tp);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+ KTEST_EQUAL(tp->t_hpts_request, 0);
+ KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(500));
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+
+ /* Wait for 1 more usec and trigger the HPTS workers and verify it
+ * triggers tcp_output this time */
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 0);
+ test_time_usec += 1;
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ KTEST_LOG(ctx, "=> tcp_hptsi(%p)", pace->rp_ent[i]);
+ HPTS_LOCK(pace->rp_ent[i]);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+ HPTS_UNLOCK(pace->rp_ent[i]);
+ NET_EPOCH_EXIT(et);
+
+ dump_hpts_entry(ctx, pace->rp_ent[i]);
+ KTEST_VERIFY(slots_ran >= 0);
+ KTEST_EQUAL(pace->rp_ent[i]->p_prev_slot, 50);
+ KTEST_EQUAL(pace->rp_ent[i]->p_cur_slot, 50);
+ }
+
+ dump_tcpcb(tp);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+ test_hpts_free_tcpcb(tp);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates HPTS scalability by creating and inserting a LOT of tcpcbs into
+ * the HPTS wheel, testing performance under high load conditions.
+ */
+KTEST_FUNC(scalability_tcpcbs)
+{
+ struct tcp_hptsi *pace;
+ struct tcpcb **tcpcbs;
+ uint32_t i, num_tcpcbs = 100000, total_queued = 0;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ /* Allocate array to hold pointers to all tcpcbs */
+ tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+ KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+ /* Create a LOT of tcpcbs */
+ KTEST_LOG(ctx, "Creating %u tcpcbs...", num_tcpcbs);
+ for (i = 0; i < num_tcpcbs; i++) {
+ tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+ if (tcpcbs[i] == NULL) {
+ KTEST_ERR(ctx, "FAIL: tcpcbs[i] == NULL");
+ return (EINVAL);
+ }
+ }
+
+ /* Insert all created tcpcbs into HPTS */
+ KTEST_LOG(ctx, "Inserting all tcpcbs into HPTS...");
+ for (i = 0; i < num_tcpcbs; i++) {
+ INP_WLOCK(&tcpcbs[i]->t_inpcb);
+ tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+ /* Insert with varying future timeouts to distribute across slots */
+ tcp_hpts_insert(pace, tcpcbs[i], 100 + (i % 1000), NULL);
+ INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+ }
+
+ /* Verify total queue counts across all CPUs */
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ total_queued += pace->rp_ent[i]->p_on_queue_cnt;
+ }
+ KTEST_EQUAL(total_queued, num_tcpcbs);
+
+ for (i = 0; i < pace->rp_num_hptss; i++)
+ dump_hpts_entry(ctx, pace->rp_ent[i]);
+
+ /* Remove all tcpcbs from HPTS */
+ KTEST_LOG(ctx, "Removing all tcpcbs from HPTS...");
+ for (i = 0; i < num_tcpcbs; i++) {
+ INP_WLOCK(&tcpcbs[i]->t_inpcb);
+ if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) {
+ tcp_hpts_remove(pace, tcpcbs[i]);
+ }
+ INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+ }
+
+ /* Verify all queues are now empty */
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ if (pace->rp_ent[i]->p_on_queue_cnt != 0) {
+ KTEST_ERR(ctx, "FAIL: pace->rp_ent[i]->p_on_queue_cnt != 0");
+ return (EINVAL);
+ }
+ }
+
+ for (i = 0; i < num_tcpcbs; i++) {
+ test_hpts_free_tcpcb(tcpcbs[i]);
+ }
+ free(tcpcbs, M_TCPHPTS);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates wheel wrap scenarios where the timer falls significantly behind
+ * and needs to process more than one full wheel revolution worth of slots.
+ */
+KTEST_FUNC(wheel_wrap_recovery)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcpcb **tcpcbs;
+ uint32_t i, timeout_usecs, num_tcpcbs = 500;
+ int32_t slots_ran;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ /* Allocate array to hold pointers to tcpcbs */
+ tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+ KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+ /* Create tcpcbs and insert them across many slots */
+ for (i = 0; i < num_tcpcbs; i++) {
+ tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tcpcbs[i], NULL);
+ TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1;
+
+ timeout_usecs = ((i * NUM_OF_HPTSI_SLOTS) / num_tcpcbs) * HPTS_USECS_PER_SLOT; /* Spread across slots */
+
+ INP_WLOCK(&tcpcbs[i]->t_inpcb);
+ tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tcpcbs[i], timeout_usecs, NULL);
+ INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+ }
+
+ /* Fast forward time significantly to trigger wheel wrap */
+ test_time_usec += (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT;
+
+ for (i = 0; i < pace->rp_num_hptss; i++) {
+ KTEST_LOG(ctx, "=> tcp_hptsi(%u)", i);
+ KTEST_NEQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+
+ HPTS_LOCK(pace->rp_ent[i]);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(pace->rp_ent[i], true);
+ HPTS_UNLOCK(pace->rp_ent[i]);
+ NET_EPOCH_EXIT(et);
+
+ KTEST_EQUAL(slots_ran, NUM_OF_HPTSI_SLOTS-1); /* Should process all slots */
+ KTEST_EQUAL(pace->rp_ent[i]->p_on_queue_cnt, 0);
+ KTEST_NEQUAL(pace->rp_ent[i]->p_cur_slot,
+ pace->rp_ent[i]->p_prev_slot);
+ }
+
+ /* Cleanup */
+ for (i = 0; i < num_tcpcbs; i++) {
+ INP_WLOCK(&tcpcbs[i]->t_inpcb);
+ if (tcpcbs[i]->t_in_hpts != IHPTS_NONE) {
+ tcp_hpts_remove(pace, tcpcbs[i]);
+ }
+ INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+ test_hpts_free_tcpcb(tcpcbs[i]);
+ }
+ free(tcpcbs, M_TCPHPTS);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates proper handling of tcpcbs in the IHPTS_MOVING state, which occurs
+ * when a tcpcb is being processed by the HPTS thread but gets removed.
+ */
+KTEST_FUNC(tcpcb_moving_state)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp1, *tp2;
+ struct tcp_hpts_entry *hpts;
+ int32_t slots_ran;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ /* Create two tcpcbs on the same CPU/slot */
+ tp1 = test_hpts_create_tcpcb(ctx, pace);
+ tp2 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp1, NULL);
+ KTEST_NEQUAL(tp2, NULL);
+
+ /* Force them to the same CPU for predictable testing */
+ tp1->t_hpts_cpu = 0;
+ tp2->t_hpts_cpu = 0;
+
+ /* Insert both into the same slot */
+ INP_WLOCK(&tp1->t_inpcb);
+ tp1->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp1, 100, NULL);
+ INP_WUNLOCK(&tp1->t_inpcb);
+
+ INP_WLOCK(&tp2->t_inpcb);
+ tp2->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp2, 100, NULL);
+ INP_WUNLOCK(&tp2->t_inpcb);
+
+ hpts = pace->rp_ent[0];
+
+ /* Manually transition tp1 to MOVING state to simulate race condition */
+ HPTS_LOCK(hpts);
+ tp1->t_in_hpts = IHPTS_MOVING;
+ tp1->t_hpts_slot = -1; /* Mark for removal */
+ HPTS_UNLOCK(hpts);
+
+ /* Set time and run HPTS to process the moving state */
+ test_time_usec += 100;
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ KTEST_VERIFY(slots_ran >= 0);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1); /* Shouldn't call on both */
+
+ /* tp1 should be cleaned up and removed */
+ KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+ /* tp2 should have been processed normally */
+ KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE);
+
+ test_hpts_free_tcpcb(tp1);
+ test_hpts_free_tcpcb(tp2);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates that tcpcbs with deferred requests (t_hpts_request > 0) are
+ * properly handled and re-inserted into appropriate future slots after
+ * the wheel processes enough slots to accommodate the original request.
+ */
+KTEST_FUNC(deferred_requests)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp, *tp2;
+ struct tcp_hpts_entry *hpts;
+ uint32_t large_timeout_usecs = (NUM_OF_HPTSI_SLOTS + 5000) * HPTS_USECS_PER_SLOT; /* Beyond wheel capacity */
+ uint32_t huge_timeout_usecs = (NUM_OF_HPTSI_SLOTS * 3) * HPTS_USECS_PER_SLOT; /* 3x wheel capacity */
+ uint32_t initial_request;
+ int32_t slots_ran;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ tp = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp, NULL);
+
+ /* Insert with a request that exceeds current wheel capacity */
+ INP_WLOCK(&tp->t_inpcb);
+ tp->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp, large_timeout_usecs, NULL);
+ INP_WUNLOCK(&tp->t_inpcb);
+
+ /* Verify it was inserted with a deferred request */
+ dump_tcpcb(tp);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+ KTEST_VERIFY(tp->t_hpts_request > 0);
+ KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+ hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+ /* Advance time to process deferred requests */
+ test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+
+ /* Process the wheel to handle deferred requests */
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ dump_hpts_entry(ctx, hpts);
+ KTEST_GREATER_THAN(slots_ran, 0);
+ dump_tcpcb(tp);
+ KTEST_EQUAL(tp->t_hpts_request, 0);
+
+ /* Test incremental deferred request processing over multiple cycles */
+ KTEST_LOG(ctx, "Testing incremental deferred request processing");
+
+ /* Create a new connection with an even larger request */
+ tp2 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp2, NULL);
+ tp2->t_hpts_cpu = tp->t_hpts_cpu; /* Same CPU for predictable testing */
+
+ INP_WLOCK(&tp2->t_inpcb);
+ tp2->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp2, huge_timeout_usecs, NULL);
+ INP_WUNLOCK(&tp2->t_inpcb);
+
+ /* Verify initial deferred request */
+ initial_request = tp2->t_hpts_request;
+ KTEST_VERIFY(initial_request > NUM_OF_HPTSI_SLOTS);
+
+ /* Process one wheel cycle - should reduce but not eliminate request */
+ test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ /* Request should be reduced but not zero */
+ KTEST_GREATER_THAN(initial_request, tp2->t_hpts_request);
+ KTEST_VERIFY(tp2->t_hpts_request > 0);
+ KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE); /* Still queued */
+
+ /* For huge_timeout_usecs = NUM_OF_HPTSI_SLOTS * 3 * HPTS_USECS_PER_SLOT, we need ~3 cycles to complete.
+ * Each cycle can reduce the request by at most NUM_OF_HPTSI_SLOTS. */
+ test_time_usec += NUM_OF_HPTSI_SLOTS * HPTS_USECS_PER_SLOT;
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ /* After second cycle, request should be reduced significantly (likely by ~NUM_OF_HPTSI_SLOTS) */
+ KTEST_VERIFY(tp2->t_hpts_request < initial_request);
+ KTEST_VERIFY(tp2->t_hpts_request > 0); /* But not yet zero for such a large request */
+
+ /* Clean up second connection */
+ INP_WLOCK(&tp2->t_inpcb);
+ if (tp2->t_in_hpts != IHPTS_NONE) {
+ tcp_hpts_remove(pace, tp2);
+ }
+ INP_WUNLOCK(&tp2->t_inpcb);
+ test_hpts_free_tcpcb(tp2);
+
+ /* Clean up */
+ INP_WLOCK(&tp->t_inpcb);
+ if (tp->t_in_hpts != IHPTS_NONE) {
+ tcp_hpts_remove(pace, tp);
+ }
+ INP_WUNLOCK(&tp->t_inpcb);
+ test_hpts_free_tcpcb(tp);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates CPU assignment and affinity mechanisms, including flowid-based
+ * assignment, random fallback scenarios, and explicit CPU setting. Tests
+ * the actual cpu assignment logic in hpts_cpuid via tcp_set_hpts.
+ */
+KTEST_FUNC(cpu_assignment)
+{
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp1, *tp2, *tp2_dup, *tp3;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+
+ /* Test random CPU assignment (no flowid) */
+ tp1 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp1, NULL);
+ tp1->t_inpcb.inp_flowtype = M_HASHTYPE_NONE;
+ INP_WLOCK(&tp1->t_inpcb);
+ tcp_set_hpts(pace, tp1);
+ INP_WUNLOCK(&tp1->t_inpcb);
+ KTEST_VERIFY(tp1->t_hpts_cpu < pace->rp_num_hptss);
+ KTEST_VERIFY(tp1->t_flags2 & TF2_HPTS_CPU_SET);
+
+ /* Test flowid-based assignment */
+ tp2 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp2, NULL);
+ tp2->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4;
+ tp2->t_inpcb.inp_flowid = 12345;
+ INP_WLOCK(&tp2->t_inpcb);
+ tcp_set_hpts(pace, tp2);
+ INP_WUNLOCK(&tp2->t_inpcb);
+ KTEST_VERIFY(tp2->t_hpts_cpu < pace->rp_num_hptss);
+ KTEST_VERIFY(tp2->t_flags2 & TF2_HPTS_CPU_SET);
+
+ /* With the same flowid, should get same CPU assignment */
+ tp2_dup = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp2_dup, NULL);
+ tp2_dup->t_inpcb.inp_flowtype = M_HASHTYPE_RSS_TCP_IPV4;
+ tp2_dup->t_inpcb.inp_flowid = 12345;
+ INP_WLOCK(&tp2_dup->t_inpcb);
+ tcp_set_hpts(pace, tp2_dup);
+ INP_WUNLOCK(&tp2_dup->t_inpcb);
+ KTEST_EQUAL(tp2_dup->t_hpts_cpu, tp2->t_hpts_cpu);
+
+ /* Test explicit CPU setting */
+ tp3 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp3, NULL);
+ tp3->t_hpts_cpu = 1; /* Assume we have at least 2 CPUs */
+ tp3->t_flags2 |= TF2_HPTS_CPU_SET;
+ INP_WLOCK(&tp3->t_inpcb);
+ tcp_set_hpts(pace, tp3);
+ INP_WUNLOCK(&tp3->t_inpcb);
+ KTEST_EQUAL(tp3->t_hpts_cpu, 1);
+
+ test_hpts_free_tcpcb(tp1);
+ test_hpts_free_tcpcb(tp2);
+ test_hpts_free_tcpcb(tp2_dup);
+ test_hpts_free_tcpcb(tp3);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates edge cases in slot calculation including boundary conditions
+ * around slot 0, maximum slots, and slot wrapping arithmetic.
+ */
+KTEST_FUNC(slot_boundary_conditions)
+{
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ /* Test insertion at slot 0 */
+ tp = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp, NULL);
+ INP_WLOCK(&tp->t_inpcb);
+ tp->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp, 0, NULL); /* Should insert immediately (0 timeout) */
+ INP_WUNLOCK(&tp->t_inpcb);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+ KTEST_VERIFY(tp->t_hpts_slot < NUM_OF_HPTSI_SLOTS);
+
+ INP_WLOCK(&tp->t_inpcb);
+ tcp_hpts_remove(pace, tp);
+ INP_WUNLOCK(&tp->t_inpcb);
+
+ /* Test insertion at maximum slot value */
+ INP_WLOCK(&tp->t_inpcb);
+ tp->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp, (NUM_OF_HPTSI_SLOTS - 1) * HPTS_USECS_PER_SLOT, NULL);
+ INP_WUNLOCK(&tp->t_inpcb);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+
+ INP_WLOCK(&tp->t_inpcb);
+ tcp_hpts_remove(pace, tp);
+ INP_WUNLOCK(&tp->t_inpcb);
+
+ /* Test very small timeout values */
+ INP_WLOCK(&tp->t_inpcb);
+ tp->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp, 1, NULL);
+ INP_WUNLOCK(&tp->t_inpcb);
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_ONQUEUE);
+ KTEST_EQUAL(tp->t_hpts_slot, HPTS_USEC_TO_SLOTS(1)); /* Should convert 1 usec to slot */
+
+ INP_WLOCK(&tp->t_inpcb);
+ tcp_hpts_remove(pace, tp);
+ INP_WUNLOCK(&tp->t_inpcb);
+
+ test_hpts_free_tcpcb(tp);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates HPTS behavior under high load conditions, including proper
+ * processing of many connections and connection count tracking.
+ */
+KTEST_FUNC(dynamic_sleep_adjustment)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcpcb **tcpcbs;
+ struct tcp_hpts_entry *hpts;
+ uint32_t i, num_tcpcbs = DEFAULT_CONNECTION_THRESHOLD + 50;
+ int32_t slots_ran;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ /* Create many connections to exceed threshold */
+ tcpcbs = malloc(num_tcpcbs * sizeof(struct tcpcb *), M_TCPHPTS, M_WAITOK | M_ZERO);
+ KTEST_VERIFY_RET(tcpcbs != NULL, ENOMEM);
+
+ for (i = 0; i < num_tcpcbs; i++) {
+ tcpcbs[i] = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tcpcbs[i], NULL);
+ tcpcbs[i]->t_hpts_cpu = 0; /* Force all to CPU 0 */
+ INP_WLOCK(&tcpcbs[i]->t_inpcb);
+ tcpcbs[i]->t_flags2 |= TF2_HPTS_CALLS;
+ TP_REMOVE_FROM_HPTS(tcpcbs[i]) = 1; /* Will be removed after output */
+ tcp_hpts_insert(pace, tcpcbs[i], 100, NULL);
+ INP_WUNLOCK(&tcpcbs[i]->t_inpcb);
+ }
+
+ hpts = pace->rp_ent[0];
+ dump_hpts_entry(ctx, hpts);
+
+ /* Verify we're above threshold */
+ KTEST_GREATER_THAN(hpts->p_on_queue_cnt, DEFAULT_CONNECTION_THRESHOLD);
+
+ /* Run HPTS to process many connections */
+ test_time_usec += 100;
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ /* Verify HPTS processed slots and connections correctly */
+ KTEST_GREATER_THAN(slots_ran, 0);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], num_tcpcbs);
+
+ /* Verify all connections were removed from queue */
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+ /* Cleanup */
+ for (i = 0; i < num_tcpcbs; i++) {
+ test_hpts_free_tcpcb(tcpcbs[i]);
+ }
+ free(tcpcbs, M_TCPHPTS);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates handling of concurrent insert/remove operations and race conditions
+ * between HPTS processing and user operations.
+ */
+KTEST_FUNC(concurrent_operations)
+{
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp1, *tp2;
+ struct tcp_hpts_entry *hpts;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ tp1 = test_hpts_create_tcpcb(ctx, pace);
+ tp2 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp1, NULL);
+ KTEST_NEQUAL(tp2, NULL);
+
+ /* Force all to CPU 0 */
+ tp1->t_hpts_cpu = 0;
+ tp2->t_hpts_cpu = 0;
+
+ /* Insert tp1 */
+ INP_WLOCK(&tp1->t_inpcb);
+ tp1->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp1, 100, NULL);
+ INP_WUNLOCK(&tp1->t_inpcb);
+
+ /* Insert tp2 into same slot */
+ INP_WLOCK(&tp2->t_inpcb);
+ tp2->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp2, 100, NULL);
+ INP_WUNLOCK(&tp2->t_inpcb);
+
+ /* Verify both are inserted */
+ KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE);
+ KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+ /* Verify they're both assigned to the same slot */
+ KTEST_EQUAL(tp1->t_hpts_slot, tp2->t_hpts_slot);
+
+ /* Verify queue count reflects both connections */
+ KTEST_EQUAL(tp1->t_hpts_cpu, tp2->t_hpts_cpu); /* Should be on same CPU */
+ hpts = pace->rp_ent[tp1->t_hpts_cpu];
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 2);
+
+ /* Remove tp1 while tp2 is still there */
+ INP_WLOCK(&tp1->t_inpcb);
+ tcp_hpts_remove(pace, tp1);
+ INP_WUNLOCK(&tp1->t_inpcb);
+
+ /* Verify tp1 removed, tp2 still there */
+ KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+ KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+ /* Verify queue count decreased by one */
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 1);
+
+ /* Remove tp2 */
+ INP_WLOCK(&tp2->t_inpcb);
+ tcp_hpts_remove(pace, tp2);
+ INP_WUNLOCK(&tp2->t_inpcb);
+
+ KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE);
+
+ /* Verify queue is now completely empty */
+ KTEST_EQUAL(hpts->p_on_queue_cnt, 0);
+
+ test_hpts_free_tcpcb(tp1);
+ test_hpts_free_tcpcb(tp2);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates the queued segments processing path via tfb_do_queued_segments,
+ * which is an alternative to direct tcp_output calls.
+ */
+KTEST_FUNC(queued_segments_processing)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp;
+ struct tcp_hpts_entry *hpts;
+ struct mbuf *fake_mbuf;
+ int32_t slots_ran;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ tp = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp, NULL);
+
+ /* Create a minimal fake mbuf that has valid STAILQ pointers */
+ fake_mbuf = malloc(sizeof(struct mbuf), M_TCPHPTS, M_WAITOK | M_ZERO);
+ KTEST_NEQUAL(fake_mbuf, NULL);
+
+ /* Set up for queued segments path */
+ tp->t_flags2 |= (TF2_HPTS_CALLS | TF2_SUPPORTS_MBUFQ);
+ STAILQ_INSERT_TAIL(&tp->t_inqueue, fake_mbuf, m_stailqpkt);
+
+ INP_WLOCK(&tp->t_inpcb);
+ tcp_hpts_insert(pace, tp, 100, NULL);
+ INP_WUNLOCK(&tp->t_inpcb);
+
+ hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+ /* Run HPTS and verify queued segments path is taken */
+ test_time_usec += 100;
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ KTEST_VERIFY(slots_ran >= 0);
+ KTEST_EQUAL(call_counts[CCNT_TCP_TFB_DO_QUEUED_SEGMENTS], 1);
+
+ /* Connection should be removed from HPTS after processing */
+ KTEST_EQUAL(tp->t_in_hpts, IHPTS_NONE);
+
+ /* Clean up the fake mbuf if it's still in the queue */
+ if (!STAILQ_EMPTY(&tp->t_inqueue)) {
+ struct mbuf *m = STAILQ_FIRST(&tp->t_inqueue);
+ STAILQ_REMOVE_HEAD(&tp->t_inqueue, m_stailqpkt);
+ free(m, M_TCPHPTS);
+ }
+
+ test_hpts_free_tcpcb(tp);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates the direct wake mechanism and wake inhibition logic when
+ * the connection count exceeds thresholds.
+ */
+KTEST_FUNC(direct_wake_mechanism)
+{
+ struct tcp_hptsi *pace;
+ struct tcpcb *tp;
+ struct tcp_hpts_entry *hpts;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ tp = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp, NULL);
+ hpts = pace->rp_ent[tp->t_hpts_cpu];
+
+ /* Test direct wake when not over threshold */
+ HPTS_LOCK(hpts);
+ hpts->p_on_queue_cnt = 50; /* Below threshold */
+ hpts->p_hpts_wake_scheduled = 0;
+ tcp_hpts_wake(hpts);
+ KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 1);
+ KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 1);
+ HPTS_UNLOCK(hpts);
+
+ /* Reset for next test */
+ hpts->p_hpts_wake_scheduled = 0;
+ call_counts[CCNT_SWI_SCHED] = 0;
+
+ /* Test wake inhibition when over threshold */
+ HPTS_LOCK(hpts);
+ hpts->p_on_queue_cnt = 200; /* Above threshold */
+ hpts->p_direct_wake = 1; /* Request direct wake */
+ tcp_hpts_wake(hpts);
+ KTEST_EQUAL(hpts->p_hpts_wake_scheduled, 0); /* Should be inhibited */
+ KTEST_EQUAL(hpts->p_direct_wake, 0); /* Should be cleared */
+ KTEST_EQUAL(call_counts[CCNT_SWI_SCHED], 0); /* No SWI scheduled */
+ HPTS_UNLOCK(hpts);
+
+ test_hpts_free_tcpcb(tp);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates HPTS collision detection when attempting to run HPTS while
+ * it's already active.
+ */
+KTEST_FUNC(hpts_collision_detection)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcp_hpts_entry *hpts;
+ int32_t slots_ran;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ hpts = pace->rp_ent[0];
+
+ /* Mark HPTS as active */
+ HPTS_LOCK(hpts);
+ hpts->p_hpts_active = 1;
+ HPTS_UNLOCK(hpts);
+
+ /* Attempt to run HPTS again - should detect collision */
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, false); /* from_callout = false */
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ /* Should return 0 indicating no work done due to collision */
+ KTEST_EQUAL(slots_ran, 0);
+
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+/*
+ * Validates generation count handling for race condition detection between
+ * HPTS processing and connection insertion/removal operations.
+ */
+KTEST_FUNC(generation_count_validation)
+{
+ struct epoch_tracker et;
+ struct tcp_hptsi *pace;
+ struct tcp_hpts_entry *hpts;
+ struct tcpcb *tp1, *tp2;
+ uint32_t initial_gencnt, slot_to_test = 10;
+ uint32_t timeout_usecs = slot_to_test * HPTS_USECS_PER_SLOT;
+ uint32_t tp2_original_gencnt;
+ int32_t slots_ran;
+
+ test_hpts_init();
+
+ pace = tcp_hptsi_create(&test_funcs, false);
+ KTEST_NEQUAL(pace, NULL);
+ tcp_hptsi_start(pace);
+
+ hpts = pace->rp_ent[0];
+
+ /* Record initial generation count for the test slot */
+ initial_gencnt = hpts->p_hptss[slot_to_test].gencnt;
+
+ /* Create and insert first connection */
+ tp1 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp1, NULL);
+ tp1->t_hpts_cpu = 0; /* Force to CPU 0 */
+
+ INP_WLOCK(&tp1->t_inpcb);
+ tp1->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp1, timeout_usecs, NULL);
+ INP_WUNLOCK(&tp1->t_inpcb);
+
+ /* Verify connection stored the generation count */
+ KTEST_EQUAL(tp1->t_in_hpts, IHPTS_ONQUEUE);
+ KTEST_EQUAL(tp1->t_hpts_slot, slot_to_test);
+ KTEST_EQUAL(tp1->t_hpts_gencnt, initial_gencnt);
+
+ /* Create second connection but don't insert yet */
+ tp2 = test_hpts_create_tcpcb(ctx, pace);
+ KTEST_NEQUAL(tp2, NULL);
+ tp2->t_hpts_cpu = 0; /* Force to CPU 0 */
+
+ /* Force generation count increment by processing the slot */
+ test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT;
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ /* Verify processing occurred */
+ KTEST_VERIFY(slots_ran > 0);
+ KTEST_EQUAL(call_counts[CCNT_TCP_OUTPUT], 1);
+
+ /* Verify generation count was incremented */
+ KTEST_EQUAL(hpts->p_hptss[slot_to_test].gencnt, initial_gencnt + 1);
+
+ /* Verify first connection was processed and removed */
+ KTEST_EQUAL(tp1->t_in_hpts, IHPTS_NONE);
+
+ /* Insert second connection and record its generation count */
+ INP_WLOCK(&tp2->t_inpcb);
+ tp2->t_flags2 |= TF2_HPTS_CALLS;
+ tcp_hpts_insert(pace, tp2, timeout_usecs, NULL);
+ INP_WUNLOCK(&tp2->t_inpcb);
+
+ /* Verify connection was inserted successfully */
+ KTEST_EQUAL(tp2->t_in_hpts, IHPTS_ONQUEUE);
+
+ /* Record the generation count that tp2 received */
+ tp2_original_gencnt = tp2->t_hpts_gencnt;
+
+ /* Test generation count mismatch detection during processing */
+ /* Manually set stale generation count to simulate race condition */
+ tp2->t_hpts_gencnt = tp2_original_gencnt + 100; /* Force a mismatch */
+
+ /* Process the slot to trigger generation count validation */
+ test_time_usec += (slot_to_test + 1) * HPTS_USECS_PER_SLOT;
+ HPTS_LOCK(hpts);
+ NET_EPOCH_ENTER(et);
+ slots_ran = tcp_hptsi(hpts, true);
+ HPTS_UNLOCK(hpts);
+ NET_EPOCH_EXIT(et);
+
+ /* Connection should be processed despite generation count mismatch */
+ KTEST_EQUAL(tp2->t_in_hpts, IHPTS_NONE); /* Processed and released */
+
+ /* The key test: HPTS should handle mismatched generation counts gracefully */
+ KTEST_VERIFY(slots_ran > 0); /* Processing should still occur */
+
+ test_hpts_free_tcpcb(tp1);
+ test_hpts_free_tcpcb(tp2);
+ tcp_hptsi_stop(pace);
+ tcp_hptsi_destroy(pace);
+
+ return (0);
+}
+
+static const struct ktest_test_info tests[] = {
+ KTEST_INFO(module_load),
+ KTEST_INFO(hptsi_create_destroy),
+ KTEST_INFO(hptsi_start_stop),
+ KTEST_INFO(hptsi_independence),
+ KTEST_INFO(function_injection),
+ KTEST_INFO(tcpcb_initialization),
+ KTEST_INFO(tcpcb_insertion),
+ KTEST_INFO(timer_functionality),
+ KTEST_INFO(scalability_tcpcbs),
+ KTEST_INFO(wheel_wrap_recovery),
+ KTEST_INFO(tcpcb_moving_state),
+ KTEST_INFO(deferred_requests),
+ KTEST_INFO(cpu_assignment),
+ KTEST_INFO(slot_boundary_conditions),
+ KTEST_INFO(dynamic_sleep_adjustment),
+ KTEST_INFO(concurrent_operations),
+ KTEST_INFO(queued_segments_processing),
+ KTEST_INFO(direct_wake_mechanism),
+ KTEST_INFO(hpts_collision_detection),
+ KTEST_INFO(generation_count_validation),
+};
+
+#else /* TCP_HPTS_KTEST */
+
+/*
+ * Stub to indicate that the TCP HPTS ktest is not enabled.
+ */
+KTEST_FUNC(module_load_without_tests)
+{
+ KTEST_LOG(ctx, "Warning: TCP HPTS ktest is not enabled");
+ return (0);
+}
+
+static const struct ktest_test_info tests[] = {
+ KTEST_INFO(module_load_without_tests),
+};
+
+#endif
+
+KTEST_MODULE_DECLARE(ktest_tcphpts, tests);
+KTEST_MODULE_DEPEND(ktest_tcphpts, tcphpts);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index dd27ec77c1af..2146b0cac48f 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -219,7 +219,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_autorcvbuf), 0,
"Enable automatic receive buffer sizing");
-VNET_DEFINE(int, tcp_autorcvbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autorcvbuf_max) = 8*1024*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autorcvbuf_max), 0,
"Max size of automatic receive buffer");
diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c
index 43587285fe26..ac1a27a4290a 100644
--- a/sys/netinet/tcp_lro_hpts.c
+++ b/sys/netinet/tcp_lro_hpts.c
@@ -29,6 +29,8 @@
#include "opt_inet6.h"
#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/interrupt.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
@@ -62,6 +64,7 @@
#include <netinet/tcp_lro.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_hpts.h>
+#include <netinet/tcp_hpts_internal.h>
#ifdef TCP_BLACKBOX
#include <netinet/tcp_log_buf.h>
#endif
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 2dfb7faf56e3..208f72c4661c 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -123,7 +123,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_inc), 0,
"Incrementor step size of automatic send buffer");
-VNET_DEFINE(int, tcp_autosndbuf_max) = 2*1024*1024;
+VNET_DEFINE(int, tcp_autosndbuf_max) = 8*1024*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_max), 0,
"Max size of automatic send buffer");
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index f2d7867df9b4..66983edcdd73 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -480,7 +480,7 @@ bbr_find_lowest_rsm(struct tcp_bbr *bbr);
static __inline uint32_t
bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type);
static void
-bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot,
+bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay,
uint8_t which);
static void
bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts,
@@ -489,7 +489,7 @@ bbr_log_timer_var(struct tcp_bbr *bbr, int mode, uint32_t cts,
static void
bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag);
static void
-bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot,
+bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t pacing_delay,
uint32_t del_by, uint32_t cts, uint32_t sloton,
uint32_t prev_delay);
static void
@@ -724,7 +724,7 @@ bbr_minseg(struct tcp_bbr *bbr)
}
static void
-bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_t frm, int32_t slot, uint32_t tot_len)
+bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_t frm, int32_t pacing_delay, uint32_t tot_len)
{
struct inpcb *inp = tptoinpcb(tp);
struct hpts_diag diag;
@@ -751,40 +751,40 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
bbr->r_ctl.rc_timer_exp = 0;
prev_delay = bbr->r_ctl.rc_last_delay_val;
if (bbr->r_ctl.rc_last_delay_val &&
- (slot == 0)) {
+ (pacing_delay == 0)) {
/*
* If a previous pacer delay was in place we
* are not coming from the output side (where
* we calculate a delay, more likely a timer).
*/
- slot = bbr->r_ctl.rc_last_delay_val;
+ pacing_delay = bbr->r_ctl.rc_last_delay_val;
if (TSTMP_GT(cts, bbr->rc_pacer_started)) {
/* Compensate for time passed */
delay_calc = cts - bbr->rc_pacer_started;
- if (delay_calc <= slot)
- slot -= delay_calc;
+ if (delay_calc <= pacing_delay)
+ pacing_delay -= delay_calc;
}
}
/* Do we have early to make up for by pushing out the pacing time? */
if (bbr->r_agg_early_set) {
- bbr_log_pacing_delay_calc(bbr, 0, bbr->r_ctl.rc_agg_early, cts, slot, 0, bbr->r_agg_early_set, 2);
- slot += bbr->r_ctl.rc_agg_early;
+ bbr_log_pacing_delay_calc(bbr, 0, bbr->r_ctl.rc_agg_early, cts, pacing_delay, 0, bbr->r_agg_early_set, 2);
+ pacing_delay += bbr->r_ctl.rc_agg_early;
bbr->r_ctl.rc_agg_early = 0;
bbr->r_agg_early_set = 0;
}
/* Are we running a total debt that needs to be compensated for? */
if (bbr->r_ctl.rc_hptsi_agg_delay) {
- if (slot > bbr->r_ctl.rc_hptsi_agg_delay) {
+ if (pacing_delay > bbr->r_ctl.rc_hptsi_agg_delay) {
/* We nuke the delay */
- slot -= bbr->r_ctl.rc_hptsi_agg_delay;
+ pacing_delay -= bbr->r_ctl.rc_hptsi_agg_delay;
bbr->r_ctl.rc_hptsi_agg_delay = 0;
} else {
/* We nuke some of the delay, put in a minimal 100usecs */
- bbr->r_ctl.rc_hptsi_agg_delay -= slot;
- bbr->r_ctl.rc_last_delay_val = slot = 100;
+ bbr->r_ctl.rc_hptsi_agg_delay -= pacing_delay;
+ bbr->r_ctl.rc_last_delay_val = pacing_delay = 100;
}
}
- bbr->r_ctl.rc_last_delay_val = slot;
+ bbr->r_ctl.rc_last_delay_val = pacing_delay;
hpts_timeout = bbr_timer_start(tp, bbr, cts);
if (tp->t_flags & TF_DELACK) {
if (bbr->rc_in_persist == 0) {
@@ -810,7 +810,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
bbr->r_ctl.rc_hpts_flags = PACE_TMR_DELACK;
hpts_timeout = delayed_ack;
}
- if (slot) {
+ if (pacing_delay) {
/* Mark that we have a pacing timer up */
BBR_STAT_INC(bbr_paced_segments);
bbr->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
@@ -820,7 +820,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
* wheel, we resort to a keep-alive timer if its configured.
*/
if ((hpts_timeout == 0) &&
- (slot == 0)) {
+ (pacing_delay == 0)) {
if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
(tp->t_state <= TCPS_CLOSING)) {
/*
@@ -849,7 +849,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
if (left < hpts_timeout)
hpts_timeout = left;
}
- if (bbr->r_ctl.rc_incr_tmrs && slot &&
+ if (bbr->r_ctl.rc_incr_tmrs && pacing_delay &&
(bbr->r_ctl.rc_hpts_flags & (PACE_TMR_TLP|PACE_TMR_RXT))) {
/*
* If configured to do so, and the timer is either
@@ -867,7 +867,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
* this extra delay but this is easier and being more
* conservative is probably better.
*/
- hpts_timeout += slot;
+ hpts_timeout += pacing_delay;
}
if (hpts_timeout) {
/*
@@ -879,10 +879,10 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
bbr->r_ctl.rc_timer_exp = cts + hpts_timeout;
} else
bbr->r_ctl.rc_timer_exp = 0;
- if ((slot) &&
+ if ((pacing_delay) &&
(bbr->rc_use_google ||
bbr->output_error_seen ||
- (slot <= hpts_timeout)) ) {
+ (pacing_delay <= hpts_timeout)) ) {
/*
* Tell LRO that it can queue packets while
* we pace.
@@ -900,17 +900,15 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
tp->t_flags2 &= ~TF2_DONT_SACK_QUEUE;
bbr->rc_pacer_started = cts;
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, pacing_delay, &diag);
bbr->rc_timer_first = 0;
bbr->bbr_timer_src = frm;
- bbr_log_to_start(bbr, cts, hpts_timeout, slot, 1);
+ bbr_log_to_start(bbr, cts, hpts_timeout, pacing_delay, 1);
bbr_log_hpts_diag(bbr, cts, &diag);
} else if (hpts_timeout) {
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, hpts_timeout, &diag);
/*
- * We add the flag here as well if the slot is set,
+ * We add the flag here as well if the pacing delay is set,
* since hpts will call in to clear the queue first before
* calling the output routine (which does our timers).
* We don't want to set the flag if its just a timer
@@ -919,7 +917,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
* on a keep-alive timer and a request comes in for
* more data.
*/
- if (slot)
+ if (pacing_delay)
bbr->rc_pacer_started = cts;
if ((bbr->r_ctl.rc_hpts_flags & PACE_TMR_RACK) &&
(bbr->rc_cwnd_limited == 0)) {
@@ -936,12 +934,12 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts, int32_
TF2_DONT_SACK_QUEUE);
}
bbr->bbr_timer_src = frm;
- bbr_log_to_start(bbr, cts, hpts_timeout, slot, 0);
+ bbr_log_to_start(bbr, cts, hpts_timeout, pacing_delay, 0);
bbr_log_hpts_diag(bbr, cts, &diag);
bbr->rc_timer_first = 1;
}
bbr->rc_tmr_stopped = 0;
- bbr_log_type_bbrsnd(bbr, tot_len, slot, delay_calc, cts, frm, prev_delay);
+ bbr_log_type_bbrsnd(bbr, tot_len, pacing_delay, delay_calc, cts, frm, prev_delay);
}
static void
@@ -1033,8 +1031,8 @@ bbr_timer_audit(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, struct sock
}
/*
* Ok the timer originally started is not what we want now. We will
- * force the hpts to be stopped if any, and restart with the slot
- * set to what was in the saved slot.
+ * force the hpts to be stopped if any, and restart with the pacing
+ * delay set to what was in the saved delay.
*/
wrong_timer:
if ((bbr->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) {
@@ -2397,7 +2395,7 @@ bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag)
log.u_bbr.flex2 = diag->p_cur_slot;
log.u_bbr.flex3 = diag->slot_req;
log.u_bbr.flex4 = diag->inp_hptsslot;
- log.u_bbr.flex5 = diag->slot_remaining;
+ log.u_bbr.flex5 = diag->time_remaining;
log.u_bbr.flex6 = diag->need_new_to;
log.u_bbr.flex7 = diag->p_hpts_active;
log.u_bbr.flex8 = diag->p_on_min_sleep;
@@ -2411,9 +2409,6 @@ bbr_log_hpts_diag(struct tcp_bbr *bbr, uint32_t cts, struct hpts_diag *diag)
log.u_bbr.bw_inuse = diag->wheel_slot;
log.u_bbr.rttProp = diag->wheel_cts;
log.u_bbr.delRate = diag->maxslots;
- log.u_bbr.cur_del_rate = diag->p_curtick;
- log.u_bbr.cur_del_rate <<= 32;
- log.u_bbr.cur_del_rate |= diag->p_lasttick;
TCP_LOG_EVENTP(bbr->rc_tp, NULL,
&bbr->rc_inp->inp_socket->so_rcv,
&bbr->rc_inp->inp_socket->so_snd,
@@ -2473,7 +2468,7 @@ bbr_log_pacing_delay_calc(struct tcp_bbr *bbr, uint16_t gain, uint32_t len,
}
static void
-bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
+bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, uint8_t which)
{
if (tcp_bblogging_on(bbr->rc_tp)) {
union tcp_log_stackspecific log;
@@ -2483,7 +2478,7 @@ bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t slot, u
log.u_bbr.flex1 = bbr->bbr_timer_src;
log.u_bbr.flex2 = to;
log.u_bbr.flex3 = bbr->r_ctl.rc_hpts_flags;
- log.u_bbr.flex4 = slot;
+ log.u_bbr.flex4 = pacing_delay;
log.u_bbr.flex5 = bbr->rc_tp->t_hpts_slot;
log.u_bbr.flex6 = TICKS_2_USEC(bbr->rc_tp->t_rxtcur);
log.u_bbr.pkts_out = bbr->rc_tp->t_flags2;
@@ -2733,13 +2728,13 @@ bbr_type_log_hdwr_pacing(struct tcp_bbr *bbr, const struct ifnet *ifp,
}
static void
-bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t slot, uint32_t del_by, uint32_t cts, uint32_t line, uint32_t prev_delay)
+bbr_log_type_bbrsnd(struct tcp_bbr *bbr, uint32_t len, uint32_t pacing_delay, uint32_t del_by, uint32_t cts, uint32_t line, uint32_t prev_delay)
{
if (tcp_bblogging_on(bbr->rc_tp)) {
union tcp_log_stackspecific log;
bbr_fill_in_logging_data(bbr, &log.u_bbr, cts);
- log.u_bbr.flex1 = slot;
+ log.u_bbr.flex1 = pacing_delay;
log.u_bbr.flex2 = del_by;
log.u_bbr.flex3 = prev_delay;
log.u_bbr.flex4 = line;
@@ -5205,7 +5200,7 @@ bbr_process_timers(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts, uint8_t
left = bbr->r_ctl.rc_timer_exp - cts;
ret = -3;
bbr_log_to_processing(bbr, cts, ret, left, hpts_calling);
- tcp_hpts_insert(tp, HPTS_USEC_TO_SLOTS(left));
+ tcp_hpts_insert(tp, left, NULL);
return (1);
}
bbr->rc_tmr_stopped = 0;
@@ -5254,7 +5249,7 @@ bbr_timer_cancel(struct tcp_bbr *bbr, int32_t line, uint32_t cts)
else
time_since_send = 0;
if (bbr->r_ctl.rc_last_delay_val > time_since_send) {
- /* Cut down our slot time */
+ /* Cut down our pacing_delay time */
bbr->r_ctl.rc_last_delay_val -= time_since_send;
} else {
bbr->r_ctl.rc_last_delay_val = 0;
@@ -5888,7 +5883,7 @@ bbr_log_output(struct tcp_bbr *bbr, struct tcpcb *tp, struct tcpopt *to, int32_t
* sequence 1 for 10 bytes. In such an example the r_start would be
* 1 (starting sequence) but the r_end would be r_start+len i.e. 11.
* This means that r_end is actually the first sequence for the next
- * slot (11).
+ * pacing delay (11).
*
*/
INP_WLOCK_ASSERT(tptoinpcb(tp));
@@ -11856,7 +11851,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
struct bbr_sendmap *rsm = NULL;
int32_t tso, mtu;
struct tcpopt to;
- int32_t slot = 0;
+ int32_t pacing_delay = 0;
struct inpcb *inp;
struct sockbuf *sb;
bool hpts_calling;
@@ -11986,8 +11981,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
delay_calc -= bbr->r_ctl.rc_last_delay_val;
else {
/*
- * We are early setup to adjust
- * our slot time.
+ * We are early setup to adjust out pacing delay.
*/
uint64_t merged_val;
@@ -12104,7 +12098,7 @@ again:
#endif
error = 0;
tso = 0;
- slot = 0;
+ pacing_delay = 0;
mtu = 0;
sendwin = min(tp->snd_wnd, tp->snd_cwnd);
sb_offset = tp->snd_max - tp->snd_una;
@@ -12126,7 +12120,7 @@ recheck_resend:
tot_len = tp->t_maxseg;
if (hpts_calling)
/* Retry in a ms */
- slot = 1001;
+ pacing_delay = 1001;
goto just_return_nolock;
}
TAILQ_INSERT_TAIL(&bbr->r_ctl.rc_free, rsm, r_next);
@@ -12699,9 +12693,9 @@ just_return:
SOCK_SENDBUF_UNLOCK(so);
just_return_nolock:
if (tot_len)
- slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
+ pacing_delay = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
if (bbr->rc_no_pacing)
- slot = 0;
+ pacing_delay = 0;
if (tot_len == 0) {
if ((ctf_outstanding(tp) + min((bbr->r_ctl.rc_high_rwnd/2), bbr_minseg(bbr))) >=
tp->snd_wnd) {
@@ -12751,7 +12745,7 @@ just_return_nolock:
/* Dont update the time if we did not send */
bbr->r_ctl.rc_last_delay_val = 0;
bbr->rc_output_starts_timer = 1;
- bbr_start_hpts_timer(bbr, tp, cts, 9, slot, tot_len);
+ bbr_start_hpts_timer(bbr, tp, cts, 9, pacing_delay, tot_len);
bbr_log_type_just_return(bbr, cts, tot_len, hpts_calling, app_limited, p_maxseg, len);
if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
/* Make sure snd_nxt is drug up */
@@ -12787,7 +12781,7 @@ send:
flags &= ~TH_FIN;
if ((len == 0) && ((tp->t_flags & TF_ACKNOW) == 0)) {
/* Lets not send this */
- slot = 0;
+ pacing_delay = 0;
goto just_return;
}
}
@@ -13053,7 +13047,7 @@ send:
/*
* We have outstanding data, don't send a fin by itself!.
*/
- slot = 0;
+ pacing_delay = 0;
goto just_return;
}
/*
@@ -13763,7 +13757,7 @@ nomore:
if (tp->snd_cwnd < maxseg)
tp->snd_cwnd = maxseg;
}
- slot = (bbr_error_base_paceout + 1) << bbr->oerror_cnt;
+ pacing_delay = (bbr_error_base_paceout + 1) << bbr->oerror_cnt;
BBR_STAT_INC(bbr_saw_enobuf);
if (bbr->bbr_hdrw_pacing)
counter_u64_add(bbr_hdwr_pacing_enobuf, 1);
@@ -13812,18 +13806,18 @@ nomore:
}
/*
* Nuke all other things that can interfere
- * with slot
+ * with pacing delay
*/
if ((tot_len + len) && (len >= tp->t_maxseg)) {
- slot = bbr_get_pacing_delay(bbr,
+ pacing_delay = bbr_get_pacing_delay(bbr,
bbr->r_ctl.rc_bbr_hptsi_gain,
(tot_len + len), cts, 0);
- if (slot < bbr_error_base_paceout)
- slot = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
+ if (pacing_delay < bbr_error_base_paceout)
+ pacing_delay = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
} else
- slot = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
+ pacing_delay = (bbr_error_base_paceout + 2) << bbr->oerror_cnt;
bbr->rc_output_starts_timer = 1;
- bbr_start_hpts_timer(bbr, tp, cts, 10, slot,
+ bbr_start_hpts_timer(bbr, tp, cts, 10, pacing_delay,
tot_len);
return (error);
}
@@ -13841,9 +13835,9 @@ nomore:
}
/* FALLTHROUGH */
default:
- slot = (bbr_error_base_paceout + 3) << bbr->oerror_cnt;
+ pacing_delay = (bbr_error_base_paceout + 3) << bbr->oerror_cnt;
bbr->rc_output_starts_timer = 1;
- bbr_start_hpts_timer(bbr, tp, cts, 11, slot, 0);
+ bbr_start_hpts_timer(bbr, tp, cts, 11, pacing_delay, 0);
return (error);
}
#ifdef STATS
@@ -13981,12 +13975,12 @@ skip_again:
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
if (((flags & (TH_RST | TH_SYN | TH_FIN)) == 0) && tot_len) {
/*
- * Calculate/Re-Calculate the hptsi slot in usecs based on
+ * Calculate/Re-Calculate the hptsi timeout in usecs based on
* what we have sent so far
*/
- slot = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
+ pacing_delay = bbr_get_pacing_delay(bbr, bbr->r_ctl.rc_bbr_hptsi_gain, tot_len, cts, 0);
if (bbr->rc_no_pacing)
- slot = 0;
+ pacing_delay = 0;
}
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
enobufs:
@@ -13999,8 +13993,8 @@ enobufs:
(more_to_rxt ||
((bbr->r_ctl.rc_resend = bbr_check_recovery_mode(tp, bbr, cts)) != NULL))) {
/* Rack cheats and shotguns out all rxt's 1ms apart */
- if (slot > 1000)
- slot = 1000;
+ if (pacing_delay > 1000)
+ pacing_delay = 1000;
}
if (bbr->bbr_hdrw_pacing && (bbr->hw_pacing_set == 0)) {
/*
@@ -14014,7 +14008,7 @@ enobufs:
tcp_bbr_tso_size_check(bbr, cts);
}
}
- bbr_start_hpts_timer(bbr, tp, cts, 12, slot, tot_len);
+ bbr_start_hpts_timer(bbr, tp, cts, 12, pacing_delay, tot_len);
if (SEQ_LT(tp->snd_nxt, tp->snd_max)) {
/* Make sure snd_nxt is drug up */
tp->snd_nxt = tp->snd_max;
@@ -14132,8 +14126,7 @@ bbr_switch_failed(struct tcpcb *tp)
}
} else
toval = HPTS_USECS_PER_SLOT;
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, toval, &diag);
bbr_log_hpts_diag(bbr, cts, &diag);
}
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 11ef5ba706c5..c7962b57a69e 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -250,11 +250,11 @@ static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the co
static int32_t rack_persist_min = 250000; /* 250usec */
static int32_t rack_persist_max = 2000000; /* 2 Second in usec's */
static int32_t rack_honors_hpts_min_to = 1; /* Do we honor the hpts minimum time out for pacing timers */
-static uint32_t rack_max_reduce = 10; /* Percent we can reduce slot by */
+static uint32_t rack_max_reduce = 10; /* Percent we can reduce pacing delay by */
static int32_t rack_sack_not_required = 1; /* set to one to allow non-sack to use rack */
static int32_t rack_limit_time_with_srtt = 0;
static int32_t rack_autosndbuf_inc = 20; /* In percentage form */
-static int32_t rack_enobuf_hw_boost_mult = 0; /* How many times the hw rate we boost slot using time_between */
+static int32_t rack_enobuf_hw_boost_mult = 0; /* How many times the hw rate we boost pacing delay using time_between */
static int32_t rack_enobuf_hw_max = 12000; /* 12 ms in usecs */
static int32_t rack_enobuf_hw_min = 10000; /* 10 ms in usecs */
static int32_t rack_hw_rwnd_factor = 2; /* How many max_segs the rwnd must be before we hold off sending */
@@ -278,7 +278,7 @@ static int32_t rack_hptsi_segments = 40;
static int32_t rack_rate_sample_method = USE_RTT_LOW;
static int32_t rack_pace_every_seg = 0;
static int32_t rack_delayed_ack_time = 40000; /* 40ms in usecs */
-static int32_t rack_slot_reduction = 4;
+static int32_t rack_pacing_delay_reduction = 4;
static int32_t rack_wma_divisor = 8; /* For WMA calculation */
static int32_t rack_cwnd_block_ends_measure = 0;
static int32_t rack_rwnd_block_ends_measure = 0;
@@ -478,7 +478,7 @@ rack_log_alt_to_to_cancel(struct tcp_rack *rack,
uint16_t flex7, uint8_t mod);
static void
-rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t slot,
+rack_log_pacing_delay_calc(struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay,
uint64_t bw_est, uint64_t bw, uint64_t len_time, int method, int line,
struct rack_sendmap *rsm, uint8_t quality);
static struct rack_sendmap *
@@ -1107,7 +1107,7 @@ rack_init_sysctls(void)
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_pacing),
OID_AUTO, "burst_reduces", CTLFLAG_RW,
- &rack_slot_reduction, 4,
+ &rack_pacing_delay_reduction, 4,
"When doing only burst mitigation what is the reduce divisor");
SYSCTL_ADD_S32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_sysctl_root),
@@ -1399,7 +1399,7 @@ rack_init_sysctls(void)
SYSCTL_CHILDREN(rack_timers),
OID_AUTO, "hpts_max_reduce", CTLFLAG_RW,
&rack_max_reduce, 10,
- "Max percentage we will reduce slot by for pacing when we are behind");
+ "Max percentage we will reduce pacing delay by for pacing when we are behind");
SYSCTL_ADD_U32(&rack_sysctl_ctx,
SYSCTL_CHILDREN(rack_timers),
OID_AUTO, "persmin", CTLFLAG_RW,
@@ -2700,7 +2700,7 @@ rack_log_retran_reason(struct tcp_rack *rack, struct rack_sendmap *rsm, uint32_t
}
static void
-rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot, uint8_t which)
+rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t pacing_delay, uint8_t which)
{
if (tcp_bblogging_on(rack->rc_tp)) {
union tcp_log_stackspecific log;
@@ -2710,7 +2710,7 @@ rack_log_to_start(struct tcp_rack *rack, uint32_t cts, uint32_t to, int32_t slot
log.u_bbr.flex1 = rack->rc_tp->t_srtt;
log.u_bbr.flex2 = to;
log.u_bbr.flex3 = rack->r_ctl.rc_hpts_flags;
- log.u_bbr.flex4 = slot;
+ log.u_bbr.flex4 = pacing_delay;
log.u_bbr.flex5 = rack->rc_tp->t_hpts_slot;
log.u_bbr.flex6 = rack->rc_tp->t_rxtcur;
log.u_bbr.flex7 = rack->rc_in_persist;
@@ -3034,14 +3034,14 @@ rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick,
}
static void
-rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, uint32_t cts, struct timeval *tv, int line)
+rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay, uint32_t cts, struct timeval *tv, int line)
{
if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
union tcp_log_stackspecific log;
memset(&log, 0, sizeof(log));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
- log.u_bbr.flex1 = slot;
+ log.u_bbr.flex1 = pacing_delay;
if (rack->rack_no_prr)
log.u_bbr.flex2 = 0;
else
@@ -3139,7 +3139,7 @@ rack_log_type_pacing_sizes(struct tcpcb *tp, struct tcp_rack *rack, uint32_t arg
}
static void
-rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t slot,
+rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, uint32_t pacing_delay,
uint8_t hpts_calling, int reason, uint32_t cwnd_to_use)
{
if (tcp_bblogging_on(rack->rc_tp)) {
@@ -3148,7 +3148,7 @@ rack_log_type_just_return(struct tcp_rack *rack, uint32_t cts, uint32_t tlen, ui
memset(&log, 0, sizeof(log));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_tp);
- log.u_bbr.flex1 = slot;
+ log.u_bbr.flex1 = pacing_delay;
log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
log.u_bbr.flex4 = reason;
if (rack->rack_no_prr)
@@ -6482,7 +6482,7 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts,
log.u_bbr.flex2 = diag->p_cur_slot;
log.u_bbr.flex3 = diag->slot_req;
log.u_bbr.flex4 = diag->inp_hptsslot;
- log.u_bbr.flex5 = diag->slot_remaining;
+ log.u_bbr.flex5 = diag->time_remaining;
log.u_bbr.flex6 = diag->need_new_to;
log.u_bbr.flex7 = diag->p_hpts_active;
log.u_bbr.flex8 = diag->p_on_min_sleep;
@@ -6497,9 +6497,6 @@ rack_log_hpts_diag(struct tcp_rack *rack, uint32_t cts,
log.u_bbr.rttProp = diag->wheel_cts;
log.u_bbr.timeStamp = cts;
log.u_bbr.delRate = diag->maxslots;
- log.u_bbr.cur_del_rate = diag->p_curtick;
- log.u_bbr.cur_del_rate <<= 32;
- log.u_bbr.cur_del_rate |= diag->p_lasttick;
TCP_LOG_EVENTP(rack->rc_tp, NULL,
&rack->rc_inp->inp_socket->so_rcv,
&rack->rc_inp->inp_socket->so_snd,
@@ -6532,14 +6529,14 @@ rack_log_wakeup(struct tcpcb *tp, struct tcp_rack *rack, struct sockbuf *sb, uin
static void
rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
- int32_t slot, uint32_t tot_len_this_send, int sup_rack)
+ int32_t usecs, uint32_t tot_len_this_send, int sup_rack)
{
struct hpts_diag diag;
struct inpcb *inp = tptoinpcb(tp);
struct timeval tv;
uint32_t delayed_ack = 0;
uint32_t hpts_timeout;
- uint32_t entry_slot = slot;
+ uint32_t entry_usecs = usecs;
uint8_t stopped;
uint32_t left = 0;
uint32_t us_cts;
@@ -6560,7 +6557,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
rack->r_ctl.rc_hpts_flags = 0;
us_cts = tcp_get_usecs(&tv);
/* Now early/late accounting */
- rack_log_pacing_delay_calc(rack, entry_slot, slot, 0, 0, 0, 26, __LINE__, NULL, 0);
+ rack_log_pacing_delay_calc(rack, entry_usecs, usecs, 0, 0, 0, 26, __LINE__, NULL, 0);
if (rack->r_early && (rack->rc_ack_can_sendout_data == 0)) {
/*
* We have a early carry over set,
@@ -6571,7 +6568,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
* penalize the next timer for being awoke
* by an ack aka the rc_agg_early (non-paced mode).
*/
- slot += rack->r_ctl.rc_agg_early;
+ usecs += rack->r_ctl.rc_agg_early;
rack->r_early = 0;
rack->r_ctl.rc_agg_early = 0;
}
@@ -6583,29 +6580,29 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
* really depends on what
* the current pacing time is.
*/
- if (rack->r_ctl.rc_agg_delayed >= slot) {
+ if (rack->r_ctl.rc_agg_delayed >= usecs) {
/*
* We can't compensate for it all.
* And we have to have some time
* on the clock. We always have a min
- * 10 slots (10 x 10 i.e. 100 usecs).
+ * 10 HPTS timer units (10 x 10 i.e. 100 usecs).
*/
- if (slot <= HPTS_USECS_PER_SLOT) {
+ if (usecs <= HPTS_USECS_PER_SLOT) {
/* We gain delay */
- rack->r_ctl.rc_agg_delayed += (HPTS_USECS_PER_SLOT - slot);
- slot = HPTS_USECS_PER_SLOT;
+ rack->r_ctl.rc_agg_delayed += (HPTS_USECS_PER_SLOT - usecs);
+ usecs = HPTS_USECS_PER_SLOT;
} else {
/* We take off some */
- rack->r_ctl.rc_agg_delayed -= (slot - HPTS_USECS_PER_SLOT);
- slot = HPTS_USECS_PER_SLOT;
+ rack->r_ctl.rc_agg_delayed -= (usecs - HPTS_USECS_PER_SLOT);
+ usecs = HPTS_USECS_PER_SLOT;
}
} else {
- slot -= rack->r_ctl.rc_agg_delayed;
+ usecs -= rack->r_ctl.rc_agg_delayed;
rack->r_ctl.rc_agg_delayed = 0;
/* Make sure we have 100 useconds at minimum */
- if (slot < HPTS_USECS_PER_SLOT) {
- rack->r_ctl.rc_agg_delayed = HPTS_USECS_PER_SLOT - slot;
- slot = HPTS_USECS_PER_SLOT;
+ if (usecs < HPTS_USECS_PER_SLOT) {
+ rack->r_ctl.rc_agg_delayed = HPTS_USECS_PER_SLOT - usecs;
+ usecs = HPTS_USECS_PER_SLOT;
}
if (rack->r_ctl.rc_agg_delayed == 0)
rack->r_late = 0;
@@ -6614,17 +6611,17 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
/* r_use_hpts_min is on and so is DGP */
uint32_t max_red;
- max_red = (slot * rack->r_ctl.max_reduction) / 100;
+ max_red = (usecs * rack->r_ctl.max_reduction) / 100;
if (max_red >= rack->r_ctl.rc_agg_delayed) {
- slot -= rack->r_ctl.rc_agg_delayed;
+ usecs -= rack->r_ctl.rc_agg_delayed;
rack->r_ctl.rc_agg_delayed = 0;
} else {
- slot -= max_red;
+ usecs -= max_red;
rack->r_ctl.rc_agg_delayed -= max_red;
}
}
if ((rack->r_use_hpts_min == 1) &&
- (slot > 0) &&
+ (usecs > 0) &&
(rack->dgp_on == 1)) {
/*
* We are enforcing a min pacing timer
@@ -6633,8 +6630,8 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
uint32_t min;
min = get_hpts_min_sleep_time();
- if (min > slot) {
- slot = min;
+ if (min > usecs) {
+ usecs = min;
}
}
hpts_timeout = rack_timer_start(tp, rack, cts, sup_rack);
@@ -6652,7 +6649,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
* wheel, we resort to a keep-alive timer if its configured.
*/
if ((hpts_timeout == 0) &&
- (slot == 0)) {
+ (usecs == 0)) {
if ((V_tcp_always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
(tp->t_state <= TCPS_CLOSING)) {
/*
@@ -6709,10 +6706,10 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
hpts_timeout = 0x7ffffffe;
rack->r_ctl.rc_timer_exp = cts + hpts_timeout;
}
- rack_log_pacing_delay_calc(rack, entry_slot, slot, hpts_timeout, 0, 0, 27, __LINE__, NULL, 0);
+ rack_log_pacing_delay_calc(rack, entry_usecs, usecs, hpts_timeout, 0, 0, 27, __LINE__, NULL, 0);
if ((rack->gp_ready == 0) &&
(rack->use_fixed_rate == 0) &&
- (hpts_timeout < slot) &&
+ (hpts_timeout < usecs) &&
(rack->r_ctl.rc_hpts_flags & (PACE_TMR_TLP|PACE_TMR_RXT))) {
/*
* We have no good estimate yet for the
@@ -6722,7 +6719,7 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
* pace that long since we know the calculation
* so far is not accurate.
*/
- slot = hpts_timeout;
+ usecs = hpts_timeout;
}
/**
* Turn off all the flags for queuing by default. The
@@ -6754,11 +6751,11 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
* so LRO can call into us.
*/
tp->t_flags2 &= ~(TF2_DONT_SACK_QUEUE|TF2_MBUF_QUEUE_READY);
- if (slot) {
+ if (usecs) {
rack->r_ctl.rc_hpts_flags |= PACE_PKT_OUTPUT;
- rack->r_ctl.rc_last_output_to = us_cts + slot;
+ rack->r_ctl.rc_last_output_to = us_cts + usecs;
/*
- * A pacing timer (slot) is being set, in
+ * A pacing timer (usecs microseconds) is being set, in
* such a case we cannot send (we are blocked by
* the timer). So lets tell LRO that it should not
* wake us unless there is a SACK. Note this only
@@ -6799,20 +6796,18 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
}
if ((rack->use_rack_rr) &&
(rack->r_rr_config < 2) &&
- ((hpts_timeout) && (hpts_timeout < slot))) {
+ ((hpts_timeout) && (hpts_timeout < usecs))) {
/*
* Arrange for the hpts to kick back in after the
* t-o if the t-o does not cause a send.
*/
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, hpts_timeout, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &tv);
- rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
+ rack_log_to_start(rack, cts, hpts_timeout, usecs, 0);
} else {
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(slot),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, usecs, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &tv);
- rack_log_to_start(rack, cts, hpts_timeout, slot, 1);
+ rack_log_to_start(rack, cts, hpts_timeout, usecs, 1);
}
} else if (hpts_timeout) {
/*
@@ -6824,22 +6819,21 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
* at the start of this block) are good enough.
*/
rack->r_ctl.rc_hpts_flags &= ~PACE_PKT_OUTPUT;
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(hpts_timeout),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, hpts_timeout, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &tv);
- rack_log_to_start(rack, cts, hpts_timeout, slot, 0);
+ rack_log_to_start(rack, cts, hpts_timeout, usecs, 0);
} else {
/* No timer starting */
#ifdef INVARIANTS
if (SEQ_GT(tp->snd_max, tp->snd_una)) {
- panic("tp:%p rack:%p tlts:%d cts:%u slot:%u pto:%u -- no timer started?",
- tp, rack, tot_len_this_send, cts, slot, hpts_timeout);
+ panic("tp:%p rack:%p tlts:%d cts:%u usecs:%u pto:%u -- no timer started?",
+ tp, rack, tot_len_this_send, cts, usecs, hpts_timeout);
}
#endif
}
rack->rc_tmr_stopped = 0;
- if (slot)
- rack_log_type_bbrsnd(rack, tot_len_this_send, slot, us_cts, &tv, __LINE__);
+ if (usecs)
+ rack_log_type_bbrsnd(rack, tot_len_this_send, usecs, us_cts, &tv, __LINE__);
}
static void
@@ -8016,7 +8010,7 @@ rack_process_timers(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, uint8
rack->rc_tp->t_flags2 &= ~TF2_DONT_SACK_QUEUE;
ret = -3;
left = rack->r_ctl.rc_timer_exp - cts;
- tcp_hpts_insert(tp, HPTS_MS_TO_SLOTS(left));
+ tcp_hpts_insert(tp, left, NULL);
rack_log_to_processing(rack, cts, ret, left);
return (1);
}
@@ -14377,8 +14371,7 @@ rack_switch_failed(struct tcpcb *tp)
}
} else
toval = HPTS_USECS_PER_SLOT;
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, toval, &diag);
rack_log_hpts_diag(rack, cts, &diag, &tv);
}
@@ -14973,8 +14966,7 @@ rack_init(struct tcpcb *tp, void **ptr)
if (tov) {
struct hpts_diag diag;
- (void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(tov),
- __LINE__, &diag);
+ tcp_hpts_insert(tp, tov, &diag);
rack_log_hpts_diag(rack, us_cts, &diag, &rack->r_ctl.act_rcv_time);
}
}
@@ -16367,7 +16359,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
struct rack_sendmap *rsm;
int32_t prev_state = 0;
int no_output = 0;
- int slot_remaining = 0;
+ int time_remaining = 0;
#ifdef TCP_ACCOUNTING
int ack_val_set = 0xf;
#endif
@@ -16416,7 +16408,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
* could be, if a sack is present, we want to be awoken and
* so should process the packets.
*/
- slot_remaining = rack->r_ctl.rc_last_output_to - us_cts;
+ time_remaining = rack->r_ctl.rc_last_output_to - us_cts;
if (rack->rc_tp->t_flags2 & TF2_DONT_SACK_QUEUE) {
no_output = 1;
} else {
@@ -16436,7 +16428,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
(*ts_ptr == TCP_LRO_TS_OPTION)))
no_output = 1;
}
- if ((no_output == 1) && (slot_remaining < tcp_min_hptsi_time)) {
+ if ((no_output == 1) && (time_remaining < tcp_min_hptsi_time)) {
/*
* It is unrealistic to think we can pace in less than
* the minimum granularity of the pacer (def:250usec). So
@@ -16919,10 +16911,10 @@ do_output_now:
(tcp_in_hpts(rack->rc_tp) == 0)) {
/*
* We are not in hpts and we had a pacing timer up. Use
- * the remaining time (slot_remaining) to restart the timer.
+ * the remaining time (time_remaining) to restart the timer.
*/
- KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
- rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0);
+ KASSERT ((time_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp));
+ rack_start_hpts_timer(rack, tp, cts, time_remaining, 0, 0);
rack_free_trim(rack);
}
/* Clear the flag, it may have been cleared by output but we may not have */
@@ -17102,7 +17094,7 @@ check_it:
}
static void
-rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
+rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t pacing_delay,
uint64_t bw_est, uint64_t bw, uint64_t len_time, int method,
int line, struct rack_sendmap *rsm, uint8_t quality)
{
@@ -17125,7 +17117,7 @@ rack_log_pacing_delay_calc (struct tcp_rack *rack, uint32_t len, uint32_t slot,
}
}
memset(&log, 0, sizeof(log));
- log.u_bbr.flex1 = slot;
+ log.u_bbr.flex1 = pacing_delay;
log.u_bbr.flex2 = len;
log.u_bbr.flex3 = rack->r_ctl.rc_pace_min_segs;
log.u_bbr.flex4 = rack->r_ctl.rc_pace_max_segs;
@@ -17284,25 +17276,25 @@ rack_arrive_at_discounted_rate(struct tcp_rack *rack, uint64_t window_input, uin
}
static int32_t
-pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
+pace_to_fill_cwnd(struct tcp_rack *rack, int32_t pacing_delay, uint32_t len, uint32_t segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
{
uint64_t lentim, fill_bw;
rack->r_via_fill_cw = 0;
if (ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked) > rack->r_ctl.cwnd_to_use)
- return (slot);
+ return (pacing_delay);
if ((ctf_outstanding(rack->rc_tp) + (segsiz-1)) > rack->rc_tp->snd_wnd)
- return (slot);
+ return (pacing_delay);
if (rack->r_ctl.rc_last_us_rtt == 0)
- return (slot);
+ return (pacing_delay);
if (rack->rc_pace_fill_if_rttin_range &&
(rack->r_ctl.rc_last_us_rtt >=
(get_filter_value_small(&rack->r_ctl.rc_gp_min_rtt) * rack->rtt_limit_mul))) {
/* The rtt is huge, N * smallest, lets not fill */
- return (slot);
+ return (pacing_delay);
}
if (rack->r_ctl.fillcw_cap && *rate_wanted >= rack->r_ctl.fillcw_cap)
- return (slot);
+ return (pacing_delay);
/*
* first lets calculate the b/w based on the last us-rtt
* and the the smallest send window.
@@ -17368,7 +17360,7 @@ at_lt_bw:
if (non_paced)
*rate_wanted = fill_bw;
if ((fill_bw < RACK_MIN_BW) || (fill_bw < *rate_wanted))
- return (slot);
+ return (pacing_delay);
rack->r_via_fill_cw = 1;
if (rack->r_rack_hw_rate_caps &&
(rack->r_ctl.crte != NULL)) {
@@ -17423,19 +17415,19 @@ at_lt_bw:
lentim = (uint64_t)(len) * (uint64_t)HPTS_USEC_IN_SEC;
lentim /= fill_bw;
*rate_wanted = fill_bw;
- if (non_paced || (lentim < slot)) {
- rack_log_pacing_delay_calc(rack, len, slot, fill_bw,
+ if (non_paced || (lentim < pacing_delay)) {
+ rack_log_pacing_delay_calc(rack, len, pacing_delay, fill_bw,
0, lentim, 12, __LINE__, NULL, 0);
return ((int32_t)lentim);
} else
- return (slot);
+ return (pacing_delay);
}
static int32_t
rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, struct rack_sendmap *rsm, uint32_t segsiz, int line)
{
uint64_t srtt;
- int32_t slot = 0;
+ int32_t pacing_delay = 0;
int can_start_hw_pacing = 1;
int err;
int pace_one;
@@ -17483,25 +17475,25 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
* cwnd. Which in that case we are just waiting for
* a ACK.
*/
- slot = len / tr_perms;
+ pacing_delay = len / tr_perms;
/* Now do we reduce the time so we don't run dry? */
- if (slot && rack_slot_reduction) {
- reduce = (slot / rack_slot_reduction);
- if (reduce < slot) {
- slot -= reduce;
+ if (pacing_delay && rack_pacing_delay_reduction) {
+ reduce = (pacing_delay / rack_pacing_delay_reduction);
+ if (reduce < pacing_delay) {
+ pacing_delay -= reduce;
} else
- slot = 0;
+ pacing_delay = 0;
} else
reduce = 0;
- slot *= HPTS_USEC_IN_MSEC;
+ pacing_delay *= HPTS_USEC_IN_MSEC;
if (rack->rc_pace_to_cwnd) {
uint64_t rate_wanted = 0;
- slot = pace_to_fill_cwnd(rack, slot, len, segsiz, NULL, &rate_wanted, 1);
+ pacing_delay = pace_to_fill_cwnd(rack, pacing_delay, len, segsiz, NULL, &rate_wanted, 1);
rack->rc_ack_can_sendout_data = 1;
- rack_log_pacing_delay_calc(rack, len, slot, rate_wanted, 0, 0, 14, __LINE__, NULL, 0);
+ rack_log_pacing_delay_calc(rack, len, pacing_delay, rate_wanted, 0, 0, 14, __LINE__, NULL, 0);
} else
- rack_log_pacing_delay_calc(rack, len, slot, tr_perms, reduce, 0, 7, __LINE__, NULL, 0);
+ rack_log_pacing_delay_calc(rack, len, pacing_delay, tr_perms, reduce, 0, 7, __LINE__, NULL, 0);
/*******************************************************/
/* RRS: We insert non-paced call to stats here for len */
/*******************************************************/
@@ -17575,7 +17567,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
segs *= oh;
lentim = (uint64_t)(len + segs) * (uint64_t)HPTS_USEC_IN_SEC;
res = lentim / rate_wanted;
- slot = (uint32_t)res;
+ pacing_delay = (uint32_t)res;
if (rack_hw_rate_min &&
(rate_wanted < rack_hw_rate_min)) {
can_start_hw_pacing = 0;
@@ -17635,7 +17627,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
* We want to pace at our rate *or* faster to
* fill the cwnd to the max if its not full.
*/
- slot = pace_to_fill_cwnd(rack, slot, (len+segs), segsiz, &capped, &rate_wanted, 0);
+ pacing_delay = pace_to_fill_cwnd(rack, pacing_delay, (len+segs), segsiz, &capped, &rate_wanted, 0);
/* Re-check to make sure we are not exceeding our max b/w */
if ((rack->r_ctl.crte != NULL) &&
(tcp_hw_highest_rate(rack->r_ctl.crte) < rate_wanted)) {
@@ -17786,15 +17778,15 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
srtt = rack->rc_tp->t_srtt;
else
srtt = RACK_INITIAL_RTO * HPTS_USEC_IN_MSEC; /* its in ms convert */
- if (srtt < (uint64_t)slot) {
- rack_log_pacing_delay_calc(rack, srtt, slot, rate_wanted, bw_est, lentim, 99, __LINE__, NULL, 0);
- slot = srtt;
+ if (srtt < (uint64_t)pacing_delay) {
+ rack_log_pacing_delay_calc(rack, srtt, pacing_delay, rate_wanted, bw_est, lentim, 99, __LINE__, NULL, 0);
+ pacing_delay = srtt;
}
}
/*******************************************************************/
/* RRS: We insert paced call to stats here for len and rate_wanted */
/*******************************************************************/
- rack_log_pacing_delay_calc(rack, len, slot, rate_wanted, bw_est, lentim, 2, __LINE__, rsm, 0);
+ rack_log_pacing_delay_calc(rack, len, pacing_delay, rate_wanted, bw_est, lentim, 2, __LINE__, rsm, 0);
}
if (rack->r_ctl.crte && (rack->r_ctl.crte->rs_num_enobufs > 0)) {
/*
@@ -17811,9 +17803,9 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct tcpcb *tp, uint32_t len, str
hw_boost_delay = rack_enobuf_hw_max;
else if (hw_boost_delay < rack_enobuf_hw_min)
hw_boost_delay = rack_enobuf_hw_min;
- slot += hw_boost_delay;
+ pacing_delay += hw_boost_delay;
}
- return (slot);
+ return (pacing_delay);
}
static void
@@ -18482,7 +18474,7 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
struct tcpopt to;
u_char opt[TCP_MAXOLEN];
uint32_t hdrlen, optlen;
- int32_t slot, segsiz, max_val, tso = 0, error = 0, ulen = 0;
+ int32_t pacing_delay, segsiz, max_val, tso = 0, error = 0, ulen = 0;
uint16_t flags;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
uint32_t if_hw_tsomaxsegsize;
@@ -18688,9 +18680,9 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
}
if (rack->r_ctl.crte != NULL) {
/* See if we can send via the hw queue */
- slot = rack_check_queue_level(rack, tp, tv, cts, len, segsiz);
+ pacing_delay = rack_check_queue_level(rack, tp, tv, cts, len, segsiz);
/* If there is nothing in queue (no pacing time) we can send via the hw queue */
- if (slot == 0)
+ if (pacing_delay == 0)
ip_sendflag = 0;
}
tcp_set_flags(th, flags);
@@ -18955,20 +18947,20 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma
rack_log_queue_level(tp, rack, len, tv, cts);
} else
tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
- slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
+ pacing_delay = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
if (rack->rc_enobuf < 0x7f)
rack->rc_enobuf++;
- if (slot < (10 * HPTS_USEC_IN_MSEC))
- slot = 10 * HPTS_USEC_IN_MSEC;
+ if (pacing_delay < (10 * HPTS_USEC_IN_MSEC))
+ pacing_delay = 10 * HPTS_USEC_IN_MSEC;
if (rack->r_ctl.crte != NULL) {
counter_u64_add(rack_saw_enobuf_hw, 1);
tcp_rl_log_enobuf(rack->r_ctl.crte);
}
counter_u64_add(rack_saw_enobuf, 1);
} else {
- slot = rack_get_pacing_delay(rack, tp, len, NULL, segsiz, __LINE__);
+ pacing_delay = rack_get_pacing_delay(rack, tp, len, NULL, segsiz, __LINE__);
}
- rack_start_hpts_timer(rack, tp, cts, slot, len, 0);
+ rack_start_hpts_timer(rack, tp, cts, pacing_delay, len, 0);
#ifdef TCP_ACCOUNTING
crtsc = get_cyclecount();
if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -19071,7 +19063,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val,
#ifdef TCP_ACCOUNTING
int cnt_thru = 1;
#endif
- int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
+ int32_t pacing_delay, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0;
uint16_t flags;
uint32_t s_soff;
uint32_t if_hw_tsomaxsegcount = 0, startseq;
@@ -19519,8 +19511,8 @@ again:
}
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
counter_u64_add(rack_fto_send, 1);
- slot = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__);
- rack_start_hpts_timer(rack, tp, cts, slot, *tot_len, 0);
+ pacing_delay = rack_get_pacing_delay(rack, tp, *tot_len, NULL, segsiz, __LINE__);
+ rack_start_hpts_timer(rack, tp, cts, pacing_delay, *tot_len, 0);
#ifdef TCP_ACCOUNTING
crtsc = get_cyclecount();
if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -19707,7 +19699,7 @@ rack_output(struct tcpcb *tp)
struct rack_sendmap *rsm = NULL;
int32_t tso, mtu;
struct tcpopt to;
- int32_t slot = 0;
+ int32_t pacing_delay = 0;
int32_t sup_rack = 0;
uint32_t cts, ms_cts, delayed, early;
uint32_t add_flag = RACK_SENT_SP;
@@ -20070,7 +20062,7 @@ again:
if (rsm == NULL) {
if (hpts_calling)
/* Retry in a ms */
- slot = (1 * HPTS_USEC_IN_MSEC);
+ pacing_delay = (1 * HPTS_USEC_IN_MSEC);
so = inp->inp_socket;
sb = &so->so_snd;
goto just_return_nolock;
@@ -20877,7 +20869,7 @@ just_return_nolock:
}
if (tot_len_this_send > 0) {
rack->r_ctl.fsb.recwin = recwin;
- slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
+ pacing_delay = rack_get_pacing_delay(rack, tp, tot_len_this_send, NULL, segsiz, __LINE__);
if ((error == 0) &&
rack_use_rfo &&
((flags & (TH_SYN|TH_FIN)) == 0) &&
@@ -21060,8 +21052,8 @@ just_return_nolock:
/* Yes lets make sure to move to persist before timer-start */
rack_enter_persist(tp, rack, rack->r_ctl.rc_rcvtime, tp->snd_una);
}
- rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, sup_rack);
- rack_log_type_just_return(rack, cts, tot_len_this_send, slot, hpts_calling, app_limited, cwnd_to_use);
+ rack_start_hpts_timer(rack, tp, cts, pacing_delay, tot_len_this_send, sup_rack);
+ rack_log_type_just_return(rack, cts, tot_len_this_send, pacing_delay, hpts_calling, app_limited, cwnd_to_use);
}
#ifdef NETFLIX_SHARED_CWND
if ((sbavail(sb) == 0) &&
@@ -21100,8 +21092,8 @@ send:
* we come around to again, the flag will be clear.
*/
check_done = 1;
- slot = rack_check_queue_level(rack, tp, &tv, cts, len, segsiz);
- if (slot) {
+ pacing_delay = rack_check_queue_level(rack, tp, &tv, cts, len, segsiz);
+ if (pacing_delay) {
rack->r_ctl.rc_agg_delayed = 0;
rack->r_ctl.rc_agg_early = 0;
rack->r_early = 0;
@@ -22358,11 +22350,11 @@ nomore:
rack_log_queue_level(tp, rack, len, &tv, cts);
} else
tcp_trace_point(rack->rc_tp, TCP_TP_ENOBUF);
- slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
+ pacing_delay = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC);
if (rack->rc_enobuf < 0x7f)
rack->rc_enobuf++;
- if (slot < (10 * HPTS_USEC_IN_MSEC))
- slot = 10 * HPTS_USEC_IN_MSEC;
+ if (pacing_delay < (10 * HPTS_USEC_IN_MSEC))
+ pacing_delay = 10 * HPTS_USEC_IN_MSEC;
if (rack->r_ctl.crte != NULL) {
counter_u64_add(rack_saw_enobuf_hw, 1);
tcp_rl_log_enobuf(rack->r_ctl.crte);
@@ -22389,8 +22381,8 @@ nomore:
goto again;
}
}
- slot = 10 * HPTS_USEC_IN_MSEC;
- rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
+ pacing_delay = 10 * HPTS_USEC_IN_MSEC;
+ rack_start_hpts_timer(rack, tp, cts, pacing_delay, 0, 0);
#ifdef TCP_ACCOUNTING
crtsc = get_cyclecount();
if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -22412,8 +22404,8 @@ nomore:
}
/* FALLTHROUGH */
default:
- slot = 10 * HPTS_USEC_IN_MSEC;
- rack_start_hpts_timer(rack, tp, cts, slot, 0, 0);
+ pacing_delay = 10 * HPTS_USEC_IN_MSEC;
+ rack_start_hpts_timer(rack, tp, cts, pacing_delay, 0, 0);
#ifdef TCP_ACCOUNTING
crtsc = get_cyclecount();
if (tp->t_flags2 & TF2_TCP_ACCOUNTING) {
@@ -22456,18 +22448,18 @@ enobufs:
/*
* We don't send again after sending a RST.
*/
- slot = 0;
+ pacing_delay = 0;
sendalot = 0;
if (error == 0)
tcp_log_end_status(tp, TCP_EI_STATUS_SERVER_RST);
- } else if ((slot == 0) && (sendalot == 0) && tot_len_this_send) {
+ } else if ((pacing_delay == 0) && (sendalot == 0) && tot_len_this_send) {
/*
* Get our pacing rate, if an error
* occurred in sending (ENOBUF) we would
* hit the else if with slot preset. Other
* errors return.
*/
- slot = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz, __LINE__);
+ pacing_delay = rack_get_pacing_delay(rack, tp, tot_len_this_send, rsm, segsiz, __LINE__);
}
/* We have sent clear the flag */
rack->r_ent_rec_ns = 0;
@@ -22499,7 +22491,7 @@ enobufs:
*/
tp->t_flags &= ~(TF_WASCRECOVERY|TF_WASFRECOVERY);
}
- if (slot) {
+ if (pacing_delay) {
/* set the rack tcb into the slot N */
if ((error == 0) &&
rack_use_rfo &&
@@ -22564,7 +22556,7 @@ skip_all_send:
/* Assure when we leave that snd_nxt will point to top */
if (SEQ_GT(tp->snd_max, tp->snd_nxt))
tp->snd_nxt = tp->snd_max;
- rack_start_hpts_timer(rack, tp, cts, slot, tot_len_this_send, 0);
+ rack_start_hpts_timer(rack, tp, cts, pacing_delay, tot_len_this_send, 0);
#ifdef TCP_ACCOUNTING
crtsc = get_cyclecount() - ts_val;
if (tot_len_this_send) {
diff --git a/sys/netinet6/in6_fib_algo.c b/sys/netinet6/in6_fib_algo.c
index 10ffe7ab0265..ef5cfc6d5ef6 100644
--- a/sys/netinet6/in6_fib_algo.c
+++ b/sys/netinet6/in6_fib_algo.c
@@ -351,7 +351,7 @@ struct fib_lookup_module flm_radix6 = {
};
static void
-fib6_algo_init(void)
+fib6_algo_init(void *dummy __unused)
{
fib_module_register(&flm_radix6_lockless);
diff --git a/sys/netipsec/xform_ipcomp.c b/sys/netipsec/xform_ipcomp.c
index 737d4a50098a..05a01b75e0bb 100644
--- a/sys/netipsec/xform_ipcomp.c
+++ b/sys/netipsec/xform_ipcomp.c
@@ -750,7 +750,7 @@ static struct xformsw ipcomp_xformsw = {
};
static void
-ipcomp_attach(void)
+ipcomp_attach(void *dummy __unused)
{
#ifdef INET
@@ -763,7 +763,7 @@ ipcomp_attach(void)
}
static void
-ipcomp_detach(void)
+ipcomp_detach(void *dummy __unused)
{
#ifdef INET
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index b59d8d08bf80..d15d7760d7f1 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -3578,11 +3578,9 @@ sysctl_ipfw_tables_sets(SYSCTL_HANDLER_ARGS)
/*
* Stuff that must be initialised only on boot or module load
*/
-static int
-ipfw_init(void)
+static void
+ipfw_init(void *dummy __unused)
{
- int error = 0;
-
/*
* Only print out this stuff the first time around,
* when called from the sysinit code.
@@ -3627,14 +3625,13 @@ ipfw_init(void)
ipfw_init_sopt_handler();
ipfw_init_obj_rewriter();
ipfw_iface_init();
- return (error);
}
/*
* Called for the removal of the last instance only on module unload.
*/
static void
-ipfw_destroy(void)
+ipfw_destroy(void *dummy __unused)
{
ipfw_iface_destroy();
diff --git a/sys/netpfil/ipfw/ip_fw_nat.c b/sys/netpfil/ipfw/ip_fw_nat.c
index 1e2ff1bca290..8bd27f6885ab 100644
--- a/sys/netpfil/ipfw/ip_fw_nat.c
+++ b/sys/netpfil/ipfw/ip_fw_nat.c
@@ -999,9 +999,11 @@ ipfw_nat_del(struct sockopt *sopt)
{
struct cfg_nat *ptr;
struct ip_fw_chain *chain = &V_layer3_chain;
- int i;
+ int error, i;
- sooptcopyin(sopt, &i, sizeof i, sizeof i);
+ error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
+ if (error != 0)
+ return (error);
/* XXX validate i */
IPFW_UH_WLOCK(chain);
ptr = lookup_nat(&chain->nat, i);
@@ -1104,7 +1106,7 @@ ipfw_nat_get_log(struct sockopt *sopt)
{
uint8_t *data;
struct cfg_nat *ptr;
- int i, size;
+ int error, i, size;
struct ip_fw_chain *chain;
IPFW_RLOCK_TRACKER;
@@ -1134,9 +1136,9 @@ ipfw_nat_get_log(struct sockopt *sopt)
i += LIBALIAS_BUF_SIZE;
}
IPFW_RUNLOCK(chain);
- sooptcopyout(sopt, data, size);
+ error = sooptcopyout(sopt, data, size);
free(data, M_IPFW);
- return(0);
+ return (error);
}
static int
@@ -1166,7 +1168,7 @@ vnet_ipfw_nat_uninit(const void *arg __unused)
}
static void
-ipfw_nat_init(void)
+ipfw_nat_init(void *dummy __unused)
{
/* init ipfw hooks */
@@ -1183,7 +1185,7 @@ ipfw_nat_init(void)
}
static void
-ipfw_nat_destroy(void)
+ipfw_nat_destroy(void *dummy __unused)
{
EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_event_tag);
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index d58af6e5ec4d..a4557f139ae5 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -259,7 +259,7 @@ static void dehook_pf_eth(void);
static void dehook_pf(void);
static int shutdown_pf(void);
static int pf_load(void);
-static void pf_unload(void);
+static void pf_unload(void *);
static struct cdevsw pf_cdevsw = {
.d_ioctl = pfioctl,
@@ -7082,7 +7082,7 @@ pf_unload_vnet(void)
}
static void
-pf_unload(void)
+pf_unload(void *dummy __unused)
{
sx_xlock(&pf_end_lock);
diff --git a/sys/nfs/nfs_diskless.c b/sys/nfs/nfs_diskless.c
index 42cfee63d184..0f0cf80feeec 100644
--- a/sys/nfs/nfs_diskless.c
+++ b/sys/nfs/nfs_diskless.c
@@ -428,7 +428,7 @@ decode_nfshandle(char *ev, u_char *fh, int maxfh)
#if !defined(BOOTP_NFSROOT)
static void
-nfs_rootconf(void)
+nfs_rootconf(void *dummy __unused)
{
nfs_setup_diskless();
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 796b1719b8ba..01bf4c7e90a8 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -297,7 +297,7 @@ static u_int moea64_clear_bit(vm_page_t, uint64_t);
static void moea64_kremove(vm_offset_t);
static void moea64_syncicache(pmap_t pmap, vm_offset_t va,
vm_paddr_t pa, vm_size_t sz);
-static void moea64_pmap_init_qpages(void);
+static void moea64_pmap_init_qpages(void *);
static void moea64_remove_locked(pmap_t, vm_offset_t,
vm_offset_t, struct pvo_dlist *);
@@ -1284,7 +1284,7 @@ moea64_late_bootstrap(vm_offset_t kernelstart, vm_offset_t kernelend)
}
static void
-moea64_pmap_init_qpages(void)
+moea64_pmap_init_qpages(void *dummy __unused)
{
struct pcpu *pc;
int i;
diff --git a/sys/powerpc/cpufreq/pmcr.c b/sys/powerpc/cpufreq/pmcr.c
index dd489b607606..6ae0777a8ac7 100644
--- a/sys/powerpc/cpufreq/pmcr.c
+++ b/sys/powerpc/cpufreq/pmcr.c
@@ -40,7 +40,8 @@ static int pstate_ids[256];
static int pstate_freqs[256];
static int npstates;
-static void parse_pstates(void)
+static void
+parse_pstates(void *dummy __unused)
{
phandle_t node;
diff --git a/sys/rpc/auth.h b/sys/rpc/auth.h
index 33c33ffd594d..648fb99a3a27 100644
--- a/sys/rpc/auth.h
+++ b/sys/rpc/auth.h
@@ -354,6 +354,10 @@ __END_DECLS
#define RPCSEC_GSS 6 /* RPCSEC_GSS */
#define AUTH_TLS 7 /* Initiate RPC-over-TLS */
+/* RFC 5531's prescribed limits for variable-lenth arrays. */
+#define AUTH_SYS_MAX_HOSTNAME 255
+#define AUTH_SYS_MAX_GROUPS 16 /* Supplementary groups. */
+
/*
* Pseudo auth flavors for RPCSEC_GSS.
*/
diff --git a/sys/rpc/authunix_prot.c b/sys/rpc/authunix_prot.c
index b107d5541c50..ff4c12c3f52e 100644
--- a/sys/rpc/authunix_prot.c
+++ b/sys/rpc/authunix_prot.c
@@ -30,7 +30,6 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
/*
* authunix_prot.c
* XDR for UNIX style authentication parameters for RPC
@@ -40,8 +39,7 @@
#include <sys/param.h>
#include <sys/jail.h>
-#include <sys/kernel.h>
-#include <sys/systm.h>
+#include <sys/libkern.h>
#include <sys/ucred.h>
#include <rpc/types.h>
@@ -50,9 +48,6 @@
#include <rpc/rpc_com.h>
-/* gids compose part of a credential; there may not be more than 16 of them */
-#define NGRPS 16
-
/*
* XDR for unix authentication parameters.
*/
@@ -60,25 +55,23 @@ bool_t
xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
{
uint32_t namelen;
- uint32_t ngroups, i;
+ uint32_t supp_ngroups, i;
uint32_t junk;
char hostbuf[MAXHOSTNAMELEN];
+ if (xdrs->x_op == XDR_FREE)
+ /* This function does not allocate auxiliary memory. */
+ return (TRUE);
+
if (xdrs->x_op == XDR_ENCODE) {
- /*
- * Restrict name length to 255 according to RFC 1057.
- */
getcredhostname(NULL, hostbuf, sizeof(hostbuf));
namelen = strlen(hostbuf);
- if (namelen > 255)
- namelen = 255;
- } else {
+ if (namelen > AUTH_SYS_MAX_HOSTNAME)
+ namelen = AUTH_SYS_MAX_HOSTNAME;
+ } else
namelen = 0;
- }
- junk = 0;
- if (!xdr_uint32_t(xdrs, time)
- || !xdr_uint32_t(xdrs, &namelen))
+ if (!xdr_uint32_t(xdrs, time) || !xdr_uint32_t(xdrs, &namelen))
return (FALSE);
/*
@@ -88,43 +81,65 @@ xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
if (!xdr_opaque(xdrs, hostbuf, namelen))
return (FALSE);
} else {
+ if (namelen > AUTH_SYS_MAX_HOSTNAME)
+ return (FALSE);
xdr_setpos(xdrs, xdr_getpos(xdrs) + RNDUP(namelen));
}
if (!xdr_uint32_t(xdrs, &cred->cr_uid))
return (FALSE);
+
+ /*
+ * Safety check: The protocol needs at least one group (access to
+ * 'cr_gid', decrementation of 'cr_ngroups' below).
+ */
+ if (xdrs->x_op == XDR_ENCODE && cred->cr_ngroups == 0)
+ return (FALSE);
if (!xdr_uint32_t(xdrs, &cred->cr_gid))
return (FALSE);
if (xdrs->x_op == XDR_ENCODE) {
/*
- * Note that this is a `struct xucred`, which maintains its
- * historical layout of preserving the egid in cr_ngroups and
- * cr_groups[0] == egid.
+ * Note that this is a 'struct xucred', which still has the
+ * historical layout where the effective GID is in cr_groups[0]
+ * and is accounted in 'cr_ngroups'. We substract 1 to obtain
+ * the number of "supplementary" groups, passed in the AUTH_SYS
+ * credentials variable-length array called gids[] in RFC 5531.
*/
- ngroups = cred->cr_ngroups - 1;
- if (ngroups > NGRPS)
- ngroups = NGRPS;
+ MPASS(cred->cr_ngroups <= XU_NGROUPS);
+ supp_ngroups = cred->cr_ngroups - 1;
+ if (supp_ngroups > AUTH_SYS_MAX_GROUPS)
+ /* With current values, this should never execute. */
+ supp_ngroups = AUTH_SYS_MAX_GROUPS;
}
- if (!xdr_uint32_t(xdrs, &ngroups))
+ if (!xdr_uint32_t(xdrs, &supp_ngroups))
return (FALSE);
- for (i = 0; i < ngroups; i++) {
- if (i < ngroups_max) {
- if (!xdr_uint32_t(xdrs, &cred->cr_groups[i + 1]))
- return (FALSE);
- } else {
- if (!xdr_uint32_t(xdrs, &junk))
- return (FALSE);
- }
- }
- if (xdrs->x_op == XDR_DECODE) {
- if (ngroups > ngroups_max)
- cred->cr_ngroups = ngroups_max + 1;
- else
- cred->cr_ngroups = ngroups + 1;
- }
+ /*
+ * Because we cannot store more than XU_NGROUPS in total (16 at time of
+ * this writing), for now we choose to be strict with respect to RFC
+ * 5531's maximum number of supplementary groups (AUTH_SYS_MAX_GROUPS).
+ * That would also be an accidental DoS prevention measure if the
+ * request handling code didn't try to reassemble it in full without any
+ * size limits. Although AUTH_SYS_MAX_GROUPS and XU_NGROUPS are equal,
+ * since the latter includes the "effective" GID, we cannot store the
+ * last group of a message with exactly AUTH_SYS_MAX_GROUPS
+ * supplementary groups. We accept such messages so as not to violate
+ * the protocol, silently dropping the last group on the floor.
+ */
+
+ if (xdrs->x_op != XDR_ENCODE && supp_ngroups > AUTH_SYS_MAX_GROUPS)
+ return (FALSE);
+
+ junk = 0;
+ for (i = 0; i < supp_ngroups; ++i)
+ if (!xdr_uint32_t(xdrs, i < XU_NGROUPS - 1 ?
+ &cred->cr_sgroups[i] : &junk))
+ return (FALSE);
+
+ if (xdrs->x_op != XDR_ENCODE)
+ cred->cr_ngroups = MIN(supp_ngroups + 1, XU_NGROUPS);
return (TRUE);
}
diff --git a/sys/rpc/svc_auth_unix.c b/sys/rpc/svc_auth_unix.c
index 963f4f272964..aa0fc585865f 100644
--- a/sys/rpc/svc_auth_unix.c
+++ b/sys/rpc/svc_auth_unix.c
@@ -41,18 +41,12 @@
*/
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/systm.h>
#include <sys/ucred.h>
#include <rpc/rpc.h>
#include <rpc/rpc_com.h>
-#define MAX_MACHINE_NAME 255
-#define NGRPS 16
-
/*
* Unix longhand authenticator
*/
@@ -62,11 +56,8 @@ _svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg)
enum auth_stat stat;
XDR xdrs;
int32_t *buf;
- uint32_t time;
struct xucred *xcr;
- u_int auth_len;
- size_t str_len, gid_len;
- u_int i;
+ uint32_t auth_len, time;
xcr = rqst->rq_clntcred;
auth_len = (u_int)msg->rm_call.cb_cred.oa_length;
@@ -74,51 +65,58 @@ _svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg)
XDR_DECODE);
buf = XDR_INLINE(&xdrs, auth_len);
if (buf != NULL) {
+ /* 'time', 'str_len', UID, GID and 'supp_ngroups'. */
+ const uint32_t min_len = 5 * BYTES_PER_XDR_UNIT;
+ uint32_t str_len, supp_ngroups;
+
+ if (auth_len < min_len)
+ goto badcred;
time = IXDR_GET_UINT32(buf);
- str_len = (size_t)IXDR_GET_UINT32(buf);
- if (str_len > MAX_MACHINE_NAME) {
- stat = AUTH_BADCRED;
- goto done;
- }
+ str_len = IXDR_GET_UINT32(buf);
+ if (str_len > AUTH_SYS_MAX_HOSTNAME)
+ goto badcred;
str_len = RNDUP(str_len);
+ /*
+ * Recheck message length now that we know the value of
+ * 'str_len' (and that it won't cause an overflow in additions
+ * below) to protect access to the credentials part.
+ */
+ if (auth_len < min_len + str_len)
+ goto badcred;
buf += str_len / sizeof (int32_t);
xcr->cr_uid = IXDR_GET_UINT32(buf);
xcr->cr_gid = IXDR_GET_UINT32(buf);
- gid_len = (size_t)IXDR_GET_UINT32(buf);
- if (gid_len > NGRPS) {
- stat = AUTH_BADCRED;
- goto done;
- }
- for (i = 0; i < gid_len; i++) {
- /*
- * Note that this is a `struct xucred`, which maintains
- * its historical layout of preserving the egid in
- * cr_ngroups and cr_groups[0] == egid.
- */
- if (i + 1 < XU_NGROUPS)
- xcr->cr_groups[i + 1] = IXDR_GET_INT32(buf);
- else
- buf++;
- }
- if (gid_len + 1 > XU_NGROUPS)
- xcr->cr_ngroups = XU_NGROUPS;
- else
- xcr->cr_ngroups = gid_len + 1;
+ supp_ngroups = IXDR_GET_UINT32(buf);
+ /*
+ * See the herald comment before a similar test at the end of
+ * xdr_authunix_parms() for why we strictly respect RFC 5531 and
+ * why we may have to drop the last supplementary group when
+ * there are AUTH_SYS_MAX_GROUPS of them.
+ */
+ if (supp_ngroups > AUTH_SYS_MAX_GROUPS)
+ goto badcred;
+ /*
+ * Final message length check, as we now know how much we will
+ * read in total.
+ */
+ if (auth_len < min_len + str_len +
+ supp_ngroups * BYTES_PER_XDR_UNIT)
+ goto badcred;
/*
- * five is the smallest unix credentials structure -
- * timestamp, hostname len (0), uid, gid, and gids len (0).
+ * Note that 'xcr' is a 'struct xucred', which still has the
+ * historical layout where the effective GID is in cr_groups[0]
+ * and is accounted in 'cr_ngroups'.
*/
- if ((5 + gid_len) * BYTES_PER_XDR_UNIT + str_len > auth_len) {
- (void) printf("bad auth_len gid %ld str %ld auth %u\n",
- (long)gid_len, (long)str_len, auth_len);
- stat = AUTH_BADCRED;
- goto done;
+ for (uint32_t i = 0; i < supp_ngroups; ++i) {
+ if (i < XU_NGROUPS - 1)
+ xcr->cr_sgroups[i] = IXDR_GET_INT32(buf);
+ else
+ buf++;
}
- } else if (! xdr_authunix_parms(&xdrs, &time, xcr)) {
- stat = AUTH_BADCRED;
- goto done;
- }
+ xcr->cr_ngroups = MIN(supp_ngroups + 1, XU_NGROUPS);
+ } else if (!xdr_authunix_parms(&xdrs, &time, xcr))
+ goto badcred;
rqst->rq_verf = _null_auth;
stat = AUTH_OK;
@@ -126,6 +124,10 @@ done:
XDR_DESTROY(&xdrs);
return (stat);
+
+badcred:
+ stat = AUTH_BADCRED;
+ goto done;
}
diff --git a/sys/security/audit/audit.c b/sys/security/audit/audit.c
index 7ec50d990d4e..876776e5f62e 100644
--- a/sys/security/audit/audit.c
+++ b/sys/security/audit/audit.c
@@ -329,7 +329,7 @@ audit_record_dtor(void *mem, int size, void *arg)
* call into the BSM assembly code to initialize it.
*/
static void
-audit_init(void)
+audit_init(void *dummy __unused)
{
audit_trail_enabled = 0;
diff --git a/sys/security/mac/mac_framework.c b/sys/security/mac/mac_framework.c
index d742b5dcbc3a..b0776160cc74 100644
--- a/sys/security/mac/mac_framework.c
+++ b/sys/security/mac/mac_framework.c
@@ -320,7 +320,7 @@ mac_policy_xlock_assert(void)
* Initialize the MAC subsystem, including appropriate SMP locks.
*/
static void
-mac_init(void)
+mac_init(void *dummy __unused)
{
LIST_INIT(&mac_static_policy_list);
@@ -340,7 +340,7 @@ mac_init(void)
* kernel, or loaded before the kernel startup.
*/
static void
-mac_late_init(void)
+mac_late_init(void *dummy __unused)
{
mac_late = 1;
diff --git a/sys/sys/imgact_elf.h b/sys/sys/imgact_elf.h
index 2845a9dbc1e2..9e2a233248b4 100644
--- a/sys/sys/imgact_elf.h
+++ b/sys/sys/imgact_elf.h
@@ -86,7 +86,7 @@ typedef struct {
struct sysentvec *sysvec;
const char *interp_newpath;
int flags;
- Elf_Brandnote *brand_note;
+ const Elf_Brandnote *brand_note;
bool (*header_supported)(const struct image_params *,
const int32_t *, const uint32_t *);
/* High 8 bits of flags is private to the ABI */
@@ -111,9 +111,9 @@ struct sseg_closure {
size_t size; /* Total size of all writable segments. */
};
-bool __elfN(brand_inuse)(Elf_Brandinfo *entry);
-int __elfN(insert_brand_entry)(Elf_Brandinfo *entry);
-int __elfN(remove_brand_entry)(Elf_Brandinfo *entry);
+bool __elfN(brand_inuse)(const Elf_Brandinfo *entry);
+int __elfN(insert_brand_entry)(const Elf_Brandinfo *entry);
+int __elfN(remove_brand_entry)(const Elf_Brandinfo *entry);
int __elfN(freebsd_fixup)(uintptr_t *, struct image_params *);
int __elfN(coredump)(struct thread *, struct coredump_writer *, off_t, int);
size_t __elfN(populate_note)(int, void *, void *, size_t, void **);
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 9140cee56885..8c0729d3ec66 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -741,7 +741,7 @@ struct proc {
reaper which spawned
our subtree. */
uint64_t p_elf_flags; /* (x) ELF flags */
- void *p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for
+ const void *p_elf_brandinfo; /* (x) Elf_Brandinfo, NULL for
non ELF binaries. */
sbintime_t p_umtx_min_timeout;
/* End area that is copied on creation. */
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index b4593f38f592..739723754b7d 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -62,7 +62,7 @@
#include <sys/_sx.h>
#include <sys/_task.h>
-#define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */
+#define SB_MAX (8*1024*1024) /* default for max chars in sockbuf */
struct ktls_session;
struct mbuf;
diff --git a/sys/sys/socket.h b/sys/sys/socket.h
index cdd4fa3b4b89..cf1d95da6168 100644
--- a/sys/sys/socket.h
+++ b/sys/sys/socket.h
@@ -396,6 +396,7 @@ struct sockproto {
#define PF_NETLINK AF_NETLINK
#define PF_INET_SDP AF_INET_SDP
#define PF_INET6_SDP AF_INET6_SDP
+#define PF_HYPERV AF_HYPERV
#define PF_DIVERT AF_DIVERT
#define PF_IPFWLOG AF_IPFWLOG
diff --git a/sys/sys/sockopt.h b/sys/sys/sockopt.h
index bfe12d8510d7..d2b0ff5ed2c8 100644
--- a/sys/sys/sockopt.h
+++ b/sys/sys/sockopt.h
@@ -57,8 +57,10 @@ struct sockopt {
int sosetopt(struct socket *so, struct sockopt *sopt);
int sogetopt(struct socket *so, struct sockopt *sopt);
-int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen);
-int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len);
+int __result_use_check sooptcopyin(struct sockopt *sopt, void *buf, size_t len,
+ size_t minlen);
+int __result_use_check sooptcopyout(struct sockopt *sopt, const void *buf,
+ size_t len);
int soopt_getm(struct sockopt *sopt, struct mbuf **mp);
int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m);
int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m);
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index 1714fa5a7416..6de391dcc03e 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -343,8 +343,7 @@ void exec_free_abi_mappings(struct proc *p);
void exec_onexec_old(struct thread *td);
#define INIT_SYSENTVEC(name, sv) \
- SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, \
- (sysinit_cfunc_t)exec_sysvec_init, sv);
+ SYSINIT(name, SI_SUB_EXEC, SI_ORDER_ANY, exec_sysvec_init, sv)
#endif /* _KERNEL */
diff --git a/sys/sys/tree.h b/sys/sys/tree.h
index c11bccfb387c..194ad505b038 100644
--- a/sys/sys/tree.h
+++ b/sys/sys/tree.h
@@ -334,10 +334,13 @@ struct { \
#define _RB_L ((__uintptr_t)1)
#define _RB_R ((__uintptr_t)2)
#define _RB_LR ((__uintptr_t)3)
-#define _RB_BITS(elm) (*(__uintptr_t *)&elm)
+#define _RB_BITS(elm) ((__uintptr_t)elm)
#define _RB_BITSUP(elm, field) _RB_BITS(_RB_UP(elm, field))
-#define _RB_PTR(elm) (__typeof(elm)) \
- ((__uintptr_t)elm & ~_RB_LR)
+#define _RB_PTR_OP(elm, op, dir) ((__typeof(elm)) \
+ ((__uintptr_t)(elm) op (dir)))
+#define _RB_PTR(elm) _RB_PTR_OP((elm), &, ~_RB_LR)
+#define _RB_MOD_OR(elm, dir) ((elm) = _RB_PTR_OP((elm), |, (dir)))
+#define _RB_MOD_XOR(elm, dir) ((elm) = _RB_PTR_OP((elm), ^, (dir)))
#define RB_PARENT(elm, field) _RB_PTR(_RB_UP(elm, field))
#define RB_LEFT(elm, field) _RB_LINK(elm, _RB_L, field)
@@ -346,8 +349,8 @@ struct { \
#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
#define RB_SET_PARENT(dst, src, field) do { \
- _RB_BITSUP(dst, field) = (__uintptr_t)src | \
- (_RB_BITSUP(dst, field) & _RB_LR); \
+ _RB_UP(dst, field) = (__typeof(src))((__uintptr_t)src | \
+ (_RB_BITSUP(dst, field) & _RB_LR)); \
} while (/*CONSTCOND*/ 0)
#define RB_SET(elm, parent, field) do { \
@@ -546,12 +549,12 @@ name##_RB_INSERT_COLOR(struct name *head, \
elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \
if (_RB_BITS(gpar) & elmdir) { \
/* shorten the parent-elm edge to rebalance */ \
- _RB_BITSUP(parent, field) ^= elmdir; \
+ _RB_MOD_XOR(_RB_UP(parent, field), elmdir); \
return (NULL); \
} \
sibdir = elmdir ^ _RB_LR; \
/* the other edge must change length */ \
- _RB_BITSUP(parent, field) ^= sibdir; \
+ _RB_MOD_XOR(_RB_UP(parent, field), sibdir); \
if ((_RB_BITS(gpar) & _RB_LR) == 0) { \
/* both edges now short, retry from parent */ \
child = elm; \
@@ -583,11 +586,14 @@ name##_RB_INSERT_COLOR(struct name *head, \
RB_ROTATE(elm, child, elmdir, field); \
child_up = _RB_UP(child, field); \
if (_RB_BITS(child_up) & sibdir) \
- _RB_BITSUP(parent, field) ^= elmdir; \
+ _RB_MOD_XOR(_RB_UP(parent, field), \
+ elmdir); \
if (_RB_BITS(child_up) & elmdir) \
- _RB_BITSUP(elm, field) ^= _RB_LR; \
+ _RB_MOD_XOR(_RB_UP(elm, field), \
+ _RB_LR); \
else \
- _RB_BITSUP(elm, field) ^= elmdir; \
+ _RB_MOD_XOR(_RB_UP(elm, field), \
+ elmdir); \
/* if child is a leaf, don't augment elm, \
* since it is restored to be a leaf again. */ \
if ((_RB_BITS(child_up) & _RB_LR) == 0) \
@@ -656,7 +662,7 @@ name##_RB_REMOVE_COLOR(struct name *head, \
/* the rank of the tree rooted at elm shrank */ \
gpar = _RB_UP(parent, field); \
elmdir = RB_RIGHT(parent, field) == elm ? _RB_R : _RB_L; \
- _RB_BITS(gpar) ^= elmdir; \
+ _RB_MOD_XOR(gpar, elmdir); \
if (_RB_BITS(gpar) & elmdir) { \
/* lengthen the parent-elm edge to rebalance */ \
_RB_UP(parent, field) = gpar; \
@@ -664,7 +670,7 @@ name##_RB_REMOVE_COLOR(struct name *head, \
} \
if (_RB_BITS(gpar) & _RB_LR) { \
/* shorten other edge, retry from parent */ \
- _RB_BITS(gpar) ^= _RB_LR; \
+ _RB_MOD_XOR(gpar, _RB_LR); \
_RB_UP(parent, field) = gpar; \
gpar = _RB_PTR(gpar); \
continue; \
@@ -672,7 +678,7 @@ name##_RB_REMOVE_COLOR(struct name *head, \
sibdir = elmdir ^ _RB_LR; \
sib = _RB_LINK(parent, sibdir, field); \
up = _RB_UP(sib, field); \
- _RB_BITS(up) ^= _RB_LR; \
+ _RB_MOD_XOR(up, _RB_LR); \
if ((_RB_BITS(up) & _RB_LR) == 0) { \
/* shorten edges descending from sib, retry */ \
_RB_UP(sib, field) = up; \
@@ -703,24 +709,29 @@ name##_RB_REMOVE_COLOR(struct name *head, \
/* elm is a 1-child. First rotate at elm. */ \
RB_ROTATE(sib, elm, sibdir, field); \
up = _RB_UP(elm, field); \
- _RB_BITSUP(parent, field) ^= \
- (_RB_BITS(up) & elmdir) ? _RB_LR : elmdir; \
- _RB_BITSUP(sib, field) ^= \
- (_RB_BITS(up) & sibdir) ? _RB_LR : sibdir; \
- _RB_BITSUP(elm, field) |= _RB_LR; \
+ _RB_MOD_XOR(_RB_UP(parent, field), \
+ (_RB_BITS(up) & elmdir) ? _RB_LR : elmdir); \
+ _RB_MOD_XOR(_RB_UP(sib, field), \
+ (_RB_BITS(up) & sibdir) ? _RB_LR : sibdir); \
+ _RB_MOD_OR(_RB_UP(elm, field), _RB_LR); \
} else { \
if ((_RB_BITS(up) & elmdir) == 0 && \
RB_STRICT_HST && elm != NULL) { \
/* if parent does not become a leaf, \
do not demote parent yet. */ \
- _RB_BITSUP(parent, field) ^= sibdir; \
- _RB_BITSUP(sib, field) ^= _RB_LR; \
+ _RB_MOD_XOR(_RB_UP(parent, field), \
+ sibdir); \
+ _RB_MOD_XOR(_RB_UP(sib, field), \
+ _RB_LR); \
} else if ((_RB_BITS(up) & elmdir) == 0) { \
/* demote parent. */ \
- _RB_BITSUP(parent, field) ^= elmdir; \
- _RB_BITSUP(sib, field) ^= sibdir; \
+ _RB_MOD_XOR(_RB_UP(parent, field), \
+ elmdir); \
+ _RB_MOD_XOR(_RB_UP(sib, field), \
+ sibdir); \
} else \
- _RB_BITSUP(sib, field) ^= sibdir; \
+ _RB_MOD_XOR(_RB_UP(sib, field), \
+ sibdir); \
elm = sib; \
} \
\
diff --git a/sys/tests/ktest.h b/sys/tests/ktest.h
index c767aa31e8e5..75d7a75e2fff 100644
--- a/sys/tests/ktest.h
+++ b/sys/tests/ktest.h
@@ -57,6 +57,8 @@ struct ktest_test_info {
ktest_parse_t parse;
};
+#define KTEST_FUNC(X) static int __ktest_##X(struct ktest_test_context *ctx)
+
struct ktest_module_info {
const char *name;
const struct ktest_test_info *tests;
@@ -64,6 +66,8 @@ struct ktest_module_info {
void *module_ptr;
};
+#define KTEST_INFO(X) { "test_" #X, "Test " #X, __ktest_##X, NULL }
+
int ktest_default_modevent(module_t mod, int type, void *arg);
bool ktest_start_msg(struct ktest_test_context *ctx);
@@ -84,6 +88,9 @@ void ktest_end_msg(struct ktest_test_context *ctx);
#define KTEST_LOG(_ctx, _fmt, ...) \
KTEST_LOG_LEVEL(_ctx, LOG_DEBUG, _fmt, ## __VA_ARGS__)
+#define KTEST_ERR(_ctx, _fmt, ...) \
+ KTEST_LOG_LEVEL(_ctx, LOG_ERR, _fmt, ## __VA_ARGS__)
+
#define KTEST_MAX_BUF 512
#define KTEST_MODULE_DECLARE(_n, _t) \
@@ -104,6 +111,9 @@ MODULE_VERSION(ktest_##_n, 1); \
MODULE_DEPEND(ktest_##_n, ktestmod, 1, 1, 1); \
MODULE_DEPEND(ktest_##_n, netlink, 1, 1, 1); \
+#define KTEST_MODULE_DEPEND(_n, _d) \
+MODULE_DEPEND(ktest_##_n, _d, 1, 1, 1); \
+
#endif /* _KERNEL */
/* genetlink definitions */
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 970536a13aa5..f47cfd08f75a 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -653,8 +653,8 @@ done:
for (i = 0; i < UFS_NDADDR; i++)
if (newblks[i] != DIP(ip, i_db[i]))
panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd",
- i, (intmax_t)newblks[UFS_NDADDR + level],
- (intmax_t)DIP(ip, i_ib[level]));
+ i, (intmax_t)newblks[i],
+ (intmax_t)DIP(ip, i_db[i]));
BO_LOCK(bo);
if (length == 0 &&
(fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index fef28bb883e4..fee50f49c844 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -96,7 +96,7 @@ struct vmmeter __read_mostly vm_cnt = {
u_long __exclusive_cache_line vm_user_wire_count;
static void
-vmcounter_startup(void)
+vmcounter_startup(void *dummy __unused)
{
counter_u64_t *cnt = (counter_u64_t *)&vm_cnt;
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 3f1be78342c9..418a9cff8abf 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -120,7 +120,7 @@
/* the kernel process "vm_pageout"*/
static void vm_pageout(void);
-static void vm_pageout_init(void);
+static void vm_pageout_init(void *);
static int vm_pageout_clean(vm_page_t m, int *numpagedout);
static int vm_pageout_cluster(vm_page_t m);
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
@@ -2333,7 +2333,7 @@ vm_pageout_init_domain(int domain)
}
static void
-vm_pageout_init(void)
+vm_pageout_init(void *dummy __unused)
{
u_long freecount;
int i;
diff --git a/sys/x86/x86/tsc.c b/sys/x86/x86/tsc.c
index a1a5d8140b14..3b873d9dae73 100644
--- a/sys/x86/x86/tsc.c
+++ b/sys/x86/x86/tsc.c
@@ -650,7 +650,7 @@ retry:
#endif /* SMP */
static void
-init_TSC_tc(void)
+init_TSC_tc(void *dummy __unused)
{
uint64_t max_freq;
int shift;
diff --git a/sys/x86/xen/xen_apic.c b/sys/x86/xen/xen_apic.c
index 994dc3e0804c..43a253cc2860 100644
--- a/sys/x86/xen/xen_apic.c
+++ b/sys/x86/xen/xen_apic.c
@@ -330,7 +330,7 @@ xen_cpu_ipi_init(int cpu)
}
static void
-xen_setup_cpus(void)
+xen_setup_cpus(void *dummy __unused)
{
uint32_t regs[4];
int i;
diff --git a/tests/atf_python/ktest.py b/tests/atf_python/ktest.py
index a18f47d1dd06..a671aaa1fd4c 100644
--- a/tests/atf_python/ktest.py
+++ b/tests/atf_python/ktest.py
@@ -67,6 +67,10 @@ class KtestLoader(object):
def __init__(self, module_name: str, autoload: bool):
self.module_name = module_name
self.autoload = autoload
+ # Ensure the base ktest.ko module is loaded
+ result = libc.kldload("ktest")
+ if result != 0 and result != 17: # 17 is EEXIST (already loaded)
+ logger.debug(f"Failed to load base ktest module (error {result})")
self.helper = NlHelper()
self.nlsock = Nlsock(NlConst.NETLINK_GENERIC, self.helper)
self.family_id = self._get_family_id()
@@ -76,7 +80,9 @@ class KtestLoader(object):
family_id = self.nlsock.get_genl_family_id(NETLINK_FAMILY)
except ValueError:
if self.autoload:
- libc.kldload(self.module_name)
+ result = libc.kldload(self.module_name)
+ if result != 0 and result != 17: # 17 is EEXIST (already loaded)
+ raise RuntimeError(f"Failed to load kernel module '{self.module_name}' (error {result})")
family_id = self.nlsock.get_genl_family_id(NETLINK_FAMILY)
else:
raise
@@ -103,7 +109,9 @@ class KtestLoader(object):
def load_ktests(self):
ret = self._load_ktests()
if not ret and self.autoload:
- libc.kldload(self.module_name)
+ result = libc.kldload(self.module_name)
+ if result != 0 and result != 17: # 17 is EEXIST (already loaded)
+ raise RuntimeError(f"Failed to load kernel module '{self.module_name}' (error {result})")
ret = self._load_ktests()
return ret
diff --git a/tests/sys/fs/fusefs/bad_server.cc b/tests/sys/fs/fusefs/bad_server.cc
index af2ca146e431..c3d195735446 100644
--- a/tests/sys/fs/fusefs/bad_server.cc
+++ b/tests/sys/fs/fusefs/bad_server.cc
@@ -65,6 +65,11 @@ TEST_F(BadServer, ShortWrite)
out.header.unique = 0; // Asynchronous notification
out.expected_errno = EINVAL;
m_mock->write_response(out);
+ /*
+ * Tell the event loop to quit. The kernel has already disconnected us
+ * because of the short write.
+ */
+ m_mock->m_quit = true;
}
/*
diff --git a/tests/sys/kern/unix_stream.c b/tests/sys/kern/unix_stream.c
index 49d621dc5b0a..442b766ac885 100644
--- a/tests/sys/kern/unix_stream.c
+++ b/tests/sys/kern/unix_stream.c
@@ -1,6 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
+ * Copyright (c) 2025 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2018 Alan Somers
*
* Redistribution and use in source and binary forms, with or without
@@ -30,6 +31,7 @@
#include <sys/event.h>
#include <sys/select.h>
#include <sys/sysctl.h>
+#include <sys/time.h>
#include <sys/un.h>
#include <errno.h>
#include <fcntl.h>
@@ -490,6 +492,30 @@ ATF_TC_BODY(ourshutdown_kevent, tc)
close(sv[1]);
}
+ATF_TC_WITHOUT_HEAD(SO_SNDTIMEO);
+ATF_TC_BODY(SO_SNDTIMEO, tc)
+{
+ struct timespec tp1, tp2, rtp, sleep = { .tv_nsec = 100000000 };
+ int sv[2];
+ char buf[10];
+
+ full_socketpair(sv);
+ ATF_REQUIRE_EQ(0, setsockopt(sv[0], SOL_SOCKET, SO_SNDTIMEO,
+ &(struct timeval){ .tv_usec = sleep.tv_nsec / 1000 },
+ sizeof(struct timeval)));
+ ATF_REQUIRE_EQ(0, clock_gettime(CLOCK_MONOTONIC_PRECISE, &tp1));
+ ATF_REQUIRE_EQ(-1, send(sv[0], buf, sizeof(buf), 0));
+ ATF_REQUIRE(errno == EAGAIN);
+ ATF_REQUIRE_EQ(0, clock_gettime(CLOCK_MONOTONIC_PRECISE, &tp2));
+ timespecsub(&tp2, &tp1, &rtp);
+ ATF_REQUIRE(timespeccmp(&rtp, &sleep, >=));
+ ATF_REQUIRE_EQ(sizeof(buf), recv(sv[1], buf, sizeof(buf), 0));
+ ATF_REQUIRE_EQ(sizeof(buf), send(sv[0], buf, sizeof(buf), 0));
+
+ close(sv[0]);
+ close(sv[1]);
+}
+
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, getpeereid);
@@ -506,6 +532,7 @@ ATF_TP_ADD_TCS(tp)
ATF_TP_ADD_TC(tp, peershutdown_wakeup_poll);
ATF_TP_ADD_TC(tp, peershutdown_wakeup_kevent);
ATF_TP_ADD_TC(tp, ourshutdown_kevent);
+ ATF_TP_ADD_TC(tp, SO_SNDTIMEO);
return atf_no_error();
}
diff --git a/tests/sys/netinet/Makefile b/tests/sys/netinet/Makefile
index b742342beecb..9739221676ce 100644
--- a/tests/sys/netinet/Makefile
+++ b/tests/sys/netinet/Makefile
@@ -30,6 +30,7 @@ ATF_TESTS_SH= arp \
ATF_TESTS_PYTEST+= carp.py
ATF_TESTS_PYTEST+= igmp.py
+ATF_TESTS_PYTEST+= tcp_hpts_test.py
LIBADD.so_reuseport_lb_test= pthread
LIBADD.udp_bindings= pthread
diff --git a/tests/sys/netinet/carp.sh b/tests/sys/netinet/carp.sh
index 2aae2854826e..568d2beaf914 100755
--- a/tests/sys/netinet/carp.sh
+++ b/tests/sys/netinet/carp.sh
@@ -215,6 +215,9 @@ unicast_v4_body()
unicast_v4_cleanup()
{
+ jexec carp_uni_v4_one killall routed
+ jexec carp_uni_v4_two killall routed
+ jexec carp_uni_v4_three killall routed
vnet_cleanup
}
diff --git a/tests/sys/netinet/multicast-receive.c b/tests/sys/netinet/multicast-receive.c
index 81d0f10f5cfe..62fc68200dd6 100644
--- a/tests/sys/netinet/multicast-receive.c
+++ b/tests/sys/netinet/multicast-receive.c
@@ -36,6 +36,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sysexits.h>
#include <limits.h>
#include <err.h>
@@ -93,8 +94,9 @@ usage:
.imr_multiaddr = maddr,
.imr_interface = ifaddr,
};
- assert(setsockopt(s, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
- sizeof(mreq)) == 0);
+ if (setsockopt(s, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq,
+ sizeof(mreq)) != 0)
+ err(EX_OSERR, "setsockopt");
} else if (strcmp(argv[1], "ip_mreqn") == 0) {
/*
* ip_mreqn shall be used with index, but for testing
@@ -105,8 +107,9 @@ usage:
.imr_address = index ? (struct in_addr){ 0 } : ifaddr,
.imr_ifindex = index ? ifindex : 0,
};
- assert(setsockopt(s, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreqn,
- sizeof(mreqn)) == 0);
+ if (setsockopt(s, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreqn,
+ sizeof(mreqn)) != 0)
+ err(EX_OSERR, "setsockopt");
} else if (strcmp(argv[1], "group_req") == 0) {
if (!index)
errx(1, "group_req expects index");
@@ -116,8 +119,9 @@ usage:
gsa->sin_family = AF_INET;
gsa->sin_len = sizeof(struct sockaddr_in);
gsa->sin_addr = maddr;
- assert(setsockopt(s, IPPROTO_IP, MCAST_JOIN_GROUP, &greq,
- sizeof(greq)) == 0);
+ if (setsockopt(s, IPPROTO_IP, MCAST_JOIN_GROUP, &greq,
+ sizeof(greq)) != 0)
+ err(EX_OSERR, "setsockopt");
} else
goto usage;
diff --git a/tests/sys/netinet/multicast.sh b/tests/sys/netinet/multicast.sh
index a3854fd2fd20..34094ff08705 100755
--- a/tests/sys/netinet/multicast.sh
+++ b/tests/sys/netinet/multicast.sh
@@ -45,6 +45,15 @@ multicast_vnet_init()
jexec mjail2 ifconfig ${epair2}b 192.0.3.2/24
}
+multicast_join()
+{
+ jexec mjail2 $(atf_get_srcdir)/multicast-receive \
+ $1 233.252.0.1 6676 $2 > out & pid=$!
+ while ! jexec mjail2 ifmcstat | grep -q 233\.252\.0\.1; do
+ sleep 0.01
+ done
+}
+
atf_test_case "IP_ADD_MEMBERSHIP_ip_mreq" "cleanup"
IP_ADD_MEMBERSHIP_ip_mreq_head()
{
@@ -56,8 +65,7 @@ IP_ADD_MEMBERSHIP_ip_mreq_body()
multicast_vnet_init
# join group on interface with IP address 192.0.2.2
- jexec mjail2 $(atf_get_srcdir)/multicast-receive \
- ip_mreq 233.252.0.1 6676 192.0.2.2 > out & pid=$!
+ multicast_join ip_mreq 192.0.2.2
atf_check -s exit:0 -o empty \
jexec mjail1 $(atf_get_srcdir)/multicast-send \
0.0.0.0 6676 233.252.0.1 6676 192.0.2.1 hello
@@ -65,8 +73,28 @@ IP_ADD_MEMBERSHIP_ip_mreq_body()
atf_check -s exit:0 -o inline:"192.0.2.1:6676 hello\n" cat out
# join group on interface with IP address 192.0.3.2
- jexec mjail2 $(atf_get_srcdir)/multicast-receive \
- ip_mreq 233.252.0.1 6676 192.0.3.2 > out & pid=$!
+ multicast_join ip_mreq 192.0.3.2
+ atf_check -s exit:0 -o empty \
+ jexec mjail1 $(atf_get_srcdir)/multicast-send \
+ 0.0.0.0 6676 233.252.0.1 6676 192.0.3.1 hello
+ atf_check -s exit:0 sh -c "wait $pid; exit $?"
+ atf_check -s exit:0 -o inline:"192.0.3.1:6676 hello\n" cat out
+
+ # join group on the first multicast capable interface (epair1a)
+ multicast_join ip_mreq 0.0.0.0
+ atf_check -s exit:0 -o empty \
+ jexec mjail1 $(atf_get_srcdir)/multicast-send \
+ 0.0.0.0 6676 233.252.0.1 6676 192.0.2.1 hello
+ atf_check -s exit:0 sh -c "wait $pid; exit $?"
+ atf_check -s exit:0 -o inline:"192.0.2.1:6676 hello\n" cat out
+
+ # Set up the receiving jail so that first multicast capable interface
+ # is epair1a and default route points into epair2a. This will allow us
+ # to exercise both branches of inp_lookup_mcast_ifp().
+ jexec mjail2 route add default 192.0.3.254
+
+ # join group on the interface determined by the route lookup
+ multicast_join ip_mreq 0.0.0.0
atf_check -s exit:0 -o empty \
jexec mjail1 $(atf_get_srcdir)/multicast-send \
0.0.0.0 6676 233.252.0.1 6676 192.0.3.1 hello
@@ -90,8 +118,7 @@ IP_ADD_MEMBERSHIP_ip_mreqn_body()
multicast_vnet_init
# join group on interface epair2
- jexec mjail2 $(atf_get_srcdir)/multicast-receive \
- ip_mreqn 233.252.0.1 6676 ${epair1}b > out & pid=$!
+ multicast_join ip_mreqn ${epair1}b
atf_check -s exit:0 -o empty \
jexec mjail1 $(atf_get_srcdir)/multicast-send \
0.0.0.0 6676 233.252.0.1 6676 ${epair1}a hello
@@ -99,13 +126,25 @@ IP_ADD_MEMBERSHIP_ip_mreqn_body()
atf_check -s exit:0 -o inline:"192.0.2.1:6676 hello\n" cat out
# join group on interface epair2
- jexec mjail2 $(atf_get_srcdir)/multicast-receive \
- ip_mreqn 233.252.0.1 6676 ${epair2}b > out & pid=$!
+ multicast_join ip_mreqn ${epair2}b
atf_check -s exit:0 -o empty \
jexec mjail1 $(atf_get_srcdir)/multicast-send \
0.0.0.0 6676 233.252.0.1 6676 ${epair2}a hello
atf_check -s exit:0 sh -c "wait $pid; exit $?"
atf_check -s exit:0 -o inline:"192.0.3.1:6676 hello\n" cat out
+
+ # try to join group on the interface determined by the route lookup
+ atf_check -s exit:71 -e inline:"multicast-receive: setsockopt: Can't assign requested address\n" \
+ jexec mjail2 $(atf_get_srcdir)/multicast-receive \
+ ip_mreqn 233.252.0.1 6676 0
+ # add route and try again
+ jexec mjail2 route add default 192.0.3.254
+ multicast_join ip_mreqn 0
+ atf_check -s exit:0 -o empty \
+ jexec mjail1 $(atf_get_srcdir)/multicast-send \
+ 0.0.0.0 6676 233.252.0.1 6676 192.0.3.1 hello
+ atf_check -s exit:0 sh -c "wait $pid; exit $?"
+ atf_check -s exit:0 -o inline:"192.0.3.1:6676 hello\n" cat out
}
IP_ADD_MEMBERSHIP_ip_mreqn_cleanup()
{
@@ -123,9 +162,8 @@ MCAST_JOIN_GROUP_body()
{
multicast_vnet_init
- # join group on interface epair2
- jexec mjail2 $(atf_get_srcdir)/multicast-receive \
- group_req 233.252.0.1 6676 ${epair1}b > out & pid=$!
+ # join group on interface epair1
+ multicast_join group_req ${epair1}b
atf_check -s exit:0 -o empty \
jexec mjail1 $(atf_get_srcdir)/multicast-send \
0.0.0.0 6676 233.252.0.1 6676 ${epair1}a hello
@@ -133,13 +171,25 @@ MCAST_JOIN_GROUP_body()
atf_check -s exit:0 -o inline:"192.0.2.1:6676 hello\n" cat out
# join group on interface epair2
- jexec mjail2 $(atf_get_srcdir)/multicast-receive \
- group_req 233.252.0.1 6676 ${epair2}b > out & pid=$!
+ multicast_join group_req ${epair2}b
atf_check -s exit:0 -o empty \
jexec mjail1 $(atf_get_srcdir)/multicast-send \
0.0.0.0 6676 233.252.0.1 6676 ${epair2}a hello
atf_check -s exit:0 sh -c "wait $pid; exit $?"
atf_check -s exit:0 -o inline:"192.0.3.1:6676 hello\n" cat out
+
+ # try to join group on the interface determined by the route lookup
+ atf_check -s exit:71 -e inline:"multicast-receive: setsockopt: Can't assign requested address\n" \
+ jexec mjail2 $(atf_get_srcdir)/multicast-receive \
+ group_req 233.252.0.1 6676 0
+ # add route and try again
+ jexec mjail2 route add default 192.0.3.254
+ multicast_join group_req 0
+ atf_check -s exit:0 -o empty \
+ jexec mjail1 $(atf_get_srcdir)/multicast-send \
+ 0.0.0.0 6676 233.252.0.1 6676 192.0.3.1 hello
+ atf_check -s exit:0 sh -c "wait $pid; exit $?"
+ atf_check -s exit:0 -o inline:"192.0.3.1:6676 hello\n" cat out
}
MCAST_JOIN_GROUP_cleanup()
{
diff --git a/tests/sys/netinet/tcp_hpts_test.py b/tests/sys/netinet/tcp_hpts_test.py
new file mode 100644
index 000000000000..c56383fb310f
--- /dev/null
+++ b/tests/sys/netinet/tcp_hpts_test.py
@@ -0,0 +1,4 @@
+from atf_python.ktest import BaseKernelTest
+
+class TestTcpHpts(BaseKernelTest):
+ KTEST_MODULE_NAME = "ktest_tcphpts"
diff --git a/tests/sys/vm/mmap_test.c b/tests/sys/vm/mmap_test.c
index 6bc30f73ca95..27d02ae667fb 100644
--- a/tests/sys/vm/mmap_test.c
+++ b/tests/sys/vm/mmap_test.c
@@ -36,21 +36,6 @@
#include <stdio.h>
#include <stdlib.h>
-static const struct {
- void *addr;
- int ok[2]; /* Depending on security.bsd.map_at_zero {0, !=0}. */
-} map_at_zero_tests[] = {
- { (void *)0, { 0, 1 } }, /* Test sysctl. */
- { (void *)1, { 0, 0 } },
- { (void *)(PAGE_SIZE - 1), { 0, 0 } },
- { (void *)PAGE_SIZE, { 1, 1 } },
- { (void *)-1, { 0, 0 } },
- { (void *)(-PAGE_SIZE), { 0, 0 } },
- { (void *)(-1 - PAGE_SIZE), { 0, 0 } },
- { (void *)(-1 - PAGE_SIZE - 1), { 0, 0 } },
- { (void *)(0x1000 * PAGE_SIZE), { 1, 1 } },
-};
-
#define MAP_AT_ZERO "security.bsd.map_at_zero"
#ifdef __LP64__
@@ -68,6 +53,22 @@ ATF_TC_BODY(mmap__map_at_zero, tc)
int map_at_zero;
bool allow_wx;
int prot_flags;
+ size_t pgsz = getpagesize();
+
+ const struct {
+ void *addr;
+ int ok[2]; /* Depending on security.bsd.map_at_zero {0, !=0}. */
+ } map_at_zero_tests[] = {
+ { (void *)0, { 0, 1 } }, /* Test sysctl. */
+ { (void *)1, { 0, 0 } },
+ { (void *)(pgsz - 1), { 0, 0 } },
+ { (void *)pgsz, { 1, 1 } },
+ { (void *)-1, { 0, 0 } },
+ { (void *)(-pgsz), { 0, 0 } },
+ { (void *)(-1 - pgsz), { 0, 0 } },
+ { (void *)(-1 - pgsz - 1), { 0, 0 } },
+ { (void *)(0x1000 * pgsz), { 1, 1 } },
+ };
len = sizeof(map_at_zero);
if (sysctlbyname(MAP_AT_ZERO, &map_at_zero, &len, NULL, 0) == -1) {
diff --git a/usr.bin/login/login.conf b/usr.bin/login/login.conf
index 1069da17b4db..c65a83caa565 100644
--- a/usr.bin/login/login.conf
+++ b/usr.bin/login/login.conf
@@ -46,7 +46,6 @@ default:\
:umtxp=unlimited:\
:pipebuf=unlimited:\
:priority=0:\
- :ignoretime@:\
:umask=022:\
:charset=UTF-8:\
:lang=C.UTF-8:
@@ -149,7 +148,6 @@ russian|Russian Users Accounts:\
# :requirehome:\
# :passwordtime=90d:\
# :umask=002:\
-# :ignoretime@:\
# :tc=default:
#
#
@@ -174,7 +172,6 @@ russian|Russian Users Accounts:\
##
#staff:\
# :ignorenologin:\
-# :ignoretime:\
# :requirehome@:\
# :accounted@:\
# :path=~/bin /bin /sbin /usr/bin /usr/sbin /usr/local/bin /usr/local/sbin:\
@@ -265,7 +262,6 @@ russian|Russian Users Accounts:\
## - no time accounting, restricted to access via dialin lines
##
#site:\
-# :ignoretime:\
# :passwordtime@:\
# :refreshtime@:\
# :refreshperiod@:\
diff --git a/usr.bin/sockstat/main.c b/usr.bin/sockstat/main.c
index 7fedfd5b8724..d1ea6b1bc958 100644
--- a/usr.bin/sockstat/main.c
+++ b/usr.bin/sockstat/main.c
@@ -1789,9 +1789,11 @@ main(int argc, char *argv[])
argc = xo_parse_args(argc, argv);
if (argc < 0)
exit(1);
- if (xo_get_style(NULL) != XO_STYLE_TEXT &&
- xo_get_style(NULL) != XO_STYLE_HTML)
- is_xo_style_encoding = true;
+ if (xo_get_style(NULL) != XO_STYLE_TEXT) {
+ show_path_state = true;
+ if (xo_get_style(NULL) != XO_STYLE_HTML)
+ is_xo_style_encoding = true;
+ }
opt_j = -1;
while ((o = getopt(argc, argv, "46AbCcfIij:Llnp:P:qSsUuvw")) != -1)
switch (o) {
diff --git a/usr.bin/sockstat/sockstat.1 b/usr.bin/sockstat/sockstat.1
index d14eb967ad0f..1498fb1d88f7 100644
--- a/usr.bin/sockstat/sockstat.1
+++ b/usr.bin/sockstat/sockstat.1
@@ -25,7 +25,7 @@
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd October 9, 2025
+.Dd October 14, 2025
.Dt SOCKSTAT 1
.Os
.Sh NAME
@@ -205,7 +205,8 @@ is specified (only for SCTP or TCP).
The path state if
.Fl s
is specified (only for SCTP).
-This column is only shown when there is at least one path state shown.
+When using traditional text output, this column is only shown when there is at
+least one path state to show.
.It Li CONN STATE
The connection state if
.Fl s
diff --git a/usr.sbin/blacklistctl/Makefile b/usr.sbin/blacklistctl/Makefile
index 8a01f52926a7..41c5f44b072b 100644
--- a/usr.sbin/blacklistctl/Makefile
+++ b/usr.sbin/blacklistctl/Makefile
@@ -6,8 +6,7 @@ PACKAGE= blocklist
PROG= blacklistctl
SRCS= blacklistctl.c conf.c state.c support.c old_internal.c \
sockaddr_snprintf.c pidfile.c strtoi.c popenve.c
-MAN= blocklistctl.8
-MLINKS= blocklistctl.8 blacklistctl.8
+MAN= blacklistctl.8
LDFLAGS+=-L${LIBBLACKLISTDIR}
LIBADD+= blocklist util
diff --git a/usr.sbin/blacklistd/Makefile b/usr.sbin/blacklistd/Makefile
index b4ba4ca2f9ad..490b12d46968 100644
--- a/usr.sbin/blacklistd/Makefile
+++ b/usr.sbin/blacklistd/Makefile
@@ -7,9 +7,7 @@ CONFS= blacklistd.conf
PROG= blacklistd
SRCS= blacklistd.c conf.c run.c state.c support.c old_internal.c \
sockaddr_snprintf.c pidfile.c strtoi.c popenve.c vsyslog_r.c
-MAN= blocklistd.8 blocklistd.conf.5
-MLINKS= blocklistd.8 blacklistd.8 \
- blocklistd.conf.5 blacklistd.conf.5
+MAN= blacklistd.8 blacklistd.conf.5
LDFLAGS+=-L${LIBBLACKLISTDIR}
LIBADD+= blocklist util
diff --git a/usr.sbin/bsdinstall/partedit/part_wizard.c b/usr.sbin/bsdinstall/partedit/part_wizard.c
index 90a8da1c3c9b..9146a2af782f 100644
--- a/usr.sbin/bsdinstall/partedit/part_wizard.c
+++ b/usr.sbin/bsdinstall/partedit/part_wizard.c
@@ -27,6 +27,7 @@
*/
#include <sys/param.h>
+#include <sys/sysctl.h>
#include <errno.h>
#include <inttypes.h>
@@ -34,6 +35,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
#include <libgeom.h>
#include <bsddialog.h>
@@ -41,10 +43,29 @@
#include "partedit.h"
#define MIN_FREE_SPACE (1023*1024*1024) /* Just under 1 GB */
-#define SWAP_SIZE(available) MIN(available/20, 4*1024*1024*1024LL)
static char *wizard_partition(struct gmesh *mesh, const char *disk);
+/*
+ * Determine default swap (partition) size in bytes for a given amount of free
+ * disk space in bytes. The algorithm should likely be revisited in light of
+ * contemporary memory and disk sizes.
+ */
+static intmax_t
+swap_size(intmax_t available)
+{
+ intmax_t swapsize;
+ unsigned long swap_maxpages;
+ size_t sz;
+
+ swapsize = MIN(available/20, 4*1024*1024*1024LL);
+ sz = sizeof(swap_maxpages);
+ if (sysctlbyname("vm.swap_maxpages", &swap_maxpages, &sz, NULL, 0) == 0)
+ swapsize = MIN(swapsize, (intmax_t)swap_maxpages * getpagesize());
+
+ return (swapsize);
+}
+
int
part_wizard(const char *fsreq)
{
@@ -383,7 +404,7 @@ wizard_makeparts(struct gmesh *mesh, const char *disk, const char *fstype,
return (!retval); /* Editor -> return 0 */
}
- swapsize = SWAP_SIZE(available);
+ swapsize = swap_size(available);
humanize_number(swapsizestr, 7, swapsize, "B", HN_AUTOSCALE,
HN_NOSPACE | HN_DECIMAL);
humanize_number(rootsizestr, 7, available - swapsize - 1024*1024,
diff --git a/usr.sbin/certctl/certctl.8 b/usr.sbin/certctl/certctl.8
index edf993e1361a..e58da8e7ff84 100644
--- a/usr.sbin/certctl/certctl.8
+++ b/usr.sbin/certctl/certctl.8
@@ -24,7 +24,7 @@
.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd August 18, 2025
+.Dd October 9, 2025
.Dt CERTCTL 8
.Os
.Sh NAME
@@ -110,7 +110,7 @@ A copy of each trusted certificate is placed in
and each untrusted certificate in
.Ev UNTRUSTDESTDIR .
In addition, a bundle containing the trusted certificates is placed in
-.Ev BUNDLEFILE .
+.Ev BUNDLE .
.It Ic untrust
Add the specified file to the untrusted list.
.It Ic trust
@@ -151,6 +151,8 @@ Default:
.Pa ${DESTDIR}${DISTBASE}/etc/ssl/untrusted
.It Ev BUNDLE
File name of bundle to produce.
+Default:
+.Pa ${DESTDIR}${DISTBASE}/etc/ssl/cert.pem
.El
.Sh SEE ALSO
.Xr openssl 1
diff --git a/usr.sbin/fwget/pci/pci_network_mediatek b/usr.sbin/fwget/pci/pci_network_mediatek
index 653c87c410eb..e1e15dcfa2e5 100644
--- a/usr.sbin/fwget/pci/pci_network_mediatek
+++ b/usr.sbin/fwget/pci/pci_network_mediatek
@@ -38,24 +38,24 @@ pci_network_mediatek_mt76()
# { sys/contrib/dev/mediatek/mt76/zzz_fw_ports_fwget.sh }
### >>>
- 0x0608) addpkg "wifi-firmware-mediatek-kmod-mt792x"; return 1 ;;
- 0x0616) addpkg "wifi-firmware-mediatek-kmod-mt792x"; return 1 ;;
- 0x0717) addpkg "wifi-firmware-mediatek-kmod-mt792x"; return 1 ;;
- 0x7611) addpkg "wifi-firmware-mediatek-kmod-mt7615"; return 1 ;;
- 0x7615) addpkg "wifi-firmware-mediatek-kmod-mt7615"; return 1 ;;
- 0x7663) addpkg "wifi-firmware-mediatek-kmod-mt7615"; return 1 ;;
- 0x7906) addpkg "wifi-firmware-mediatek-kmod-mt7915"; return 1 ;;
- 0x790a) addpkg "wifi-firmware-mediatek-kmod-mt7915"; return 1 ;;
- 0x7915) addpkg "wifi-firmware-mediatek-kmod-mt7915"; return 1 ;;
- 0x7916) addpkg "wifi-firmware-mediatek-kmod-mt7915"; return 1 ;;
- 0x7920) addpkg "wifi-firmware-mediatek-kmod-mt792x"; return 1 ;;
- 0x7922) addpkg "wifi-firmware-mediatek-kmod-mt792x"; return 1 ;;
- 0x7925) addpkg "wifi-firmware-mediatek-kmod-mt792x"; return 1 ;;
- 0x7961) addpkg "wifi-firmware-mediatek-kmod-mt792x"; return 1 ;;
- 0x7990) addpkg "wifi-firmware-mediatek-kmod-mt7996"; return 1 ;;
- 0x7991) addpkg "wifi-firmware-mediatek-kmod-mt7996"; return 1 ;;
- 0x7992) addpkg "wifi-firmware-mediatek-kmod-mt7996"; return 1 ;;
- 0x799a) addpkg "wifi-firmware-mediatek-kmod-mt7996"; return 1 ;;
+ 0x0608) addpkg "wifi-firmware-mt76-kmod-mt792x"; return 1 ;;
+ 0x0616) addpkg "wifi-firmware-mt76-kmod-mt792x"; return 1 ;;
+ 0x0717) addpkg "wifi-firmware-mt76-kmod-mt792x"; return 1 ;;
+ 0x7611) addpkg "wifi-firmware-mt76-kmod-mt7615"; return 1 ;;
+ 0x7615) addpkg "wifi-firmware-mt76-kmod-mt7615"; return 1 ;;
+ 0x7663) addpkg "wifi-firmware-mt76-kmod-mt7615"; return 1 ;;
+ 0x7906) addpkg "wifi-firmware-mt76-kmod-mt7915"; return 1 ;;
+ 0x790a) addpkg "wifi-firmware-mt76-kmod-mt7915"; return 1 ;;
+ 0x7915) addpkg "wifi-firmware-mt76-kmod-mt7915"; return 1 ;;
+ 0x7916) addpkg "wifi-firmware-mt76-kmod-mt7915"; return 1 ;;
+ 0x7920) addpkg "wifi-firmware-mt76-kmod-mt792x"; return 1 ;;
+ 0x7922) addpkg "wifi-firmware-mt76-kmod-mt792x"; return 1 ;;
+ 0x7925) addpkg "wifi-firmware-mt76-kmod-mt792x"; return 1 ;;
+ 0x7961) addpkg "wifi-firmware-mt76-kmod-mt792x"; return 1 ;;
+ 0x7990) addpkg "wifi-firmware-mt76-kmod-mt7996"; return 1 ;;
+ 0x7991) addpkg "wifi-firmware-mt76-kmod-mt7996"; return 1 ;;
+ 0x7992) addpkg "wifi-firmware-mt76-kmod-mt7996"; return 1 ;;
+ 0x799a) addpkg "wifi-firmware-mt76-kmod-mt7996"; return 1 ;;
### <<<
esac
diff --git a/usr.sbin/unbound/Makefile.inc b/usr.sbin/unbound/Makefile.inc
index a28e388e47c0..84dfdbf736bd 100644
--- a/usr.sbin/unbound/Makefile.inc
+++ b/usr.sbin/unbound/Makefile.inc
@@ -1,6 +1,6 @@
MK_WERROR= no
NO_WTHREAD_SAFETY= true
-PACKAGE= unbound
+PACKAGE= local-unbound
.for man in ${MAN}
${man}: ${UNBOUNDDIR}/doc/${man:S/local-//}