aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.github/workflows/checklist.yml2
-rw-r--r--Makefile.inc12
-rw-r--r--ObsoleteFiles.inc37
-rw-r--r--UPDATING18
-rw-r--r--bin/cpuset/Makefile2
-rw-r--r--bin/cpuset/cpuset.c153
-rw-r--r--crypto/krb5/src/util/ss/listen.c4
-rw-r--r--crypto/openssh/sshd_config7
-rw-r--r--etc/gss-krb5/mech16
-rw-r--r--krb5/include/autoconf.h5
-rw-r--r--krb5/lib/Makefile.inc2
-rw-r--r--krb5/plugins/Makefile.inc2
-rw-r--r--krb5/usr.bin/kadmin/Makefile4
-rw-r--r--krb5/usr.bin/ktutil/Makefile2
-rw-r--r--krb5/usr.sbin/kadmin.local/Makefile4
-rw-r--r--krb5/util/Makefile.inc2
-rw-r--r--krb5/util/ss/Makefile5
-rw-r--r--lib/libc/db/hash/hash.c21
-rw-r--r--lib/libc/db/man/dbm.35
-rw-r--r--lib/libc/db/man/dbopen.35
-rw-r--r--lib/libc/tests/db/Makefile2
-rw-r--r--lib/libc/tests/db/dbm_nextkey_test.c53
-rw-r--r--lib/libc/tests/db/dbm_open_test.c23
-rw-r--r--lib/libc/tests/db/dbm_perm_test.c98
-rw-r--r--lib/libnvmf/libnvmf.h7
-rw-r--r--lib/libnvmf/nvmf_controller.c50
-rw-r--r--lib/libsys/getsockopt.29
-rw-r--r--lib/libthr/thread/thr_getthreadid_np.c2
-rw-r--r--lib/libutil/Makefile1
-rw-r--r--lib/libutil/cpuset.351
-rw-r--r--lib/libutil/cpuset.c98
-rw-r--r--lib/libutil/libutil.h8
-rw-r--r--lib/libvmmapi/Makefile2
-rw-r--r--lib/libvmmapi/internal.h11
-rw-r--r--lib/libvmmapi/vmmapi.c181
-rw-r--r--lib/libvmmapi/vmmapi.h22
-rw-r--r--libexec/comsat/comsat.c40
-rw-r--r--release/packages/ucl/bmake-all.ucl5
-rw-r--r--release/packages/ucl/sendmail.ucl7
-rw-r--r--release/packages/ucl/yp.ucl7
-rwxr-xr-xrelease/scripts/pkgbase-stage.lua4
-rw-r--r--release/tools/vmimage.subr1
-rw-r--r--sbin/ifconfig/af_inet6.c2
-rw-r--r--sbin/ifconfig/ifbridge.c42
-rw-r--r--sbin/ifconfig/tests/inet6.sh30
-rw-r--r--sbin/ping/Makefile2
-rw-r--r--sbin/recoverdisk/recoverdisk.c2
-rwxr-xr-xsbin/swapon/tests/swapon_test.sh60
-rw-r--r--sbin/zfsbootcfg/zfsbootcfg.88
-rw-r--r--share/man/man4/usbhid.44
-rw-r--r--share/man/man4/vtnet.423
-rw-r--r--share/man/man5/core.548
-rw-r--r--share/man/man9/Makefile2
-rw-r--r--share/man/man9/coredumper_register.9168
-rw-r--r--share/man/man9/domainset.916
-rw-r--r--share/man/man9/mbuf.962
-rw-r--r--share/man/man9/style.9160
-rw-r--r--share/man/man9/ucred.924
-rw-r--r--share/misc/committers-ports.dot3
-rw-r--r--share/misc/organization.dot2
-rw-r--r--share/mk/Makefile1
-rw-r--r--share/mk/src.libnames.mk1
-rw-r--r--share/vt/fonts/INDEX.fonts8
-rw-r--r--share/vt/keymaps/INDEX.keymaps8
-rw-r--r--stand/defaults/loader.conf1
-rw-r--r--stand/i386/Makefile2
-rw-r--r--stand/i386/common/bootargs.h2
-rw-r--r--stand/i386/gptboot/Makefile6
-rw-r--r--stand/i386/gptzfsboot/Makefile7
-rw-r--r--stand/i386/gptzfsboot/zfsboot.c (renamed from stand/i386/zfsboot/zfsboot.c)0
-rw-r--r--stand/i386/isoboot/Makefile6
-rw-r--r--stand/i386/loader/main.c2
-rw-r--r--stand/i386/zfsboot/Makefile92
-rw-r--r--stand/i386/zfsboot/Makefile.depend17
-rw-r--r--stand/i386/zfsboot/zfsboot.8130
-rw-r--r--stand/i386/zfsboot/zfsldr.S281
-rw-r--r--stand/libsa/ip.c6
-rw-r--r--sys/amd64/include/vmm_dev.h7
-rw-r--r--sys/arm64/include/vmm_dev.h5
-rw-r--r--sys/cddl/dev/sdt/sdt.c23
-rw-r--r--sys/compat/linprocfs/linprocfs.c8
-rw-r--r--sys/compat/linux/linux_file.c2
-rw-r--r--sys/compat/linux/linux_misc.c36
-rw-r--r--sys/compat/linux/linux_uid16.c35
-rw-r--r--sys/compat/linuxkpi/common/include/acpi/acpi_bus.h4
-rw-r--r--sys/compat/linuxkpi/common/include/linux/pci.h60
-rw-r--r--sys/compat/linuxkpi/common/src/linux_acpi.c24
-rw-r--r--sys/compat/linuxkpi/common/src/linux_pci.c20
-rw-r--r--sys/conf/files2
-rw-r--r--sys/conf/files.arm644
-rw-r--r--sys/conf/files.x861
-rw-r--r--sys/dev/amdsmu/amdsmu.c466
-rw-r--r--sys/dev/amdsmu/amdsmu.h95
-rw-r--r--sys/dev/amdsmu/amdsmu_reg.h84
-rw-r--r--sys/dev/iicbus/iichid.c74
-rw-r--r--sys/dev/mmc/host/dwmmc.c83
-rw-r--r--sys/dev/nvmf/controller/nvmft_subr.c40
-rw-r--r--sys/dev/pci/pci_iov.c25
-rw-r--r--sys/dev/pci/pci_iov_private.h2
-rw-r--r--sys/dev/usb/input/usbhid.c2
-rw-r--r--sys/dev/vmm/vmm_dev.c135
-rw-r--r--sys/dev/vmm/vmm_mem.c15
-rw-r--r--sys/dev/vmm/vmm_mem.h27
-rw-r--r--sys/fs/nfs/nfs_commonport.c5
-rw-r--r--sys/fs/nfs/nfs_commonsubs.c2
-rw-r--r--sys/fs/nfsclient/nfs_clrpcops.c3
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c3
-rw-r--r--sys/fs/nfsserver/nfs_nfsdsocket.c2
-rw-r--r--sys/kern/coredump_vnode.c562
-rw-r--r--sys/kern/imgact_elf.c15
-rw-r--r--sys/kern/kern_cpuset.c98
-rw-r--r--sys/kern/kern_exec.c21
-rw-r--r--sys/kern/kern_jail.c17
-rw-r--r--sys/kern/kern_prot.c216
-rw-r--r--sys/kern/kern_sig.c594
-rw-r--r--sys/kern/kern_sysctl.c2
-rw-r--r--sys/kern/kern_thread.c2
-rw-r--r--sys/kern/kern_ucoredump.c299
-rw-r--r--sys/kern/subr_compressor.c6
-rw-r--r--sys/kern/sys_generic.c2
-rw-r--r--sys/kern/uipc_shm.c50
-rw-r--r--sys/kern/vfs_aio.c33
-rw-r--r--sys/kern/vfs_export.c4
-rw-r--r--sys/modules/Makefile2
-rw-r--r--sys/modules/amdsmu/Makefile14
-rw-r--r--sys/net/if_bridge.c197
-rw-r--r--sys/net/if_bridgevar.h4
-rw-r--r--sys/net/if_ovpn.c241
-rw-r--r--sys/net/if_ovpn.h1
-rw-r--r--sys/net/if_tuntap.c76
-rw-r--r--sys/net/if_vlan.c12
-rw-r--r--sys/netinet/ip_fastfwd.c22
-rw-r--r--sys/netinet/sctp_input.c6
-rw-r--r--sys/netinet/sctp_timer.c1
-rw-r--r--sys/netinet/tcp_hpts.c87
-rw-r--r--sys/netinet/tcp_hpts.h128
-rw-r--r--sys/netinet/tcp_input.c12
-rw-r--r--sys/netinet/tcp_lro_hpts.c2
-rw-r--r--sys/netinet/tcp_stacks/bbr.c50
-rw-r--r--sys/netinet/tcp_stacks/rack.c106
-rw-r--r--sys/netinet/tcp_stacks/rack_pcm.c6
-rw-r--r--sys/netinet/udp_usrreq.c6
-rw-r--r--sys/netinet6/ip6_fastfwd.c28
-rw-r--r--sys/netinet6/ip6_forward.c27
-rw-r--r--sys/netinet6/scope6.c17
-rw-r--r--sys/netinet6/sctp6_usrreq.c6
-rw-r--r--sys/netinet6/udp6_usrreq.c6
-rw-r--r--sys/netlink/netlink_io.c1
-rw-r--r--sys/netpfil/pf/pf.c2
-rw-r--r--sys/netpfil/pf/pf_ioctl.c16
-rw-r--r--sys/riscv/include/vmm_dev.h5
-rw-r--r--sys/rpc/authunix_prot.c6
-rw-r--r--sys/rpc/rpcsec_gss/svc_rpcsec_gss.c2
-rw-r--r--sys/rpc/svc_auth.c5
-rw-r--r--sys/rpc/svc_auth_unix.c6
-rw-r--r--sys/sys/compressor.h1
-rw-r--r--sys/sys/domainset.h14
-rw-r--r--sys/sys/exec.h20
-rw-r--r--sys/sys/exterr_cat.h2
-rw-r--r--sys/sys/imgact_elf.h3
-rw-r--r--sys/sys/jail.h2
-rw-r--r--sys/sys/mbuf.h18
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/signalvar.h1
-rw-r--r--sys/sys/syscallsubr.h3
-rw-r--r--sys/sys/sysent.h4
-rw-r--r--sys/sys/ucoredump.h99
-rw-r--r--sys/sys/ucred.h31
-rw-r--r--sys/sys/unistd.h2
-rw-r--r--sys/ufs/ffs/ffs_softdep.c20
-rw-r--r--sys/ufs/ufs/ufs_extern.h8
-rw-r--r--sys/ufs/ufs/ufs_lookup.c7
-rw-r--r--sys/ufs/ufs/ufs_vnops.c47
-rw-r--r--sys/vm/swap_pager.c32
-rw-r--r--sys/vm/vm_page.c2
-rw-r--r--targets/pseudo/userland/misc/Makefile.depend1
-rwxr-xr-xtests/sys/net/if_bridge_test.sh24
-rw-r--r--tests/sys/net/if_ovpn/if_ovpn.sh91
-rwxr-xr-xtests/sys/net/if_vlan.sh27
-rw-r--r--tests/sys/netpfil/pf/nat64.py41
-rwxr-xr-xtools/boot/install-boot.sh23
-rwxr-xr-xtools/boot/rootgen.sh65
-rw-r--r--tools/build/cross-build/include/mac/endian.h7
-rw-r--r--tools/build/mk/OptionalObsoleteFiles.inc52
-rw-r--r--usr.bin/bmake/Makefile.inc2
-rw-r--r--usr.bin/clang/clang-scan-deps/Makefile7
-rw-r--r--usr.bin/clang/clang.prog.mk2
-rw-r--r--usr.bin/clang/llvm-ar/Makefile1
-rw-r--r--usr.bin/clang/llvm-nm/Makefile1
-rw-r--r--usr.bin/clang/llvm-size/Makefile1
-rw-r--r--usr.bin/clang/llvm.prog.mk2
-rw-r--r--usr.bin/find/find.174
-rw-r--r--usr.bin/find/function.c5
-rw-r--r--usr.bin/pom/pom.c1
-rw-r--r--usr.bin/sockstat/Makefile2
-rw-r--r--usr.bin/sockstat/sockstat.117
-rw-r--r--usr.bin/sockstat/sockstat.c467
-rw-r--r--usr.bin/strings/Makefile2
-rw-r--r--usr.bin/xargs/tests/Makefile3
-rw-r--r--usr.bin/xargs/tests/legacy_test.sh5
-rw-r--r--usr.bin/xargs/tests/regress.sh32
-rwxr-xr-xusr.bin/xargs/tests/xargs_test.sh193
-rw-r--r--usr.sbin/bhyve/acpi.c124
-rw-r--r--usr.sbin/bhyve/acpi.h3
-rw-r--r--usr.sbin/bhyve/amd64/bhyverun_machdep.c14
-rw-r--r--usr.sbin/bhyve/amd64/xmsr.c9
-rw-r--r--usr.sbin/bhyve/bhyve.880
-rw-r--r--usr.sbin/bhyve/bhyverun.c174
-rw-r--r--usr.sbin/bhyve/bhyverun.h1
-rw-r--r--usr.sbin/bhyve/bootrom.c1
-rw-r--r--usr.sbin/bhyve/pci_emul.c1
-rw-r--r--usr.sbin/bhyve/pci_fbuf.c1
-rw-r--r--usr.sbin/bhyve/pci_passthru.c1
-rw-r--r--usr.sbin/bhyve/tpm_ppi_qemu.c2
-rw-r--r--usr.sbin/bsdinstall/bsdinstall.82
-rwxr-xr-xusr.sbin/bsdinstall/scripts/bootconfig2
-rwxr-xr-xusr.sbin/bsdinstall/scripts/pkgbase.in4
-rwxr-xr-xusr.sbin/bsdinstall/scripts/zfsboot2
-rw-r--r--usr.sbin/bsnmpd/modules/snmp_wlan/wlan_sys.c2
-rw-r--r--usr.sbin/chroot/chroot.814
-rw-r--r--usr.sbin/makefs/zfs/dsl.c25
-rw-r--r--usr.sbin/makefs/zfs/fs.c45
-rw-r--r--usr.sbin/makefs/zfs/objset.c1
-rw-r--r--usr.sbin/makefs/zfs/vdev.c1
-rw-r--r--usr.sbin/makefs/zfs/zap.c13
-rw-r--r--usr.sbin/syslogd/syslogd.c2
226 files changed, 5702 insertions, 2811 deletions
diff --git a/.github/workflows/checklist.yml b/.github/workflows/checklist.yml
index 7f7b0d51f46e..ecc3939f34b8 100644
--- a/.github/workflows/checklist.yml
+++ b/.github/workflows/checklist.yml
@@ -89,7 +89,7 @@ jobs:
/* Loop for each key in "checklist". */
for (const c in checklist)
msg += "- " + c + "<sup>" + checklist[c].join(", ") + "</sup>\n";
- msg += "\nPlease review CONTRIBUTING.md, then update and push your branch again.\n"
+ msg += "\nPlease review [CONTRIBUTING.md](https://github.com/freebsd/freebsd-src/blob/main/CONTRIBUTING.md), then update and push your branch again.\n"
comment_func({
owner: context.repo.owner,
diff --git a/Makefile.inc1 b/Makefile.inc1
index 010f5ac2bb55..b66743e154eb 100644
--- a/Makefile.inc1
+++ b/Makefile.inc1
@@ -2627,8 +2627,6 @@ _kerberos5_bootstrap_tools= \
krb5/util/compile_et \
krb5/util/support \
krb5/util/et \
- lib/ncurses/tinfo \
- lib/libedit \
krb5/util/ss \
krb5/util/profile \
krb5/util/verto
diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index 00f889804013..83fb2d3f3a2c 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -51,8 +51,43 @@
# xargs -n1 | sort | uniq -d;
# done
-# 20250726: Test case renamed
+# 20250728: zfsboot (MBR) removed
+OLD_FILES+=boot/zfsboot
+OLD_FILES+=usr/share/man/man8/zfsboot.8.gz
+
+# 20250728: Machine versions of 'runq.h' do not exist anymore
+OLD_FILES+=usr/include/machine/runq.h
+
+# 20250726: MIT KRB5 DSO bump
+OLD_LIBS+=usr/lib/libcom_err.so.121
+OLD_LIBS+=usr/lib/libgssapi_krb5.so.121
+OLD_LIBS+=usr/lib/libgssrpc.so.121
+OLD_LIBS+=usr/lib/libk5crypto.so.121
+OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121
+OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121
+OLD_LIBS+=usr/lib/libkdb5.so.121
+OLD_LIBS+=usr/lib/libkrad.so.121
+OLD_LIBS+=usr/lib/libkrb5.so.121
+OLD_LIBS+=usr/lib/libkrb5profile.so.121
+OLD_LIBS+=usr/lib/libkrb5support.so.121
+OLD_LIBS+=usr/lib/libverto.so.121
+OLD_LIBS+=usr/lib/libcom_err.so.121
+OLD_LIBS+=usr/lib/libgssapi_krb5.so.121
+OLD_LIBS+=usr/lib/libgssrpc.so.121
+OLD_LIBS+=usr/lib/libk5crypto.so.121
+OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121
+OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121
+OLD_LIBS+=usr/lib/libkdb5.so.121
+OLD_LIBS+=usr/lib/libkrad.so.121
+OLD_LIBS+=usr/lib/libkrb5.so.121
+OLD_LIBS+=usr/lib/libkrb5profile.so.121
+OLD_LIBS+=usr/lib/libkrb5support.so.121
+OLD_LIBS+=usr/lib/libverto.so.121
+
+# 20250726: xargs tests rewritten
+OLD_FILES+=usr/tests/usr.bin/xargs/legacy_test
OLD_FILES+=usr/tests/usr.bin/xargs/regress.n2147483647.out
+OLD_FILES+=usr/tests/usr.bin/xargs/regress.sh
# 20250726: This file is now installed in /etc/dma
OLD_FILES+=usr/share/examples/dma/auth.conf
diff --git a/UPDATING b/UPDATING
index a2843aa95127..28ff08e65ef5 100644
--- a/UPDATING
+++ b/UPDATING
@@ -27,6 +27,24 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 15.x IS SLOW:
world, or to merely disable the most expensive debugging functionality
at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".)
+20250730:
+ The usbhid(4) USB HID driver is now enabled by default, and will be
+ used in preference to other USB HID drivers like ukbd(4), ums(4), and
+ uhid(4). Work on a FIDO/U2F driver and moused(8) is in progress.
+ The default is being switched now so that we can find and fix any
+ additional issues prior to FreeBSD 15.0.
+
+ To revert to the previous USB HID driver behavior, set the loader
+ tunable hw.usb.usbhid_enable=0.
+
+20250727:
+ bmake (i.e., /usr/bin/make and /usr/share/mk) has moved to a new
+ package, FreeBSD-bmake. If you use pkgbase and you need make, you
+ should install this package.
+
+20250726:
+ amd64 kernel configurations must contain "options SMP".
+
20250725:
gssd(8) has been moved to a new package, FreeBSD-gssd. If you use
pkgbase and you need gssd, you should install this package.
diff --git a/bin/cpuset/Makefile b/bin/cpuset/Makefile
index d6f58db62901..639dd9812171 100644
--- a/bin/cpuset/Makefile
+++ b/bin/cpuset/Makefile
@@ -1,6 +1,6 @@
PROG= cpuset
-LIBADD= jail
+LIBADD= jail util
SYMLINKS+= ../..${BINDIR}/cpuset /usr/bin/cpuset
diff --git a/bin/cpuset/cpuset.c b/bin/cpuset/cpuset.c
index 82ffcaeec252..7416e100a3c6 100644
--- a/bin/cpuset/cpuset.c
+++ b/bin/cpuset/cpuset.c
@@ -43,6 +43,7 @@
#include <err.h>
#include <errno.h>
#include <jail.h>
+#include <libutil.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
@@ -69,154 +70,6 @@ static cpuwhich_t which;
static void usage(void) __dead2;
-struct numa_policy {
- const char *name;
- int policy;
-};
-
-static struct numa_policy policies[] = {
- { "round-robin", DOMAINSET_POLICY_ROUNDROBIN },
- { "rr", DOMAINSET_POLICY_ROUNDROBIN },
- { "first-touch", DOMAINSET_POLICY_FIRSTTOUCH },
- { "ft", DOMAINSET_POLICY_FIRSTTOUCH },
- { "prefer", DOMAINSET_POLICY_PREFER },
- { "interleave", DOMAINSET_POLICY_INTERLEAVE},
- { "il", DOMAINSET_POLICY_INTERLEAVE},
- { NULL, DOMAINSET_POLICY_INVALID }
-};
-
-static void printset(struct bitset *mask, int size);
-
-static void
-parselist(char *list, struct bitset *mask, int size)
-{
- enum { NONE, NUM, DASH } state;
- int lastnum;
- int curnum;
- char *l;
-
- state = NONE;
- curnum = lastnum = 0;
- for (l = list; *l != '\0';) {
- if (isdigit(*l)) {
- curnum = atoi(l);
- if (curnum >= size)
- errx(EXIT_FAILURE,
- "List entry %d exceeds maximum of %d",
- curnum, size - 1);
- while (isdigit(*l))
- l++;
- switch (state) {
- case NONE:
- lastnum = curnum;
- state = NUM;
- break;
- case DASH:
- for (; lastnum <= curnum; lastnum++)
- BIT_SET(size, lastnum, mask);
- state = NONE;
- break;
- case NUM:
- default:
- goto parserr;
- }
- continue;
- }
- switch (*l) {
- case ',':
- switch (state) {
- case NONE:
- break;
- case NUM:
- BIT_SET(size, curnum, mask);
- state = NONE;
- break;
- case DASH:
- goto parserr;
- break;
- }
- break;
- case '-':
- if (state != NUM)
- goto parserr;
- state = DASH;
- break;
- default:
- goto parserr;
- }
- l++;
- }
- switch (state) {
- case NONE:
- break;
- case NUM:
- BIT_SET(size, curnum, mask);
- break;
- case DASH:
- goto parserr;
- }
- return;
-parserr:
- errx(EXIT_FAILURE, "Malformed list %s", list);
-}
-
-static void
-parsecpulist(char *list, cpuset_t *mask)
-{
-
- if (strcasecmp(list, "all") == 0) {
- if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
- sizeof(*mask), mask) != 0)
- err(EXIT_FAILURE, "getaffinity");
- return;
- }
- parselist(list, (struct bitset *)mask, CPU_SETSIZE);
-}
-
-/*
- * permissively parse policy:domain list
- * allow:
- * round-robin:0-4 explicit
- * round-robin:all explicit root domains
- * 0-4 implicit root policy
- * round-robin implicit root domains
- * all explicit root domains and implicit policy
- */
-static void
-parsedomainlist(char *list, domainset_t *mask, int *policyp)
-{
- domainset_t rootmask;
- struct numa_policy *policy;
- char *l;
- int p;
-
- /*
- * Use the rootset's policy as the default for unspecified policies.
- */
- if (cpuset_getdomain(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
- sizeof(rootmask), &rootmask, &p) != 0)
- err(EXIT_FAILURE, "getdomain");
-
- l = list;
- for (policy = &policies[0]; policy->name != NULL; policy++) {
- if (strncasecmp(l, policy->name, strlen(policy->name)) == 0) {
- p = policy->policy;
- l += strlen(policy->name);
- if (*l != ':' && *l != '\0')
- errx(EXIT_FAILURE, "Malformed list %s", list);
- if (*l == ':')
- l++;
- break;
- }
- }
- *policyp = p;
- if (strcasecmp(l, "all") == 0 || *l == '\0') {
- DOMAINSET_COPY(&rootmask, mask);
- return;
- }
- parselist(l, (struct bitset *)mask, DOMAINSET_SETSIZE);
-}
-
static void
printset(struct bitset *mask, int size)
{
@@ -327,11 +180,11 @@ main(int argc, char *argv[])
break;
case 'l':
lflag = 1;
- parsecpulist(optarg, &mask);
+ cpuset_parselist(optarg, &mask);
break;
case 'n':
nflag = 1;
- parsedomainlist(optarg, &domains, &policy);
+ domainset_parselist(optarg, &domains, &policy);
break;
case 'p':
pflag = 1;
diff --git a/crypto/krb5/src/util/ss/listen.c b/crypto/krb5/src/util/ss/listen.c
index 08427df1e5c7..fe18475447be 100644
--- a/crypto/krb5/src/util/ss/listen.c
+++ b/crypto/krb5/src/util/ss/listen.c
@@ -14,9 +14,6 @@
#include <termios.h>
#include <sys/param.h>
-#ifdef __FreeBSD__
-#include <edit/readline/readline.h>
-#else
#if defined(HAVE_LIBEDIT)
#include <editline/readline.h>
#elif defined(HAVE_READLINE)
@@ -25,7 +22,6 @@
#else
#define NO_READLINE
#endif
-#endif
static ss_data *current_info;
static jmp_buf listen_jmpb;
diff --git a/crypto/openssh/sshd_config b/crypto/openssh/sshd_config
index a17484b1da2d..88c93386db65 100644
--- a/crypto/openssh/sshd_config
+++ b/crypto/openssh/sshd_config
@@ -56,12 +56,15 @@ AuthorizedKeysFile .ssh/authorized_keys
# Don't read the user's ~/.rhosts and ~/.shosts files
#IgnoreRhosts yes
-# Change to yes to enable built-in password authentication.
+# Change to "yes" to enable built-in password authentication.
# Note that passwords may also be accepted via KbdInteractiveAuthentication.
#PasswordAuthentication no
#PermitEmptyPasswords no
-# Change to no to disable PAM authentication
+# Change to "no" to disable keyboard-interactive authentication. Depending on
+# the system's configuration, this may involve passwords, challenge-response,
+# one-time passwords or some combination of these and other methods.
+# Keyboard interactive authentication is also used for PAM authentication.
#KbdInteractiveAuthentication yes
# Kerberos options
diff --git a/etc/gss-krb5/mech b/etc/gss-krb5/mech
index 94fed68a24eb..b13f665705c5 100644
--- a/etc/gss-krb5/mech
+++ b/etc/gss-krb5/mech
@@ -1,10 +1,10 @@
#
# Name OID Library name Kernel module
-kerberosv5 1.2.840.113554.1.2.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
-kerberosv5 1.2.840.113554.1.2.3 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
-kerberosv5 1.3.6.1.5.5.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
-kerberosv5 1.2.840.48018.1.2.2.1 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
-kerberosv5 1.2.840.48018.1.2.2.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
-kerberosv5 1.2.840.48018.1.2.2.4 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
-kerberosv5 1.2.840.48018.1.2.2.5 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
-kerberosv5 1.3.5.1.5.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5
+kerberosv5 1.2.840.113554.1.2.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
+kerberosv5 1.2.840.113554.1.2.3 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
+kerberosv5 1.3.6.1.5.5.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
+kerberosv5 1.2.840.48018.1.2.2.1 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
+kerberosv5 1.2.840.48018.1.2.2.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
+kerberosv5 1.2.840.48018.1.2.2.4 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
+kerberosv5 1.2.840.48018.1.2.2.5 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
+kerberosv5 1.3.5.1.5.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5
diff --git a/krb5/include/autoconf.h b/krb5/include/autoconf.h
index 24039611c7e7..fe281d136954 100644
--- a/krb5/include/autoconf.h
+++ b/krb5/include/autoconf.h
@@ -5,9 +5,6 @@
#ifndef KRB5_AUTOCONF_H
#define KRB5_AUTOCONF_H
-#include <sys/types.h>
-#include <machine/param.h>
-
/* Define if AES-NI support is enabled */
/* #undef AESNI */
@@ -263,7 +260,7 @@
#define HAVE_LIBCRYPTO 1
/* Define if building with libedit. */
-#define HAVE_LIBEDIT 1
+/* #undef HAVE_LIBEDIT */
/* Define to 1 if you have the `nsl' library (-lnsl). */
/* #undef HAVE_LIBNSL */
diff --git a/krb5/lib/Makefile.inc b/krb5/lib/Makefile.inc
index 6d86e7a35146..b6e5f6275039 100644
--- a/krb5/lib/Makefile.inc
+++ b/krb5/lib/Makefile.inc
@@ -15,4 +15,4 @@ KRB5_KRB5LIBDIR= ${KRB5_SRCLIBDIR}/krb5
KRB5_K5CRYPTODIR= ${KRB5_SRCLIBDIR}/crypto
SHLIBDIR?= /usr/lib
-SHLIB_MAJOR?= 121
+SHLIB_MAJOR?= 122
diff --git a/krb5/plugins/Makefile.inc b/krb5/plugins/Makefile.inc
index e888f79acd49..d98ed1d3887b 100644
--- a/krb5/plugins/Makefile.inc
+++ b/krb5/plugins/Makefile.inc
@@ -12,6 +12,6 @@ MK_INSTALLLIB= no
SHLIB_NAME?= ${LIB}.so.${SHLIB_MAJOR}
PLUGINSDIR= ${LIBDIR_BASE}/krb5/plugins
SHLIBDIR= ${LIBDIR}
-SHLIB_MAJOR= 121
+SHLIB_MAJOR= 122
.include "../Makefile.inc"
diff --git a/krb5/usr.bin/kadmin/Makefile b/krb5/usr.bin/kadmin/Makefile
index b2a094795d48..182cabb8f9f6 100644
--- a/krb5/usr.bin/kadmin/Makefile
+++ b/krb5/usr.bin/kadmin/Makefile
@@ -9,8 +9,8 @@
PROG= kadmin
-LIBADD= kadmin_common edit kadm5clnt_mit gssrpc gssapi_krb5 krb5 k5crypto \
- com_err krb5ss krb5profile krb5support tinfow sys
+LIBADD= kadmin_common kadm5clnt_mit gssrpc gssapi_krb5 krb5 k5crypto \
+ com_err krb5ss krb5profile krb5support sys
SRCS= keytab.c
diff --git a/krb5/usr.bin/ktutil/Makefile b/krb5/usr.bin/ktutil/Makefile
index 6bcb4877ed6f..15991cb49bce 100644
--- a/krb5/usr.bin/ktutil/Makefile
+++ b/krb5/usr.bin/ktutil/Makefile
@@ -9,7 +9,7 @@
PROG= ktutil
-LIBADD= edit krb5 k5crypto com_err krb5profile krb5support krb5ss tinfow sys
+LIBADD= krb5 k5crypto com_err krb5profile krb5support krb5ss sys
SRCS= ktutil.c \
ktutil_ct.c \
diff --git a/krb5/usr.sbin/kadmin.local/Makefile b/krb5/usr.sbin/kadmin.local/Makefile
index 4b99f490bd7b..3930c0fc4694 100644
--- a/krb5/usr.sbin/kadmin.local/Makefile
+++ b/krb5/usr.sbin/kadmin.local/Makefile
@@ -11,8 +11,8 @@ PACKAGE= kerberos-kdc
PROG= kadmin.local
-LIBADD= kadmin_common edit kadm5srv_mit kdb5 gssrpc gssapi_krb5 krb5 \
- k5crypto com_err krb5profile krb5support krb5ss tinfow sys
+LIBADD= kadmin_common kadm5srv_mit kdb5 gssrpc gssapi_krb5 krb5 \
+ k5crypto com_err krb5profile krb5support krb5ss sys
SRCS= keytab_local.c
diff --git a/krb5/util/Makefile.inc b/krb5/util/Makefile.inc
index 858c1eb48dd9..95b93a793d77 100644
--- a/krb5/util/Makefile.inc
+++ b/krb5/util/Makefile.inc
@@ -10,4 +10,4 @@
.include "../Makefile.inc"
SHLIBDIR?= /usr/lib
-SHLIB_MAJOR?= 121
+SHLIB_MAJOR?= 122
diff --git a/krb5/util/ss/Makefile b/krb5/util/ss/Makefile
index e7e025184284..2c43f2b5934f 100644
--- a/krb5/util/ss/Makefile
+++ b/krb5/util/ss/Makefile
@@ -47,6 +47,11 @@ CFLAGS+=-I${KRB5_DIR}/util/ss \
-I${.OBJDIR:H} \
-I${.OBJDIR}
+.if !defined(BOOTSTRAPPING)
+CFLAGS+= -DHAVE_READLINE=1 \
+ -I${SYSROOT:U${DESTDIR}}/${INCLUDEDIR}/edit
+.endif
+
GEN= std_rqs.c ${GEN_SS_ERR_C} ${GEN_SS_ERR_H}
GEN_SCRIPTS= ct_c.awk ct_c.sed mk_cmds
GEN_SS_ERR_C= ${GEN_SS_ERR:S/.et$/.c/}
diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c
index cc96fb5ce326..b1655fe63d55 100644
--- a/lib/libc/db/hash/hash.c
+++ b/lib/libc/db/hash/hash.c
@@ -99,11 +99,6 @@ __hash_open(const char *file, int flags, int mode,
DB *dbp;
int bpages, hdrsize, new_table, nsegs, save_errno;
- if ((flags & O_ACCMODE) == O_WRONLY) {
- errno = EINVAL;
- return (NULL);
- }
-
if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB))))
return (NULL);
hashp->fp = -1;
@@ -115,6 +110,10 @@ __hash_open(const char *file, int flags, int mode,
* we can check accesses.
*/
hashp->flags = flags;
+ if ((flags & O_ACCMODE) == O_WRONLY) {
+ flags &= ~O_WRONLY;
+ flags |= O_RDWR;
+ }
if (file) {
if ((hashp->fp = _open(file, flags | O_CLOEXEC, mode)) == -1)
@@ -180,7 +179,7 @@ __hash_open(const char *file, int flags, int mode,
__buf_init(hashp, DEF_BUFSIZE);
hashp->new_file = new_table;
- hashp->save_file = file && (hashp->flags & O_RDWR);
+ hashp->save_file = file && (flags & O_RDWR);
hashp->cbucket = -1;
if (!(dbp = (DB *)malloc(sizeof(DB)))) {
save_errno = errno;
@@ -524,6 +523,10 @@ hash_get(const DB *dbp, const DBT *key, DBT *data, u_int32_t flag)
hashp->error = errno = EINVAL;
return (ERROR);
}
+ if ((hashp->flags & O_ACCMODE) == O_WRONLY) {
+ hashp->error = errno = EPERM;
+ return (ERROR);
+ }
return (hash_access(hashp, HASH_GET, (DBT *)key, data));
}
@@ -701,17 +704,19 @@ hash_seq(const DB *dbp, DBT *key, DBT *data, u_int32_t flag)
u_int16_t *bp, ndx;
hashp = (HTAB *)dbp->internal;
- if (flag && flag != R_FIRST && flag != R_NEXT) {
+ if (flag != R_FIRST || flag != R_NEXT) {
hashp->error = errno = EINVAL;
return (ERROR);
}
#ifdef HASH_STATISTICS
hash_accesses++;
#endif
- if ((hashp->cbucket < 0) || (flag == R_FIRST)) {
+ if (flag == R_FIRST) {
hashp->cbucket = 0;
hashp->cndx = 1;
hashp->cpage = NULL;
+ } else if (hashp->cbucket < 0) { /* R_NEXT */
+ return (ABNORMAL);
}
next_bucket:
for (bp = NULL; !bp || !bp[0]; ) {
diff --git a/lib/libc/db/man/dbm.3 b/lib/libc/db/man/dbm.3
index c5a83c7acef4..30787600ad2d 100644
--- a/lib/libc/db/man/dbm.3
+++ b/lib/libc/db/man/dbm.3
@@ -13,7 +13,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd April 2, 2022
+.Dd July 25, 2025
.Dt DBM 3
.Os
.Sh NAME
@@ -99,9 +99,6 @@ is a typical value for
.Li 0660
is a typical value for
.Fa mode .
-.Dv O_WRONLY
-is not allowed in
-.Fa flags .
The pointer returned by
.Fn dbm_open
identifies the database and is the
diff --git a/lib/libc/db/man/dbopen.3 b/lib/libc/db/man/dbopen.3
index 64cef88506d8..7fe515f17849 100644
--- a/lib/libc/db/man/dbopen.3
+++ b/lib/libc/db/man/dbopen.3
@@ -76,13 +76,10 @@ are as specified to the
.Xr open 2
routine, however, only the
.Dv O_CREAT , O_EXCL , O_EXLOCK , O_NOFOLLOW , O_NONBLOCK ,
-.Dv O_RDONLY , O_RDWR , O_SHLOCK , O_SYNC
+.Dv O_RDONLY , O_RDWR , O_SHLOCK , O_SYNC, O_WRONLY,
and
.Dv O_TRUNC
flags are meaningful.
-(Note, opening a database file
-.Dv O_WRONLY
-is not possible.)
.\"Three additional options may be specified by
.\".Em or Ns 'ing
.\"them into the
diff --git a/lib/libc/tests/db/Makefile b/lib/libc/tests/db/Makefile
index 54b38b94a581..771569183584 100644
--- a/lib/libc/tests/db/Makefile
+++ b/lib/libc/tests/db/Makefile
@@ -8,6 +8,8 @@ PROGS+= h_lfsr
${PACKAGE}FILES+= README
ATF_TESTS_C+= dbm_open_test
+ATF_TESTS_C+= dbm_perm_test
+ATF_TESTS_C+= dbm_nextkey_test
NETBSD_ATF_TESTS_C+= db_hash_seq_test
NETBSD_ATF_TESTS_SH+= db_test
diff --git a/lib/libc/tests/db/dbm_nextkey_test.c b/lib/libc/tests/db/dbm_nextkey_test.c
new file mode 100644
index 000000000000..67b745efb196
--- /dev/null
+++ b/lib/libc/tests/db/dbm_nextkey_test.c
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 2025 Klara, Inc.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <fcntl.h>
+#include <ndbm.h>
+#include <stdio.h>
+
+#include <atf-c.h>
+
+static const char *path = "tmp";
+static const char *dbname = "tmp.db";
+
+ATF_TC(dbm_nextkey_test);
+ATF_TC_HEAD(dbm_nextkey_test, tc)
+{
+ atf_tc_set_md_var(tc, "descr",
+ "Check that dbm_nextkey always returns NULL after reaching the end of the database");
+}
+
+ATF_TC_BODY(dbm_nextkey_test, tc)
+{
+ DBM *db;
+ datum key, data;
+
+ data.dptr = "bar";
+ data.dsize = strlen("bar");
+ key.dptr = "foo";
+ key.dsize = strlen("foo");
+
+ db = dbm_open(path, O_RDWR | O_CREAT, 0755);
+ ATF_CHECK(db != NULL);
+ ATF_REQUIRE(atf_utils_file_exists(dbname));
+ ATF_REQUIRE(dbm_store(db, key, data, DBM_INSERT) != -1);
+
+ key = dbm_firstkey(db);
+ ATF_REQUIRE(key.dptr != NULL);
+ key = dbm_nextkey(db);
+ ATF_REQUIRE(key.dptr == NULL);
+ key = dbm_nextkey(db);
+ ATF_REQUIRE(key.dptr == NULL);
+
+ dbm_close(db);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+ ATF_TP_ADD_TC(tp, dbm_nextkey_test);
+
+ return (atf_no_error());
+}
diff --git a/lib/libc/tests/db/dbm_open_test.c b/lib/libc/tests/db/dbm_open_test.c
index 18d398e16b2a..8a3e888bf72c 100644
--- a/lib/libc/tests/db/dbm_open_test.c
+++ b/lib/libc/tests/db/dbm_open_test.c
@@ -4,14 +4,15 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
-#include <sys/mman.h>
-
#include <fcntl.h>
#include <ndbm.h>
#include <stdio.h>
#include <atf-c.h>
+static const char *path = "tmp";
+static const char *dbname = "tmp.db";
+
ATF_TC(dbm_open_missing_test);
ATF_TC_HEAD(dbm_open_missing_test, tc)
{
@@ -21,23 +22,31 @@ ATF_TC_HEAD(dbm_open_missing_test, tc)
ATF_TC_BODY(dbm_open_missing_test, tc)
{
- const char *path = "tmp";
- const char *dbname = "tmp.db";
/*
* POSIX.1 specifies that a missing database file should
* always get created if O_CREAT is present, except when
* O_EXCL is specified as well.
*/
- ATF_CHECK(dbm_open(path, O_RDONLY, _PROT_ALL) == NULL);
+ ATF_CHECK(dbm_open(path, O_RDONLY, 0755) == NULL);
+ ATF_REQUIRE(!atf_utils_file_exists(dbname));
+ ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT, 0755) != NULL);
+ ATF_REQUIRE(atf_utils_file_exists(dbname));
+ ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT | O_EXCL, 0755) == NULL);
+}
+
+ATF_TC_WITHOUT_HEAD(dbm_open_wronly_test);
+ATF_TC_BODY(dbm_open_wronly_test, tc)
+{
+ ATF_CHECK(dbm_open(path, O_WRONLY, 0755) == NULL);
ATF_REQUIRE(!atf_utils_file_exists(dbname));
- ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT, _PROT_ALL) != NULL);
+ ATF_CHECK(dbm_open(path, O_WRONLY | O_CREAT, 0755) != NULL);
ATF_REQUIRE(atf_utils_file_exists(dbname));
- ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT | O_EXCL, _PROT_ALL) == NULL);
}
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, dbm_open_missing_test);
+ ATF_TP_ADD_TC(tp, dbm_open_wronly_test);
return (atf_no_error());
}
diff --git a/lib/libc/tests/db/dbm_perm_test.c b/lib/libc/tests/db/dbm_perm_test.c
new file mode 100644
index 000000000000..c07210292014
--- /dev/null
+++ b/lib/libc/tests/db/dbm_perm_test.c
@@ -0,0 +1,98 @@
+/*-
+ * Copyright (c) 2025 Klara, Inc.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <ndbm.h>
+#include <stdio.h>
+
+#include <atf-c.h>
+
+static const char *path = "tmp";
+static const char *dbname = "tmp.db";
+
+static void
+create_db(void)
+{
+ DB *db;
+ datum data, key;
+
+ data.dptr = "bar";
+ data.dsize = strlen("bar");
+ key.dptr = "foo";
+ key.dsize = strlen("foo");
+
+ db = dbm_open(path, O_RDWR | O_CREAT, 0755);
+ ATF_CHECK(db != NULL);
+ ATF_REQUIRE(atf_utils_file_exists(dbname));
+ ATF_REQUIRE(dbm_store(db, key, data, DBM_INSERT) != -1);
+ dbm_close(db);
+}
+
+ATF_TC_WITHOUT_HEAD(dbm_rdonly_test);
+ATF_TC_BODY(dbm_rdonly_test, tc)
+{
+ DB *db;
+ datum data, key;
+
+ bzero(&data, sizeof(data));
+ key.dptr = "foo";
+ key.dsize = strlen("foo");
+ create_db();
+
+ db = dbm_open(path, O_RDONLY, 0755);
+ data = dbm_fetch(db, key);
+ ATF_REQUIRE(data.dptr != NULL);
+ ATF_REQUIRE(strncmp((const char*)data.dptr, "bar", data.dsize) == 0);
+ ATF_REQUIRE(dbm_store(db, key, data, DBM_REPLACE) == -1);
+ ATF_REQUIRE(errno == EPERM);
+}
+
+ATF_TC_WITHOUT_HEAD(dbm_wronly_test);
+ATF_TC_BODY(dbm_wronly_test, tc)
+{
+ DB *db;
+ datum data, key;
+
+ key.dptr = "foo";
+ key.dsize = strlen("foo");
+ data.dptr = "baz";
+ data.dsize = strlen("baz");
+ create_db();
+
+ db = dbm_open(path, O_WRONLY, 0755);
+ data = dbm_fetch(db, key);
+ ATF_REQUIRE(data.dptr == NULL);
+ ATF_REQUIRE(errno == EPERM);
+ ATF_REQUIRE(dbm_store(db, key, data, DBM_REPLACE) != -1);
+}
+
+ATF_TC_WITHOUT_HEAD(dbm_rdwr_test);
+ATF_TC_BODY(dbm_rdwr_test, tc)
+{
+ DB *db;
+ datum data, key;
+
+ key.dptr = "foo";
+ key.dsize = strlen("foo");
+ create_db();
+
+ db = dbm_open(path, O_RDWR, 0755);
+ data = dbm_fetch(db, key);
+ ATF_REQUIRE(data.dptr != NULL);
+ data.dptr = "baz";
+ data.dsize = strlen("baz");
+ ATF_REQUIRE(dbm_store(db, key, data, DBM_REPLACE) != -1);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+ ATF_TP_ADD_TC(tp, dbm_rdonly_test);
+ ATF_TP_ADD_TC(tp, dbm_wronly_test);
+ ATF_TP_ADD_TC(tp, dbm_rdwr_test);
+
+ return (atf_no_error());
+}
diff --git a/lib/libnvmf/libnvmf.h b/lib/libnvmf/libnvmf.h
index 7cdd7e433455..6b38fd286596 100644
--- a/lib/libnvmf/libnvmf.h
+++ b/lib/libnvmf/libnvmf.h
@@ -111,8 +111,13 @@ const void *nvmf_capsule_cqe(const struct nvmf_capsule *nc);
/* Return a string name for a transport type. */
const char *nvmf_transport_type(uint8_t trtype);
-/* Validate a NVMe Qualified Name. */
+/*
+ * Validate a NVMe Qualified Name. The second version enforces
+ * stricter checks inline with the specification. The first version
+ * enforces more minimal checks.
+ */
bool nvmf_nqn_valid(const char *nqn);
+bool nvmf_nqn_valid_strict(const char *nqn);
/* Controller-specific APIs. */
diff --git a/lib/libnvmf/nvmf_controller.c b/lib/libnvmf/nvmf_controller.c
index 971dccbe039e..f26f11633e03 100644
--- a/lib/libnvmf/nvmf_controller.c
+++ b/lib/libnvmf/nvmf_controller.c
@@ -7,6 +7,7 @@
#include <sys/utsname.h>
#include <assert.h>
+#include <ctype.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
@@ -15,6 +16,55 @@
#include "internal.h"
#include "nvmft_subr.h"
+bool
+nvmf_nqn_valid_strict(const char *nqn)
+{
+ size_t len;
+
+ if (!nvmf_nqn_valid(nqn))
+ return (false);
+
+ /*
+ * Stricter checks from the spec. Linux does not seem to
+ * require these.
+ */
+ len = strlen(nqn);
+
+ /*
+ * NVMF_NQN_MIN_LEN does not include '.' and require at least
+ * one character of a domain name.
+ */
+ if (len < NVMF_NQN_MIN_LEN + 2)
+ return (false);
+ if (memcmp("nqn.", nqn, strlen("nqn.")) != 0)
+ return (false);
+ nqn += strlen("nqn.");
+
+ /* Next 4 digits must be a year. */
+ for (u_int i = 0; i < 4; i++) {
+ if (!isdigit(nqn[i]))
+ return (false);
+ }
+ nqn += 4;
+
+ /* '-' between year and month. */
+ if (nqn[0] != '-')
+ return (false);
+ nqn++;
+
+ /* 2 digit month. */
+ for (u_int i = 0; i < 2; i++) {
+ if (!isdigit(nqn[i]))
+ return (false);
+ }
+ nqn += 2;
+
+ /* '.' between month and reverse domain name. */
+ if (nqn[0] != '.')
+ return (false);
+ return (true);
+}
+
void
nvmf_init_cqe(void *cqe, const struct nvmf_capsule *nc, uint16_t status)
{
diff --git a/lib/libsys/getsockopt.2 b/lib/libsys/getsockopt.2
index 8839b61597a2..3867824681d7 100644
--- a/lib/libsys/getsockopt.2
+++ b/lib/libsys/getsockopt.2
@@ -593,6 +593,15 @@ specified amount of time has elapsed since the initial call to
If
.Fa sp_fd
is -1, the socket will be unspliced immediately.
+A successful
+.Xr select 2 ,
+.Xr poll 2 ,
+or
+.Xr kqueue 2
+operation testing the ability to read from the source socket indicates
+that the splicing has terminated and at least one byte is available for
+reading.
+When one of the sockets gets closed, splicing ends.
.Pp
When passed to
.Fn getsockopt ,
diff --git a/lib/libthr/thread/thr_getthreadid_np.c b/lib/libthr/thread/thr_getthreadid_np.c
index ade332519dfb..ffecd0bc7ea9 100644
--- a/lib/libthr/thread/thr_getthreadid_np.c
+++ b/lib/libthr/thread/thr_getthreadid_np.c
@@ -36,7 +36,7 @@ __weak_reference(_thr_getthreadid_np, _pthread_getthreadid_np);
__weak_reference(_thr_getthreadid_np, pthread_getthreadid_np);
/*
- * Provide the equivelant to AIX pthread_getthreadid_np() function.
+ * Provide the equivalent to AIX pthread_getthreadid_np() function.
*/
int
_thr_getthreadid_np(void)
diff --git a/lib/libutil/Makefile b/lib/libutil/Makefile
index 0639745d08fc..2d92c5ba1916 100644
--- a/lib/libutil/Makefile
+++ b/lib/libutil/Makefile
@@ -38,6 +38,7 @@ MAN+= cpuset.3 expand_number.3 flopen.3 fparseln.3 ftime.3 getlocalbase.3 \
property.3 pty.3 quotafile.3 realhostname.3 realhostname_sa.3 \
_secure_path.3 trimdomain.3 uucplock.3 pw_util.3
MAN+= login.conf.5
+MLINKS+=cpuset.3 domainset_parselist.3
MLINKS+=flopen.3 flopenat.3
MLINKS+=kld.3 kld_isloaded.3 kld.3 kld_load.3
MLINKS+=login_auth.3 auth_cat.3 login_auth.3 auth_checknologin.3
diff --git a/lib/libutil/cpuset.3 b/lib/libutil/cpuset.3
index be29d5309ef0..47dffd209ee6 100644
--- a/lib/libutil/cpuset.3
+++ b/lib/libutil/cpuset.3
@@ -22,21 +22,22 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd October 31, 2017
+.Dd June 24, 2025
.Dt CPUSET 3
.Os
.Sh NAME
-.Nm cpuset_parselist
-.Nd utility functions for
-.Xr cpuset 2
-handling
+.Nm cpuset_parselist ,
+.Nm domainset_parselist
+.Nd utility functions for cpuset(2) handling
.Sh LIBRARY
.Lb libutil
.Sh SYNOPSIS
.In sys/cpuset.h
.In libutil.h
.Ft int
-.Fn cpuset_parselist "const char *cpu-list" "cpuset_t *mask"
+.Fn cpuset_parselist "const char *cpu_list" "cpuset_t *mask"
+.Ft int
+.Fn domainset_parselist "const char *domain_policy" "domainset_t *domain_mask" "int *policyp"
.Sh DESCRIPTION
The
.Fn cpuset_parselist
@@ -52,6 +53,27 @@ numbers.
A special list of
.Dq all
may be specified in which case the list includes all CPUs from the root set.
+.Pp
+The
+.Fn domainset_parselist
+function parses a
+.Xr domainset 9
+memory domain allocation policy
+specified by
+.Va domain_policy
+filling the
+.Va domain_mask
+and the
+.Va policyp .
+A valid
+.Va domain_policy
+is formatted as
+.Ar policy:domain-list .
+See the
+.Ar -n
+flag in
+.Xr cpuset 1
+for a list of valid domain policies.
.Sh RETURN VALUES
Return values can be the following
.Bl -tag -width Er
@@ -60,19 +82,30 @@ The parsing was successful
.It Dv CPUSET_PARSE_ERROR
The
.Va cpu-list
+or
+.Va domain-policy
format is invalid
.It Dv CPUSET_PARSE_GETAFFINITY
The
.Xr cpuset_getaffinity 2
call has failed
.It Dv CPUSET_PARSE_INVALID_CPU
-The number of supported CPUs has been exceeded.
+The number of supported CPUs or NUMA domains has been exceeded.
The maximum number being
-.Va CPU_SETSIZE .
+.Va CPU_SETSIZE
+and
+.Va DOMAINSET_SETSIZE
+respectively.
+.It Dv CPUSET_PARSE_GETDOMAIN
+The
+.Xr cpuset_getdomain 2
+call has failed
.El
.Sh SEE ALSO
.Xr cpuset 1 ,
.Xr cpuset 2 ,
-.Xr cpuset 9
+.Xr numa 4 ,
+.Xr cpuset 9 ,
+.Xr domainset 9
.Sh AUTHORS
.An Jeffrey Roberson Aq Mt jeff@FreeBSD.org
diff --git a/lib/libutil/cpuset.c b/lib/libutil/cpuset.c
index 3c374bfa6cac..d4840af7e175 100644
--- a/lib/libutil/cpuset.c
+++ b/lib/libutil/cpuset.c
@@ -27,34 +27,48 @@
* SUCH DAMAGE.
*/
+#include <sys/cdefs.h>
+#define _WANT_FREEBSD_BITSET
+
#include <sys/types.h>
#include <sys/cpuset.h>
+#include <sys/domainset.h>
#include <stdlib.h>
#include <string.h>
#include <libutil.h>
#include <ctype.h>
-int
-cpuset_parselist(const char *list, cpuset_t *mask)
+struct numa_policy {
+ const char *name;
+ int policy;
+};
+
+static const struct numa_policy policies[] = {
+ { "round-robin", DOMAINSET_POLICY_ROUNDROBIN },
+ { "rr", DOMAINSET_POLICY_ROUNDROBIN },
+ { "first-touch", DOMAINSET_POLICY_FIRSTTOUCH },
+ { "ft", DOMAINSET_POLICY_FIRSTTOUCH },
+ { "prefer", DOMAINSET_POLICY_PREFER },
+ { "interleave", DOMAINSET_POLICY_INTERLEAVE},
+ { "il", DOMAINSET_POLICY_INTERLEAVE},
+ { NULL, DOMAINSET_POLICY_INVALID }
+};
+
+static int
+parselist(const char *list, struct bitset *mask, int size)
{
enum { NONE, NUM, DASH } state;
int lastnum;
int curnum;
const char *l;
- if (strcasecmp(list, "all") == 0) {
- if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
- sizeof(*mask), mask) != 0)
- return (CPUSET_PARSE_GETAFFINITY);
- return (CPUSET_PARSE_OK);
- }
state = NONE;
curnum = lastnum = 0;
for (l = list; *l != '\0';) {
if (isdigit(*l)) {
curnum = atoi(l);
- if (curnum > CPU_SETSIZE)
+ if (curnum >= size)
return (CPUSET_PARSE_INVALID_CPU);
while (isdigit(*l))
l++;
@@ -65,7 +79,7 @@ cpuset_parselist(const char *list, cpuset_t *mask)
break;
case DASH:
for (; lastnum <= curnum; lastnum++)
- CPU_SET(lastnum, mask);
+ BIT_SET(size, lastnum, mask);
state = NONE;
break;
case NUM:
@@ -80,7 +94,7 @@ cpuset_parselist(const char *list, cpuset_t *mask)
case NONE:
break;
case NUM:
- CPU_SET(curnum, mask);
+ BIT_SET(size, curnum, mask);
state = NONE;
break;
case DASH:
@@ -102,7 +116,7 @@ cpuset_parselist(const char *list, cpuset_t *mask)
case NONE:
break;
case NUM:
- CPU_SET(curnum, mask);
+ BIT_SET(size, curnum, mask);
break;
case DASH:
goto parserr;
@@ -111,3 +125,63 @@ cpuset_parselist(const char *list, cpuset_t *mask)
parserr:
return (CPUSET_PARSE_ERROR);
}
+
+/*
+ * permissively parse policy:domain list
+ * allow:
+ * round-robin:0-4 explicit
+ * round-robin:all explicit root domains
+ * 0-4 implicit root policy
+ * round-robin implicit root domains
+ * all explicit root domains and implicit policy
+ */
+int
+domainset_parselist(const char *list, domainset_t *mask, int *policyp)
+{
+ domainset_t rootmask;
+ const struct numa_policy *policy;
+ const char *l;
+ int p;
+
+ /*
+ * Use the rootset's policy as the default for unspecified policies.
+ */
+ if (cpuset_getdomain(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
+ sizeof(rootmask), &rootmask, &p) != 0)
+ return (CPUSET_PARSE_GETDOMAIN);
+
+ if (list == NULL || strcasecmp(list, "all") == 0 || *list == '\0') {
+ *policyp = p;
+ DOMAINSET_COPY(&rootmask, mask);
+ return (CPUSET_PARSE_OK);
+ }
+
+ l = list;
+ for (policy = &policies[0]; policy->name != NULL; policy++) {
+ if (strncasecmp(l, policy->name, strlen(policy->name)) == 0) {
+ p = policy->policy;
+ l += strlen(policy->name);
+ if (*l != ':' && *l != '\0')
+ return (CPUSET_PARSE_ERROR);
+ if (*l == ':')
+ l++;
+ break;
+ }
+ }
+ *policyp = p;
+
+ return (parselist(l, (struct bitset *)mask, DOMAINSET_SETSIZE));
+}
+
+int
+cpuset_parselist(const char *list, cpuset_t *mask)
+{
+ if (strcasecmp(list, "all") == 0) {
+ if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
+ sizeof(*mask), mask) != 0)
+ return (CPUSET_PARSE_GETAFFINITY);
+ return (CPUSET_PARSE_OK);
+ }
+
+ return (parselist(list, (struct bitset *)mask, CPU_SETSIZE));
+}
diff --git a/lib/libutil/libutil.h b/lib/libutil/libutil.h
index 919855184caf..7d8bfdf67fac 100644
--- a/lib/libutil/libutil.h
+++ b/lib/libutil/libutil.h
@@ -213,7 +213,13 @@ int cpuset_parselist(const char *list, cpuset_t *mask);
#define CPUSET_PARSE_OK 0
#define CPUSET_PARSE_GETAFFINITY -1
#define CPUSET_PARSE_ERROR -2
-#define CPUSET_PARSE_INVALID_CPU -3
+#define CPUSET_PARSE_OUT_OF_RANGE -3
+#define CPUSET_PARSE_GETDOMAIN -4
+#define CPUSET_PARSE_INVALID_CPU CPUSET_PARSE_OUT_OF_RANGE /* backwards compat */
+#endif
+
+#ifdef _SYS_DOMAINSET_H_
+int domainset_parselist(const char *list, domainset_t *mask, int *policyp);
#endif
__END_DECLS
diff --git a/lib/libvmmapi/Makefile b/lib/libvmmapi/Makefile
index 1866c8fa5e7c..6dd0deeaa9c0 100644
--- a/lib/libvmmapi/Makefile
+++ b/lib/libvmmapi/Makefile
@@ -1,6 +1,6 @@
PACKAGE=lib${LIB}
LIB= vmmapi
-SHLIB_MAJOR= 6
+SHLIB_MAJOR= 7
SRCS= vmmapi.c
INCS= vmmapi.h
diff --git a/lib/libvmmapi/internal.h b/lib/libvmmapi/internal.h
index aa7b1d8e6a93..4afe1cab3460 100644
--- a/lib/libvmmapi/internal.h
+++ b/lib/libvmmapi/internal.h
@@ -8,12 +8,7 @@
#define __VMMAPI_INTERNAL_H__
#include <sys/types.h>
-
-enum {
- VM_MEMSEG_LOW,
- VM_MEMSEG_HIGH,
- VM_MEMSEG_COUNT,
-};
+#include <dev/vmm/vmm_mem.h>
struct vmctx {
int fd; /* device file descriptor */
@@ -21,7 +16,9 @@ struct vmctx {
struct {
vm_paddr_t base;
vm_size_t size;
- } memsegs[VM_MEMSEG_COUNT];
+ } memsegs[VM_MAX_MEMSEGS];
+ size_t lowmem_size;
+ size_t highmem_size;
int memflags;
char *baseaddr;
char *name;
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index a1a5d56ff8a2..77f0f8f5c581 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -28,13 +28,14 @@
#include <sys/param.h>
#include <sys/capsicum.h>
+#include <sys/cpuset.h>
+#include <sys/domainset.h>
#include <sys/sysctl.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/linker.h>
#include <sys/module.h>
#include <sys/_iovec.h>
-#include <sys/cpuset.h>
#include <capsicum_helpers.h>
#include <err.h>
@@ -322,8 +323,8 @@ vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr,
{
*guest_baseaddr = ctx->baseaddr;
- *lowmem_size = ctx->memsegs[VM_MEMSEG_LOW].size;
- *highmem_size = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ *lowmem_size = ctx->lowmem_size;
+ *highmem_size = ctx->highmem_size;
return (0);
}
@@ -379,7 +380,8 @@ cmpseg(size_t len, const char *str, size_t len2, const char *str2)
}
static int
-vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
+vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name,
+ int ds_policy, domainset_t *ds_mask, size_t ds_size)
{
struct vm_memseg memseg;
size_t n;
@@ -407,6 +409,13 @@ vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
bzero(&memseg, sizeof(struct vm_memseg));
memseg.segid = segid;
memseg.len = len;
+ if (ds_mask == NULL) {
+ memseg.ds_policy = DOMAINSET_POLICY_INVALID;
+ } else {
+ memseg.ds_policy = ds_policy;
+ memseg.ds_mask = ds_mask;
+ memseg.ds_mask_size = ds_size;
+ }
if (name != NULL) {
n = strlcpy(memseg.name, name, sizeof(memseg.name));
if (n >= sizeof(memseg.name)) {
@@ -442,13 +451,14 @@ vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
}
static int
-setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
+map_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len,
+ size_t segoff, char *base)
{
char *ptr;
int error, flags;
/* Map 'len' bytes starting at 'gpa' in the guest address space */
- error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
+ error = vm_mmap_memseg(ctx, gpa, segid, segoff, len, PROT_ALL);
if (error)
return (error);
@@ -464,65 +474,136 @@ setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
return (0);
}
+/*
+ * Allocates and maps virtual machine memory segments according
+ * to the NUMA topology specified by the 'doms' array.
+ *
+ * The domains are laid out sequentially in the guest's physical address space.
+ * The [VM_LOWMEM_LIMIT, VM_HIGHMEM_BASE) address range is skipped and
+ * left unmapped.
+ */
int
-vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+vm_setup_memory_domains(struct vmctx *ctx, enum vm_mmap_style vms,
+ struct vm_mem_domain *doms, int ndoms)
{
- size_t objsize, len;
- vm_paddr_t gpa;
+ size_t low_len, len, totalsize;
+ struct vm_mem_domain *dom;
+ struct vm_memseg memseg;
char *baseaddr, *ptr;
- int error;
+ int error, i, segid;
+ vm_paddr_t gpa;
+ /* Sanity checks. */
assert(vms == VM_MMAP_ALL);
-
- /*
- * If 'memsize' cannot fit entirely in the 'lowmem' segment then create
- * another 'highmem' segment above VM_HIGHMEM_BASE for the remainder.
- */
- if (memsize > VM_LOWMEM_LIMIT) {
- ctx->memsegs[VM_MEMSEG_LOW].size = VM_LOWMEM_LIMIT;
- ctx->memsegs[VM_MEMSEG_HIGH].size = memsize - VM_LOWMEM_LIMIT;
- objsize = VM_HIGHMEM_BASE + ctx->memsegs[VM_MEMSEG_HIGH].size;
- } else {
- ctx->memsegs[VM_MEMSEG_LOW].size = memsize;
- ctx->memsegs[VM_MEMSEG_HIGH].size = 0;
- objsize = memsize;
+ if (doms == NULL || ndoms <= 0 || ndoms > VM_MAXMEMDOM) {
+ errno = EINVAL;
+ return (-1);
}
- error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
- if (error)
- return (error);
+ /* Calculate total memory size. */
+ totalsize = 0;
+ for (i = 0; i < ndoms; i++)
+ totalsize += doms[i].size;
+
+ if (totalsize > VM_LOWMEM_LIMIT)
+ totalsize = VM_HIGHMEM_BASE + (totalsize - VM_LOWMEM_LIMIT);
/*
* Stake out a contiguous region covering the guest physical memory
* and the adjoining guard regions.
*/
- len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
+ len = VM_MMAP_GUARD_SIZE + totalsize + VM_MMAP_GUARD_SIZE;
ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0);
if (ptr == MAP_FAILED)
return (-1);
-
baseaddr = ptr + VM_MMAP_GUARD_SIZE;
- if (ctx->memsegs[VM_MEMSEG_HIGH].size > 0) {
- gpa = VM_HIGHMEM_BASE;
- len = ctx->memsegs[VM_MEMSEG_HIGH].size;
- error = setup_memory_segment(ctx, gpa, len, baseaddr);
- if (error)
- return (error);
- }
- if (ctx->memsegs[VM_MEMSEG_LOW].size > 0) {
- gpa = 0;
- len = ctx->memsegs[VM_MEMSEG_LOW].size;
- error = setup_memory_segment(ctx, gpa, len, baseaddr);
- if (error)
- return (error);
- }
+ /*
+ * Allocate and map memory segments for the virtual machine.
+ */
+ gpa = VM_LOWMEM_LIMIT > 0 ? 0 : VM_HIGHMEM_BASE;
+ ctx->lowmem_size = 0;
+ ctx->highmem_size = 0;
+ for (i = 0; i < ndoms; i++) {
+ segid = VM_SYSMEM + i;
+ dom = &doms[i];
+
+ /*
+ * Check if the memory segment already exists.
+ * If 'ndoms' is greater than one, refuse to proceed if the
+ * memseg already exists. If only one domain was requested, use
+ * the existing segment to preserve the behaviour of the previous
+ * implementation.
+ *
+ * Splitting existing memory segments is tedious and
+ * error-prone, which is why we don't support NUMA
+ * domains for bhyveload(8)-loaded VMs.
+ */
+ error = vm_get_memseg(ctx, segid, &len, memseg.name,
+ sizeof(memseg.name));
+ if (error == 0 && len != 0) {
+ if (ndoms != 1) {
+ errno = EEXIST;
+ return (-1);
+ } else
+ doms[0].size = len;
+ } else {
+ error = vm_alloc_memseg(ctx, segid, dom->size, NULL,
+ dom->ds_policy, dom->ds_mask, dom->ds_size);
+ if (error)
+ return (error);
+ }
+ /*
+ * If a domain is split by VM_LOWMEM_LIMIT then break
+ * its segment mapping into two parts, one below VM_LOWMEM_LIMIT
+ * and one above VM_HIGHMEM_BASE.
+ */
+ if (gpa <= VM_LOWMEM_LIMIT &&
+ gpa + dom->size > VM_LOWMEM_LIMIT) {
+ low_len = VM_LOWMEM_LIMIT - gpa;
+ error = map_memory_segment(ctx, segid, gpa, low_len, 0,
+ baseaddr);
+ if (error)
+ return (error);
+ ctx->lowmem_size = VM_LOWMEM_LIMIT;
+ /* Map the remainder. */
+ gpa = VM_HIGHMEM_BASE;
+ len = dom->size - low_len;
+ error = map_memory_segment(ctx, segid, gpa, len,
+ low_len, baseaddr);
+ if (error)
+ return (error);
+ } else {
+ len = dom->size;
+ error = map_memory_segment(ctx, segid, gpa, len, 0,
+ baseaddr);
+ if (error)
+ return (error);
+ }
+ if (gpa <= VM_LOWMEM_LIMIT)
+ ctx->lowmem_size += len;
+ else
+ ctx->highmem_size += len;
+ gpa += len;
+ }
ctx->baseaddr = baseaddr;
return (0);
}
+int
+vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+{
+ struct vm_mem_domain dom0;
+
+ memset(&dom0, 0, sizeof(dom0));
+ dom0.ds_policy = DOMAINSET_POLICY_INVALID;
+ dom0.size = memsize;
+
+ return (vm_setup_memory_domains(ctx, vms, &dom0, 1));
+}
+
/*
* Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
* the lowmem or highmem regions.
@@ -535,13 +616,13 @@ vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
{
vm_size_t lowsize, highsize;
- lowsize = ctx->memsegs[VM_MEMSEG_LOW].size;
+ lowsize = ctx->lowmem_size;
if (lowsize > 0) {
if (gaddr < lowsize && len <= lowsize && gaddr + len <= lowsize)
return (ctx->baseaddr + gaddr);
}
- highsize = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ highsize = ctx->highmem_size;
if (highsize > 0 && gaddr >= VM_HIGHMEM_BASE) {
if (gaddr < VM_HIGHMEM_BASE + highsize && len <= highsize &&
gaddr + len <= VM_HIGHMEM_BASE + highsize)
@@ -559,12 +640,12 @@ vm_rev_map_gpa(struct vmctx *ctx, void *addr)
offaddr = (char *)addr - ctx->baseaddr;
- lowsize = ctx->memsegs[VM_MEMSEG_LOW].size;
+ lowsize = ctx->lowmem_size;
if (lowsize > 0)
if (offaddr <= lowsize)
return (offaddr);
- highsize = ctx->memsegs[VM_MEMSEG_HIGH].size;
+ highsize = ctx->highmem_size;
if (highsize > 0)
if (offaddr >= VM_HIGHMEM_BASE &&
offaddr < VM_HIGHMEM_BASE + highsize)
@@ -583,8 +664,7 @@ vm_get_name(struct vmctx *ctx)
size_t
vm_get_lowmem_size(struct vmctx *ctx)
{
-
- return (ctx->memsegs[VM_MEMSEG_LOW].size);
+ return (ctx->lowmem_size);
}
vm_paddr_t
@@ -597,8 +677,7 @@ vm_get_highmem_base(struct vmctx *ctx __unused)
size_t
vm_get_highmem_size(struct vmctx *ctx)
{
-
- return (ctx->memsegs[VM_MEMSEG_HIGH].size);
+ return (ctx->highmem_size);
}
void *
@@ -616,7 +695,7 @@ vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
goto done;
}
- error = vm_alloc_memseg(ctx, segid, len, name);
+ error = vm_alloc_memseg(ctx, segid, len, name, 0, NULL, 0);
if (error)
goto done;
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index 440064ad13cb..b637c45d1eff 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -40,7 +40,7 @@
* API version for out-of-tree consumers like grub-bhyve for making compile
* time decisions.
*/
-#define VMMAPI_VERSION 0200 /* 2 digit major followed by 2 digit minor */
+#define VMMAPI_VERSION 0300 /* 2 digit major followed by 2 digit minor */
struct iovec;
struct vcpu;
@@ -64,16 +64,12 @@ enum vm_mmap_style {
#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */
#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */
-/*
- * Identifiers for memory segments:
- * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
- * - the remaining identifiers can be used to create devmem segments.
- */
-enum {
- VM_SYSMEM,
- VM_BOOTROM,
- VM_FRAMEBUFFER,
- VM_PCIROM,
+/* Memory size and allocation policy for a single NUMA domain. */
+struct vm_mem_domain {
+ size_t size;
+ int ds_policy;
+ domainset_t *ds_mask;
+ size_t ds_size;
};
__BEGIN_DECLS
@@ -127,7 +123,9 @@ struct vcpu *vm_vcpu_open(struct vmctx *ctx, int vcpuid);
void vm_vcpu_close(struct vcpu *vcpu);
int vcpu_id(struct vcpu *vcpu);
int vm_parse_memsize(const char *optarg, size_t *memsize);
-int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
+int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
+int vm_setup_memory_domains(struct vmctx *ctx, enum vm_mmap_style s,
+ struct vm_mem_domain *doms, int ndoms);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
/* inverse operation to vm_map_gpa - extract guest address from host pointer */
vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr);
diff --git a/libexec/comsat/comsat.c b/libexec/comsat/comsat.c
index d5d1eedeb5f3..cb00ee4a9392 100644
--- a/libexec/comsat/comsat.c
+++ b/libexec/comsat/comsat.c
@@ -113,29 +113,24 @@ mailfor(char *name)
char *file;
off_t offset;
int folder;
- char buf[sizeof(_PATH_MAILDIR) + sizeof(utp->ut_user) + 1];
- char buf2[sizeof(_PATH_MAILDIR) + sizeof(utp->ut_user) + 1];
+ char buf[MAXPATHLEN];
- if (!(cp = strchr(name, '@')))
+ if ((cp = strchr(name, '@')) == NULL)
return;
*cp = '\0';
offset = strtoll(cp + 1, NULL, 10);
- if (!(cp = strchr(cp + 1, ':')))
- file = name;
- else
- file = cp + 1;
- sprintf(buf, "%s/%.*s", _PATH_MAILDIR, (int)sizeof(utp->ut_user),
- name);
- if (*file != '/') {
- sprintf(buf2, "%s/%.*s", _PATH_MAILDIR,
- (int)sizeof(utp->ut_user), file);
- file = buf2;
+ if ((cp = strchr(cp + 1, ':')) != NULL &&
+ strchr((file = cp + 1), '/') == NULL) {
+ snprintf(buf, sizeof(buf), "%s/%s", _PATH_MAILDIR, file);
+ folder = 1;
+ } else {
+ snprintf(buf, sizeof(buf), "%s/%s", _PATH_MAILDIR, name);
+ folder = 0;
}
- folder = strcmp(buf, file);
setutxent();
while ((utp = getutxent()) != NULL)
if (utp->ut_type == USER_PROCESS && !strcmp(utp->ut_user, name))
- notify(utp, file, offset, folder);
+ notify(utp, buf, offset, folder);
endutxent();
}
@@ -159,8 +154,7 @@ notify(struct utmpx *utp, char file[], off_t offset, int folder)
utp->ut_line);
return;
}
- (void)snprintf(tty, sizeof(tty), "%s%.*s",
- _PATH_DEV, (int)sizeof(utp->ut_line), utp->ut_line);
+ (void)snprintf(tty, sizeof(tty), "%s%s", _PATH_DEV, utp->ut_line);
if (stat(tty, &stb) == -1 || !(stb.st_mode & (S_IXUSR | S_IXGRP))) {
dsyslog(LOG_DEBUG, "%s: wrong mode on %s", utp->ut_user, tty);
return;
@@ -187,26 +181,20 @@ notify(struct utmpx *utp, char file[], off_t offset, int folder)
initgroups(p->pw_name, p->pw_gid) == -1 ||
setgid(p->pw_gid) == -1 ||
setuid(p->pw_uid) == -1)
- return;
+ _exit(1);
- switch (stb.st_mode & (S_IXUSR | S_IXGRP)) {
- case S_IXUSR:
- case (S_IXUSR | S_IXGRP):
+ if (stb.st_mode & S_IXUSR) {
(void)fprintf(tp,
"%s\007New mail for %s@%.*s\007 has arrived%s%s%s:%s----%s",
cr, utp->ut_user, (int)sizeof(hostname), hostname,
folder ? cr : "", folder ? "to " : "", folder ? file : "",
cr, cr);
jkfprintf(tp, file, offset);
- break;
- case S_IXGRP:
+ } else if (stb.st_mode & S_IXGRP) {
(void)fprintf(tp, "\007");
(void)fflush(tp);
(void)sleep(1);
(void)fprintf(tp, "\007");
- break;
- default:
- break;
}
(void)fclose(tp);
_exit(0);
diff --git a/release/packages/ucl/bmake-all.ucl b/release/packages/ucl/bmake-all.ucl
new file mode 100644
index 000000000000..ee8175d1dd8a
--- /dev/null
+++ b/release/packages/ucl/bmake-all.ucl
@@ -0,0 +1,5 @@
+comment = "Program maintenance utility"
+desc = <<EOD
+make(1) allows programs to be built from source files based on a specification
+of the program's dependencies called a Makefile.
+EOD
diff --git a/release/packages/ucl/sendmail.ucl b/release/packages/ucl/sendmail.ucl
new file mode 100644
index 000000000000..c79775eb8af4
--- /dev/null
+++ b/release/packages/ucl/sendmail.ucl
@@ -0,0 +1,7 @@
+deps {
+ # sendmail requires make to build its configuration file.
+ "bmake": {
+ version = "${VERSION}"
+ origin = "base"
+ }
+}
diff --git a/release/packages/ucl/yp.ucl b/release/packages/ucl/yp.ucl
new file mode 100644
index 000000000000..14b2327e56d1
--- /dev/null
+++ b/release/packages/ucl/yp.ucl
@@ -0,0 +1,7 @@
+deps {
+ # YP requires bmake to rebuild the database.
+ "bmake": {
+ version = "${VERSION}"
+ origin = "base"
+ }
+}
diff --git a/release/scripts/pkgbase-stage.lua b/release/scripts/pkgbase-stage.lua
index 01eec8c44e49..1b48b4faede3 100755
--- a/release/scripts/pkgbase-stage.lua
+++ b/release/scripts/pkgbase-stage.lua
@@ -46,7 +46,9 @@ local function select_packages(pkg, media, all_libcompats)
table.insert(components["src"], package)
elseif package == "FreeBSD-tests" or package:match("^FreeBSD%-tests%-.*") then
table.insert(components["tests"], package)
- elseif package:match("^FreeBSD%-kernel%-.*") then
+ elseif package:match("^FreeBSD%-kernel%-.*") and
+ package ~= "FreeBSD-kernel-man"
+ then
-- Kernels other than FreeBSD-kernel-generic are ignored
if package == "FreeBSD-kernel-generic" then
table.insert(components["kernel"], package)
diff --git a/release/tools/vmimage.subr b/release/tools/vmimage.subr
index eb816018e9d3..156987e33457 100644
--- a/release/tools/vmimage.subr
+++ b/release/tools/vmimage.subr
@@ -118,7 +118,6 @@ vm_emulation_setup() {
mkdir -p ${DESTDIR}/dev
mount -t devfs devfs ${DESTDIR}/dev
- chroot ${DESTDIR} ${EMULATOR} /usr/bin/newaliases
chroot ${DESTDIR} ${EMULATOR} /bin/sh /etc/rc.d/ldconfig forcestart
cp /etc/resolv.conf ${DESTDIR}/etc/resolv.conf
diff --git a/sbin/ifconfig/af_inet6.c b/sbin/ifconfig/af_inet6.c
index 17dc068ee875..7986edf490b4 100644
--- a/sbin/ifconfig/af_inet6.c
+++ b/sbin/ifconfig/af_inet6.c
@@ -759,7 +759,7 @@ static struct afswtch af_inet6 = {
#else
.af_difaddr = NL_RTM_DELADDR,
.af_aifaddr = NL_RTM_NEWADDR,
- .af_ridreq = &in6_add,
+ .af_ridreq = &in6_del,
.af_addreq = &in6_add,
.af_exec = in6_exec_nl,
#endif
diff --git a/sbin/ifconfig/ifbridge.c b/sbin/ifconfig/ifbridge.c
index 3465dc223ada..a75c37e640a2 100644
--- a/sbin/ifconfig/ifbridge.c
+++ b/sbin/ifconfig/ifbridge.c
@@ -80,6 +80,20 @@ get_val(const char *cp, u_long *valp)
}
static int
+get_vlan_id(const char *cp, ether_vlanid_t *valp)
+{
+ u_long val;
+
+ if (get_val(cp, &val) == -1)
+ return (-1);
+ if (val < DOT1Q_VID_MIN || val > DOT1Q_VID_MAX)
+ return (-1);
+
+ *valp = (ether_vlanid_t)val;
+ return (0);
+}
+
+static int
do_cmd(if_ctx *ctx, u_long op, void *arg, size_t argsize, int set)
{
struct ifdrv ifd = {};
@@ -242,8 +256,8 @@ bridge_status(if_ctx *ctx)
else
printf(" <unknown state %d>", state);
}
- if (member->ifbr_untagged != 0)
- printf(" untagged %u", (unsigned)member->ifbr_untagged);
+ if (member->ifbr_pvid != 0)
+ printf(" untagged %u", (unsigned)member->ifbr_pvid);
print_vlans(&bridge->member_vlans[i]);
printf("\n");
}
@@ -614,25 +628,15 @@ static void
setbridge_untagged(if_ctx *ctx, const char *ifn, const char *vlanid)
{
struct ifbreq req;
- u_long val;
memset(&req, 0, sizeof(req));
+ strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname));
- if (get_val(vlanid, &val) < 0)
+ if (get_vlan_id(vlanid, &req.ifbr_pvid) < 0)
errx(1, "invalid VLAN identifier: %s", vlanid);
- /*
- * Reject vlan 0, since it's not a valid vlan identifier and has a
- * special meaning in the kernel interface.
- */
- if (val == 0)
- errx(1, "invalid VLAN identifier: %lu", val);
-
- strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname));
- req.ifbr_untagged = val;
-
- if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0)
- err(1, "BRDGSIFUNTAGGED %s", vlanid);
+ if (do_cmd(ctx, BRDGSIFPVID, &req, sizeof(req), 1) < 0)
+ err(1, "BRDGSIFPVID %s", vlanid);
}
static void
@@ -643,10 +647,10 @@ unsetbridge_untagged(if_ctx *ctx, const char *ifn, int dummy __unused)
memset(&req, 0, sizeof(req));
strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname));
- req.ifbr_untagged = 0;
+ req.ifbr_pvid = 0;
- if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0)
- err(1, "BRDGSIFUNTAGGED");
+ if (do_cmd(ctx, BRDGSIFPVID, &req, sizeof(req), 1) < 0)
+ err(1, "BRDGSIFPVID");
}
static void
diff --git a/sbin/ifconfig/tests/inet6.sh b/sbin/ifconfig/tests/inet6.sh
index edfd88d93af7..22399915a64d 100644
--- a/sbin/ifconfig/tests/inet6.sh
+++ b/sbin/ifconfig/tests/inet6.sh
@@ -76,8 +76,38 @@ broadcast_cleanup()
vnet_cleanup
}
+atf_test_case "delete6" "cleanup"
+delete6_head()
+{
+ atf_set descr 'Test removing IPv6 addresses'
+ atf_set require.user root
+}
+
+delete6_body()
+{
+ vnet_init
+
+ ep=$(vnet_mkepair)
+
+ atf_check -s exit:0 \
+ ifconfig ${ep}a inet6 fe80::42/64
+ atf_check -s exit:0 -o match:"fe80::42%${ep}" \
+ ifconfig ${ep}a inet6
+
+ atf_check -s exit:0 \
+ ifconfig ${ep}a inet6 -alias fe80::42
+ atf_check -s exit:0 -o not-match:"fe80::42%${ep}" \
+ ifconfig ${ep}a inet6
+}
+
+delete6_cleanup()
+{
+ vnet_cleanup
+}
+
atf_init_test_cases()
{
atf_add_test_case netmask
atf_add_test_case broadcast
+ atf_add_test_case delete6
}
diff --git a/sbin/ping/Makefile b/sbin/ping/Makefile
index b4e3f115b245..30c68cbaba52 100644
--- a/sbin/ping/Makefile
+++ b/sbin/ping/Makefile
@@ -32,8 +32,6 @@ CFLAGS+=-DWITH_CASPER
CFLAGS+=-DIPSEC
LIBADD+= ipsec
-CFLAGS+= -Wno-error=unused-but-set-variable
-
HAS_TESTS=
SUBDIR.${MK_TESTS}+= tests
diff --git a/sbin/recoverdisk/recoverdisk.c b/sbin/recoverdisk/recoverdisk.c
index e1b283e54a93..f13a1f211863 100644
--- a/sbin/recoverdisk/recoverdisk.c
+++ b/sbin/recoverdisk/recoverdisk.c
@@ -715,7 +715,7 @@ main(int argc, char * const argv[])
int64_t sz;
int error;
time_t t_now, t_report, t_save;
- unsigned snapshot = 60, unsaved;
+ time_t snapshot = 60, unsaved;
setbuf(stdout, NULL);
setbuf(stderr, NULL);
diff --git a/sbin/swapon/tests/swapon_test.sh b/sbin/swapon/tests/swapon_test.sh
index b6d31ecaeed0..a04bb36cc49e 100755
--- a/sbin/swapon/tests/swapon_test.sh
+++ b/sbin/swapon/tests/swapon_test.sh
@@ -31,7 +31,10 @@ attach_mdX_head()
attach_mdX_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo 'md31 none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md31 as swap device" -x "swapon -F fstab.out -a"
}
@@ -49,7 +52,10 @@ attach_dev_mdX_head()
attach_dev_mdX_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo '/dev/md32 none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md32 as swap device" -x "swapon -F fstab.out -a"
}
@@ -67,7 +73,10 @@ attach_md_head()
attach_md_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo 'md none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md[0-9][0-9]* as swap device" -x "swapon -F fstab.out -a"
}
@@ -85,7 +94,10 @@ attach_dev_md_head()
attach_dev_md_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo '/dev/md none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md[0-9][0-9]* as swap device" -x "swapon -F fstab.out -a"
}
@@ -103,7 +115,10 @@ attach_mdX_eli_head()
attach_mdX_eli_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo 'md33.eli none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md33.eli as swap device" -x "swapon -F fstab.out -a"
}
@@ -121,7 +136,10 @@ attach_dev_mdX_eli_head()
attach_dev_mdX_eli_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo '/dev/md34.eli none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md34.eli as swap device" -x "swapon -F fstab.out -a"
}
@@ -139,7 +157,10 @@ attach_md_eli_head()
attach_md_eli_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo 'md.eli none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md[0-9][0-9]*.eli as swap device" -x "swapon -F fstab.out -a"
}
@@ -157,7 +178,10 @@ attach_dev_md_eli_head()
attach_dev_md_eli_body()
{
# if the swapfile is too small (like 1k) then mdconfig hangs looking up the md
- atf_check -s exit:0 -x "truncate -s 10k swapfile"
+ # but need a swapfile bigger than one page kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize * 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
atf_check -s exit:0 -o save:fstab.out -x "echo '/dev/md.eli none swap sw,file=swapfile 0 0'"
atf_check -s exit:0 -o match:"swapon: adding /dev/md[0-9][0-9]*.eli as swap device" -x "swapon -F fstab.out -a"
}
@@ -167,6 +191,24 @@ attach_dev_md_eli_cleanup()
}
###
+
+atf_test_case attach_too_small
+attach_too_small_head()
+{
+ atf_set "descr" "should refuse to attach if smaller than one kernel page size"
+}
+attach_too_small_body()
+{
+ # Need to use smaller than kernel page size
+ pagesize=$(sysctl -n hw.pagesize)
+ minsize=$(( pagesize / 2 ))
+ atf_check -s exit:0 -x "truncate -s $minsize swapfile"
+ atf_check -s exit:0 -o save:fstab.out -x "echo 'md35 none swap sw,file=swapfile 0 0'"
+ atf_check -s exit:1 -e match:"swapon: /dev/md35: NSWAPDEV limit reached" -x "swapon -F fstab.out -a"
+ atf_check -s exit:0 -x "mdconfig -d -u 35"
+}
+
+###
atf_init_test_cases()
{
atf_add_test_case attach_mdX
@@ -178,4 +220,6 @@ atf_init_test_cases()
atf_add_test_case attach_dev_mdX_eli
atf_add_test_case attach_md_eli
atf_add_test_case attach_dev_md_eli
+
+ atf_add_test_case attach_too_small
}
diff --git a/sbin/zfsbootcfg/zfsbootcfg.8 b/sbin/zfsbootcfg/zfsbootcfg.8
index 5e7f02b2578c..3831adfc81bd 100644
--- a/sbin/zfsbootcfg/zfsbootcfg.8
+++ b/sbin/zfsbootcfg/zfsbootcfg.8
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd July 22, 2020
+.Dd July 28, 2025
.Dt ZFSBOOTCFG 8
.Os
.Sh NAME
@@ -44,14 +44,11 @@
is used to set
.Xr boot.config 5 Ns -style
options to be used by
-.Xr zfsboot 8 ,
.Xr gptzfsboot 8
or
.Xr loader 8
the next time the machine is booted.
Once
-.Xr zfsboot 8
-or
.Xr gptzfsboot 8
or
.Xr loader 8
@@ -130,8 +127,7 @@ To clear the boot options:
.Xr boot.config 5 ,
.Xr bectl 8 ,
.Xr gptzfsboot 8 ,
-.Xr loader 8 ,
-.Xr zfsboot 8
+.Xr loader 8
.Sh HISTORY
.Nm
appeared in
diff --git a/share/man/man4/usbhid.4 b/share/man/man4/usbhid.4
index 5109bbe72de6..e5ba370cd025 100644
--- a/share/man/man4/usbhid.4
+++ b/share/man/man4/usbhid.4
@@ -21,7 +21,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd January 12, 2021
+.Dd July 30, 2025
.Dt USBHID 4
.Os
.Sh NAME
@@ -60,7 +60,7 @@ and make its priority greater than other USB HID drivers, such as
.Xr ums 4 ,
and
.Xr uhid 4 .
-Default is 0.
+Default is 1.
.El
.Bl -tag -width indent
.It Va hw.usb.usbhid.debug
diff --git a/share/man/man4/vtnet.4 b/share/man/man4/vtnet.4
index 270366488a98..8b99cd9f17b9 100644
--- a/share/man/man4/vtnet.4
+++ b/share/man/man4/vtnet.4
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd January 22, 2012
+.Dd July 29, 2025
.Dt VTNET 4
.Os
.Sh NAME
@@ -70,6 +70,11 @@ prompt before booting the kernel or stored in
.It Va hw.vtnet. Ns Ar X Ns Va .csum_disable
This tunable disables receive and send checksum offload.
The default value is 0.
+.It Va hw.vtnet.fixup_needs_csum
+.It Va hw.vtnet. Ns Ar X Ns Va .fixup_needs_csum
+This tunable enforces the calculation of a valid checksum for NEEDS_CSUM
+packets.
+The default value is 0.
.It Va hw.vtnet.tso_disable
.It Va hw.vtnet. Ns Ar X Ns Va .tso_disable
This tunable disables TSO.
@@ -91,6 +96,22 @@ The number of queue pairs used is the lesser of the maximum supported by the
driver and the hypervisor, the number of CPUs present in the guest, and this
tunable if not zero.
The default value is 0.
+.It Va hw.vtnet.tso_maxlen
+.It Va hw.vtnet. Ns Ar X Ns Va .tso_maxlen
+This tunable sets the TSO burst limit.
+The default value is 65535.
+.It Va hw.vtnet.rx_process_limit
+.It Va hw.vtnet. Ns Ar X Ns Va .rx_process_limit
+This tunable sets the number of RX segments processed in one pass.
+The default value is 1024.
+.It Va hw.vtnet.lro_entry_count
+.It Va hw.vtnet. Ns Ar X Ns Va .lro_entry_count
+This tunable sets the software LRO entry count.
+The default value is 128, the minimum value is 8.
+.It Va hw.vtnet.lro_mbufq_depth
+.It Va hw.vtnet. Ns Ar X Ns Va .lro_mbufq_depth
+This tunable sets the depth of the software LRO mbuf queue.
+The default value is 0.
.It Va hw.vtnet.altq_disable
This tunable disables ALTQ support, allowing the use of multiqueue instead.
This option applies to all interfaces.
diff --git a/share/man/man5/core.5 b/share/man/man5/core.5
index 8efc8c970014..628fdb7920bb 100644
--- a/share/man/man5/core.5
+++ b/share/man/man5/core.5
@@ -25,7 +25,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd November 12, 2023
+.Dd July 17, 2025
.Dt CORE 5
.Os
.Sh NAME
@@ -48,26 +48,6 @@ a system crash.
(In this event, the decision to save the core file is arbitrary, see
.Xr savecore 8 . )
.Pp
-The maximum size of a core file is limited by the
-.Dv RLIMIT_CORE
-.Xr setrlimit 2
-limit.
-Files which would be larger than the limit are not created.
-.Pp
-With a large limit, a process that had mapped a very large,
-and perhaps sparsely populated, virtual memory region, could take
-a very long time to create core dumps.
-The system ignores all signals sent to a process writing a core file, except
-.Dv SIGKILL
-which terminates the writing and causes immediate exit of the process.
-The behavior of
-.Dv SIGKILL
-can be disabled by setting tunable
-.Xr sysctl 8
-variable
-.Va kern.core_dump_can_intr
-to zero.
-.Pp
The name of the file is controlled via the
.Xr sysctl 8
variable
@@ -107,6 +87,26 @@ yielding the traditional
.Fx
behaviour.
.Pp
+The maximum size of a core file is limited by the
+.Dv RLIMIT_CORE
+.Xr setrlimit 2
+limit.
+Files which would be larger than the limit are not created.
+.Pp
+With a large limit, a process that had mapped a very large,
+and perhaps sparsely populated, virtual memory region, could take
+a very long time to create core dumps.
+The system ignores all signals sent to a process writing a core file, except
+.Dv SIGKILL
+which terminates the writing and causes immediate exit of the process.
+The behavior of
+.Dv SIGKILL
+can be disabled by setting tunable
+.Xr sysctl 8
+variable
+.Va kern.core_dump_can_intr
+to zero.
+.Pp
By default, a process that changes user or group credentials whether
real or effective will not create a corefile.
This behaviour can be
@@ -116,11 +116,13 @@ variable
.Va kern.sugid_coredump
to 1.
.Pp
-Corefiles can be compressed by the kernel if the following item
-is included in the kernel configuration file:
+Corefiles can be compressed by the kernel if one of the following items
+are included in the kernel configuration file:
.Bl -tag -width "1234567890" -compact -offset "12345"
.It options
GZIO
+.It options
+ZSTDIO
.El
.Pp
The following sysctl control core file compression:
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index b73e47b3ef4d..5bcde3030ebc 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -69,6 +69,7 @@ MAN= accept_filter.9 \
config_intrhook.9 \
contigmalloc.9 \
copy.9 \
+ coredumper_register.9 \
counter.9 \
cpu_machdep.9 \
cpuset.9 \
@@ -905,6 +906,7 @@ MLINKS+=copy.9 copyin.9 \
copy.9 copyout.9 \
copy.9 copyout_nofault.9 \
copy.9 copystr.9
+MLINKS+=coredumper_register.9 coredumper_unregister.9
MLINKS+=counter.9 counter_u64_alloc.9 \
counter.9 counter_u64_free.9 \
counter.9 counter_u64_add.9 \
diff --git a/share/man/man9/coredumper_register.9 b/share/man/man9/coredumper_register.9
new file mode 100644
index 000000000000..f4c9eb4a1bf6
--- /dev/null
+++ b/share/man/man9/coredumper_register.9
@@ -0,0 +1,168 @@
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2025 Kyle Evans <kevans@FreeBSD.org>
+.\"
+.Dd July 23, 2025
+.Dt COREDUMPER_REGISTER 9
+.Os
+.Sh NAME
+.Nm coredumper_register ,
+.Nm coredumper_unregister
+.Nd loadable user coredumper support
+.Sh SYNOPSIS
+.In sys/ucoredump.h
+.Ft void
+.Fn coredumper_register "struct coredumper *cd"
+.Ft void
+.Fn coredumper_unregister "struct coredumper *cd"
+.Pp
+.Ft int
+.Fn coredumper_probe_fn "struct thread *td"
+.Ft int
+.Fn coredumper_handle_fn "struct thread *td" "off_t limit"
+.Bd -literal
+/* Incomplete, but the useful members are depicted here. */
+struct coredumper {
+ const char *cd_name;
+ coredumper_probe_fn *cd_probe;
+ coredumper_handle_fn *cd_handle;
+};
+.Ed
+.Pp
+.Ft int
+.Fn coredump_init_fn "const struct coredump_writer *" \
+"const struct coredump_params *"
+.Ft int
+.Fn coredump_write_fn "const struct coredump_writer *" "const void *" "size_t" \
+"off_t" "enum uio_seg" "struct ucred *" "size_t *" "struct thread *"
+.Ft int
+.Fn coredump_extend_fn "const struct coredump_writer *" "off_t" "struct ucred *"
+.Bd -literal
+struct coredump_writer {
+ void *ctx;
+ coredump_init_fn *init_fn;
+ coredump_write_fn *write_fn;
+ coredump_extend_fn *extend_fn;
+};
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+mechanism provides a path for kernel modules to register a new user process core
+dumper.
+The expected use of
+.Nm
+is for a module to define the fields of the struct coredumper listed above, then
+call
+.Fn coredumper_register
+at
+.Dv MOD_LOAD
+time.
+A corresponding
+.Fn coredumper_unregister
+should be called at
+.Dv MOD_UNLOAD
+time.
+Note that
+.Fn coredumper_unregister
+will block until the specified coredumper is no longer processing coredumps.
+.Pp
+When a user process is preparing to start dumping core, the kernel will execute
+the
+.Fn cd_probe
+function for each coredumper currently registered.
+The
+.Fn cd_probe
+function is expected to return either -1 if it would decline to dump the
+process, or a priority level greater than 0.
+The coredumper with the highest priority will handle the coredump.
+The following default priorities are defined:
+.Bl -tag -width indent
+.It Dv COREDUMPER_NOMATCH
+This dumper declines dumping the process.
+.It Dv COREDUMPER_GENERIC
+This dumper will dump the process at the lowest priority.
+This priority is not recommended, as the default vnode dumper will bid at
+.Dv COREDUMPER_GENERIC
+as well.
+.It Dv COREDUMPER_SPECIAL
+This dumper provides special behavior, and will dump the process at a higher
+priority.
+.It Dv COREDUMPER_HIGHPRIORITY
+This dumper would prefer to handle this coredump.
+This may be used by, for instance, a custom or vendor-specific coredump
+mechanism that wishes to preempt others.
+.El
+.Pp
+Note that this system has been designed such that the
+.Fn cd_probe
+function can examine the process in question and make an informed decision.
+Different processes being dumped could probe at different priorities in the
+same coredumper.
+.Pp
+Once the highest priority coredumper has been selected, the
+.Fn cd_handle
+function will be invoked.
+The
+.Fn cd_handle
+will receive both the thread and the
+.Dv RLIMIT_CORE
+.Xr setrlimit 2
+.Fa limit .
+The proc lock will be held on entry, and should be unlocked before the handler
+returns.
+The
+.Fa limit
+is typically passed to the
+.Fn sv_coredump
+that belongs to the process's
+.Va p_sysent .
+.Pp
+The
+.Fn cd_handle
+function should return either 0 if the dump was successful, or an appropriate
+.Xr errno 2
+otherwise.
+.Ss Customized Coredump Writers
+Custom coredumpers can define their own
+.Dv coredump_writer
+to pass to
+.Fn sv_coredump .
+.Pp
+The
+.Va ctx
+member is opaque and only to be used by the coredumper itself.
+.Pp
+The
+.Va init_fn
+function, if it's provided, will be called by the
+.Fn sv_coredump
+implementation before any data is to be written.
+This allows the writer implementation to record any coredump parameters that it
+might need to capture, or setup the object to be written to.
+.Pp
+The
+.Va write_fn
+function will be called by the
+.Fn sv_coredump
+implementation to write out data.
+The
+.Va extend_fn
+function will be called to enlarge the coredump, in the sense that a hole is
+created in any difference between the current size and the new size.
+For convenience, the
+.Fn core_vn_write
+and
+.Fn core_vn_extend
+functions used by the vnode coredumper are exposed in
+.In sys/ucordumper.h ,
+and the
+.Dv coredump_vnode_ctx
+defined there should be populated with the vnode to write to.
+.Sh SEE ALSO
+.Xr setrlimit 2 ,
+.Xr core 5
+.Sh AUTHORS
+This manual page was written by
+.An Kyle Evans Aq Mt kevans@FreeBSD.org .
diff --git a/share/man/man9/domainset.9 b/share/man/man9/domainset.9
index 816ce29f04f7..702c9f83a88b 100644
--- a/share/man/man9/domainset.9
+++ b/share/man/man9/domainset.9
@@ -22,7 +22,7 @@
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd April 14, 2021
+.Dd June 24, 2025
.Dt DOMAINSET 9
.Os
.Sh NAME
@@ -54,6 +54,8 @@ struct domainset {
.Ft struct domainset *
.Fn domainset_create "const struct domainset *key"
.Ft int
+.Fn domainset_populate "struct domainset *domain" "domainset_t *mask" "int policy" "size_t mask_size"
+.Ft int
.Fn sysctl_handle_domainset "SYSCTL_HANDLER_ARGS"
.Sh DESCRIPTION
The
@@ -137,6 +139,7 @@ These policies should be used in preference to
to avoid blocking indefinitely on a
.Dv M_WAITOK
request.
+.Pp
The
.Fn domainset_create
function takes a partially filled in domainset as a key and returns a
@@ -148,6 +151,17 @@ is an immutable type that is shared among all matching keys and must
not be modified after return.
.Pp
The
+.Fn domainset_populate
+function fills a
+.Vt domainset
+struct using a domain mask and policy.
+It is used for validating and
+translating a domain mask and policy into a
+.Vt domainset
+struct when creating a custom domainset using
+.Vt domainset_create .
+.Pp
+The
.Fn sysctl_handle_domainset
function is provided as a convenience for modifying or viewing domainsets
that are not accessible via
diff --git a/share/man/man9/mbuf.9 b/share/man/man9/mbuf.9
index 0262c598ed18..e4f30962ccab 100644
--- a/share/man/man9/mbuf.9
+++ b/share/man/man9/mbuf.9
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd December 28, 2023
+.Dd August 1, 2025
.Dt MBUF 9
.Os
.\"
@@ -1091,7 +1091,7 @@ network code, when data must be encrypted or otherwise
altered prior to transmission.
.El
.Sh HARDWARE-ASSISTED CHECKSUM CALCULATION
-This section currently applies to TCP/IP only.
+This section currently applies to SCTP, TCP, and UDP over IP only.
In order to save the host CPU resources, computing checksums is
offloaded to the network interface hardware if possible.
The
@@ -1102,8 +1102,7 @@ of a packet contains two fields used for that purpose,
.Vt int Va csum_flags
and
.Vt int Va csum_data .
-The meaning of those fields depends on the direction a packet flows in,
-and on whether the packet is fragmented.
+The meaning of those fields depends on whether the packet is fragmented.
Henceforth,
.Va csum_flags
or
@@ -1117,14 +1116,14 @@ in the
.Vt mbuf chain
containing the packet.
.Pp
-On output, checksum offloading is attempted after the outgoing
-interface has been determined for a packet.
+When a packet is sent by SCTP, TCP, or UDP, the computation of the checksum
+is delayed until the outgoing interface has been determined for a packet.
The interface-specific field
.Va ifnet.if_data.ifi_hwassist
(see
.Xr ifnet 9 )
-is consulted for the capabilities of the interface to assist in
-computing checksums.
+is consulted by IP for the capabilities of the network interface selected for
+output to assist in computing checksums.
The
.Va csum_flags
field of the packet header is set to indicate which actions the interface
@@ -1135,12 +1134,15 @@ such actions will never be requested through
.Va csum_flags .
.Pp
The flags demanding a particular action from an interface are as follows:
-.Bl -tag -width ".Dv CSUM_TCP" -offset indent
+.Bl -tag -width ".Dv CSUM_SCTP" -offset indent
.It Dv CSUM_IP
The IP header checksum is to be computed and stored in the
corresponding field of the packet.
The hardware is expected to know the format of an IP header
to determine the offset of the IP checksum field.
+.It Dv CSUM_SCTP
+The SCTP checksum is to be computed.
+(See below.)
.It Dv CSUM_TCP
The TCP checksum is to be computed.
(See below.)
@@ -1149,17 +1151,19 @@ The UDP checksum is to be computed.
(See below.)
.El
.Pp
-Should a TCP or UDP checksum be offloaded to the hardware,
+Should a SCTP, TCP, or UDP checksum be offloaded to the hardware,
the field
.Va csum_data
will contain the byte offset of the checksum field relative to the
end of the IP header.
-In this case, the checksum field will be initially
-set by the TCP/IP module to the checksum of the pseudo header
+In the case of TCP or UDP, the checksum field will be initially
+set by the TCP or UDP implementation to the checksum of the pseudo header
defined by the TCP and UDP specifications.
+In the case of SCTP, the checksum field will be initially
+set by the SCTP implementation to 0.
.Pp
-On input, an interface indicates the actions it has performed
-on a packet by setting one or more of the following flags in
+When a packet is received by an interface, it indicates the actions it has
+performed on a packet by setting one or more of the following flags in
.Va csum_flags
associated with the packet:
.Bl -tag -width ".Dv CSUM_IP_CHECKED" -offset indent
@@ -1187,13 +1191,13 @@ to obtain the final checksum to be used for TCP or UDP validation purposes.
.El
.Pp
If a particular network interface just indicates success or
-failure of TCP or UDP checksum validation without returning
+failure of SCTP, TCP, or UDP checksum validation without returning
the exact value of the checksum to the host CPU, its driver can mark
.Dv CSUM_DATA_VALID
-and
-.Dv CSUM_PSEUDO_HDR
in
-.Va csum_flags ,
+.Va csum_flags
+as well as, for TCP and UDP,
+.Dv CSUM_PSEUDO_HDR
and set
.Va csum_data
to
@@ -1203,6 +1207,28 @@ It is a peculiarity of the algorithm used that the Internet checksum
calculated over any valid packet will be
.Li 0xFFFF
as long as the original checksum field is included.
+Note that for SCTP the value of
+.Va csum_data
+is not relevant and
+.Dv CSUM_PSEUDO_HDR
+in
+.Va csum_flags
+is not set, since SCTP does not use a pseudo header checksum.
+.Pp
+If IP delivers a packet with the flags
+.Dv CSUM_SCTP ,
+.Dv CSUM_TCP ,
+or
+.Dv CSUM_UDP
+set in
+.Va csum_flags
+to a local SCTP, TCP, or UDP stack, the packet will be processed without
+computing or validating the checksum, since the packet has not been on the
+wire.
+This can happen if the packet was handled by a virtual interface such as
+.Xr tap 4
+or
+.Xr epair 4 .
.Sh STRESS TESTING
When running a kernel compiled with the option
.Dv MBUF_STRESS_TEST ,
diff --git a/share/man/man9/style.9 b/share/man/man9/style.9
index 484b4f144b2e..26c7a3b2aa64 100644
--- a/share/man/man9/style.9
+++ b/share/man/man9/style.9
@@ -22,7 +22,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd March 27, 2025
+.Dd July 28, 2025
.Dt STYLE 9
.Os
.Sh NAME
@@ -766,8 +766,7 @@ to any pointer type.
.Pp
Values in
.Ic return
-statements should be enclosed in parentheses where possible.
-For example, parentheses cannot be used if the value is a C++ braced-init-list.
+statements should be enclosed in parentheses.
.Pp
Use
.Xr err 3
@@ -918,6 +917,161 @@ Only use the annotation for the entire if statement,
rather than individual clauses.
Do not add these annotations without empirical evidence of the likelihood of the
branch.
+.Ss C++
+KNF style was originally defined as a style for C.
+C++ introduces several new idioms which do not have an existing corollary
+in KNF C such as inline function definitions in classes.
+C++ is also not always compatible with some KNF guidelines such as
+enclosing return values in parentheses.
+For C++ code, FreeBSD aims to follow broadly accepted C++ practices while
+also following the general shape of KNF.
+This section enumerates C++ specific guidelines that differ from KNF C.
+.Pp
+The preferred suffixes for C++ source files are
+.Dq .cc
+and
+.Dq .hh .
+Header files should always use a suffix,
+unlike headers from the C++ standard library.
+.Pp
+Return values should not be enclosed in parentheses.
+When converting existing C code to C++,
+existing return values may remain in parentheses.
+.Pp
+The opening curly brace for namespace declarations should be on the first line
+similar to structure and class definitions.
+Nested namespaces should be declared using a single namespace declaration.
+.Bd -literal
+namespace foo::bar {
+}
+.Ed
+.Pp
+Member function declarations should follow the same style used for standalone
+function protoypes except that a space should be used between a function's
+return type and name.
+.Pp
+Function definitions at the top level should use a newline after the function
+type similar to C function definitions.
+.Pp
+Nested member function definitions inside of a class, structure, or union
+should not use a newline after the function type.
+Instead, these should follow the style of member function declarations.
+This is more common C++ style and is more compact for small methods such as
+getters and setters.
+.Pp
+Inline functions whose body consists of a single statement may use a single
+line for the function body.
+Inline functions with an empty body should always use a single line.
+.Bd -literal
+struct widget {
+ int foo() { return 4; }
+ int bar();
+};
+
+int
+widget::bar()
+{
+ return 6;
+}
+.Ed
+.Pp
+Default and deleted methods should be declared as a single line.
+.Bd -literal
+class box {
+ ~box() = default;
+};
+.Ed
+.Pp
+In template declarations, the
+.Ic template
+keyword and list of template parameters should be followed by a newline
+before the templated declaration.
+.Bd -literal
+template <typename T>
+class box {
+ T data;
+};
+.Ed
+.Pp
+The
+.Ic &
+for reference variables should be placed on the variable name rather
+than the type similar to the style used with
+.Ic *
+for pointers.
+.Bd -literal
+ int x;
+ int &xp = x;
+.Ed
+.Pp
+Variables may be declared at any point within a function,
+not just at the start of blocks.
+.Pp
+Standard library containers should be used in preference to
+.Xr queue 3
+or
+.Xr tree 3
+macros.
+.Pp
+.Ic nullptr
+should be used instead of
+.Dv NULL
+or 0.
+.Pp
+Use standard library types for managing strings such as
+.Vt std::string
+and
+.Vt std::string_view
+rather than
+.Vt "char *"
+and
+.Vt "const char *" .
+C types may be used when interfacing with C code.
+.Pp
+The
+.Ic auto
+keyword can be used in various contexts which improve readability.
+Examples include iterators, non-trivial types of ranged-for values,
+and return values of obvious types,
+such as
+.Ic static_cast
+or
+.Fn std::make_unique .
+Place any qualifiers before
+.Ic auto ,
+for example:
+.Ic const auto .
+.Pp
+Use the
+.Vt std::unique_ptr
+and
+.Vt std::shared_ptr
+smart pointers to manage the lifetime of dynamically allocated objects
+instead of
+.Ic new
+and
+.Ic delete .
+Construct smart pointers with
+.Fn std::make_unique
+or
+.Fn std::make_shared .
+Do not use
+.Xr malloc 3
+except when necessary to interface with C code.
+.Pp
+Do not import any namespaces with
+.Ic using
+at global scope in header files.
+Namespaces other than the
+.Ic std
+namespace (for example,
+.Ic std::literals )
+may be imported in source files and in function scope in header files.
+.Pp
+Define type aliases using
+.Ic using
+instead of
+.Ic typedef .
.Sh FILES
.Bl -tag -width indent
.It Pa /usr/src/tools/build/checkstyle9.pl
diff --git a/share/man/man9/ucred.9 b/share/man/man9/ucred.9
index e9fe2e1d02fc..38759bddb5b0 100644
--- a/share/man/man9/ucred.9
+++ b/share/man/man9/ucred.9
@@ -24,7 +24,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
.\" DAMAGE.
.\"
-.Dd January 23, 2019
+.Dd July 29, 2025
.Dt UCRED 9
.Os
.Sh NAME
@@ -54,6 +54,9 @@
.Ft void
.Fn crsetgroups "struct ucred *cr" "int ngrp" "gid_t *groups"
.Ft void
+.Fn crsetgroups_and_egid "struct ucred *cr" "int ngrp" "gid_t *groups" \
+ "gid_t default_egid"
+.Ft void
.Fn cru2x "struct ucred *cr" "struct xucred *xcr"
.Sh DESCRIPTION
The
@@ -110,17 +113,28 @@ The actual copying is performed by
.Pp
The
.Fn crsetgroups
-function sets the
+and
+.Fn crsetgroups_and_egid
+functions set the
.Va cr_groups
and
.Va cr_ngroups
variables and allocates space as needed.
-It also truncates the group list to the current maximum number of
+They also truncate the group list to the current maximum number of
groups.
No other mechanism should be used to modify the
.Va cr_groups
-array except for updating the primary group via assignment to
-.Va cr_groups[0] .
+array.
+Note that
+.Fn crsetgroups_and_egid
+will interpret the first element of
+.Va groups
+as the new effective GID and the rest of the array as the supplementary groups,
+and
+.Va default_egid
+will be used as the new effective GID only if
+.Va groups
+is empty.
.Pp
The
.Fn cru2x
diff --git a/share/misc/committers-ports.dot b/share/misc/committers-ports.dot
index 7bb3d936e5e5..fb6c168f1425 100644
--- a/share/misc/committers-ports.dot
+++ b/share/misc/committers-ports.dot
@@ -153,6 +153,7 @@ ak [label="Alex Kozlov\nak@FreeBSD.org\n2012/02/29"]
ale [label="Alex Dupre\nale@FreeBSD.org\n2004/01/12"]
alepulver [label="Alejandro Pulver\nalepulver@FreeBSD.org\n2006/04/01"]
alexey [label="Alexey Degtyarev\nalexey@FreeBSD.org\n2013/11/09"]
+alven [label="Älven\nalven@FreeBSD.org\n2025/07/28"]
amdmi3 [label="Dmitry Marakasov\namdmi3@FreeBSD.org\n2008/06/19"]
antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2013/04/03"]
arrowd [label="Gleb Popov\narrowd@FreeBSD.org\n2018/05/18"]
@@ -420,6 +421,7 @@ culot -> marino
culot -> pi
culot -> wg
+db -> alven
db -> tj
db -> shurd
@@ -865,6 +867,7 @@ wxs -> zi
ygy -> yasu
+yuri -> alven
yuri -> rea
zirias -> jbo
diff --git a/share/misc/organization.dot b/share/misc/organization.dot
index 1a88bc71b14e..73e879578dd7 100644
--- a/share/misc/organization.dot
+++ b/share/misc/organization.dot
@@ -30,7 +30,7 @@ doccommitters [label="Doc/www Committers\ndoc-committers@FreeBSD.org"]
doceng [label="Documentation Engineering Team\ndoceng@FreeBSD.org\nbcr, gabor, gjb, hrs,\nblackend, ryusuke, wblock"]
pkgmgr [label="Package Management Team\npkgmgr@FreeBSD.org\nantoine, bdrewery"]
portscommitters [label="Ports Committers\nports-committers@FreeBSD.org"]
-portmgr [label="Port Management Team\nportmgr@FreeBSD.org\nbapt, bofh, mat,\npizzamig, rene, tcberner"]
+portmgr [label="Port Management Team\nportmgr@FreeBSD.org\nbapt, dvl, mat,\npizzamig, rene, tcberner"]
portmgrsecretary [label="Port Management Team Secretary\nportmgr-secretary@FreeBSD.org\nrene"]
re [label="Primary Release Engineering Team\nre@FreeBSD.org\ngjb, kib,\nblackend, delphij, cperciva"]
secteam [label="Security Team\nsecteam@FreeBSD.org\ndelphij,\ndes, markj,\nemaste,\ngjb, gordon,\noshogbo, philip"]
diff --git a/share/mk/Makefile b/share/mk/Makefile
index 837f7da68b4b..4ab5c8cc314b 100644
--- a/share/mk/Makefile
+++ b/share/mk/Makefile
@@ -10,6 +10,7 @@ UPDATE_DEPENDFILE= no
.include <src.opts.mk>
+PACKAGE= bmake
FILES= \
auto.obj.mk \
bsd.README \
diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk
index f21d519160d2..88202aeb78fe 100644
--- a/share/mk/src.libnames.mk
+++ b/share/mk/src.libnames.mk
@@ -487,6 +487,7 @@ _DP_be= zfs spl nvpair zfsbootenv
_DP_netmap=
_DP_ifconfig= m
_DP_pfctl= nv
+_DP_krb5ss= edit
# OFED support
.if ${MK_OFED} != "no"
diff --git a/share/vt/fonts/INDEX.fonts b/share/vt/fonts/INDEX.fonts
index 8a36ccfbf211..dee9e855c42e 100644
--- a/share/vt/fonts/INDEX.fonts
+++ b/share/vt/fonts/INDEX.fonts
@@ -25,14 +25,6 @@ MENU:da:Vælg skrifttypen til din terminal
MENU:de:Wählen Sie Ihre Schrift
MENU:fr:Choisissez votre fonte écran
-#
-# The font definition for "en" is the fall-back font for
-# all languages.
-# Add language specific font definitions only where required!
-#
-FONT:en:vgarom-8x14.fnt
-#
-
gallant.fnt:en:Gallant Character set, 12x22
gallant.fnt:da:Gallant-tegnsæt, 12x22
gallant.fnt:de:Gallant Zeichensatz, 12x22
diff --git a/share/vt/keymaps/INDEX.keymaps b/share/vt/keymaps/INDEX.keymaps
index 2b1db8528e85..fd00d0e71c87 100644
--- a/share/vt/keymaps/INDEX.keymaps
+++ b/share/vt/keymaps/INDEX.keymaps
@@ -33,14 +33,6 @@ MENU:el:Επιλέξτε το πληκτρολόγιο της κονσόλας
MENU:hy:Ընտրեք ստեղնաշարի դասավորությունը
MENU:tr:Klavye düzeninizi seçiniz
-#
-# The font definition for "en" is the fall-back font for
-# all languages.
-# Add language specific font definitions only where required!
-#
-FONT:en:vgarom-8x16.hex
-
-#
am.kbd:en:Armenian phonetic layout
am.kbd:da:Armensk fonetisk layout
am.kbd:de:Armenische phonetische Tastenbelegung
diff --git a/stand/defaults/loader.conf b/stand/defaults/loader.conf
index f0843f3e930b..036479d22285 100644
--- a/stand/defaults/loader.conf
+++ b/stand/defaults/loader.conf
@@ -114,6 +114,7 @@ kernels_autodetect="YES" # Auto-detect kernel directories in /boot
#currdev="disk1s1a" # Set the current device
module_path="/boot/modules;/boot/firmware;/boot/dtb;/boot/dtb/overlays" # Set the module search path
module_blacklist="drm drm2 radeonkms i915kms amdgpu if_iwlwifi if_rtw88 if_rtw89" # Loader module blacklist
+module_blacklist="${module_blacklist} nvidia nvidia-drm nvidia-modeset"
#prompt="\\${interpret}" # Set the command prompt
#root_disk_unit="0" # Force the root disk unit number
#rootdev="disk1s1a" # Set the root filesystem
diff --git a/stand/i386/Makefile b/stand/i386/Makefile
index 768496598575..299e070d8cd5 100644
--- a/stand/i386/Makefile
+++ b/stand/i386/Makefile
@@ -18,7 +18,7 @@ SUBDIR.yes+= loader_simp
# special boot programs, 'self-extracting boot2+loader'
SUBDIR.${MK_LOADER_PXEBOOT}+= pxeldr
-SUBDIR.${MK_LOADER_ZFS}+= zfsboot gptzfsboot
+SUBDIR.${MK_LOADER_ZFS}+= gptzfsboot
.if defined(PXEBOOT_DEFAULT_INTERP)
L=${PXEBOOT_DEFAULT_INTERP}
diff --git a/stand/i386/common/bootargs.h b/stand/i386/common/bootargs.h
index dafcf6a55554..072f7ee505fd 100644
--- a/stand/i386/common/bootargs.h
+++ b/stand/i386/common/bootargs.h
@@ -88,7 +88,7 @@ struct bootargs
/*
* geli_boot_data is embedded in geli_boot_args (passed from gptboot to loader)
- * and in zfs_boot_args (passed from zfsboot and gptzfsboot to loader).
+ * and in zfs_boot_args (passed from gptzfsboot to loader).
*/
struct geli_boot_data
{
diff --git a/stand/i386/gptboot/Makefile b/stand/i386/gptboot/Makefile
index b91875d242f5..a829be6c745d 100644
--- a/stand/i386/gptboot/Makefile
+++ b/stand/i386/gptboot/Makefile
@@ -1,6 +1,6 @@
.include <bsd.init.mk>
-.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${SASRC}
+.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common
FILES= gptboot
MAN= gptboot.8
@@ -53,12 +53,12 @@ gptldr.out: gptldr.o
${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o
CLEANFILES+= gptboot.bin gptboot.out gptboot.o sio.o drv.o \
- cons.o ${OPENCRYPTO_XTS}
+ cons.o
gptboot.bin: gptboot.out
${OBJCOPY} -S -O binary gptboot.out ${.TARGET}
-gptboot.out: ${BTXCRT} gptboot.o sio.o drv.o cons.o ${OPENCRYPTO_XTS}
+gptboot.out: ${BTXCRT} gptboot.o sio.o drv.o cons.o
${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32}
.include <bsd.prog.mk>
diff --git a/stand/i386/gptzfsboot/Makefile b/stand/i386/gptzfsboot/Makefile
index 0d9fa8b043df..0b67ff8cdaf4 100644
--- a/stand/i386/gptzfsboot/Makefile
+++ b/stand/i386/gptzfsboot/Makefile
@@ -1,7 +1,7 @@
.include <bsd.init.mk>
.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \
- ${BOOTSRC}/i386/zfsboot ${BOOTSRC}/i386/common \
+ ${BOOTSRC}/i386/common \
${BOOTSRC}/common
FILES= gptzfsboot
@@ -65,7 +65,7 @@ gptldr.out: gptldr.o
${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o
OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o
-CLEANFILES+= gptzfsboot.bin gptzfsboot.out ${OBJS} ${OPENCRYPTO_XTS}
+CLEANFILES+= gptzfsboot.bin gptzfsboot.out ${OBJS}
# i386 standalone support library
LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a
@@ -73,8 +73,7 @@ LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a
gptzfsboot.bin: gptzfsboot.out
${OBJCOPY} -S -O binary gptzfsboot.out ${.TARGET}
-gptzfsboot.out: ${BTXCRT} ${OBJS} \
- ${OPENCRYPTO_XTS}
+gptzfsboot.out: ${BTXCRT} ${OBJS}
${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32}
zfsboot.o: ${ZFSSRC}/zfsimpl.c
diff --git a/stand/i386/zfsboot/zfsboot.c b/stand/i386/gptzfsboot/zfsboot.c
index 4c8eae9b65e5..4c8eae9b65e5 100644
--- a/stand/i386/zfsboot/zfsboot.c
+++ b/stand/i386/gptzfsboot/zfsboot.c
diff --git a/stand/i386/isoboot/Makefile b/stand/i386/isoboot/Makefile
index 7973f8029aa0..0049e7fd3e0a 100644
--- a/stand/i386/isoboot/Makefile
+++ b/stand/i386/isoboot/Makefile
@@ -1,7 +1,7 @@
.include <bsd.init.mk>
.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \
- ${BOOTSRC}/i386/common ${SASRC}
+ ${BOOTSRC}/i386/common
FILES= isoboot
MAN= isoboot.8
@@ -51,12 +51,12 @@ gptldr.out: gptldr.o
${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o
CLEANFILES+= isoboot.bin isoboot.out isoboot.o sio.o drv.o \
- cons.o ${OPENCRYPTO_XTS}
+ cons.o
isoboot.bin: isoboot.out
${OBJCOPY} -S -O binary isoboot.out ${.TARGET}
-isoboot.out: ${BTXCRT} isoboot.o sio.o drv.o cons.o ${OPENCRYPTO_XTS}
+isoboot.out: ${BTXCRT} isoboot.o sio.o drv.o cons.o
${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32}
.include <bsd.prog.mk>
diff --git a/stand/i386/loader/main.c b/stand/i386/loader/main.c
index fd95cf5243cf..a70b3a253b90 100644
--- a/stand/i386/loader/main.c
+++ b/stand/i386/loader/main.c
@@ -198,7 +198,7 @@ main(void)
#ifdef LOADER_ZFS_SUPPORT
/*
- * zfsboot and gptzfsboot have always passed KARGS_FLAGS_ZFS,
+ * gptzfsboot has always passed KARGS_FLAGS_ZFS,
* so if that is set along with KARGS_FLAGS_EXTARG we know we
* can interpret the extarg data as a struct zfs_boot_args.
*/
diff --git a/stand/i386/zfsboot/Makefile b/stand/i386/zfsboot/Makefile
deleted file mode 100644
index b619b84c368e..000000000000
--- a/stand/i386/zfsboot/Makefile
+++ /dev/null
@@ -1,92 +0,0 @@
-.include <bsd.init.mk>
-
-.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${BOOTSRC}/common
-
-FILES= zfsboot
-MAN= zfsboot.8
-
-BOOT_COMCONSOLE_PORT?= 0x3f8
-BOOT_COMCONSOLE_SPEED?= 115200
-B2SIOFMT?= 0x3
-
-REL1= 0x700
-ORG1= 0x7c00
-ORG2= 0x2000
-
-CFLAGS+=-DBOOTPROG=\"zfsboot\" \
- -O1 \
- -DBOOT2 \
- -DLOADER_GPT_SUPPORT \
- -DLOADER_MBR_SUPPORT \
- -DLOADER_ZFS_SUPPORT \
- -DLOADER_UFS_SUPPORT \
- -DSIOPRT=${BOOT_COMCONSOLE_PORT} \
- -DSIOFMT=${B2SIOFMT} \
- -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \
- -I${LDRSRC} \
- -I${BOOTSRC}/i386/common \
- -I${BOOTSRC}/i386/libi386 \
- -I${ZFSSRC} \
- -I${SYSDIR}/crypto/skein \
- -I${SYSDIR}/cddl/boot/zfs \
- -I${SYSDIR}/contrib/openzfs/include \
- -I${SYSDIR}/contrib/openzfs/include/os/freebsd/spl \
- -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs \
- -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \
- -I${BOOTSRC}/i386/boot2 \
- -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \
- -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \
- -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings
-
-CFLAGS.part.c+= -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib
-
-CFLAGS.gcc+= --param max-inline-insns-single=100
-
-LD_FLAGS+=${LD_FLAGS_BIN}
-
-CLEANFILES+= zfsboot
-
-zfsboot: zfsboot1 zfsboot2
- cat zfsboot1 zfsboot2 > zfsboot
-
-CLEANFILES+= zfsboot1 zfsldr.out zfsldr.o
-
-zfsboot1: zfsldr.out
- ${OBJCOPY} -S -O binary zfsldr.out ${.TARGET}
-
-zfsldr.out: zfsldr.o
- ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} zfsldr.o
-
-OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o
-CLEANFILES+= zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \
- ${OBJS}
-
-# We currently allow 256k bytes for zfsboot - in practice it could be
-# any size up to 3.5Mb but keeping it fixed size simplifies zfsldr.
-#
-BOOT2SIZE= 262144
-
-# i386 standalone support library
-LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a
-
-zfsboot2: zfsboot.ld
- @set -- `ls -l ${.ALLSRC}`; x=$$((${BOOT2SIZE}-$$5)); \
- echo "$$x bytes available"; test $$x -ge 0
- ${DD} if=${.ALLSRC} of=${.TARGET} bs=${BOOT2SIZE} conv=sync
-
-zfsboot.ld: zfsboot.ldr zfsboot.bin ${BTXKERN}
- btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l zfsboot.ldr \
- -o ${.TARGET} -P 1 zfsboot.bin
-
-zfsboot.ldr:
- :> ${.TARGET}
-
-zfsboot.bin: zfsboot.out
- ${OBJCOPY} -S -O binary zfsboot.out ${.TARGET}
-
-zfsboot.out: ${BTXCRT} ${OBJS}
- ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32}
-
-SRCS= zfsboot.c
-
-.include <bsd.prog.mk>
diff --git a/stand/i386/zfsboot/Makefile.depend b/stand/i386/zfsboot/Makefile.depend
deleted file mode 100644
index 92ab022283fd..000000000000
--- a/stand/i386/zfsboot/Makefile.depend
+++ /dev/null
@@ -1,17 +0,0 @@
-# Autogenerated - do NOT edit!
-
-DIRDEPS = \
- include \
- include/xlocale \
- lib/libmd \
- stand/i386/btx/btx \
- stand/i386/btx/lib \
- stand/libsa32 \
- stand/zfs32 \
-
-
-.include <dirdeps.mk>
-
-.if ${DEP_RELDIR} == ${_DEP_RELDIR}
-# local dependencies - needed for -jN in clean tree
-.endif
diff --git a/stand/i386/zfsboot/zfsboot.8 b/stand/i386/zfsboot/zfsboot.8
deleted file mode 100644
index a8411bc065d0..000000000000
--- a/stand/i386/zfsboot/zfsboot.8
+++ /dev/null
@@ -1,130 +0,0 @@
-.\" Copyright (c) 2014 Andriy Gapon <avg@FreeBSD.org>
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.Dd March 27, 2018
-.Dt ZFSBOOT 8
-.Os
-.Sh NAME
-.Nm zfsboot
-.Nd bootcode for ZFS on BIOS-based computers
-.Sh DESCRIPTION
-.Nm
-is used on BIOS-based computers to boot from a filesystem in
-a ZFS pool.
-.Nm
-is installed in two parts on a disk or a partition used by a ZFS pool.
-The first part, a single-sector starter boot block, is installed
-at the beginning of the disk or partition.
-The second part, a main boot block, is installed at a special offset
-within the disk or partition.
-Both areas are reserved by the ZFS on-disk specification for boot use.
-If
-.Nm
-is installed in a partition, then that partition should be made
-bootable using appropriate configuration and boot blocks described in
-.Xr boot 8 .
-.Sh BOOTING
-The
-.Nm
-boot process is very similar to that of
-.Xr gptzfsboot 8 .
-One significant difference is that
-.Nm
-does not currently support the GPT partitioning scheme.
-Thus only whole disks and MBR partitions, traditionally referred to as
-slices, are probed for ZFS disk labels.
-See the BUGS section in
-.Xr gptzfsboot 8
-for some limitations of the MBR scheme support.
-.Sh USAGE
-.Nm
-supports all the same prompt and configuration file arguments as
-.Xr gptzfsboot 8 .
-.Sh FILES
-.Bl -tag -width /boot/zfsboot -compact
-.It Pa /boot/zfsboot
-boot code binary
-.It Pa /boot.config
-parameters for the boot block
-.Pq optional
-.It Pa /boot/config
-alternative parameters for the boot block
-.Pq optional
-.El
-.Sh EXAMPLES
-.Nm
-is typically installed using
-.Xr dd 1 .
-To install
-.Nm
-on the
-.Pa ada0
-drive:
-.Bd -literal -offset indent
-dd if=/boot/zfsboot of=/dev/ada0 count=1
-dd if=/boot/zfsboot of=/dev/ada0 iseek=1 oseek=1024
-.Ed
-.Pp
-If the drive is currently in use, the GEOM safety will prevent writes
-and must be disabled before running the above commands:
-.Bd -literal -offset indent
-sysctl kern.geom.debugflags=0x10
-.Ed
-.Pp
-.Nm
-can also be installed in an MBR slice:
-.Bd -literal -offset indent
-gpart create -s mbr ada0
-gpart add -t freebsd ada0
-gpart bootcode -b /boot/boot0 ada0
-gpart set -a active -i 1 ada0
-dd if=/dev/zero of=/dev/ada0s1 count=2
-dd if=/boot/zfsboot of=/dev/ada0s1 count=1
-dd if=/boot/zfsboot of=/dev/ada0s1 iseek=1 oseek=1024
-.Ed
-.Pp
-Note that commands to create and populate a pool are not shown
-in the example above.
-.Sh SEE ALSO
-.Xr dd 1 ,
-.Xr boot.config 5 ,
-.Xr boot 8 ,
-.Xr gptzfsboot 8 ,
-.Xr loader 8 ,
-.Xr zpool 8
-.Sh HISTORY
-.Nm
-appeared in FreeBSD 7.3.
-.Sh AUTHORS
-This manual page was written by
-.An Andriy Gapon Aq avg@FreeBSD.org .
-.Sh BUGS
-Installing
-.Nm
-with
-.Xr dd 1
-is a hack.
-ZFS needs a command to properly install
-.Nm
-onto a ZFS-controlled disk or partition.
diff --git a/stand/i386/zfsboot/zfsldr.S b/stand/i386/zfsboot/zfsldr.S
deleted file mode 100644
index cd8289f952fd..000000000000
--- a/stand/i386/zfsboot/zfsldr.S
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Copyright (c) 1998 Robert Nordier
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms are freely
- * permitted provided that the above copyright notice and this
- * paragraph and the following disclaimer are duplicated in all
- * such forms.
- *
- * This software is provided "AS IS" and without any express or
- * implied warranties, including, without limitation, the implied
- * warranties of merchantability and fitness for a particular
- * purpose.
- */
-
-/* Memory Locations */
- .set MEM_ARG,0x900 # Arguments
- .set MEM_ORG,0x7c00 # Origin
- .set MEM_BUF,0x8000 # Load area
- .set MEM_BTX,0x9000 # BTX start
- .set MEM_JMP,0x9010 # BTX entry point
- .set MEM_USR,0xa000 # Client start
- .set BDA_BOOT,0x472 # Boot howto flag
-
-/* Partition Constants */
- .set PRT_OFF,0x1be # Partition offset
- .set PRT_NUM,0x4 # Partitions
- .set PRT_BSD,0xa5 # Partition type
-
-/* Misc. Constants */
- .set SIZ_PAG,0x1000 # Page size
- .set SIZ_SEC,0x200 # Sector size
- .set COPY_BLKS,0x8 # Number of blocks
- # to copy for boot2 (<= 15)
- .set COPY_BLK_SZ,0x8000 # Copy in 32k blocks; must be
- # a multiple of 16 bytes
- .set NSECT,(COPY_BLK_SZ / SIZ_SEC * COPY_BLKS)
- .globl start
- .code16
-
-/*
- * Load the rest of zfsboot2 and BTX up, copy the parts to the right locations,
- * and start it all up.
- */
-
-/*
- * Setup the segment registers to flat addressing (segment 0) and setup the
- * stack to end just below the start of our code.
- */
-start: cld # String ops inc
- xor %cx,%cx # Zero
- mov %cx,%es # Address
- mov %cx,%ds # data
- mov %cx,%ss # Set up
- mov $start,%sp # stack
-/*
- * Load the MBR and look for the first FreeBSD slice. We use the fake
- * partition entry below that points to the MBR when we call read.
- * The first pass looks for the first active FreeBSD slice. The
- * second pass looks for the first non-active FreeBSD slice if the
- * first one fails.
- */
- call check_edd # Make sure EDD works
- mov $part4,%si # Dummy partition
- xor %eax,%eax # Read MBR
- movl $MEM_BUF,%ebx # from first
- call read # sector
- mov $0x1,%cx # Two passes
-main.1: mov $MEM_BUF+PRT_OFF,%si # Partition table
- movb $0x1,%dh # Partition
-main.2: cmpb $PRT_BSD,0x4(%si) # Our partition type?
- jne main.3 # No
- jcxz main.5 # If second pass
- testb $0x80,(%si) # Active?
- jnz main.5 # Yes
-main.3: add $0x10,%si # Next entry
- incb %dh # Partition
- cmpb $0x1+PRT_NUM,%dh # In table?
- jb main.2 # Yes
- dec %cx # Do two
- jcxz main.1 # passes
-/*
- * If we get here, we didn't find any FreeBSD slices at all, so print an
- * error message and die.
- */
- mov $msg_part,%si # Message
- jmp error # Error
-
-/*
- * Ok, we have a slice and drive in %dx now, so use that to locate and
- * load boot2. %si references the start of the slice we are looking
- * for, so go ahead and load up the COPY_BLKS*COPY_BLK_SZ/SIZ_SEC sectors
- * starting at sector 1024 (i.e. after the two vdev labels). We don't
- * have do anything fancy here to allow for an extra copy of boot1 and
- * a partition table (compare to this section of the UFS bootstrap) so we
- * just load it all at 0x9000. The first part of boot2 is BTX, which wants
- * to run at 0x9000. The boot2.bin binary starts right after the end of BTX,
- * so we have to figure out where the start of it is and then move the
- * binary to 0xc000. Normally, BTX clients start at MEM_USR, or 0xa000,
- * but when we use btxld to create zfsboot2, we use an entry point of
- * 0x2000. That entry point is relative to MEM_USR; thus boot2.bin
- * starts at 0xc000.
- *
- * The load area and the target area for the client overlap so we have
- * to use a decrementing string move. We also play segment register
- * games with the destination address for the move so that the client
- * can be larger than 16k (which would overflow the zero segment since
- * the client starts at 0xc000).
- */
-main.5: mov %dx,MEM_ARG # Save args
- mov $NSECT,%cx # Sector count
- movl $1024,%eax # Offset to boot2
- mov $MEM_BTX,%ebx # Destination buffer
-main.6: pushal # Save params
- call read # Read disk
- popal # Restore
- incl %eax # Advance to
- add $SIZ_SEC,%ebx # next sector
- loop main.6 # If not last, read another
-
- mov $MEM_BTX,%bx # BTX
- mov 0xa(%bx),%si # Get BTX length and set
- add %bx,%si # %si to start of boot2
- dec %si # Set %ds:%si to point at the
- mov %si,%ax # last byte we want to copy
- shr $4,%ax # from boot2, with %si made as
- add $(COPY_BLKS*COPY_BLK_SZ/16),%ax # small as possible.
- and $0xf,%si #
- mov %ax,%ds #
- mov $(MEM_USR+2*SIZ_PAG)/16,%ax # Set %es:(-1) to point at
- add $(COPY_BLKS*COPY_BLK_SZ/16),%ax # the last byte we
- mov %ax,%es # want to copy boot2 into.
- mov $COPY_BLKS,%bx # Copy COPY_BLKS 32k blocks
-copyloop:
- add $COPY_BLK_SZ,%si # Adjust %ds:%si to point at
- mov %ds,%ax # the end of the next 32k to
- sub $COPY_BLK_SZ/16,%ax # copy from boot2
- mov %ax,%ds
- mov $COPY_BLK_SZ-1,%di # Adjust %es:%di to point at
- mov %es,%ax # the end of the next 32k into
- sub $COPY_BLK_SZ/16,%ax # which we want boot2 copied
- mov %ax,%es
- mov $COPY_BLK_SZ,%cx # Copy 32k
- std
- rep movsb
- dec %bx
- jnz copyloop
- mov %cx,%ds # Reset %ds and %es
- mov %cx,%es
- cld # Back to increment
-
-/*
- * Enable A20 so we can access memory above 1 meg.
- * Use the zero-valued %cx as a timeout for embedded hardware which do not
- * have a keyboard controller.
- */
-seta20: cli # Disable interrupts
-seta20.1: dec %cx # Timeout?
- jz seta20.3 # Yes
- inb $0x64,%al # Get status
- testb $0x2,%al # Busy?
- jnz seta20.1 # Yes
- movb $0xd1,%al # Command: Write
- outb %al,$0x64 # output port
-seta20.2: inb $0x64,%al # Get status
- testb $0x2,%al # Busy?
- jnz seta20.2 # Yes
- movb $0xdf,%al # Enable
- outb %al,$0x60 # A20
-seta20.3: sti # Enable interrupts
-
- jmp start+MEM_JMP-MEM_ORG # Start BTX
-
-
-/*
- * Read a sector from the disk. Sets up an EDD packet on the stack
- * and passes it to read. We assume that the destination address is
- * always segment-aligned.
- *
- * %eax - int - LBA to read in relative to partition start
- * %ebx - ptr - destination address
- * %dl - byte - drive to read from
- * %si - ptr - MBR partition entry
- */
-read: xor %ecx,%ecx # Get
- addl 0x8(%si),%eax # LBA
- adc $0,%ecx
- pushl %ecx # Starting absolute block
- pushl %eax # block number
- shr $4,%ebx # Convert to segment
- push %bx # Address of
- push $0 # transfer buffer
- push $0x1 # Read 1 sector
- push $0x10 # Size of packet
- mov %sp,%si # Packet pointer
- mov $0x42,%ah # BIOS: Extended
- int $0x13 # read
- jc read.1 # If error, fail
- lea 0x10(%si),%sp # Clear stack
- ret # If success, return
-read.1: mov %ah,%al # Format
- mov $read_err,%di # error
- call hex8 # code
- mov $msg_read,%si # Set the error message and
- # fall through to the error
- # routine
-/*
- * Print out the error message pointed to by %ds:(%si) followed
- * by a prompt, wait for a keypress, and then reboot the machine.
- */
-error: callw putstr # Display message
- mov $prompt,%si # Display
- callw putstr # prompt
- xorb %ah,%ah # BIOS: Get
- int $0x16 # keypress
- movw $0x1234, BDA_BOOT # Do a warm boot
- ljmp $0xffff,$0x0 # reboot the machine
-/*
- * Display a null-terminated string using the BIOS output.
- */
-putstr.0: mov $0x7,%bx # Page:attribute
- movb $0xe,%ah # BIOS: Display
- int $0x10 # character
-putstr: lodsb # Get char
- testb %al,%al # End of string?
- jne putstr.0 # No
- ret # To caller
-/*
- * Check to see if the disk supports EDD. zfsboot requires EDD and does not
- * support older C/H/S disk I/O.
- */
-check_edd: cmpb $0x80,%dl # Hard drive?
- jb check_edd.1 # No, fail to boot
- mov $0x55aa,%bx # Magic
- push %dx # Save
- movb $0x41,%ah # BIOS: Check
- int $0x13 # extensions present
- pop %dx # Restore
- jc check_edd.1 # If error, fail
- cmp $0xaa55,%bx # Magic?
- jne check_edd.1 # No, so fail
- testb $0x1,%cl # Packet interface?
- jz check_edd.1 # No, so fail
- ret # EDD ok, keep booting
-check_edd.1: mov $msg_chs,%si # Warn that CHS is
- jmp error # unsupported and fail
-/*
- * AL to hex, saving the result to [EDI].
- */
-hex8: push %ax # Save
- shrb $0x4,%al # Do upper
- call hex8.1 # 4
- pop %ax # Restore
-hex8.1: andb $0xf,%al # Get lower 4
- cmpb $0xa,%al # Convert
- sbbb $0x69,%al # to hex
- das # digit
- orb $0x20,%al # To lower case
- stosb # Save char
- ret # (Recursive)
-
-/* Messages */
-
-msg_chs: .asciz "CHS not supported"
-msg_read: .ascii "Read error: "
-read_err: .asciz "XX"
-msg_part: .asciz "Boot error"
-
-prompt: .asciz "\r\n"
-
- .org PRT_OFF,0x90
-
-/* Partition table */
-
- .fill 0x30,0x1,0x0
-part4: .byte 0x80, 0x00, 0x01, 0x00
- .byte 0xa5, 0xfe, 0xff, 0xff
- .byte 0x00, 0x00, 0x00, 0x00
- .byte 0x50, 0xc3, 0x00, 0x00 # 50000 sectors long, bleh
-
- .word 0xaa55 # Magic number
diff --git a/stand/libsa/ip.c b/stand/libsa/ip.c
index 2c2acf2eda16..6c7b0844b14d 100644
--- a/stand/libsa/ip.c
+++ b/stand/libsa/ip.c
@@ -181,6 +181,7 @@ readipv4(struct iodesc *d, void **pkt, void **payload, time_t tleft,
ssize_t n;
size_t hlen;
struct ether_header *eh;
+ void *buf;
struct ip *ip;
struct udphdr *uh;
uint16_t etype; /* host order */
@@ -195,7 +196,7 @@ readipv4(struct iodesc *d, void **pkt, void **payload, time_t tleft,
ip = NULL;
ptr = NULL;
- n = readether(d, (void **)&ptr, (void **)&ip, tleft, &etype);
+ n = readether(d, (void **)&ptr, (void **)&buf, tleft, &etype);
if (n == -1 || n < sizeof(*ip) + sizeof(*uh)) {
free(ptr);
return (-1);
@@ -205,7 +206,7 @@ readipv4(struct iodesc *d, void **pkt, void **payload, time_t tleft,
/* Need to respond to ARP requests. */
if (etype == ETHERTYPE_ARP) {
- struct arphdr *ah = (void *)ip;
+ struct arphdr *ah = buf;
if (ah->ar_op == htons(ARPOP_REQUEST)) {
/* Send ARP reply */
arp_reply(d, ah);
@@ -224,6 +225,7 @@ readipv4(struct iodesc *d, void **pkt, void **payload, time_t tleft,
return (-1);
}
+ ip = buf;
/* Check ip header */
if (ip->ip_v != IPVERSION || /* half char */
ip->ip_p != proto) {
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index 1f86538ce5f3..441330fd57b8 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -29,6 +29,8 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
+#include <sys/domainset.h>
+
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
@@ -52,7 +54,10 @@ struct vm_munmap {
struct vm_memseg {
int segid;
size_t len;
- char name[VM_MAX_SUFFIXLEN + 1];
+ char name[VM_MAX_SUFFIXLEN + 1];
+ domainset_t *ds_mask;
+ size_t ds_mask_size;
+ int ds_policy;
};
struct vm_register {
diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h
index 938bea47c7f8..219f1116c728 100644
--- a/sys/arm64/include/vmm_dev.h
+++ b/sys/arm64/include/vmm_dev.h
@@ -27,6 +27,8 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
+#include <sys/domainset.h>
+
#include <machine/vmm.h>
struct vm_memmap {
@@ -49,6 +51,9 @@ struct vm_memseg {
int segid;
size_t len;
char name[VM_MAX_SUFFIXLEN + 1];
+ domainset_t *ds_mask;
+ size_t ds_mask_size;
+ int ds_policy;
};
struct vm_register {
diff --git a/sys/cddl/dev/sdt/sdt.c b/sys/cddl/dev/sdt/sdt.c
index a8da618204af..0a9059104671 100644
--- a/sys/cddl/dev/sdt/sdt.c
+++ b/sys/cddl/dev/sdt/sdt.c
@@ -72,6 +72,7 @@ static void sdt_load(void);
static int sdt_unload(void);
static void sdt_create_provider(struct sdt_provider *);
static void sdt_create_probe(struct sdt_probe *);
+static void sdt_init_probe(struct sdt_probe *, linker_file_t);
static void sdt_kld_load(void *, struct linker_file *);
static void sdt_kld_unload_try(void *, struct linker_file *, int *);
@@ -204,6 +205,14 @@ sdt_create_probe(struct sdt_probe *probe)
(void)dtrace_probe_create(prov->id, mod, func, name, aframes, probe);
}
+static void
+sdt_init_probe(struct sdt_probe *probe, linker_file_t lf)
+{
+ probe->sdtp_lf = lf;
+ TAILQ_INIT(&probe->argtype_list);
+ STAILQ_INIT(&probe->tracepoint_list);
+}
+
/*
* Probes are created through the SDT module load/unload hook, so this function
* has nothing to do. It only exists because the DTrace provider framework
@@ -361,12 +370,19 @@ static void
sdt_kld_load_providers(struct linker_file *lf)
{
struct sdt_provider **prov, **begin, **end;
+ struct sdt_probe **p_begin, **p_end;
if (linker_file_lookup_set(lf, "sdt_providers_set", &begin, &end,
NULL) == 0) {
for (prov = begin; prov < end; prov++)
sdt_create_provider(*prov);
}
+
+ if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
+ NULL) == 0) {
+ for (struct sdt_probe **probe = p_begin; probe < p_end; probe++)
+ sdt_init_probe(*probe, lf);
+ }
}
static void
@@ -378,13 +394,8 @@ sdt_kld_load_probes(struct linker_file *lf)
if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end,
NULL) == 0) {
- for (struct sdt_probe **probe = p_begin; probe < p_end;
- probe++) {
- (*probe)->sdtp_lf = lf;
+ for (struct sdt_probe **probe = p_begin; probe < p_end; probe++)
sdt_create_probe(*probe);
- TAILQ_INIT(&(*probe)->argtype_list);
- STAILQ_INIT(&(*probe)->tracepoint_list);
- }
}
if (linker_file_lookup_set(lf, "sdt_argtypes_set", &a_begin, &a_end,
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index cfb054235489..1c6d64d6b8bc 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -1911,7 +1911,7 @@ linprocfs_doproclimits(PFS_FILL_ARGS)
"kern.sigqueue.max_pending_per_proc",
&res, &size, 0, 0, 0, 0);
if (error != 0)
- goto out;
+ continue;
rl.rlim_cur = res;
rl.rlim_max = res;
break;
@@ -1919,7 +1919,7 @@ linprocfs_doproclimits(PFS_FILL_ARGS)
error = kernel_sysctlbyname(td,
"kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
if (error != 0)
- goto out;
+ continue;
rl.rlim_cur = res;
rl.rlim_max = res;
break;
@@ -1941,9 +1941,9 @@ linprocfs_doproclimits(PFS_FILL_ARGS)
li->desc, (unsigned long long)rl.rlim_cur,
(unsigned long long)rl.rlim_max, li->unit);
}
-out:
+
lim_free(limp);
- return (error);
+ return (0);
}
/*
diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c
index 86834a7ecea8..a4be5313aa96 100644
--- a/sys/compat/linux/linux_file.c
+++ b/sys/compat/linux/linux_file.c
@@ -1792,7 +1792,7 @@ linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args)
if ((flags & MFD_ALLOW_SEALING) != 0)
shmflags |= SHM_ALLOW_SEALING;
return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL,
- memfd_name));
+ memfd_name, NULL));
}
int
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index b88f1451f1a2..5e32353c6b8e 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -1030,47 +1030,33 @@ linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
{
struct ucred *newcred, *oldcred;
l_gid_t *linux_gidset;
- gid_t *bsd_gidset;
int ngrp, error;
struct proc *p;
ngrp = args->gidsetsize;
- if (ngrp < 0 || ngrp >= ngroups_max + 1)
+ if (ngrp < 0 || ngrp >= ngroups_max)
return (EINVAL);
linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
if (error)
goto out;
newcred = crget();
- crextend(newcred, ngrp + 1);
+ crextend(newcred, ngrp);
p = td->td_proc;
PROC_LOCK(p);
oldcred = p->p_ucred;
crcopy(newcred, oldcred);
- /*
- * cr_groups[0] holds egid. Setting the whole set from
- * the supplied set will cause egid to be changed too.
- * Keep cr_groups[0] unchanged to prevent that.
- */
-
if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) {
PROC_UNLOCK(p);
crfree(newcred);
goto out;
}
- if (ngrp > 0) {
- newcred->cr_ngroups = ngrp + 1;
-
- bsd_gidset = newcred->cr_groups;
- ngrp--;
- while (ngrp >= 0) {
- bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
- ngrp--;
- }
- } else
- newcred->cr_ngroups = 1;
+ newcred->cr_ngroups = ngrp;
+ for (int i = 0; i < ngrp; i++)
+ newcred->cr_groups[i] = linux_gidset[i];
+ newcred->cr_flags |= CRED_FLAG_GROUPSET;
setsugid(p);
proc_set_cred(p, newcred);
@@ -1092,13 +1078,7 @@ linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
cred = td->td_ucred;
bsd_gidset = cred->cr_groups;
- bsd_gidsetsz = cred->cr_ngroups - 1;
-
- /*
- * cr_groups[0] holds egid. Returning the whole set
- * here will cause a duplicate. Exclude cr_groups[0]
- * to prevent that.
- */
+ bsd_gidsetsz = cred->cr_ngroups;
if ((ngrp = args->gidsetsize) == 0) {
td->td_retval[0] = bsd_gidsetsz;
@@ -1112,7 +1092,7 @@ linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
M_LINUX, M_WAITOK);
while (ngrp < bsd_gidsetsz) {
- linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
+ linux_gidset[ngrp] = bsd_gidset[ngrp];
ngrp++;
}
diff --git a/sys/compat/linux/linux_uid16.c b/sys/compat/linux/linux_uid16.c
index a0c9f1c39198..1d9a19916412 100644
--- a/sys/compat/linux/linux_uid16.c
+++ b/sys/compat/linux/linux_uid16.c
@@ -87,12 +87,11 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args)
{
struct ucred *newcred, *oldcred;
l_gid16_t *linux_gidset;
- gid_t *bsd_gidset;
int ngrp, error;
struct proc *p;
ngrp = args->gidsetsize;
- if (ngrp < 0 || ngrp >= ngroups_max + 1)
+ if (ngrp < 0 || ngrp >= ngroups_max)
return (EINVAL);
linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
error = copyin(args->gidset, linux_gidset, ngrp * sizeof(l_gid16_t));
@@ -106,12 +105,6 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args)
PROC_LOCK(p);
oldcred = crcopysafe(p, newcred);
- /*
- * cr_groups[0] holds egid. Setting the whole set from
- * the supplied set will cause egid to be changed too.
- * Keep cr_groups[0] unchanged to prevent that.
- */
-
if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) {
PROC_UNLOCK(p);
crfree(newcred);
@@ -121,18 +114,10 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args)
goto out;
}
- if (ngrp > 0) {
- newcred->cr_ngroups = ngrp + 1;
-
- bsd_gidset = newcred->cr_groups;
- ngrp--;
- while (ngrp >= 0) {
- bsd_gidset[ngrp + 1] = linux_gidset[ngrp];
- ngrp--;
- }
- }
- else
- newcred->cr_ngroups = 1;
+ newcred->cr_ngroups = ngrp;
+ for (int i = 0; i < ngrp; i++)
+ newcred->cr_groups[i] = linux_gidset[i];
+ newcred->cr_flags |= CRED_FLAG_GROUPSET;
setsugid(td->td_proc);
proc_set_cred(p, newcred);
@@ -155,13 +140,7 @@ linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args)
cred = td->td_ucred;
bsd_gidset = cred->cr_groups;
- bsd_gidsetsz = cred->cr_ngroups - 1;
-
- /*
- * cr_groups[0] holds egid. Returning the whole set
- * here will cause a duplicate. Exclude cr_groups[0]
- * to prevent that.
- */
+ bsd_gidsetsz = cred->cr_ngroups;
if ((ngrp = args->gidsetsize) == 0) {
td->td_retval[0] = bsd_gidsetsz;
@@ -175,7 +154,7 @@ linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args)
linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
M_LINUX, M_WAITOK);
while (ngrp < bsd_gidsetsz) {
- linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
+ linux_gidset[ngrp] = bsd_gidset[ngrp];
ngrp++;
}
diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h b/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h
index 47195e7d66a6..da50d25a63bb 100644
--- a/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h
+++ b/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h
@@ -45,9 +45,9 @@ struct acpi_bus_event {
lkpi_acpi_dev_get_first_match_dev(__VA_ARGS__)
ACPI_HANDLE bsd_acpi_get_handle(device_t bsddev);
-bool acpi_check_dsm(ACPI_HANDLE handle, const char *uuid, int rev,
+bool acpi_check_dsm(ACPI_HANDLE handle, const guid_t *uuid, int rev,
uint64_t funcs);
-ACPI_OBJECT * acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const char *uuid,
+ACPI_OBJECT * acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const guid_t *uuid,
int rev, int func, ACPI_OBJECT *argv4,
ACPI_OBJECT_TYPE type);
int register_acpi_notifier(struct notifier_block *nb);
diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h
index af19829f1cbb..ba1c0d2ac99e 100644
--- a/sys/compat/linuxkpi/common/include/linux/pci.h
+++ b/sys/compat/linuxkpi/common/include/linux/pci.h
@@ -4,7 +4,7 @@
* Copyright (c) 2010 Panasas, Inc.
* Copyright (c) 2013-2016 Mellanox Technologies, Ltd.
* All rights reserved.
- * Copyright (c) 2020-2022 The FreeBSD Foundation
+ * Copyright (c) 2020-2025 The FreeBSD Foundation
*
* Portions of this software were developed by Björn Zeeb
* under sponsorship from the FreeBSD Foundation.
@@ -362,9 +362,9 @@ bool pci_device_is_present(struct pci_dev *pdev);
int linuxkpi_pcim_enable_device(struct pci_dev *pdev);
void __iomem **linuxkpi_pcim_iomap_table(struct pci_dev *pdev);
-void *linuxkpi_pci_iomap_range(struct pci_dev *pdev, int mmio_bar,
- unsigned long mmio_off, unsigned long mmio_size);
-void *linuxkpi_pci_iomap(struct pci_dev *pdev, int mmio_bar, int mmio_size);
+void *linuxkpi_pci_iomap_range(struct pci_dev *, int,
+ unsigned long, unsigned long);
+void *linuxkpi_pci_iomap(struct pci_dev *, int, unsigned long);
void linuxkpi_pci_iounmap(struct pci_dev *pdev, void *res);
int linuxkpi_pcim_iomap_regions(struct pci_dev *pdev, uint32_t mask,
const char *name);
@@ -377,7 +377,7 @@ int linuxkpi_pci_enable_msix(struct pci_dev *pdev, struct msix_entry *entries,
/* Internal helper function(s). */
struct pci_dev *lkpinew_pci_dev(device_t);
void lkpi_pci_devres_release(struct device *, void *);
-struct pci_dev *lkpi_pci_get_device(uint16_t, uint16_t, struct pci_dev *);
+struct pci_dev *lkpi_pci_get_device(uint32_t, uint32_t, struct pci_dev *);
struct msi_desc *lkpi_pci_msi_desc_alloc(int);
struct device *lkpi_pci_find_irq_dev(unsigned int irq);
int _lkpi_pci_enable_msi_range(struct pci_dev *pdev, int minvec, int maxvec);
@@ -561,10 +561,12 @@ done:
return (pdev->bus->self);
}
-#define pci_release_region(pdev, bar) linuxkpi_pci_release_region(pdev, bar)
-#define pci_release_regions(pdev) linuxkpi_pci_release_regions(pdev)
-#define pci_request_regions(pdev, res_name) \
- linuxkpi_pci_request_regions(pdev, res_name)
+#define pci_release_region(pdev, bar) \
+ linuxkpi_pci_release_region(pdev, bar)
+#define pci_release_regions(pdev) \
+ linuxkpi_pci_release_regions(pdev)
+#define pci_request_regions(pdev, res_name) \
+ linuxkpi_pci_request_regions(pdev, res_name)
static inline void
lkpi_pci_disable_msix(struct pci_dev *pdev)
@@ -730,8 +732,10 @@ int linux_pci_register_drm_driver(struct pci_driver *pdrv);
void linux_pci_unregister_driver(struct pci_driver *pdrv);
void linux_pci_unregister_drm_driver(struct pci_driver *pdrv);
-#define pci_register_driver(pdrv) linux_pci_register_driver(pdrv)
-#define pci_unregister_driver(pdrv) linux_pci_unregister_driver(pdrv)
+#define pci_register_driver(pdrv) \
+ linux_pci_register_driver(pdrv)
+#define pci_unregister_driver(pdrv) \
+ linux_pci_unregister_driver(pdrv)
/*
* Enable msix, positive errors indicate actual number of available
@@ -740,10 +744,11 @@ void linux_pci_unregister_drm_driver(struct pci_driver *pdrv);
* NB: define added to prevent this definition of pci_enable_msix from
* clashing with the native FreeBSD version.
*/
-#define pci_enable_msix(...) linuxkpi_pci_enable_msix(__VA_ARGS__)
+#define pci_enable_msix(...) \
+ linuxkpi_pci_enable_msix(__VA_ARGS__)
-#define pci_enable_msix_range(...) \
- linux_pci_enable_msix_range(__VA_ARGS__)
+#define pci_enable_msix_range(...) \
+ linux_pci_enable_msix_range(__VA_ARGS__)
static inline int
pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
@@ -768,8 +773,8 @@ pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
return (nvec);
}
-#define pci_enable_msi(pdev) \
- linux_pci_enable_msi(pdev)
+#define pci_enable_msi(pdev) \
+ linux_pci_enable_msi(pdev)
static inline int
pci_enable_msi(struct pci_dev *pdev)
@@ -794,11 +799,12 @@ static inline void pci_disable_sriov(struct pci_dev *dev)
{
}
-#define pci_iomap_range(pdev, mmio_bar, mmio_off, mmio_size) \
- linuxkpi_pci_iomap_range(pdev, mmio_bar, mmio_off, mmio_size)
-#define pci_iomap(pdev, mmio_bar, mmio_size) \
- linuxkpi_pci_iomap(pdev, mmio_bar, mmio_size)
-#define pci_iounmap(pdev, res) linuxkpi_pci_iounmap(pdev, res)
+#define pci_iomap_range(pdev, mmio_bar, mmio_off, mmio_size) \
+ linuxkpi_pci_iomap_range(pdev, mmio_bar, mmio_off, mmio_size)
+#define pci_iomap(pdev, mmio_bar, mmio_size) \
+ linuxkpi_pci_iomap(pdev, mmio_bar, mmio_size)
+#define pci_iounmap(pdev, res) \
+ linuxkpi_pci_iounmap(pdev, res)
static inline void
lkpi_pci_save_state(struct pci_dev *pdev)
@@ -1387,10 +1393,12 @@ struct pci_dev *lkpi_pci_get_base_class(unsigned int class,
/* -------------------------------------------------------------------------- */
-#define pcim_enable_device(pdev) linuxkpi_pcim_enable_device(pdev)
-#define pcim_iomap_table(pdev) linuxkpi_pcim_iomap_table(pdev)
-#define pcim_iomap_regions(pdev, mask, name) \
- linuxkpi_pcim_iomap_regions(pdev, mask, name)
+#define pcim_enable_device(pdev) \
+ linuxkpi_pcim_enable_device(pdev)
+#define pcim_iomap_table(pdev) \
+ linuxkpi_pcim_iomap_table(pdev)
+#define pcim_iomap_regions(pdev, mask, name) \
+ linuxkpi_pcim_iomap_regions(pdev, mask, name)
static inline int
pcim_iomap_regions_request_all(struct pci_dev *pdev, uint32_t mask, char *name)
@@ -1431,7 +1439,7 @@ err:
* using pci_get_device() need to be changed to call linuxkpi_pci_get_device().
*/
static inline struct pci_dev *
-linuxkpi_pci_get_device(uint16_t vendor, uint16_t device, struct pci_dev *odev)
+linuxkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev)
{
return (lkpi_pci_get_device(vendor, device, odev));
diff --git a/sys/compat/linuxkpi/common/src/linux_acpi.c b/sys/compat/linuxkpi/common/src/linux_acpi.c
index d18c69d9210d..43783bb8727b 100644
--- a/sys/compat/linuxkpi/common/src/linux_acpi.c
+++ b/sys/compat/linuxkpi/common/src/linux_acpi.c
@@ -72,8 +72,9 @@ bsd_acpi_get_handle(device_t bsddev)
}
bool
-acpi_check_dsm(ACPI_HANDLE handle, const char *uuid, int rev, uint64_t funcs)
+acpi_check_dsm(ACPI_HANDLE handle, const guid_t *uuid, int rev, uint64_t funcs)
{
+ UINT64 ret;
if (funcs == 0)
return (false);
@@ -87,17 +88,20 @@ acpi_check_dsm(ACPI_HANDLE handle, const char *uuid, int rev, uint64_t funcs)
*/
funcs |= 1 << 0;
- return ((acpi_DSMQuery(handle, uuid, rev) & funcs) == funcs);
+ ret = acpi_DSMQuery(handle, (const uint8_t *)uuid, rev);
+ return ((ret & funcs) == funcs);
}
ACPI_OBJECT *
-acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const char *uuid, int rev,
+acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const guid_t *uuid, int rev,
int func, ACPI_OBJECT *argv4, ACPI_OBJECT_TYPE type)
{
ACPI_BUFFER buf;
+ ACPI_STATUS status;
- return (ACPI_SUCCESS(acpi_EvaluateDSMTyped(handle, uuid, rev, func,
- argv4, &buf, type)) ? (ACPI_OBJECT *)buf.Pointer : NULL);
+ status = acpi_EvaluateDSMTyped(handle, (const uint8_t *)uuid, rev, func,
+ argv4, &buf, type);
+ return (ACPI_SUCCESS(status) ? (ACPI_OBJECT *)buf.Pointer : NULL);
}
union linuxkpi_acpi_object *
@@ -105,9 +109,11 @@ acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid,
UINT64 rev, UINT64 func, union linuxkpi_acpi_object *pkg)
{
ACPI_BUFFER buf;
+ ACPI_STATUS status;
- return (ACPI_SUCCESS(acpi_EvaluateDSM(ObjHandle, (const uint8_t *)guid,
- rev, func, (ACPI_OBJECT *)pkg, &buf)) ?
+ status = acpi_EvaluateDSM(ObjHandle, (const uint8_t *)guid, rev, func,
+ (ACPI_OBJECT *)pkg, &buf);
+ return (ACPI_SUCCESS(status) ?
(union linuxkpi_acpi_object *)buf.Pointer : NULL);
}
@@ -323,13 +329,13 @@ bsd_acpi_get_handle(device_t bsddev)
}
bool
-acpi_check_dsm(ACPI_HANDLE handle, const char *uuid, int rev, uint64_t funcs)
+acpi_check_dsm(ACPI_HANDLE handle, const guid_t *uuid, int rev, uint64_t funcs)
{
return (false);
}
ACPI_OBJECT *
-acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const char *uuid, int rev,
+acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const guid_t *uuid, int rev,
int func, ACPI_OBJECT *argv4, ACPI_OBJECT_TYPE type)
{
return (NULL);
diff --git a/sys/compat/linuxkpi/common/src/linux_pci.c b/sys/compat/linuxkpi/common/src/linux_pci.c
index 55202da00440..d5bbbea1eb2c 100644
--- a/sys/compat/linuxkpi/common/src/linux_pci.c
+++ b/sys/compat/linuxkpi/common/src/linux_pci.c
@@ -1,7 +1,7 @@
/*-
* Copyright (c) 2015-2016 Mellanox Technologies, Ltd.
* All rights reserved.
- * Copyright (c) 2020-2022 The FreeBSD Foundation
+ * Copyright (c) 2020-2025 The FreeBSD Foundation
*
* Portions of this software were developed by Björn Zeeb
* under sponsorship from the FreeBSD Foundation.
@@ -285,7 +285,7 @@ linux_pci_find(device_t dev, const struct pci_device_id **idp)
}
struct pci_dev *
-lkpi_pci_get_device(uint16_t vendor, uint16_t device, struct pci_dev *odev)
+lkpi_pci_get_device(uint32_t vendor, uint32_t device, struct pci_dev *odev)
{
struct pci_dev *pdev, *found;
@@ -752,7 +752,7 @@ linuxkpi_pcim_iomap_table(struct pci_dev *pdev)
}
static struct resource *
-_lkpi_pci_iomap(struct pci_dev *pdev, int bar, int mmio_size __unused)
+_lkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen __unused)
{
struct pci_mmio_region *mmio, *p;
int type;
@@ -792,25 +792,25 @@ _lkpi_pci_iomap(struct pci_dev *pdev, int bar, int mmio_size __unused)
}
void *
-linuxkpi_pci_iomap_range(struct pci_dev *pdev, int mmio_bar,
- unsigned long mmio_off, unsigned long mmio_size)
+linuxkpi_pci_iomap_range(struct pci_dev *pdev, int bar,
+ unsigned long off, unsigned long maxlen)
{
struct resource *res;
- res = _lkpi_pci_iomap(pdev, mmio_bar, mmio_size);
+ res = _lkpi_pci_iomap(pdev, bar, maxlen);
if (res == NULL)
return (NULL);
/* This is a FreeBSD extension so we can use bus_*(). */
if (pdev->want_iomap_res)
return (res);
- MPASS(mmio_off < rman_get_size(res));
- return ((void *)(rman_get_bushandle(res) + mmio_off));
+ MPASS(off < rman_get_size(res));
+ return ((void *)(rman_get_bushandle(res) + off));
}
void *
-linuxkpi_pci_iomap(struct pci_dev *pdev, int mmio_bar, int mmio_size)
+linuxkpi_pci_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
{
- return (linuxkpi_pci_iomap_range(pdev, mmio_bar, 0, mmio_size));
+ return (linuxkpi_pci_iomap_range(pdev, bar, 0, maxlen));
}
void
diff --git a/sys/conf/files b/sys/conf/files
index dd0d390962f2..b7c19fae0b8e 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3768,6 +3768,7 @@ gnu/gcov/gcov_subr.c optional gcov
kern/bus_if.m standard
kern/clock_if.m standard
+kern/coredump_vnode.c standard
kern/cpufreq_if.m standard
kern/device_if.m standard
kern/imgact_binmisc.c optional imgact_binmisc
@@ -3856,6 +3857,7 @@ kern/kern_time.c standard
kern/kern_timeout.c standard
kern/kern_tslog.c optional tslog
kern/kern_ubsan.c optional kubsan
+kern/kern_ucoredump.c standard
kern/kern_umtx.c standard
kern/kern_uuid.c standard
kern/kern_vnodedumper.c standard
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index 901da27e63f2..641001efab5e 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -368,6 +368,10 @@ dev/ice/irdma_di_if.m optional ice pci \
compile-with "${NORMAL_M} -I$S/dev/ice"
dev/ice/ice_ddp_common.c optional ice pci \
compile-with "${NORMAL_C} -I$S/dev/ice"
+dev/ice/ice_iov.c optional ice pci pci_iov \
+ compile-with "${NORMAL_C} -I$S/dev/ice"
+dev/ice/ice_vf_mbx.c optional ice pci pci_iov \
+ compile-with "${NORMAL_C} -I$S/dev/ice"
ice_ddp.c optional ice_ddp \
compile-with "${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01032900 -mice_ddp -c${.TARGET}" \
no-ctfconvert no-implicit-rule before-depend local \
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
index df206b314b38..9976e9cfec5d 100644
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -62,6 +62,7 @@ dev/acpi_support/acpi_wmi_if.m standard
dev/agp/agp_amd64.c optional agp
dev/agp/agp_i810.c optional agp
dev/agp/agp_via.c optional agp
+dev/amdsmu/amdsmu.c optional amdsmu pci
dev/amdsbwd/amdsbwd.c optional amdsbwd
dev/amdsmn/amdsmn.c optional amdsmn | amdtemp
dev/amdtemp/amdtemp.c optional amdtemp
diff --git a/sys/dev/amdsmu/amdsmu.c b/sys/dev/amdsmu/amdsmu.c
new file mode 100644
index 000000000000..416f875c6176
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.c
@@ -0,0 +1,466 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/amdsmu/amdsmu.h>
+
+static bool
+amdsmu_match(device_t dev, const struct amdsmu_product **product_out)
+{
+ const uint16_t vendorid = pci_get_vendor(dev);
+ const uint16_t deviceid = pci_get_device(dev);
+
+ for (size_t i = 0; i < nitems(amdsmu_products); i++) {
+ const struct amdsmu_product *prod = &amdsmu_products[i];
+
+ if (vendorid == prod->amdsmu_vendorid &&
+ deviceid == prod->amdsmu_deviceid) {
+ if (product_out != NULL)
+ *product_out = prod;
+ return (true);
+ }
+ }
+ return (false);
+}
+
+static void
+amdsmu_identify(driver_t *driver, device_t parent)
+{
+ if (device_find_child(parent, "amdsmu", -1) != NULL)
+ return;
+
+ if (amdsmu_match(parent, NULL)) {
+ if (device_add_child(parent, "amdsmu", -1) == NULL)
+ device_printf(parent, "add amdsmu child failed\n");
+ }
+}
+
+static int
+amdsmu_probe(device_t dev)
+{
+ if (resource_disabled("amdsmu", 0))
+ return (ENXIO);
+ if (!amdsmu_match(device_get_parent(dev), NULL))
+ return (ENXIO);
+ device_set_descf(dev, "AMD System Management Unit");
+
+ return (BUS_PROBE_GENERIC);
+}
+
+static enum amdsmu_res
+amdsmu_wait_res(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ enum amdsmu_res res;
+
+ /*
+ * The SMU has a response ready for us when the response register is
+ * set. Otherwise, we must wait.
+ */
+ for (size_t i = 0; i < SMU_RES_READ_MAX; i++) {
+ res = amdsmu_read4(sc, SMU_REG_RESPONSE);
+ if (res != SMU_RES_WAIT)
+ return (res);
+ pause_sbt("amdsmu", ustosbt(SMU_RES_READ_PERIOD_US), 0,
+ C_HARDCLOCK);
+ }
+ device_printf(dev, "timed out waiting for response from SMU\n");
+ return (SMU_RES_WAIT);
+}
+
+static int
+amdsmu_cmd(device_t dev, enum amdsmu_msg msg, uint32_t arg, uint32_t *ret)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ enum amdsmu_res res;
+
+ /* Wait for SMU to be ready. */
+ if (amdsmu_wait_res(dev) == SMU_RES_WAIT)
+ return (ETIMEDOUT);
+
+ /* Clear previous response. */
+ amdsmu_write4(sc, SMU_REG_RESPONSE, SMU_RES_WAIT);
+
+ /* Write out command to registers. */
+ amdsmu_write4(sc, SMU_REG_MESSAGE, msg);
+ amdsmu_write4(sc, SMU_REG_ARGUMENT, arg);
+
+ /* Wait for SMU response and handle it. */
+ res = amdsmu_wait_res(dev);
+
+ switch (res) {
+ case SMU_RES_WAIT:
+ return (ETIMEDOUT);
+ case SMU_RES_OK:
+ if (ret != NULL)
+ *ret = amdsmu_read4(sc, SMU_REG_ARGUMENT);
+ return (0);
+ case SMU_RES_REJECT_BUSY:
+ device_printf(dev, "SMU is busy\n");
+ return (EBUSY);
+ case SMU_RES_REJECT_PREREQ:
+ case SMU_RES_UNKNOWN:
+ case SMU_RES_FAILED:
+ device_printf(dev, "SMU error: %02x\n", res);
+ return (EIO);
+ }
+
+ return (EINVAL);
+}
+
+static int
+amdsmu_get_vers(device_t dev)
+{
+ int err;
+ uint32_t smu_vers;
+ struct amdsmu_softc *sc = device_get_softc(dev);
+
+ err = amdsmu_cmd(dev, SMU_MSG_GETSMUVERSION, 0, &smu_vers);
+ if (err != 0) {
+ device_printf(dev, "failed to get SMU version\n");
+ return (err);
+ }
+ sc->smu_program = (smu_vers >> 24) & 0xFF;
+ sc->smu_maj = (smu_vers >> 16) & 0xFF;
+ sc->smu_min = (smu_vers >> 8) & 0xFF;
+ sc->smu_rev = smu_vers & 0xFF;
+ device_printf(dev, "SMU version: %d.%d.%d (program %d)\n",
+ sc->smu_maj, sc->smu_min, sc->smu_rev, sc->smu_program);
+
+ return (0);
+}
+
+static int
+amdsmu_get_ip_blocks(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ const uint16_t deviceid = pci_get_device(dev);
+ int err;
+ struct amdsmu_metrics *m = &sc->metrics;
+ bool active;
+ char sysctl_descr[32];
+
+ /* Get IP block count. */
+ switch (deviceid) {
+ case PCI_DEVICEID_AMD_REMBRANDT_ROOT:
+ sc->ip_block_count = 12;
+ break;
+ case PCI_DEVICEID_AMD_PHOENIX_ROOT:
+ sc->ip_block_count = 21;
+ break;
+ /* TODO How many IP blocks does Strix Point (and the others) have? */
+ case PCI_DEVICEID_AMD_STRIX_POINT_ROOT:
+ default:
+ sc->ip_block_count = nitems(amdsmu_ip_blocks_names);
+ }
+ KASSERT(sc->ip_block_count <= nitems(amdsmu_ip_blocks_names),
+ ("too many IP blocks for array"));
+
+ /* Get and print out IP blocks. */
+ err = amdsmu_cmd(dev, SMU_MSG_GET_SUP_CONSTRAINTS, 0,
+ &sc->active_ip_blocks);
+ if (err != 0) {
+ device_printf(dev, "failed to get IP blocks\n");
+ return (err);
+ }
+ device_printf(dev, "Active IP blocks: ");
+ for (size_t i = 0; i < sc->ip_block_count; i++) {
+ active = (sc->active_ip_blocks & (1 << i)) != 0;
+ sc->ip_blocks_active[i] = active;
+ if (!active)
+ continue;
+ printf("%s%s", amdsmu_ip_blocks_names[i],
+ i + 1 < sc->ip_block_count ? " " : "\n");
+ }
+
+ /* Create a sysctl node for IP blocks. */
+ sc->ip_blocks_sysctlnode = SYSCTL_ADD_NODE(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, "ip_blocks",
+ CTLFLAG_RD, NULL, "SMU metrics");
+ if (sc->ip_blocks_sysctlnode == NULL) {
+ device_printf(dev, "could not add sysctl node for IP blocks\n");
+ return (ENOMEM);
+ }
+
+ /* Create a sysctl node for each IP block. */
+ for (size_t i = 0; i < sc->ip_block_count; i++) {
+ /* Create the sysctl node itself for the IP block. */
+ snprintf(sysctl_descr, sizeof sysctl_descr,
+ "Metrics about the %s AMD IP block",
+ amdsmu_ip_blocks_names[i]);
+ sc->ip_block_sysctlnodes[i] = SYSCTL_ADD_NODE(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_blocks_sysctlnode), OID_AUTO,
+ amdsmu_ip_blocks_names[i], CTLFLAG_RD, NULL, sysctl_descr);
+ if (sc->ip_block_sysctlnodes[i] == NULL) {
+ device_printf(dev,
+ "could not add sysctl node for \"%s\"\n", sysctl_descr);
+ continue;
+ }
+ /*
+ * Create sysctls for if the IP block is currently active, last
+ * active time, and total active time.
+ */
+ SYSCTL_ADD_BOOL(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "active", CTLFLAG_RD, &sc->ip_blocks_active[i], 0,
+ "IP block is currently active");
+ SYSCTL_ADD_U64(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "last_time", CTLFLAG_RD, &m->ip_block_last_active_time[i],
+ 0, "How long the IP block was active for during the last"
+ " sleep (us)");
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+ SYSCTL_ADD_U64(sc->sysctlctx,
+ SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO,
+ "total_time", CTLFLAG_RD, &m->ip_block_total_active_time[i],
+ 0, "How long the IP block was active for during sleep in"
+ " total (us)");
+#endif
+ }
+ return (0);
+}
+
+static int
+amdsmu_init_metrics(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+ uint32_t metrics_addr_lo, metrics_addr_hi;
+ uint64_t metrics_addr;
+
+ /* Get physical address of logging buffer. */
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_LO, 0, &metrics_addr_lo);
+ if (err != 0)
+ return (err);
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_HI, 0, &metrics_addr_hi);
+ if (err != 0)
+ return (err);
+ metrics_addr = ((uint64_t) metrics_addr_hi << 32) | metrics_addr_lo;
+
+ /* Map memory of logging buffer. */
+ err = bus_space_map(sc->bus_tag, metrics_addr,
+ sizeof(struct amdsmu_metrics), 0, &sc->metrics_space);
+ if (err != 0) {
+ device_printf(dev, "could not map bus space for SMU metrics\n");
+ return (err);
+ }
+
+ /* Start logging for metrics. */
+ amdsmu_cmd(dev, SMU_MSG_LOG_RESET, 0, NULL);
+ amdsmu_cmd(dev, SMU_MSG_LOG_START, 0, NULL);
+ return (0);
+}
+
+static int
+amdsmu_dump_metrics(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+
+ err = amdsmu_cmd(dev, SMU_MSG_LOG_DUMP_DATA, 0, NULL);
+ if (err != 0) {
+ device_printf(dev, "failed to dump metrics\n");
+ return (err);
+ }
+ bus_space_read_region_4(sc->bus_tag, sc->metrics_space, 0,
+ (uint32_t *)&sc->metrics, sizeof(sc->metrics) / sizeof(uint32_t));
+
+ return (0);
+}
+
+static void
+amdsmu_fetch_idlemask(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+
+ sc->idlemask = amdsmu_read4(sc, SMU_REG_IDLEMASK);
+}
+
+static int
+amdsmu_attach(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int err;
+ uint32_t physbase_addr_lo, physbase_addr_hi;
+ uint64_t physbase_addr;
+ int rid = 0;
+ struct sysctl_oid *node;
+
+ /*
+ * Find physical base address for SMU.
+ * XXX I am a little confused about the masks here. I'm just copying
+ * what Linux does in the amd-pmc driver to get the base address.
+ */
+ pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_LO, 4);
+ physbase_addr_lo = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0xFFF00000;
+
+ pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_HI, 4);
+ physbase_addr_hi = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0x0000FFFF;
+
+ physbase_addr = (uint64_t)physbase_addr_hi << 32 | physbase_addr_lo;
+
+ /* Map memory for SMU and its registers. */
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
+ if (sc->res == NULL) {
+ device_printf(dev, "could not allocate resource\n");
+ return (ENXIO);
+ }
+
+ sc->bus_tag = rman_get_bustag(sc->res);
+
+ if (bus_space_map(sc->bus_tag, physbase_addr,
+ SMU_MEM_SIZE, 0, &sc->smu_space) != 0) {
+ device_printf(dev, "could not map bus space for SMU\n");
+ err = ENXIO;
+ goto err_smu_space;
+ }
+ if (bus_space_map(sc->bus_tag, physbase_addr + SMU_REG_SPACE_OFF,
+ SMU_MEM_SIZE, 0, &sc->reg_space) != 0) {
+ device_printf(dev, "could not map bus space for SMU regs\n");
+ err = ENXIO;
+ goto err_reg_space;
+ }
+
+ /* sysctl stuff. */
+ sc->sysctlctx = device_get_sysctl_ctx(dev);
+ sc->sysctlnode = device_get_sysctl_tree(dev);
+
+ /* Get version & add sysctls. */
+ if ((err = amdsmu_get_vers(dev)) != 0)
+ goto err_dump;
+
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "program", CTLFLAG_RD, &sc->smu_program, 0, "SMU program number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_major", CTLFLAG_RD, &sc->smu_maj, 0,
+ "SMU firmware major version number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_minor", CTLFLAG_RD, &sc->smu_min, 0,
+ "SMU firmware minor version number");
+ SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "version_revision", CTLFLAG_RD, &sc->smu_rev, 0,
+ "SMU firmware revision number");
+
+ /* Set up for getting metrics & add sysctls. */
+ if ((err = amdsmu_init_metrics(dev)) != 0)
+ goto err_dump;
+ if ((err = amdsmu_dump_metrics(dev)) != 0)
+ goto err_dump;
+
+ node = SYSCTL_ADD_NODE(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode),
+ OID_AUTO, "metrics", CTLFLAG_RD, NULL, "SMU metrics");
+ if (node == NULL) {
+ device_printf(dev, "could not add sysctl node for metrics\n");
+ err = ENOMEM;
+ goto err_dump;
+ }
+
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "table_version", CTLFLAG_RD, &sc->metrics.table_version, 0,
+ "SMU metrics table version");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "hint_count", CTLFLAG_RD, &sc->metrics.hint_count, 0,
+ "How many times the sleep hint was set");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "s0i3_last_entry_status", CTLFLAG_RD,
+ &sc->metrics.s0i3_last_entry_status, 0,
+ "1 if last S0i3 entry was successful");
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_s0i2", CTLFLAG_RD, &sc->metrics.time_last_in_s0i2, 0,
+ "Time spent in S0i2 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_entering_s0i3", CTLFLAG_RD,
+ &sc->metrics.time_last_entering_s0i3, 0,
+ "Time spent entering S0i3 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_entering_s0i3", CTLFLAG_RD,
+ &sc->metrics.total_time_entering_s0i3, 0,
+ "Total time spent entering S0i3 (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_resuming", CTLFLAG_RD, &sc->metrics.time_last_resuming,
+ 0, "Time spent resuming from last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_resuming", CTLFLAG_RD, &sc->metrics.total_time_resuming,
+ 0, "Total time spent resuming from sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_s0i3", CTLFLAG_RD, &sc->metrics.time_last_in_s0i3, 0,
+ "Time spent in S0i3 during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_in_s0i3", CTLFLAG_RD, &sc->metrics.total_time_in_s0i3,
+ 0, "Total time spent in S0i3 (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "time_last_in_sw_drips", CTLFLAG_RD,
+ &sc->metrics.time_last_in_sw_drips, 0,
+ "Time spent in awake during last sleep (us)");
+ SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO,
+ "total_time_in_sw_drips", CTLFLAG_RD,
+ &sc->metrics.total_time_in_sw_drips, 0,
+ "Total time spent awake (us)");
+
+ /* Get IP blocks & add sysctls. */
+ err = amdsmu_get_ip_blocks(dev);
+ if (err != 0)
+ goto err_dump;
+
+ /* Get idlemask & add sysctl. */
+ amdsmu_fetch_idlemask(dev);
+ SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO,
+ "idlemask", CTLFLAG_RD, &sc->idlemask, 0, "SMU idlemask. This "
+ "value is not documented - only used to help AMD internally debug "
+ "issues");
+
+ return (0);
+err_dump:
+ bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+err_reg_space:
+ bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+err_smu_space:
+ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+ return (err);
+}
+
+static int
+amdsmu_detach(device_t dev)
+{
+ struct amdsmu_softc *sc = device_get_softc(dev);
+ int rid = 0;
+
+ bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE);
+ bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE);
+
+ bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res);
+ return (0);
+}
+
+static device_method_t amdsmu_methods[] = {
+ DEVMETHOD(device_identify, amdsmu_identify),
+ DEVMETHOD(device_probe, amdsmu_probe),
+ DEVMETHOD(device_attach, amdsmu_attach),
+ DEVMETHOD(device_detach, amdsmu_detach),
+ DEVMETHOD_END
+};
+
+static driver_t amdsmu_driver = {
+ "amdsmu",
+ amdsmu_methods,
+ sizeof(struct amdsmu_softc),
+};
+
+DRIVER_MODULE(amdsmu, hostb, amdsmu_driver, NULL, NULL);
+MODULE_VERSION(amdsmu, 1);
+MODULE_DEPEND(amdsmu, amdsmn, 1, 1, 1);
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmu, amdsmu_products,
+ nitems(amdsmu_products));
diff --git a/sys/dev/amdsmu/amdsmu.h b/sys/dev/amdsmu/amdsmu.h
new file mode 100644
index 000000000000..025887f7fe5a
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu.h
@@ -0,0 +1,95 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_H_
+#define _AMDSMU_H_
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <x86/cputypes.h>
+
+#include <dev/amdsmu/amdsmu_reg.h>
+
+#define SMU_RES_READ_PERIOD_US 50
+#define SMU_RES_READ_MAX 20000
+
+static const struct amdsmu_product {
+ uint16_t amdsmu_vendorid;
+ uint16_t amdsmu_deviceid;
+} amdsmu_products[] = {
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_REMBRANDT_ROOT },
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_PHOENIX_ROOT },
+ { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_STRIX_POINT_ROOT },
+};
+
+static const char *const amdsmu_ip_blocks_names[] = {
+ "DISPLAY",
+ "CPU",
+ "GFX",
+ "VDD",
+ "ACP",
+ "VCN",
+ "ISP",
+ "NBIO",
+ "DF",
+ "USB3_0",
+ "USB3_1",
+ "LAPIC",
+ "USB3_2",
+ "USB3_3",
+ "USB3_4",
+ "USB4_0",
+ "USB4_1",
+ "MPM",
+ "JPEG",
+ "IPU",
+ "UMSCH",
+ "VPE",
+};
+
+CTASSERT(nitems(amdsmu_ip_blocks_names) <= 32);
+
+struct amdsmu_softc {
+ struct sysctl_ctx_list *sysctlctx;
+ struct sysctl_oid *sysctlnode;
+
+ struct resource *res;
+ bus_space_tag_t bus_tag;
+
+ bus_space_handle_t smu_space;
+ bus_space_handle_t reg_space;
+
+ uint8_t smu_program;
+ uint8_t smu_maj, smu_min, smu_rev;
+
+ uint32_t active_ip_blocks;
+ struct sysctl_oid *ip_blocks_sysctlnode;
+ size_t ip_block_count;
+ struct sysctl_oid *ip_block_sysctlnodes[nitems(amdsmu_ip_blocks_names)];
+ bool ip_blocks_active[nitems(amdsmu_ip_blocks_names)];
+
+ bus_space_handle_t metrics_space;
+ struct amdsmu_metrics metrics;
+ uint32_t idlemask;
+};
+
+static inline uint32_t
+amdsmu_read4(const struct amdsmu_softc *sc, bus_size_t reg)
+{
+ return (bus_space_read_4(sc->bus_tag, sc->reg_space, reg));
+}
+
+static inline void
+amdsmu_write4(const struct amdsmu_softc *sc, bus_size_t reg, uint32_t val)
+{
+ bus_space_write_4(sc->bus_tag, sc->reg_space, reg, val);
+}
+
+#endif /* _AMDSMU_H_ */
diff --git a/sys/dev/amdsmu/amdsmu_reg.h b/sys/dev/amdsmu/amdsmu_reg.h
new file mode 100644
index 000000000000..e685b34e6883
--- /dev/null
+++ b/sys/dev/amdsmu/amdsmu_reg.h
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 The FreeBSD Foundation
+ *
+ * This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+#ifndef _AMDSMU_REG_H_
+#define _AMDSMU_REG_H_
+
+#include <sys/types.h>
+
+/*
+ * TODO These are in common with amdtemp; should we find a way to factor these
+ * out? Also, there are way more of these. I couldn't find a centralized place
+ * which lists them though.
+ */
+#define PCI_DEVICEID_AMD_REMBRANDT_ROOT 0x14B5
+#define PCI_DEVICEID_AMD_PHOENIX_ROOT 0x14E8
+#define PCI_DEVICEID_AMD_STRIX_POINT_ROOT 0x14A4
+
+#define SMU_INDEX_ADDRESS 0xB8
+#define SMU_INDEX_DATA 0xBC
+
+#define SMU_PHYSBASE_ADDR_LO 0x13B102E8
+#define SMU_PHYSBASE_ADDR_HI 0x13B102EC
+
+#define SMU_MEM_SIZE 0x1000
+#define SMU_REG_SPACE_OFF 0x10000
+
+#define SMU_REG_MESSAGE 0x538
+#define SMU_REG_RESPONSE 0x980
+#define SMU_REG_ARGUMENT 0x9BC
+#define SMU_REG_IDLEMASK 0xD14
+
+enum amdsmu_res {
+ SMU_RES_WAIT = 0x00,
+ SMU_RES_OK = 0x01,
+ SMU_RES_REJECT_BUSY = 0xFC,
+ SMU_RES_REJECT_PREREQ = 0xFD,
+ SMU_RES_UNKNOWN = 0xFE,
+ SMU_RES_FAILED = 0xFF,
+};
+
+enum amdsmu_msg {
+ SMU_MSG_GETSMUVERSION = 0x02,
+ SMU_MSG_LOG_GETDRAM_ADDR_HI = 0x04,
+ SMU_MSG_LOG_GETDRAM_ADDR_LO = 0x05,
+ SMU_MSG_LOG_START = 0x06,
+ SMU_MSG_LOG_RESET = 0x07,
+ SMU_MSG_LOG_DUMP_DATA = 0x08,
+ SMU_MSG_GET_SUP_CONSTRAINTS = 0x09,
+};
+
+/* XXX Copied from Linux struct smu_metrics. */
+struct amdsmu_metrics {
+ uint32_t table_version;
+ uint32_t hint_count;
+ uint32_t s0i3_last_entry_status;
+ uint32_t time_last_in_s0i2;
+ uint64_t time_last_entering_s0i3;
+ uint64_t total_time_entering_s0i3;
+ uint64_t time_last_resuming;
+ uint64_t total_time_resuming;
+ uint64_t time_last_in_s0i3;
+ uint64_t total_time_in_s0i3;
+ uint64_t time_last_in_sw_drips;
+ uint64_t total_time_in_sw_drips;
+ /*
+ * This is how long each IP block was active for (us), i.e., blocking
+ * entry to S0i3. In Linux, these are called "timecondition_notmet_*".
+ *
+ * XXX Total active time for IP blocks seems to be buggy and reporting
+ * garbage (at least on Phoenix), so it's disabled for now. The last
+ * active time for the USB4_0 IP block also seems to be buggy.
+ */
+ uint64_t ip_block_last_active_time[32];
+#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME
+ uint64_t ip_block_total_active_time[32];
+#endif
+} __attribute__((packed));
+
+#endif /* _AMDSMU_REG_H_ */
diff --git a/sys/dev/iicbus/iichid.c b/sys/dev/iicbus/iichid.c
index 9c0324a24685..3f1d7a0cefba 100644
--- a/sys/dev/iicbus/iichid.c
+++ b/sys/dev/iicbus/iichid.c
@@ -275,62 +275,36 @@ iichid_cmd_read(struct iichid_softc* sc, void *buf, iichid_size_t maxlen,
* 6.1.3 - Retrieval of Input Reports
* DEVICE returns the length (2 Bytes) and the entire Input Report.
*/
- uint8_t actbuf[2] = { 0, 0 };
- /* Read actual input report length. */
+
+ memset(buf, 0xaa, 2); // In case nothing gets read
struct iic_msg msgs[] = {
- { sc->addr, IIC_M_RD | IIC_M_NOSTOP, sizeof(actbuf), actbuf },
+ { sc->addr, IIC_M_RD, maxlen, buf },
};
- uint16_t actlen;
int error;
error = iicbus_transfer(sc->dev, msgs, nitems(msgs));
if (error != 0)
return (error);
- actlen = actbuf[0] | actbuf[1] << 8;
-#ifdef IICHID_SAMPLING
- if ((actlen == 0 && sc->sampling_rate_slow < 0) ||
- (maxlen == 0 && sc->sampling_rate_slow >= 0)) {
-#else
+ DPRINTFN(sc, 5, "%*D\n", msgs[0].len, msgs[0].buf, " ");
+
+ uint16_t actlen = le16dec(buf);
+
if (actlen == 0) {
-#endif
- /* Read and discard reset command response. */
- msgs[0] = (struct iic_msg)
- { sc->addr, IIC_M_RD | IIC_M_NOSTART,
- le16toh(sc->desc.wMaxInputLength) - 2, sc->intr_buf };
- actlen = 0;
if (!sc->reset_acked) {
mtx_lock(&sc->mtx);
sc->reset_acked = true;
wakeup(&sc->reset_acked);
mtx_unlock(&sc->mtx);
}
-#ifdef IICHID_SAMPLING
- } else if ((actlen <= 2 || actlen == 0xFFFF) &&
- sc->sampling_rate_slow >= 0) {
- /* Read and discard 1 byte to send I2C STOP condition. */
- msgs[0] = (struct iic_msg)
- { sc->addr, IIC_M_RD | IIC_M_NOSTART, 1, actbuf };
- actlen = 0;
-#endif
- } else {
- actlen -= 2;
- if (actlen > maxlen) {
- DPRINTF(sc, "input report too big. requested=%d "
- "received=%d\n", maxlen, actlen);
- actlen = maxlen;
- }
- /* Read input report itself. */
- msgs[0] = (struct iic_msg)
- { sc->addr, IIC_M_RD | IIC_M_NOSTART, actlen, buf };
}
- error = iicbus_transfer(sc->dev, msgs, 1);
- if (error == 0 && actual_len != NULL)
+ if (actlen <= 2 || actlen > maxlen) {
+ actlen = 0;
+ }
+ if (actual_len != NULL) {
*actual_len = actlen;
-
- DPRINTFN(sc, 5,
- "%*D - %*D\n", 2, actbuf, " ", msgs[0].len, msgs[0].buf, " ");
+ }
return (error);
}
@@ -566,7 +540,7 @@ iichid_sampling_task(void *context, int pending)
error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
if (error == 0) {
if (actual > 0) {
- sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
sc->missing_samples = 0;
if (sc->dup_size != actual ||
memcmp(sc->dup_buf, sc->intr_buf, actual) != 0) {
@@ -577,7 +551,7 @@ iichid_sampling_task(void *context, int pending)
++sc->dup_samples;
} else {
if (++sc->missing_samples == 1)
- sc->intr_handler(sc->intr_ctx, sc->intr_buf, 0);
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, 0);
sc->dup_samples = 0;
}
} else
@@ -632,7 +606,7 @@ iichid_intr(void *context)
if (error == 0) {
if (sc->power_on && sc->open) {
if (actual != 0)
- sc->intr_handler(sc->intr_ctx, sc->intr_buf,
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2,
actual);
else
DPRINTF(sc, "no data received\n");
@@ -842,11 +816,12 @@ iichid_intr_setup(device_t dev, device_t child __unused, hid_intr_t intr,
sc = device_get_softc(dev);
/*
- * Do not rely on wMaxInputLength, as some devices may set it to
- * a wrong length. Find the longest input report in report descriptor.
+ * Do not rely just on wMaxInputLength, as some devices (which?)
+ * may set it to a wrong length. Also find the longest input report
+ * in report descriptor, and add two for the length field.
*/
- rdesc->rdsize =
- MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength) - 2);
+ rdesc->rdsize = 2 +
+ MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength));
/* Write and get/set_report sizes are limited by I2C-HID protocol. */
rdesc->grsize = rdesc->srsize = IICHID_SIZE_MAX;
rdesc->wrsize = IICHID_SIZE_MAX;
@@ -919,7 +894,7 @@ iichid_intr_poll(device_t dev, device_t child __unused)
sc = device_get_softc(dev);
error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual);
if (error == 0 && actual != 0)
- sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual);
+ sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual);
}
/*
@@ -946,6 +921,7 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
{
struct iichid_softc *sc;
device_t parent;
+ uint8_t *tmpbuf;
int error;
if (maxlen > IICHID_SIZE_MAX)
@@ -954,8 +930,12 @@ iichid_read(device_t dev, device_t child __unused, void *buf,
parent = device_get_parent(sc->dev);
error = iicbus_request_bus(parent, sc->dev, IIC_WAIT);
if (error == 0) {
- error = iichid_cmd_read(sc, buf, maxlen, actlen);
+ tmpbuf = malloc(maxlen + 2, M_DEVBUF, M_WAITOK | M_ZERO);
+ error = iichid_cmd_read(sc, tmpbuf, maxlen + 2, actlen);
iicbus_release_bus(parent, sc->dev);
+ if (*actlen > 0)
+ memcpy(buf, tmpbuf + 2, *actlen);
+ free(tmpbuf, M_DEVBUF);
}
return (iic2errno(error));
}
diff --git a/sys/dev/mmc/host/dwmmc.c b/sys/dev/mmc/host/dwmmc.c
index 57992571982c..a422d86d6034 100644
--- a/sys/dev/mmc/host/dwmmc.c
+++ b/sys/dev/mmc/host/dwmmc.c
@@ -315,20 +315,11 @@ static void
dwmmc_cmd_done(struct dwmmc_softc *sc)
{
struct mmc_command *cmd;
-#ifdef MMCCAM
- union ccb *ccb;
-#endif
-#ifdef MMCCAM
- ccb = sc->ccb;
- if (ccb == NULL)
- return;
- cmd = &ccb->mmcio.cmd;
-#else
+ DWMMC_ASSERT_LOCKED(sc);
+
cmd = sc->curcmd;
-#endif
- if (cmd == NULL)
- return;
+ KASSERT(cmd != NULL, ("%s: sc %p curcmd %p == NULL", __func__, sc, cmd));
if (cmd->flags & MMC_RSP_PRESENT) {
if (cmd->flags & MMC_RSP_136) {
@@ -350,15 +341,17 @@ dwmmc_tasklet(struct dwmmc_softc *sc)
{
struct mmc_command *cmd;
+ DWMMC_ASSERT_LOCKED(sc);
+
cmd = sc->curcmd;
- if (cmd == NULL)
- return;
+ KASSERT(cmd != NULL, ("%s: sc %p curcmd %p == NULL", __func__, sc, cmd));
if (!sc->cmd_done)
return;
if (cmd->error != MMC_ERR_NONE || !cmd->data) {
dwmmc_next_operation(sc);
+
} else if (cmd->data && sc->dto_rcvd) {
if ((cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK ||
cmd->opcode == MMC_READ_MULTIPLE_BLOCK) &&
@@ -383,6 +376,7 @@ dwmmc_intr(void *arg)
DWMMC_LOCK(sc);
cmd = sc->curcmd;
+ KASSERT(cmd != NULL, ("%s: sc %p curcmd %p == NULL", __func__, sc, cmd));
/* First handle SDMMC controller interrupts */
reg = READ4(sc, SDMMC_MINTSTS);
@@ -1093,6 +1087,9 @@ dwmmc_start_cmd(struct dwmmc_softc *sc, struct mmc_command *cmd)
uint32_t cmdr;
dprintf("%s\n", __func__);
+
+ DWMMC_ASSERT_LOCKED(sc);
+
sc->curcmd = cmd;
data = cmd->data;
@@ -1177,18 +1174,22 @@ dwmmc_start_cmd(struct dwmmc_softc *sc, struct mmc_command *cmd)
static void
dwmmc_next_operation(struct dwmmc_softc *sc)
{
- struct mmc_command *cmd;
- dprintf("%s\n", __func__);
#ifdef MMCCAM
union ccb *ccb;
+#else
+ struct mmc_request *req;
+#endif
+ struct mmc_command *cmd;
+ dprintf("%s\n", __func__);
+ DWMMC_ASSERT_LOCKED(sc);
+
+#ifdef MMCCAM
ccb = sc->ccb;
if (ccb == NULL)
return;
cmd = &ccb->mmcio.cmd;
#else
- struct mmc_request *req;
-
req = sc->req;
if (req == NULL)
return;
@@ -1205,7 +1206,7 @@ dwmmc_next_operation(struct dwmmc_softc *sc)
* mostly caused by multi-block write command
* followed by single-read.
*/
- while(READ4(sc, SDMMC_STATUS) & (SDMMC_STATUS_DATA_BUSY))
+ while (READ4(sc, SDMMC_STATUS) & (SDMMC_STATUS_DATA_BUSY))
continue;
if (sc->flags & PENDING_CMD) {
@@ -1219,50 +1220,44 @@ dwmmc_next_operation(struct dwmmc_softc *sc)
return;
}
-#ifdef MMCCAM
- sc->ccb = NULL;
sc->curcmd = NULL;
+#ifdef MMCCAM
ccb->ccb_h.status =
(ccb->mmcio.cmd.error == 0 ? CAM_REQ_CMP : CAM_REQ_CMP_ERR);
xpt_done(ccb);
+ sc->ccb = NULL;
#else
- sc->req = NULL;
- sc->curcmd = NULL;
req->done(req);
+ sc->req = NULL;
#endif
}
+#ifndef MMCCAM
static int
dwmmc_request(device_t brdev, device_t reqdev, struct mmc_request *req)
{
struct dwmmc_softc *sc;
- sc = device_get_softc(brdev);
-
dprintf("%s\n", __func__);
- DWMMC_LOCK(sc);
+ sc = device_get_softc(brdev);
-#ifdef MMCCAM
- sc->flags |= PENDING_CMD;
-#else
+ DWMMC_LOCK(sc);
if (sc->req != NULL) {
DWMMC_UNLOCK(sc);
return (EBUSY);
}
-
sc->req = req;
sc->flags |= PENDING_CMD;
if (sc->req->stop)
sc->flags |= PENDING_STOP;
-#endif
- dwmmc_next_operation(sc);
+ dwmmc_next_operation(sc);
DWMMC_UNLOCK(sc);
+
return (0);
}
-#ifndef MMCCAM
static int
dwmmc_get_ro(device_t brdev, device_t reqdev)
{
@@ -1505,10 +1500,15 @@ dwmmc_cam_request(device_t dev, union ccb *ccb)
struct ccb_mmcio *mmcio;
sc = device_get_softc(dev);
- mmcio = &ccb->mmcio;
-
DWMMC_LOCK(sc);
+ KASSERT(ccb->ccb_h.pinfo.index == CAM_ACTIVE_INDEX,
+ ("%s: ccb %p index %d != CAM_ACTIVE_INDEX: func=%#x %s status %#x\n",
+ __func__, ccb, ccb->ccb_h.pinfo.index, ccb->ccb_h.func_code,
+ xpt_action_name(ccb->ccb_h.func_code), ccb->ccb_h.status));
+
+ mmcio = &ccb->mmcio;
+
#ifdef DEBUG
if (__predict_false(bootverbose)) {
device_printf(sc->dev, "CMD%u arg %#x flags %#x dlen %u dflags %#x\n",
@@ -1519,16 +1519,21 @@ dwmmc_cam_request(device_t dev, union ccb *ccb)
#endif
if (mmcio->cmd.data != NULL) {
if (mmcio->cmd.data->len == 0 || mmcio->cmd.data->flags == 0)
- panic("data->len = %d, data->flags = %d -- something is b0rked",
- (int)mmcio->cmd.data->len, mmcio->cmd.data->flags);
+ panic("%s: data %p data->len = %d, data->flags = %d -- something is b0rked",
+ __func__, mmcio->cmd.data, (int)mmcio->cmd.data->len, mmcio->cmd.data->flags);
}
+
if (sc->ccb != NULL) {
- device_printf(sc->dev, "Controller still has an active command\n");
+ device_printf(sc->dev, "%s: Controller still has an active command: "
+ "sc->ccb %p new ccb %p\n", __func__, sc->ccb, ccb);
+ DWMMC_UNLOCK(sc);
return (EBUSY);
}
sc->ccb = ccb;
+ sc->flags |= PENDING_CMD;
+
+ dwmmc_next_operation(sc);
DWMMC_UNLOCK(sc);
- dwmmc_request(sc->dev, NULL, NULL);
return (0);
}
diff --git a/sys/dev/nvmf/controller/nvmft_subr.c b/sys/dev/nvmf/controller/nvmft_subr.c
index bb2bc0988e81..245971813854 100644
--- a/sys/dev/nvmf/controller/nvmft_subr.c
+++ b/sys/dev/nvmf/controller/nvmft_subr.c
@@ -26,46 +26,6 @@ nvmf_nqn_valid(const char *nqn)
len = strnlen(nqn, NVME_NQN_FIELD_SIZE);
if (len == 0 || len > NVMF_NQN_MAX_LEN)
return (false);
-
-#ifdef STRICT_CHECKS
- /*
- * Stricter checks from the spec. Linux does not seem to
- * require these.
- */
-
- /*
- * NVMF_NQN_MIN_LEN does not include '.', and require at least
- * one character of a domain name.
- */
- if (len < NVMF_NQN_MIN_LEN + 2)
- return (false);
- if (memcmp("nqn.", nqn, strlen("nqn.")) != 0)
- return (false);
- nqn += strlen("nqn.");
-
- /* Next 4 digits must be a year. */
- for (u_int i = 0; i < 4; i++) {
- if (!isdigit(nqn[i]))
- return (false);
- }
- nqn += 4;
-
- /* '-' between year and month. */
- if (nqn[0] != '-')
- return (false);
- nqn++;
-
- /* 2 digit month. */
- for (u_int i = 0; i < 2; i++) {
- if (!isdigit(nqn[i]))
- return (false);
- }
- nqn += 2;
-
- /* '.' between month and reverse domain name. */
- if (nqn[0] != '.')
- return (false);
-#endif
return (true);
}
diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c
index 1f72391fb6b4..0efcfeac9eff 100644
--- a/sys/dev/pci/pci_iov.c
+++ b/sys/dev/pci/pci_iov.c
@@ -734,11 +734,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
first_rid = pci_get_rid(dev) + rid_off;
last_rid = first_rid + (num_vfs - 1) * rid_stride;
- /* We don't yet support allocating extra bus numbers for VFs. */
if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
- device_printf(dev, "not enough PCIe bus numbers for VFs\n");
- error = ENOSPC;
- goto out;
+ int rid = 0;
+ uint16_t last_rid_bus = PCI_RID2BUS(last_rid);
+
+ iov->iov_bus_res = bus_alloc_resource(bus, PCI_RES_BUS, &rid,
+ last_rid_bus, last_rid_bus, 1, RF_ACTIVE);
+ if (iov->iov_bus_res == NULL) {
+ device_printf(dev,
+ "failed to allocate PCIe bus number for VFs\n");
+ error = ENOSPC;
+ goto out;
+ }
}
if (!ari_enabled && PCI_RID2SLOT(last_rid) != 0) {
@@ -786,6 +793,11 @@ out:
}
}
+ if (iov->iov_bus_res != NULL) {
+ bus_release_resource(bus, iov->iov_bus_res);
+ iov->iov_bus_res = NULL;
+ }
+
if (iov->iov_flags & IOV_RMAN_INITED) {
rman_fini(&iov->rman);
iov->iov_flags &= ~IOV_RMAN_INITED;
@@ -896,6 +908,11 @@ pci_iov_delete_iov_children(struct pci_devinfo *dinfo)
}
}
+ if (iov->iov_bus_res != NULL) {
+ bus_release_resource(bus, iov->iov_bus_res);
+ iov->iov_bus_res = NULL;
+ }
+
if (iov->iov_flags & IOV_RMAN_INITED) {
rman_fini(&iov->rman);
iov->iov_flags &= ~IOV_RMAN_INITED;
diff --git a/sys/dev/pci/pci_iov_private.h b/sys/dev/pci/pci_iov_private.h
index 7ae2219b936d..ecf0a9b21be5 100644
--- a/sys/dev/pci/pci_iov_private.h
+++ b/sys/dev/pci/pci_iov_private.h
@@ -39,6 +39,8 @@ struct pcicfg_iov {
struct cdev *iov_cdev;
nvlist_t *iov_schema;
+ struct resource *iov_bus_res;
+
struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1];
struct rman rman;
char rman_name[64];
diff --git a/sys/dev/usb/input/usbhid.c b/sys/dev/usb/input/usbhid.c
index 3bb7d5e594e3..df810012b3f8 100644
--- a/sys/dev/usb/input/usbhid.c
+++ b/sys/dev/usb/input/usbhid.c
@@ -76,7 +76,7 @@
#include "hid_if.h"
static SYSCTL_NODE(_hw_usb, OID_AUTO, usbhid, CTLFLAG_RW, 0, "USB usbhid");
-static int usbhid_enable = 0;
+static int usbhid_enable = 1;
SYSCTL_INT(_hw_usb_usbhid, OID_AUTO, enable, CTLFLAG_RWTUN,
&usbhid_enable, 0, "Enable usbhid and prefer it to other USB HID drivers");
#ifdef USB_DEBUG
diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c
index 819debadd1ac..9f2b009d02ec 100644
--- a/sys/dev/vmm/vmm_dev.c
+++ b/sys/dev/vmm/vmm_dev.c
@@ -30,7 +30,8 @@
#include <dev/vmm/vmm_mem.h>
#include <dev/vmm/vmm_stat.h>
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
struct vm_memseg_12 {
int segid;
size_t len;
@@ -42,7 +43,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI");
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12)
#define VM_GET_MEMSEG_12 \
_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12)
-#endif
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+struct vm_memseg_14 {
+ int segid;
+ size_t len;
+ char name[VM_MAX_SUFFIXLEN + 1];
+};
+_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16),
+ "COMPAT_FREEBSD14 ABI");
+
+#define VM_ALLOC_MEMSEG_14 \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14)
+#define VM_GET_MEMSEG_14 \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14)
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
struct devmem_softc {
int segid;
@@ -257,7 +273,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
}
static int
-alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len,
+ struct domainset *domainset)
{
char *name;
int error;
@@ -278,8 +295,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len)
if (error)
goto done;
}
-
- error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset);
if (error)
goto done;
@@ -295,6 +311,20 @@ done:
return (error);
}
+#if defined(__amd64__) && \
+ (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12))
+/*
+ * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts.
+ */
+static void
+adjust_segid(struct vm_memseg *mseg)
+{
+ if (mseg->segid != VM_SYSMEM) {
+ mseg->segid += (VM_BOOTROM - 1);
+ }
+}
+#endif
+
static int
vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum,
uint64_t *regval)
@@ -353,10 +383,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU),
VMMDEV_IOCTL(VM_STAT_DESC, 0),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14,
+ VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_ALLOC_MEMSEG,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
VMMDEV_IOCTL(VM_MMAP_MEMSEG,
@@ -366,9 +402,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = {
VMMDEV_IOCTL(VM_REINIT,
VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS),
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#if defined(COMPAT_FREEBSD12)
VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS),
#endif
+#ifdef COMPAT_FREEBSD14
+ VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS),
+#endif
+#endif /* __amd64__ */
VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS),
VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS),
@@ -388,6 +429,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vmmdev_softc *sc;
struct vcpu *vcpu;
const struct vmmdev_ioctl *ioctl;
+ struct vm_memseg *mseg;
int error, vcpuid;
sc = vmmdev_lookup2(cdev);
@@ -499,20 +541,77 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len);
break;
}
-#if defined(__amd64__) && defined(COMPAT_FREEBSD12)
+#ifdef __amd64__
+#ifdef COMPAT_FREEBSD12
case VM_ALLOC_MEMSEG_12:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg_12 *)0)->name));
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_12 *)0)->name), NULL);
break;
case VM_GET_MEMSEG_12:
- error = get_memseg(sc, (struct vm_memseg *)data,
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
sizeof(((struct vm_memseg_12 *)0)->name));
break;
-#endif
- case VM_ALLOC_MEMSEG:
- error = alloc_memseg(sc, (struct vm_memseg *)data,
- sizeof(((struct vm_memseg *)0)->name));
+#endif /* COMPAT_FREEBSD12 */
+#ifdef COMPAT_FREEBSD14
+ case VM_ALLOC_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = alloc_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name), NULL);
+ break;
+ case VM_GET_MEMSEG_14:
+ mseg = (struct vm_memseg *)data;
+
+ adjust_segid(mseg);
+ error = get_memseg(sc, mseg,
+ sizeof(((struct vm_memseg_14 *)0)->name));
+ break;
+#endif /* COMPAT_FREEBSD14 */
+#endif /* __amd64__ */
+ case VM_ALLOC_MEMSEG: {
+ domainset_t *mask;
+ struct domainset *domainset, domain;
+
+ domainset = NULL;
+ mseg = (struct vm_memseg *)data;
+ if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) {
+ if (mseg->ds_mask_size < sizeof(domainset_t) ||
+ mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) {
+ error = ERANGE;
+ break;
+ }
+ memset(&domain, 0, sizeof(domain));
+ mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK);
+ error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size);
+ if (error) {
+ free(mask, M_VMMDEV);
+ break;
+ }
+ error = domainset_populate(&domain, mask, mseg->ds_policy,
+ mseg->ds_mask_size);
+ if (error) {
+ free(mask, M_VMMDEV);
+ break;
+ }
+ domainset = domainset_create(&domain);
+ if (domainset == NULL) {
+ error = EINVAL;
+ free(mask, M_VMMDEV);
+ break;
+ }
+ free(mask, M_VMMDEV);
+ }
+ error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset);
+
break;
+ }
case VM_GET_MEMSEG:
error = get_memseg(sc, (struct vm_memseg *)data,
sizeof(((struct vm_memseg *)0)->name));
@@ -820,7 +919,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred);
@@ -830,7 +928,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_destroy, "A",
- NULL);
+ "Destroy a vmm(4) instance (legacy interface)");
static struct cdevsw vmmdevsw = {
.d_name = "vmmdev",
@@ -909,7 +1007,6 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
buflen = VM_MAX_NAMELEN + 1;
buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO);
- strlcpy(buf, "beavis", buflen);
error = sysctl_handle_string(oidp, buf, buflen, req);
if (error == 0 && req->newptr != NULL)
error = vmmdev_create(buf, req->td->td_ucred);
@@ -919,7 +1016,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_hw_vmm, OID_AUTO, create,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE,
NULL, 0, sysctl_vmm_create, "A",
- NULL);
+ "Create a vmm(4) instance (legacy interface)");
static int
vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td)
diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c
index c61ae2d44b96..be59e37de33d 100644
--- a/sys/dev/vmm/vmm_mem.c
+++ b/sys/dev/vmm/vmm_mem.c
@@ -7,6 +7,7 @@
#include <sys/types.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
#include <sys/sx.h>
#include <sys/systm.h>
@@ -156,10 +157,11 @@ vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa)
}
int
-vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+ struct domainset *obj_domainset)
{
- struct vm_mem *mem;
struct vm_mem_seg *seg;
+ struct vm_mem *mem;
vm_object_t obj;
mem = vm_mem(vm);
@@ -179,13 +181,22 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
return (EINVAL);
}
+ /*
+ * When given an impossible policy, signal an
+ * error to the user.
+ */
+ if (obj_domainset != NULL && domainset_empty_vm(obj_domainset))
+ return (EINVAL);
obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT);
if (obj == NULL)
return (ENOMEM);
seg->len = len;
seg->object = obj;
+ if (obj_domainset != NULL)
+ seg->object->domain.dr_policy = obj_domainset;
seg->sysmem = sysmem;
+
return (0);
}
diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h
index a4be4c1c57aa..856470cf2590 100644
--- a/sys/dev/vmm/vmm_mem.h
+++ b/sys/dev/vmm/vmm_mem.h
@@ -8,6 +8,27 @@
#ifndef _DEV_VMM_MEM_H_
#define _DEV_VMM_MEM_H_
+/* Maximum number of NUMA domains in a guest. */
+#define VM_MAXMEMDOM 8
+#define VM_MAXSYSMEM VM_MAXMEMDOM
+
+/*
+ * Identifiers for memory segments.
+ * Each guest NUMA domain is represented by a single system
+ * memory segment from [VM_SYSMEM, VM_MAXSYSMEM).
+ * The remaining identifiers can be used to create devmem segments.
+ */
+enum {
+ VM_SYSMEM = 0,
+ VM_BOOTROM = VM_MAXSYSMEM,
+ VM_FRAMEBUFFER,
+ VM_PCIROM,
+ VM_MEMSEG_END
+};
+
+#define VM_MAX_MEMSEGS VM_MEMSEG_END
+#define VM_MAX_MEMMAPS (VM_MAX_MEMSEGS * 2)
+
#ifdef _KERNEL
#include <sys/types.h>
@@ -31,9 +52,6 @@ struct vm_mem_map {
int flags;
};
-#define VM_MAX_MEMSEGS 4
-#define VM_MAX_MEMMAPS 8
-
struct vm_mem {
struct vm_mem_map mem_maps[VM_MAX_MEMMAPS];
struct vm_mem_seg mem_segs[VM_MAX_MEMSEGS];
@@ -55,7 +73,8 @@ void vm_assert_memseg_xlocked(struct vm *vm);
int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
size_t len, int prot, int flags);
int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len);
-int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem,
+ struct domainset *obj_domainset);
void vm_free_memseg(struct vm *vm, int ident);
/*
diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index 222cfc03e4b3..e5fdb395c9f7 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -258,7 +258,7 @@ newnfs_copycred(struct nfscred *nfscr, struct ucred *cr)
KASSERT(nfscr->nfsc_ngroups >= 0,
("newnfs_copycred: negative nfsc_ngroups"));
cr->cr_uid = nfscr->nfsc_uid;
- crsetgroups_fallback(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups,
+ crsetgroups_and_egid(cr, nfscr->nfsc_ngroups, nfscr->nfsc_groups,
GID_NOGROUP);
}
@@ -380,8 +380,7 @@ newnfs_setroot(struct ucred *cred)
cred->cr_uid = 0;
cred->cr_gid = 0;
- /* XXXKE Fix this if cr_gid gets separated out. */
- cred->cr_ngroups = 1;
+ cred->cr_ngroups = 0;
}
/*
diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c
index a957315aaa12..4ffc4ce5c29f 100644
--- a/sys/fs/nfs/nfs_commonsubs.c
+++ b/sys/fs/nfs/nfs_commonsubs.c
@@ -4143,7 +4143,7 @@ nfssvc_idname(struct nfsd_idargs *nidp)
*/
cr = crget();
cr->cr_uid = cr->cr_ruid = cr->cr_svuid = nidp->nid_uid;
- crsetgroups_fallback(cr, nidp->nid_ngroup, grps,
+ crsetgroups_and_egid(cr, nidp->nid_ngroup, grps,
GID_NOGROUP);
cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
cr->cr_prison = curthread->td_ucred->cr_prison;
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 36b534be531e..920fcf7b8c61 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -6934,8 +6934,7 @@ nfscl_dofflayoutio(vnode_t vp, struct uio *uiop, int *iomode, int *must_commit,
tcred = NFSNEWCRED(cred);
tcred->cr_uid = flp->nfsfl_ffm[mirror].user;
tcred->cr_gid = flp->nfsfl_ffm[mirror].group;
- /* XXXKE Fix this if cr_gid gets separated out. */
- tcred->cr_ngroups = 1;
+ tcred->cr_ngroups = 0;
} else
tcred = cred;
if (rwflag == NFSV4OPEN_ACCESSREAD)
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 4f0d5946d6b9..8c427c66c156 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -3463,9 +3463,10 @@ nfsd_excred(struct nfsrv_descript *nd, struct nfsexstuff *exp,
NFSVNO_EXPORTANON(exp) ||
(nd->nd_flag & ND_AUTHNONE) != 0) {
nd->nd_cred->cr_uid = credanon->cr_uid;
+ nd->nd_cred->cr_gid = credanon->cr_gid;
/*
* 'credanon' is already a 'struct ucred' that was built
- * internally with calls to crsetgroups_fallback(), so
+ * internally with calls to crsetgroups_and_egid(), so
* we don't need a fallback here.
*/
crsetgroups(nd->nd_cred, credanon->cr_ngroups,
diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c
index d1b6198ba0e1..d6832b4f74be 100644
--- a/sys/fs/nfsserver/nfs_nfsdsocket.c
+++ b/sys/fs/nfsserver/nfs_nfsdsocket.c
@@ -1425,7 +1425,7 @@ nfsrv_createrootcred(void)
cr = crget();
cr->cr_uid = cr->cr_ruid = cr->cr_svuid = UID_ROOT;
- crsetgroups_fallback(cr, 0, NULL, GID_WHEEL);
+ crsetgroups_and_egid(cr, 0, NULL, GID_WHEEL);
cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
cr->cr_prison = curthread->td_ucred->cr_prison;
prison_hold(cr->cr_prison);
diff --git a/sys/kern/coredump_vnode.c b/sys/kern/coredump_vnode.c
new file mode 100644
index 000000000000..8b857e9aa4a2
--- /dev/null
+++ b/sys/kern/coredump_vnode.c
@@ -0,0 +1,562 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause
+ *
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * - kern_sig.c
+ */
+/*
+ * Copyright (c) 1993, David Greenman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * -kern_exec.c
+ */
+
+#include <sys/systm.h>
+#include <sys/acct.h>
+#include <sys/compressor.h>
+#include <sys/devctl.h>
+#include <sys/fcntl.h>
+#include <sys/jail.h>
+#include <sys/limits.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/sbuf.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/syslog.h>
+#include <sys/ucoredump.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+
+#include <security/audit/audit.h>
+
+#define GZIP_SUFFIX ".gz"
+#define ZSTD_SUFFIX ".zst"
+
+#define MAX_NUM_CORE_FILES 100000
+#ifndef NUM_CORE_FILES
+#define NUM_CORE_FILES 5
+#endif
+
+static coredumper_handle_fn coredump_vnode;
+static struct coredumper vnode_coredumper = {
+ .cd_name = "vnode_coredumper",
+ .cd_handle = coredump_vnode,
+};
+
+SYSINIT(vnode_coredumper_register, SI_SUB_EXEC, SI_ORDER_ANY,
+ coredumper_register, &vnode_coredumper);
+
+_Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES,
+ "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")");
+static int num_cores = NUM_CORE_FILES;
+
+static int capmode_coredump;
+SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN,
+ &capmode_coredump, 0, "Allow processes in capability mode to dump core");
+
+static int set_core_nodump_flag = 0;
+SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
+ 0, "Enable setting the NODUMP flag on coredump files");
+
+static int coredump_devctl = 0;
+SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl,
+ 0, "Generate a devctl notification when processes coredump");
+
+/*
+ * corefilename[] is protected by the allproc_lock.
+ */
+static char corefilename[MAXPATHLEN] = { "%N.core" };
+TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
+
+static int
+sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+
+ sx_xlock(&allproc_lock);
+ error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename),
+ req);
+ sx_xunlock(&allproc_lock);
+
+ return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW |
+ CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A",
+ "Process corefile name format string");
+
+static int
+sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ int new_val;
+
+ new_val = num_cores;
+ error = sysctl_handle_int(oidp, &new_val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (new_val > MAX_NUM_CORE_FILES)
+ new_val = MAX_NUM_CORE_FILES;
+ if (new_val < 0)
+ new_val = 0;
+ num_cores = new_val;
+ return (0);
+}
+SYSCTL_PROC(_debug, OID_AUTO, ncores,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int),
+ sysctl_debug_num_cores_check, "I",
+ "Maximum number of generated process corefiles while using index format");
+
+static void
+vnode_close_locked(struct thread *td, struct vnode *vp)
+{
+
+ VOP_UNLOCK(vp);
+ vn_close(vp, FWRITE, td->td_ucred, td);
+}
+
+int
+core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len,
+ off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid,
+ struct thread *td)
+{
+ struct coredump_vnode_ctx *ctx = cdw->ctx;
+
+ return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base),
+ len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
+ cred, ctx->fcred, resid, td));
+}
+
+int
+core_vn_extend(const struct coredump_writer *cdw, off_t newsz,
+ struct ucred *cred)
+{
+ struct coredump_vnode_ctx *ctx = cdw->ctx;
+ struct mount *mp;
+ int error;
+
+ error = vn_start_write(ctx->vp, &mp, V_WAIT);
+ if (error != 0)
+ return (error);
+ vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY);
+ error = vn_truncate_locked(ctx->vp, newsz, false, cred);
+ VOP_UNLOCK(ctx->vp);
+ vn_finished_write(mp);
+ return (error);
+}
+
+/*
+ * If the core format has a %I in it, then we need to check
+ * for existing corefiles before defining a name.
+ * To do this we iterate over 0..ncores to find a
+ * non-existing core file name to use. If all core files are
+ * already used we choose the oldest one.
+ */
+static int
+corefile_open_last(struct thread *td, char *name, int indexpos,
+ int indexlen, int ncores, struct vnode **vpp)
+{
+ struct vnode *oldvp, *nextvp, *vp;
+ struct vattr vattr;
+ struct nameidata nd;
+ int error, i, flags, oflags, cmode;
+ char ch;
+ struct timespec lasttime;
+
+ nextvp = oldvp = NULL;
+ cmode = S_IRUSR | S_IWUSR;
+ oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
+ (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
+
+ for (i = 0; i < ncores; i++) {
+ flags = O_CREAT | FWRITE | O_NOFOLLOW;
+
+ ch = name[indexpos + indexlen];
+ (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen,
+ i);
+ name[indexpos + indexlen] = ch;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
+ error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
+ NULL);
+ if (error != 0)
+ break;
+
+ vp = nd.ni_vp;
+ NDFREE_PNBUF(&nd);
+ if ((flags & O_CREAT) == O_CREAT) {
+ nextvp = vp;
+ break;
+ }
+
+ error = VOP_GETATTR(vp, &vattr, td->td_ucred);
+ if (error != 0) {
+ vnode_close_locked(td, vp);
+ break;
+ }
+
+ if (oldvp == NULL ||
+ lasttime.tv_sec > vattr.va_mtime.tv_sec ||
+ (lasttime.tv_sec == vattr.va_mtime.tv_sec &&
+ lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) {
+ if (oldvp != NULL)
+ vn_close(oldvp, FWRITE, td->td_ucred, td);
+ oldvp = vp;
+ VOP_UNLOCK(oldvp);
+ lasttime = vattr.va_mtime;
+ } else {
+ vnode_close_locked(td, vp);
+ }
+ }
+
+ if (oldvp != NULL) {
+ if (nextvp == NULL) {
+ if ((td->td_proc->p_flag & P_SUGID) != 0) {
+ error = EFAULT;
+ vn_close(oldvp, FWRITE, td->td_ucred, td);
+ } else {
+ nextvp = oldvp;
+ error = vn_lock(nextvp, LK_EXCLUSIVE);
+ if (error != 0) {
+ vn_close(nextvp, FWRITE, td->td_ucred,
+ td);
+ nextvp = NULL;
+ }
+ }
+ } else {
+ vn_close(oldvp, FWRITE, td->td_ucred, td);
+ }
+ }
+ if (error != 0) {
+ if (nextvp != NULL)
+ vnode_close_locked(td, oldvp);
+ } else {
+ *vpp = nextvp;
+ }
+
+ return (error);
+}
+
+/*
+ * corefile_open(comm, uid, pid, td, compress, vpp, namep)
+ * Expand the name described in corefilename, using name, uid, and pid
+ * and open/create core file.
+ * corefilename is a printf-like string, with three format specifiers:
+ * %N name of process ("name")
+ * %P process id (pid)
+ * %U user id (uid)
+ * For example, "%N.core" is the default; they can be disabled completely
+ * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
+ * This is controlled by the sysctl variable kern.corefile (see above).
+ */
+static int
+corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
+ int compress, int signum, struct vnode **vpp, char **namep)
+{
+ struct sbuf sb;
+ struct nameidata nd;
+ const char *format;
+ char *hostname, *name;
+ int cmode, error, flags, i, indexpos, indexlen, oflags, ncores;
+
+ hostname = NULL;
+ format = corefilename;
+ name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
+ indexlen = 0;
+ indexpos = -1;
+ ncores = num_cores;
+ (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
+ sx_slock(&allproc_lock);
+ for (i = 0; format[i] != '\0'; i++) {
+ switch (format[i]) {
+ case '%': /* Format character */
+ i++;
+ switch (format[i]) {
+ case '%':
+ sbuf_putc(&sb, '%');
+ break;
+ case 'H': /* hostname */
+ if (hostname == NULL) {
+ hostname = malloc(MAXHOSTNAMELEN,
+ M_TEMP, M_WAITOK);
+ }
+ getcredhostname(td->td_ucred, hostname,
+ MAXHOSTNAMELEN);
+ sbuf_cat(&sb, hostname);
+ break;
+ case 'I': /* autoincrementing index */
+ if (indexpos != -1) {
+ sbuf_printf(&sb, "%%I");
+ break;
+ }
+
+ indexpos = sbuf_len(&sb);
+ sbuf_printf(&sb, "%u", ncores - 1);
+ indexlen = sbuf_len(&sb) - indexpos;
+ break;
+ case 'N': /* process name */
+ sbuf_printf(&sb, "%s", comm);
+ break;
+ case 'P': /* process id */
+ sbuf_printf(&sb, "%u", pid);
+ break;
+ case 'S': /* signal number */
+ sbuf_printf(&sb, "%i", signum);
+ break;
+ case 'U': /* user id */
+ sbuf_printf(&sb, "%u", uid);
+ break;
+ default:
+ log(LOG_ERR,
+ "Unknown format character %c in "
+ "corename `%s'\n", format[i], format);
+ break;
+ }
+ break;
+ default:
+ sbuf_putc(&sb, format[i]);
+ break;
+ }
+ }
+ sx_sunlock(&allproc_lock);
+ free(hostname, M_TEMP);
+ if (compress == COMPRESS_GZIP)
+ sbuf_cat(&sb, GZIP_SUFFIX);
+ else if (compress == COMPRESS_ZSTD)
+ sbuf_cat(&sb, ZSTD_SUFFIX);
+ if (sbuf_error(&sb) != 0) {
+ log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
+ "long\n", (long)pid, comm, (u_long)uid);
+ sbuf_delete(&sb);
+ free(name, M_TEMP);
+ return (ENOMEM);
+ }
+ sbuf_finish(&sb);
+ sbuf_delete(&sb);
+
+ if (indexpos != -1) {
+ error = corefile_open_last(td, name, indexpos, indexlen, ncores,
+ vpp);
+ if (error != 0) {
+ log(LOG_ERR,
+ "pid %d (%s), uid (%u): Path `%s' failed "
+ "on initial open test, error = %d\n",
+ pid, comm, uid, name, error);
+ }
+ } else {
+ cmode = S_IRUSR | S_IWUSR;
+ oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
+ (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
+ flags = O_CREAT | FWRITE | O_NOFOLLOW;
+ if ((td->td_proc->p_flag & P_SUGID) != 0)
+ flags |= O_EXCL;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
+ error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
+ NULL);
+ if (error == 0) {
+ *vpp = nd.ni_vp;
+ NDFREE_PNBUF(&nd);
+ }
+ }
+
+ if (error != 0) {
+#ifdef AUDIT
+ audit_proc_coredump(td, name, error);
+#endif
+ free(name, M_TEMP);
+ return (error);
+ }
+ *namep = name;
+ return (0);
+}
+
+/*
+ * The vnode dumper is the traditional coredump handler. Our policy and limits
+ * are generally checked already, so it creates the coredump name and passes on
+ * a vnode and a size limit to the process-specific coredump routine if there is
+ * one. If there _is not_ one, it returns ENOSYS; otherwise it returns the
+ * error from the process-specific routine.
+ */
+static int
+coredump_vnode(struct thread *td, off_t limit)
+{
+ struct proc *p = td->td_proc;
+ struct ucred *cred = td->td_ucred;
+ struct vnode *vp;
+ struct coredump_vnode_ctx wctx;
+ struct coredump_writer cdw = { };
+ struct flock lf;
+ struct vattr vattr;
+ size_t fullpathsize;
+ int error, error1, jid, locked, ppid, sig;
+ char *name; /* name of corefile */
+ void *rl_cookie;
+ char *fullpath, *freepath = NULL;
+ struct sbuf *sb;
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+
+ ppid = p->p_oppid;
+ sig = p->p_sig;
+ jid = p->p_ucred->cr_prison->pr_id;
+ PROC_UNLOCK(p);
+
+ error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td,
+ compress_user_cores, sig, &vp, &name);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Don't dump to non-regular files or files with links.
+ * Do not dump into system files. Effective user must own the corefile.
+ */
+ if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
+ vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 ||
+ vattr.va_uid != cred->cr_uid) {
+ VOP_UNLOCK(vp);
+ error = EFAULT;
+ goto out;
+ }
+
+ VOP_UNLOCK(vp);
+
+ /* Postpone other writers, including core dumps of other processes. */
+ rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
+
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ lf.l_type = F_WRLCK;
+ locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
+
+ VATTR_NULL(&vattr);
+ vattr.va_size = 0;
+ if (set_core_nodump_flag)
+ vattr.va_flags = UF_NODUMP;
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ VOP_SETATTR(vp, &vattr, cred);
+ VOP_UNLOCK(vp);
+ PROC_LOCK(p);
+ p->p_acflag |= ACORE;
+ PROC_UNLOCK(p);
+
+ wctx.vp = vp;
+ wctx.fcred = NOCRED;
+
+ cdw.ctx = &wctx;
+ cdw.write_fn = core_vn_write;
+ cdw.extend_fn = core_vn_extend;
+
+ if (p->p_sysent->sv_coredump != NULL) {
+ error = p->p_sysent->sv_coredump(td, &cdw, limit, 0);
+ } else {
+ error = ENOSYS;
+ }
+
+ if (locked) {
+ lf.l_type = F_UNLCK;
+ VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
+ }
+ vn_rangelock_unlock(vp, rl_cookie);
+
+ /*
+ * Notify the userland helper that a process triggered a core dump.
+ * This allows the helper to run an automated debugging session.
+ */
+ if (error != 0 || coredump_devctl == 0)
+ goto out;
+ sb = sbuf_new_auto();
+ if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0)
+ goto out2;
+ sbuf_cat(sb, "comm=\"");
+ devctl_safe_quote_sb(sb, fullpath);
+ free(freepath, M_TEMP);
+ sbuf_cat(sb, "\" core=\"");
+
+ /*
+ * We can't lookup core file vp directly. When we're replacing a core, and
+ * other random times, we flush the name cache, so it will fail. Instead,
+ * if the path of the core is relative, add the current dir in front if it.
+ */
+ if (name[0] != '/') {
+ fullpathsize = MAXPATHLEN;
+ freepath = malloc(fullpathsize, M_TEMP, M_WAITOK);
+ if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) {
+ free(freepath, M_TEMP);
+ goto out2;
+ }
+ devctl_safe_quote_sb(sb, fullpath);
+ free(freepath, M_TEMP);
+ sbuf_putc(sb, '/');
+ }
+ devctl_safe_quote_sb(sb, name);
+ sbuf_putc(sb, '"');
+
+ sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d",
+ jid, p->p_pid, ppid, sig);
+ if (sbuf_finish(sb) == 0)
+ devctl_notify("kernel", "signal", "coredump", sbuf_data(sb));
+out2:
+ sbuf_delete(sb);
+out:
+ error1 = vn_close(vp, FWRITE, cred, td);
+ if (error == 0)
+ error = error1;
+#ifdef AUDIT
+ audit_proc_coredump(td, name, error);
+#endif
+ free(name, M_TEMP);
+ return (error);
+}
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index b7ffbe68b483..2690ad3b2679 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -64,6 +64,7 @@
#include <sys/syscall.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
+#include <sys/ucoredump.h>
#include <sys/vnode.h>
#include <sys/syslog.h>
#include <sys/eventhandler.h>
@@ -1562,9 +1563,6 @@ struct note_info {
TAILQ_HEAD(note_info_list, note_info);
-extern int compress_user_cores;
-extern int compress_user_cores_level;
-
static void cb_put_phdr(vm_map_entry_t, void *);
static void cb_size_segment(vm_map_entry_t, void *);
static void each_dumpable_segment(struct thread *, segment_callback, void *,
@@ -1595,7 +1593,7 @@ core_compressed_write(void *base, size_t len, off_t offset, void *arg)
}
int
-__elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
+__elfN(coredump)(struct thread *td, struct coredump_writer *cdw, off_t limit, int flags)
{
struct ucred *cred = td->td_ucred;
int compm, error = 0;
@@ -1625,9 +1623,8 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
/* Set up core dump parameters. */
params.offset = 0;
params.active_cred = cred;
- params.file_cred = NOCRED;
params.td = td;
- params.vp = vp;
+ params.cdw = cdw;
params.comp = NULL;
#ifdef RACCT
@@ -1662,6 +1659,12 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags)
tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO);
}
+ if (cdw->init_fn != NULL) {
+ error = (*cdw->init_fn)(cdw, &params);
+ if (error != 0)
+ goto done;
+ }
+
/*
* Allocate memory for building the header, fill it up,
* and write it out following the notes.
diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c
index 5d9e2f2f326b..d7eb82d5f259 100644
--- a/sys/kern/kern_cpuset.c
+++ b/sys/kern/kern_cpuset.c
@@ -530,7 +530,7 @@ _domainset_create(struct domainset *domain, struct domainlist *freelist)
* remove them and update the domainset accordingly. If only empty
* domains are present, we must return failure.
*/
-static bool
+bool
domainset_empty_vm(struct domainset *domain)
{
domainset_t empty;
@@ -2409,82 +2409,92 @@ sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap)
}
int
-kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
- id_t id, size_t domainsetsize, const domainset_t *maskp, int policy,
- const struct cpuset_copy_cb *cb)
+domainset_populate(struct domainset *domain, const domainset_t *mask, int policy,
+ size_t mask_size)
{
- struct cpuset *nset;
- struct cpuset *set;
- struct thread *ttd;
- struct proc *p;
- struct domainset domain;
- domainset_t *mask;
- int error;
- if (domainsetsize < sizeof(domainset_t) ||
- domainsetsize > DOMAINSET_MAXSIZE / NBBY)
- return (ERANGE);
if (policy <= DOMAINSET_POLICY_INVALID ||
- policy > DOMAINSET_POLICY_MAX)
+ policy > DOMAINSET_POLICY_MAX) {
return (EINVAL);
- error = cpuset_check_capabilities(td, level, which, id);
- if (error != 0)
- return (error);
- memset(&domain, 0, sizeof(domain));
- mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
- error = cb->cpuset_copyin(maskp, mask, domainsetsize);
- if (error)
- goto out;
+ }
+
/*
* Verify that no high bits are set.
*/
- if (domainsetsize > sizeof(domainset_t)) {
- char *end;
- char *cp;
+ if (mask_size > sizeof(domainset_t)) {
+ const char *end;
+ const char *cp;
- end = cp = (char *)&mask->__bits;
- end += domainsetsize;
+ end = cp = (const char *)&mask->__bits;
+ end += mask_size;
cp += sizeof(domainset_t);
- while (cp != end)
+ while (cp != end) {
if (*cp++ != 0) {
- error = EINVAL;
- goto out;
+ return (EINVAL);
}
+ }
}
if (DOMAINSET_EMPTY(mask)) {
- error = EDEADLK;
- goto out;
+ return (EDEADLK);
}
- DOMAINSET_COPY(mask, &domain.ds_mask);
- domain.ds_policy = policy;
+ DOMAINSET_COPY(mask, &domain->ds_mask);
+ domain->ds_policy = policy;
/*
* Sanitize the provided mask.
*/
- if (!DOMAINSET_SUBSET(&all_domains, &domain.ds_mask)) {
- error = EINVAL;
- goto out;
+ if (!DOMAINSET_SUBSET(&all_domains, &domain->ds_mask)) {
+ return (EINVAL);
}
/* Translate preferred policy into a mask and fallback. */
if (policy == DOMAINSET_POLICY_PREFER) {
/* Only support a single preferred domain. */
- if (DOMAINSET_COUNT(&domain.ds_mask) != 1) {
- error = EINVAL;
- goto out;
+ if (DOMAINSET_COUNT(&domain->ds_mask) != 1) {
+ return (EINVAL);
}
- domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1;
+ domain->ds_prefer = DOMAINSET_FFS(&domain->ds_mask) - 1;
/* This will be constrained by domainset_shadow(). */
- DOMAINSET_COPY(&all_domains, &domain.ds_mask);
+ DOMAINSET_COPY(&all_domains, &domain->ds_mask);
}
+ return (0);
+}
+
+int
+kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
+ id_t id, size_t domainsetsize, const domainset_t *maskp, int policy,
+ const struct cpuset_copy_cb *cb)
+{
+ struct cpuset *nset;
+ struct cpuset *set;
+ struct thread *ttd;
+ struct proc *p;
+ struct domainset domain;
+ domainset_t *mask;
+ int error;
+
+ error = cpuset_check_capabilities(td, level, which, id);
+ if (error != 0)
+ return (error);
+ if (domainsetsize < sizeof(domainset_t) ||
+ domainsetsize > DOMAINSET_MAXSIZE / NBBY)
+ return (ERANGE);
+ memset(&domain, 0, sizeof(domain));
+ mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO);
+ error = cb->cpuset_copyin(maskp, mask, domainsetsize);
+ if (error)
+ goto out;
+ error = domainset_populate(&domain, mask, policy, domainsetsize);
+ if (error)
+ goto out;
+
/*
* When given an impossible policy, fall back to interleaving
* across all domains.
*/
if (domainset_empty_vm(&domain))
domainset_copy(domainset2, &domain);
-
switch (level) {
case CPU_LEVEL_ROOT:
case CPU_LEVEL_CPUSET:
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 03268365891e..0fc2d0e7f1bc 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -70,6 +70,7 @@
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/timers.h>
+#include <sys/ucoredump.h>
#include <sys/umtxvar.h>
#include <sys/vnode.h>
#include <sys/wait.h>
@@ -2002,10 +2003,14 @@ int
core_write(struct coredump_params *cp, const void *base, size_t len,
off_t offset, enum uio_seg seg, size_t *resid)
{
+ return ((*cp->cdw->write_fn)(cp->cdw, base, len, offset, seg,
+ cp->active_cred, resid, cp->td));
+}
- return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base),
- len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED,
- cp->active_cred, cp->file_cred, resid, cp->td));
+static int
+core_extend(struct coredump_params *cp, off_t newsz)
+{
+ return ((*cp->cdw->extend_fn)(cp->cdw, newsz, cp->active_cred));
}
int
@@ -2013,7 +2018,6 @@ core_output(char *base, size_t len, off_t offset, struct coredump_params *cp,
void *tmpbuf)
{
vm_map_t map;
- struct mount *mp;
size_t resid, runlen;
int error;
bool success;
@@ -2068,14 +2072,7 @@ core_output(char *base, size_t len, off_t offset, struct coredump_params *cp,
}
}
if (!success) {
- error = vn_start_write(cp->vp, &mp, V_WAIT);
- if (error != 0)
- break;
- vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY);
- error = vn_truncate_locked(cp->vp, offset + runlen,
- false, cp->td->td_ucred);
- VOP_UNLOCK(cp->vp);
- vn_finished_write(mp);
+ error = core_extend(cp, offset + runlen);
if (error != 0)
break;
}
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index d4529e096929..7ef1d19f0ea8 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -3466,7 +3466,7 @@ prison_check_af(struct ucred *cred, int af)
pr = cred->cr_prison;
#ifdef VIMAGE
/* Prisons with their own network stack are not limited. */
- if (prison_owns_vnet(cred))
+ if (prison_owns_vnet(pr))
return (0);
#endif
@@ -3531,7 +3531,7 @@ prison_if(struct ucred *cred, const struct sockaddr *sa)
KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
#ifdef VIMAGE
- if (prison_owns_vnet(cred))
+ if (prison_owns_vnet(cred->cr_prison))
return (0);
#endif
@@ -3648,7 +3648,7 @@ jailed_without_vnet(struct ucred *cred)
if (!jailed(cred))
return (false);
#ifdef VIMAGE
- if (prison_owns_vnet(cred))
+ if (prison_owns_vnet(cred->cr_prison))
return (false);
#endif
@@ -3711,20 +3711,17 @@ getjailname(struct ucred *cred, char *name, size_t len)
#ifdef VIMAGE
/*
- * Determine whether the prison represented by cred owns
- * its vnet rather than having it inherited.
- *
- * Returns true in case the prison owns the vnet, false otherwise.
+ * Determine whether the prison owns its VNET.
*/
bool
-prison_owns_vnet(struct ucred *cred)
+prison_owns_vnet(struct prison *pr)
{
/*
* vnets cannot be added/removed after jail creation,
* so no need to lock here.
*/
- return ((cred->cr_prison->pr_flags & PR_VNET) != 0);
+ return ((pr->pr_flags & PR_VNET) != 0);
}
#endif
@@ -4425,7 +4422,7 @@ sysctl_jail_vnet(SYSCTL_HANDLER_ARGS)
#ifdef VIMAGE
struct ucred *cred = req->td->td_ucred;
- havevnet = jailed(cred) && prison_owns_vnet(cred);
+ havevnet = jailed(cred) && prison_owns_vnet(cred->cr_prison);
#else
havevnet = 0;
#endif
diff --git a/sys/kern/kern_prot.c b/sys/kern/kern_prot.c
index 0f0bc056cafd..6bdef84a34c1 100644
--- a/sys/kern/kern_prot.c
+++ b/sys/kern/kern_prot.c
@@ -99,12 +99,11 @@ static inline void
groups_check_positive_len(int ngrp)
{
MPASS2(ngrp >= 0, "negative number of groups");
- MPASS2(ngrp != 0, "at least one group expected (effective GID)");
}
static inline void
groups_check_max_len(int ngrp)
{
- MPASS2(ngrp <= ngroups_max + 1, "too many groups");
+ MPASS2(ngrp <= ngroups_max, "too many supplementary groups");
}
static void groups_normalize(int *ngrp, gid_t *groups);
@@ -321,10 +320,17 @@ int
sys_getgroups(struct thread *td, struct getgroups_args *uap)
{
struct ucred *cred;
+ gid_t *ugidset;
int ngrp, error;
cred = td->td_ucred;
- ngrp = cred->cr_ngroups;
+
+ /*
+ * cr_gid has been moved out of cr_groups, but we'll continue exporting
+ * the egid as groups[0] for the time being until we audit userland for
+ * any surprises.
+ */
+ ngrp = cred->cr_ngroups + 1;
if (uap->gidsetsize == 0) {
error = 0;
@@ -333,7 +339,14 @@ sys_getgroups(struct thread *td, struct getgroups_args *uap)
if (uap->gidsetsize < ngrp)
return (EINVAL);
- error = copyout(cred->cr_groups, uap->gidset, ngrp * sizeof(gid_t));
+ ugidset = uap->gidset;
+ error = copyout(&cred->cr_gid, ugidset, sizeof(*ugidset));
+ if (error != 0)
+ goto out;
+
+ if (ngrp > 1)
+ error = copyout(cred->cr_groups, ugidset + 1,
+ (ngrp - 1) * sizeof(*ugidset));
out:
td->td_retval[0] = ngrp;
return (error);
@@ -499,8 +512,8 @@ gidp_cmp(const void *p1, const void *p2)
}
/*
- * Final storage for groups (including the effective GID) will be returned via
- * 'groups'. '*groups' must be NULL on input, and if not equal to 'smallgroups'
+ * Final storage for supplementary groups will be returned via 'groups'.
+ * '*groups' must be NULL on input, and if not equal to 'smallgroups'
* on output, must be freed (M_TEMP) *even if* an error is returned.
*/
static int
@@ -525,15 +538,15 @@ kern_setcred_copyin_supp_groups(struct setcred *const wcred,
* now, to avoid having to allocate and copy again the
* supplementary groups.
*/
- *groups = wcred->sc_supp_groups_nb < CRED_SMALLGROUPS_NB ?
- smallgroups : malloc((wcred->sc_supp_groups_nb + 1) *
+ *groups = wcred->sc_supp_groups_nb <= CRED_SMALLGROUPS_NB ?
+ smallgroups : malloc(wcred->sc_supp_groups_nb *
sizeof(*groups), M_TEMP, M_WAITOK);
- error = copyin(wcred->sc_supp_groups, *groups + 1,
+ error = copyin(wcred->sc_supp_groups, *groups,
wcred->sc_supp_groups_nb * sizeof(*groups));
if (error != 0)
return (error);
- wcred->sc_supp_groups = *groups + 1;
+ wcred->sc_supp_groups = *groups;
} else {
wcred->sc_supp_groups_nb = 0;
wcred->sc_supp_groups = NULL;
@@ -652,9 +665,8 @@ sys_setcred(struct thread *td, struct setcred_args *uap)
* CAUTION: This function normalizes groups in 'wcred'.
*
* If 'preallocated_groups' is non-NULL, it must be an already allocated array
- * of size 'wcred->sc_supp_groups_nb + 1', with the supplementary groups
- * starting at index 1, and 'wcred->sc_supp_groups' then must point to the first
- * supplementary group.
+ * of size 'wcred->sc_supp_groups_nb' containing the supplementary groups, and
+ * 'wcred->sc_supp_groups' then must point to it.
*/
int
kern_setcred(struct thread *const td, const u_int flags,
@@ -685,13 +697,14 @@ kern_setcred(struct thread *const td, const u_int flags,
return (EINVAL);
if (preallocated_groups != NULL) {
groups = preallocated_groups;
- MPASS(preallocated_groups + 1 == wcred->sc_supp_groups);
+ MPASS(preallocated_groups == wcred->sc_supp_groups);
} else {
- groups = wcred->sc_supp_groups_nb < CRED_SMALLGROUPS_NB ?
- smallgroups :
- malloc((wcred->sc_supp_groups_nb + 1) *
- sizeof(*groups), M_TEMP, M_WAITOK);
- memcpy(groups + 1, wcred->sc_supp_groups,
+ if (wcred->sc_supp_groups_nb <= CRED_SMALLGROUPS_NB)
+ groups = smallgroups;
+ else
+ groups = malloc(wcred->sc_supp_groups_nb *
+ sizeof(*groups), M_TEMP, M_WAITOK);
+ memcpy(groups, wcred->sc_supp_groups,
wcred->sc_supp_groups_nb * sizeof(*groups));
}
}
@@ -726,16 +739,12 @@ kern_setcred(struct thread *const td, const u_int flags,
if (flags & SETCREDF_SVGID)
AUDIT_ARG_SGID(wcred->sc_svgid);
if (flags & SETCREDF_SUPP_GROUPS) {
- int ngrp = wcred->sc_supp_groups_nb;
-
/*
* Output the raw supplementary groups array for better
* traceability.
*/
- AUDIT_ARG_GROUPSET(groups + 1, ngrp);
- ++ngrp;
- groups_normalize(&ngrp, groups);
- wcred->sc_supp_groups_nb = ngrp - 1;
+ AUDIT_ARG_GROUPSET(groups, wcred->sc_supp_groups_nb);
+ groups_normalize(&wcred->sc_supp_groups_nb, groups);
}
/*
@@ -746,7 +755,7 @@ kern_setcred(struct thread *const td, const u_int flags,
new_cred = crget();
to_free_cred = new_cred;
if (flags & SETCREDF_SUPP_GROUPS)
- crextend(new_cred, wcred->sc_supp_groups_nb + 1);
+ crextend(new_cred, wcred->sc_supp_groups_nb);
#ifdef MAC
mac_cred_setcred_enter();
@@ -773,16 +782,11 @@ kern_setcred(struct thread *const td, const u_int flags,
/*
* Change groups.
- *
- * crsetgroups_internal() changes both the effective and supplementary
- * ones.
*/
- if (flags & SETCREDF_SUPP_GROUPS) {
- groups[0] = flags & SETCREDF_GID ? wcred->sc_gid :
- new_cred->cr_gid;
- crsetgroups_internal(new_cred, wcred->sc_supp_groups_nb + 1,
+ if (flags & SETCREDF_SUPP_GROUPS)
+ crsetgroups_internal(new_cred, wcred->sc_supp_groups_nb,
groups);
- } else if (flags & SETCREDF_GID)
+ if (flags & SETCREDF_GID)
change_egid(new_cred, wcred->sc_gid);
if (flags & SETCREDF_RGID)
change_rgid(new_cred, wcred->sc_rgid);
@@ -1206,6 +1210,7 @@ sys_setgroups(struct thread *td, struct setgroups_args *uap)
* setgroups() differ.
*/
gidsetsize = uap->gidsetsize;
+ /* XXXKE Limit to ngroups_max when we change the userland interface. */
if (gidsetsize > ngroups_max + 1 || gidsetsize < 0)
return (EINVAL);
@@ -1233,29 +1238,49 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
struct proc *p = td->td_proc;
struct ucred *newcred, *oldcred;
int ngrp, error;
+ gid_t egid;
ngrp = *ngrpp;
/* Sanity check size. */
+ /* XXXKE Limit to ngroups_max when we change the userland interface. */
if (ngrp < 0 || ngrp > ngroups_max + 1)
return (EINVAL);
AUDIT_ARG_GROUPSET(groups, ngrp);
+ /*
+ * setgroups(0, NULL) is a legitimate way of clearing the groups vector
+ * on non-BSD systems (which generally do not have the egid in the
+ * groups[0]). We risk security holes when running non-BSD software if
+ * we do not do the same. So we allow and treat 0 for 'ngrp' specially
+ * below (twice).
+ */
if (ngrp != 0) {
- /* We allow and treat 0 specially below. */
- groups_normalize(ngrpp, groups);
- ngrp = *ngrpp;
+ /*
+ * To maintain userland compat for now, we use the first group
+ * as our egid and we'll use the rest as our supplemental
+ * groups.
+ */
+ egid = groups[0];
+ ngrp--;
+ groups++;
+
+ groups_normalize(&ngrp, groups);
+ *ngrpp = ngrp;
}
newcred = crget();
- if (ngrp != 0)
- crextend(newcred, ngrp);
+ crextend(newcred, ngrp);
PROC_LOCK(p);
oldcred = crcopysafe(p, newcred);
#ifdef MAC
- error = ngrp == 0 ?
- /* If 'ngrp' is 0, we'll keep just the current effective GID. */
- mac_cred_check_setgroups(oldcred, 1, oldcred->cr_groups) :
- mac_cred_check_setgroups(oldcred, ngrp, groups);
+ /*
+ * We pass NULL here explicitly if we don't have any supplementary
+ * groups mostly for the sake of normalization, but also to avoid/detect
+ * a situation where a MAC module has some assumption about the layout
+ * of `groups` matching historical behavior.
+ */
+ error = mac_cred_check_setgroups(oldcred, ngrp,
+ ngrp == 0 ? NULL : groups);
if (error)
goto fail;
#endif
@@ -1264,16 +1289,14 @@ kern_setgroups(struct thread *td, int *ngrpp, gid_t *groups)
if (error)
goto fail;
- if (ngrp == 0) {
- /*
- * setgroups(0, NULL) is a legitimate way of clearing the
- * groups vector on non-BSD systems (which generally do not
- * have the egid in the groups[0]). We risk security holes
- * when running non-BSD software if we do not do the same.
- */
- newcred->cr_ngroups = 1;
- } else
- crsetgroups_internal(newcred, ngrp, groups);
+ /*
+ * If some groups were passed, the first one is currently the desired
+ * egid. This code is to be removed (along with some commented block
+ * above) when setgroups() is changed to take only supplementary groups.
+ */
+ if (ngrp != 0)
+ newcred->cr_gid = egid;
+ crsetgroups_internal(newcred, ngrp, groups);
setsugid(p);
proc_set_cred(p, newcred);
@@ -1693,11 +1716,11 @@ groups_check_normalized(int ngrp, const gid_t *groups)
groups_check_positive_len(ngrp);
groups_check_max_len(ngrp);
- if (ngrp == 1)
+ if (ngrp <= 1)
return;
- prev_g = groups[1];
- for (int i = 2; i < ngrp; ++i) {
+ prev_g = groups[0];
+ for (int i = 1; i < ngrp; ++i) {
const gid_t g = groups[i];
if (prev_g >= g)
@@ -1723,7 +1746,7 @@ group_is_supplementary(const gid_t gid, const struct ucred *const cred)
* Perform a binary search of the supplementary groups. This is
* possible because we sort the groups in crsetgroups().
*/
- return (bsearch(&gid, cred->cr_groups + 1, cred->cr_ngroups - 1,
+ return (bsearch(&gid, cred->cr_groups, cred->cr_ngroups,
sizeof(gid), gidp_cmp) != NULL);
}
@@ -2588,11 +2611,6 @@ void
crcopy(struct ucred *dest, struct ucred *src)
{
- /*
- * Ideally, 'cr_ngroups' should be moved out of 'struct ucred''s bcopied
- * area, but this would break the ABI, so is deferred until there is
- * a compelling need to change it.
- */
bcopy(&src->cr_startcopy, &dest->cr_startcopy,
(unsigned)((caddr_t)&src->cr_endcopy -
(caddr_t)&src->cr_startcopy));
@@ -2634,11 +2652,17 @@ cru2x(struct ucred *cr, struct xucred *xcr)
bzero(xcr, sizeof(*xcr));
xcr->cr_version = XUCRED_VERSION;
xcr->cr_uid = cr->cr_uid;
+ xcr->cr_gid = cr->cr_gid;
- ngroups = MIN(cr->cr_ngroups, XU_NGROUPS);
+ /*
+ * We use a union to alias cr_gid to cr_groups[0] in the xucred, so
+ * this is kind of ugly; cr_ngroups still includes the egid for our
+ * purposes to avoid bumping the xucred version.
+ */
+ ngroups = MIN(cr->cr_ngroups + 1, nitems(xcr->cr_groups));
xcr->cr_ngroups = ngroups;
- bcopy(cr->cr_groups, xcr->cr_groups,
- ngroups * sizeof(*cr->cr_groups));
+ bcopy(cr->cr_groups, xcr->cr_sgroups,
+ (ngroups - 1) * sizeof(*cr->cr_groups));
}
void
@@ -2772,7 +2796,8 @@ crextend(struct ucred *cr, int n)
size_t nbytes;
MPASS2(cr->cr_ref == 1, "'cr_ref' must be 1 (referenced, unshared)");
- MPASS2(cr->cr_ngroups == 0, "groups on 'cr' already set!");
+ MPASS2((cr->cr_flags & CRED_FLAG_GROUPSET) == 0,
+ "groups on 'cr' already set!");
groups_check_positive_len(n);
groups_check_max_len(n);
@@ -2809,12 +2834,8 @@ crextend(struct ucred *cr, int n)
/*
* Normalizes a set of groups to be applied to a 'struct ucred'.
*
- * The set of groups is an array that must comprise the effective GID as its
- * first element (so its length cannot be 0).
- *
- * Normalization ensures that elements after the first, which stand for the
- * supplementary groups, are sorted in ascending order and do not contain
- * duplicates.
+ * Normalization ensures that the supplementary groups are sorted in ascending
+ * order and do not contain duplicates.
*/
static void
groups_normalize(int *ngrp, gid_t *groups)
@@ -2825,15 +2846,15 @@ groups_normalize(int *ngrp, gid_t *groups)
groups_check_positive_len(*ngrp);
groups_check_max_len(*ngrp);
- if (*ngrp == 1)
+ if (*ngrp <= 1)
return;
- qsort(groups + 1, *ngrp - 1, sizeof(*groups), gidp_cmp);
+ qsort(groups, *ngrp, sizeof(*groups), gidp_cmp);
/* Remove duplicates. */
- prev_g = groups[1];
- ins_idx = 2;
- for (int i = 2; i < *ngrp; ++i) {
+ prev_g = groups[0];
+ ins_idx = 1;
+ for (int i = ins_idx; i < *ngrp; ++i) {
const gid_t g = groups[i];
if (g != prev_g) {
@@ -2870,13 +2891,14 @@ crsetgroups_internal(struct ucred *cr, int ngrp, const gid_t *groups)
bcopy(groups, cr->cr_groups, ngrp * sizeof(gid_t));
cr->cr_ngroups = ngrp;
+ cr->cr_flags |= CRED_FLAG_GROUPSET;
}
/*
* Copy groups in to a credential after expanding it if required.
*
* May sleep in order to allocate memory (except if, e.g., crextend() was called
- * before with 'ngrp' or greater). Truncates the list to (ngroups_max + 1) if
+ * before with 'ngrp' or greater). Truncates the list to ngroups_max if
* it is too large. Array 'groups' doesn't need to be sorted. 'ngrp' must be
* strictly positive.
*/
@@ -2884,36 +2906,48 @@ void
crsetgroups(struct ucred *cr, int ngrp, const gid_t *groups)
{
- if (ngrp > ngroups_max + 1)
- ngrp = ngroups_max + 1;
+ if (ngrp > ngroups_max)
+ ngrp = ngroups_max;
+ cr->cr_ngroups = 0;
+ if (ngrp == 0) {
+ cr->cr_flags |= CRED_FLAG_GROUPSET;
+ return;
+ }
+
/*
* crextend() asserts that groups are not set, as it may allocate a new
* backing storage without copying the content of the old one. Since we
* are going to install a completely new set anyway, signal that we
* consider the old ones thrown away.
*/
- cr->cr_ngroups = 0;
+ cr->cr_flags &= ~CRED_FLAG_GROUPSET;
+
crextend(cr, ngrp);
crsetgroups_internal(cr, ngrp, groups);
groups_normalize(&cr->cr_ngroups, cr->cr_groups);
}
/*
- * Same as crsetgroups() but accepts an empty groups array.
+ * Same as crsetgroups() but sets the effective GID as well.
*
* This function ensures that an effective GID is always present in credentials.
- * An empty array is treated as a one-size one holding the passed effective GID
- * fallback.
+ * An empty array will only set the effective GID to the default_egid, while a
+ * non-empty array will peel off groups[0] to set as the effective GID and use
+ * the remainder, if any, as supplementary groups.
*/
void
-crsetgroups_fallback(struct ucred *cr, int ngrp, const gid_t *groups,
- const gid_t fallback)
+crsetgroups_and_egid(struct ucred *cr, int ngrp, const gid_t *groups,
+ const gid_t default_egid)
{
- if (ngrp == 0)
- /* Shortcut. */
- crsetgroups_internal(cr, 1, &fallback);
- else
- crsetgroups(cr, ngrp, groups);
+ if (ngrp == 0) {
+ cr->cr_gid = default_egid;
+ cr->cr_ngroups = 0;
+ cr->cr_flags |= CRED_FLAG_GROUPSET;
+ return;
+ }
+
+ crsetgroups(cr, ngrp - 1, groups + 1);
+ cr->cr_gid = groups[0];
}
/*
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 5d51aa675cb7..da0efac0598d 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -45,10 +45,10 @@
#include <sys/vnode.h>
#include <sys/acct.h>
#include <sys/capsicum.h>
-#include <sys/compressor.h>
#include <sys/condvar.h>
#include <sys/devctl.h>
#include <sys/event.h>
+#include <sys/exec.h>
#include <sys/fcntl.h>
#include <sys/imgact.h>
#include <sys/jail.h>
@@ -80,6 +80,7 @@
#include <sys/syslog.h>
#include <sys/sysproto.h>
#include <sys/timers.h>
+#include <sys/ucoredump.h>
#include <sys/unistd.h>
#include <sys/vmmeter.h>
#include <sys/wait.h>
@@ -101,7 +102,6 @@ SDT_PROBE_DEFINE2(proc, , , signal__clear,
SDT_PROBE_DEFINE3(proc, , , signal__discard,
"struct thread *", "struct proc *", "int");
-static int coredump(struct thread *);
static int killpg1(struct thread *td, int sig, int pgid, int all,
ksiginfo_t *ksi);
static int issignal(struct thread *td);
@@ -126,11 +126,6 @@ const struct filterops sig_filtops = {
.f_event = filt_signal,
};
-static int kern_logsigexit = 1;
-SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW,
- &kern_logsigexit, 0,
- "Log processes quitting on abnormal signals to syslog(3)");
-
static int kern_forcesigexit = 1;
SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW,
&kern_forcesigexit, 0, "Force trap signal to be handled");
@@ -193,26 +188,6 @@ SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL);
(cr1)->cr_ruid == (cr2)->cr_uid || \
(cr1)->cr_uid == (cr2)->cr_uid)
-static int sugid_coredump;
-SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN,
- &sugid_coredump, 0, "Allow setuid and setgid processes to dump core");
-
-static int capmode_coredump;
-SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN,
- &capmode_coredump, 0, "Allow processes in capability mode to dump core");
-
-static int do_coredump = 1;
-SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW,
- &do_coredump, 0, "Enable/Disable coredumps");
-
-static int set_core_nodump_flag = 0;
-SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag,
- 0, "Enable setting the NODUMP flag on coredump files");
-
-static int coredump_devctl = 0;
-SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl,
- 0, "Generate a devctl notification when processes coredump");
-
/*
* Signal properties and actions.
* The array below categorizes the signals and their default actions
@@ -784,6 +759,13 @@ sigprop(int sig)
return (0);
}
+bool
+sig_do_core(int sig)
+{
+
+ return ((sigprop(sig) & SIGPROP_CORE) != 0);
+}
+
static bool
sigact_flag_test(const struct sigaction *act, int flag)
{
@@ -2665,6 +2647,8 @@ static void
ptrace_coredumpreq(struct thread *td, struct proc *p,
struct thr_coredump_req *tcq)
{
+ struct coredump_vnode_ctx wctx;
+ struct coredump_writer cdw;
void *rl_cookie;
if (p->p_sysent->sv_coredump == NULL) {
@@ -2672,8 +2656,15 @@ ptrace_coredumpreq(struct thread *td, struct proc *p,
return;
}
+ wctx.vp = tcq->tc_vp;
+ wctx.fcred = NOCRED;
+
+ cdw.ctx = &wctx;
+ cdw.write_fn = core_vn_write;
+ cdw.extend_fn = core_vn_extend;
+
rl_cookie = vn_rangelock_wlock(tcq->tc_vp, 0, OFF_MAX);
- tcq->tc_error = p->p_sysent->sv_coredump(td, tcq->tc_vp,
+ tcq->tc_error = p->p_sysent->sv_coredump(td, &cdw,
tcq->tc_limit, tcq->tc_flags);
vn_rangelock_unlock(tcq->tc_vp, rl_cookie);
}
@@ -3635,82 +3626,6 @@ killproc(struct proc *p, const char *why)
}
/*
- * Force the current process to exit with the specified signal, dumping core
- * if appropriate. We bypass the normal tests for masked and caught signals,
- * allowing unrecoverable failures to terminate the process without changing
- * signal state. Mark the accounting record with the signal termination.
- * If dumping core, save the signal number for the debugger. Calls exit and
- * does not return.
- */
-void
-sigexit(struct thread *td, int sig)
-{
- struct proc *p = td->td_proc;
- const char *coreinfo;
- int rv;
- bool logexit;
-
- PROC_LOCK_ASSERT(p, MA_OWNED);
- proc_set_p2_wexit(p);
-
- p->p_acflag |= AXSIG;
- if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0)
- logexit = kern_logsigexit != 0;
- else
- logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0;
-
- /*
- * We must be single-threading to generate a core dump. This
- * ensures that the registers in the core file are up-to-date.
- * Also, the ELF dump handler assumes that the thread list doesn't
- * change out from under it.
- *
- * XXX If another thread attempts to single-thread before us
- * (e.g. via fork()), we won't get a dump at all.
- */
- if ((sigprop(sig) & SIGPROP_CORE) &&
- thread_single(p, SINGLE_NO_EXIT) == 0) {
- p->p_sig = sig;
- /*
- * Log signals which would cause core dumps
- * (Log as LOG_INFO to appease those who don't want
- * these messages.)
- * XXX : Todo, as well as euid, write out ruid too
- * Note that coredump() drops proc lock.
- */
- rv = coredump(td);
- switch (rv) {
- case 0:
- sig |= WCOREFLAG;
- coreinfo = " (core dumped)";
- break;
- case EFAULT:
- coreinfo = " (no core dump - bad address)";
- break;
- case EINVAL:
- coreinfo = " (no core dump - invalid argument)";
- break;
- case EFBIG:
- coreinfo = " (no core dump - too large)";
- break;
- default:
- coreinfo = " (no core dump - other error)";
- break;
- }
- if (logexit)
- log(LOG_INFO,
- "pid %d (%s), jid %d, uid %d: exited on "
- "signal %d%s\n", p->p_pid, p->p_comm,
- p->p_ucred->cr_prison->pr_id,
- td->td_ucred->cr_uid,
- sig &~ WCOREFLAG, coreinfo);
- } else
- PROC_UNLOCK(p);
- exit1(td, 0, sig);
- /* NOTREACHED */
-}
-
-/*
* Send queued SIGCHLD to parent when child process's state
* is changed.
*/
@@ -3803,477 +3718,6 @@ childproc_exited(struct proc *p)
sigparent(p, reason, status);
}
-#define MAX_NUM_CORE_FILES 100000
-#ifndef NUM_CORE_FILES
-#define NUM_CORE_FILES 5
-#endif
-CTASSERT(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES);
-static int num_cores = NUM_CORE_FILES;
-
-static int
-sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS)
-{
- int error;
- int new_val;
-
- new_val = num_cores;
- error = sysctl_handle_int(oidp, &new_val, 0, req);
- if (error != 0 || req->newptr == NULL)
- return (error);
- if (new_val > MAX_NUM_CORE_FILES)
- new_val = MAX_NUM_CORE_FILES;
- if (new_val < 0)
- new_val = 0;
- num_cores = new_val;
- return (0);
-}
-SYSCTL_PROC(_debug, OID_AUTO, ncores,
- CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int),
- sysctl_debug_num_cores_check, "I",
- "Maximum number of generated process corefiles while using index format");
-
-#define GZIP_SUFFIX ".gz"
-#define ZSTD_SUFFIX ".zst"
-
-int compress_user_cores = 0;
-
-static int
-sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS)
-{
- int error, val;
-
- val = compress_user_cores;
- error = sysctl_handle_int(oidp, &val, 0, req);
- if (error != 0 || req->newptr == NULL)
- return (error);
- if (val != 0 && !compressor_avail(val))
- return (EINVAL);
- compress_user_cores = val;
- return (error);
-}
-SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores,
- CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int),
- sysctl_compress_user_cores, "I",
- "Enable compression of user corefiles ("
- __XSTRING(COMPRESS_GZIP) " = gzip, "
- __XSTRING(COMPRESS_ZSTD) " = zstd)");
-
-int compress_user_cores_level = 6;
-SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN,
- &compress_user_cores_level, 0,
- "Corefile compression level");
-
-/*
- * Protect the access to corefilename[] by allproc_lock.
- */
-#define corefilename_lock allproc_lock
-
-static char corefilename[MAXPATHLEN] = {"%N.core"};
-TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename));
-
-static int
-sysctl_kern_corefile(SYSCTL_HANDLER_ARGS)
-{
- int error;
-
- sx_xlock(&corefilename_lock);
- error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename),
- req);
- sx_xunlock(&corefilename_lock);
-
- return (error);
-}
-SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW |
- CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A",
- "Process corefile name format string");
-
-static void
-vnode_close_locked(struct thread *td, struct vnode *vp)
-{
-
- VOP_UNLOCK(vp);
- vn_close(vp, FWRITE, td->td_ucred, td);
-}
-
-/*
- * If the core format has a %I in it, then we need to check
- * for existing corefiles before defining a name.
- * To do this we iterate over 0..ncores to find a
- * non-existing core file name to use. If all core files are
- * already used we choose the oldest one.
- */
-static int
-corefile_open_last(struct thread *td, char *name, int indexpos,
- int indexlen, int ncores, struct vnode **vpp)
-{
- struct vnode *oldvp, *nextvp, *vp;
- struct vattr vattr;
- struct nameidata nd;
- int error, i, flags, oflags, cmode;
- char ch;
- struct timespec lasttime;
-
- nextvp = oldvp = NULL;
- cmode = S_IRUSR | S_IWUSR;
- oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
- (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
-
- for (i = 0; i < ncores; i++) {
- flags = O_CREAT | FWRITE | O_NOFOLLOW;
-
- ch = name[indexpos + indexlen];
- (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen,
- i);
- name[indexpos + indexlen] = ch;
-
- NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
- error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
- NULL);
- if (error != 0)
- break;
-
- vp = nd.ni_vp;
- NDFREE_PNBUF(&nd);
- if ((flags & O_CREAT) == O_CREAT) {
- nextvp = vp;
- break;
- }
-
- error = VOP_GETATTR(vp, &vattr, td->td_ucred);
- if (error != 0) {
- vnode_close_locked(td, vp);
- break;
- }
-
- if (oldvp == NULL ||
- lasttime.tv_sec > vattr.va_mtime.tv_sec ||
- (lasttime.tv_sec == vattr.va_mtime.tv_sec &&
- lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) {
- if (oldvp != NULL)
- vn_close(oldvp, FWRITE, td->td_ucred, td);
- oldvp = vp;
- VOP_UNLOCK(oldvp);
- lasttime = vattr.va_mtime;
- } else {
- vnode_close_locked(td, vp);
- }
- }
-
- if (oldvp != NULL) {
- if (nextvp == NULL) {
- if ((td->td_proc->p_flag & P_SUGID) != 0) {
- error = EFAULT;
- vn_close(oldvp, FWRITE, td->td_ucred, td);
- } else {
- nextvp = oldvp;
- error = vn_lock(nextvp, LK_EXCLUSIVE);
- if (error != 0) {
- vn_close(nextvp, FWRITE, td->td_ucred,
- td);
- nextvp = NULL;
- }
- }
- } else {
- vn_close(oldvp, FWRITE, td->td_ucred, td);
- }
- }
- if (error != 0) {
- if (nextvp != NULL)
- vnode_close_locked(td, oldvp);
- } else {
- *vpp = nextvp;
- }
-
- return (error);
-}
-
-/*
- * corefile_open(comm, uid, pid, td, compress, vpp, namep)
- * Expand the name described in corefilename, using name, uid, and pid
- * and open/create core file.
- * corefilename is a printf-like string, with three format specifiers:
- * %N name of process ("name")
- * %P process id (pid)
- * %U user id (uid)
- * For example, "%N.core" is the default; they can be disabled completely
- * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P".
- * This is controlled by the sysctl variable kern.corefile (see above).
- */
-static int
-corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td,
- int compress, int signum, struct vnode **vpp, char **namep)
-{
- struct sbuf sb;
- struct nameidata nd;
- const char *format;
- char *hostname, *name;
- int cmode, error, flags, i, indexpos, indexlen, oflags, ncores;
-
- hostname = NULL;
- format = corefilename;
- name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO);
- indexlen = 0;
- indexpos = -1;
- ncores = num_cores;
- (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN);
- sx_slock(&corefilename_lock);
- for (i = 0; format[i] != '\0'; i++) {
- switch (format[i]) {
- case '%': /* Format character */
- i++;
- switch (format[i]) {
- case '%':
- sbuf_putc(&sb, '%');
- break;
- case 'H': /* hostname */
- if (hostname == NULL) {
- hostname = malloc(MAXHOSTNAMELEN,
- M_TEMP, M_WAITOK);
- }
- getcredhostname(td->td_ucred, hostname,
- MAXHOSTNAMELEN);
- sbuf_cat(&sb, hostname);
- break;
- case 'I': /* autoincrementing index */
- if (indexpos != -1) {
- sbuf_printf(&sb, "%%I");
- break;
- }
-
- indexpos = sbuf_len(&sb);
- sbuf_printf(&sb, "%u", ncores - 1);
- indexlen = sbuf_len(&sb) - indexpos;
- break;
- case 'N': /* process name */
- sbuf_printf(&sb, "%s", comm);
- break;
- case 'P': /* process id */
- sbuf_printf(&sb, "%u", pid);
- break;
- case 'S': /* signal number */
- sbuf_printf(&sb, "%i", signum);
- break;
- case 'U': /* user id */
- sbuf_printf(&sb, "%u", uid);
- break;
- default:
- log(LOG_ERR,
- "Unknown format character %c in "
- "corename `%s'\n", format[i], format);
- break;
- }
- break;
- default:
- sbuf_putc(&sb, format[i]);
- break;
- }
- }
- sx_sunlock(&corefilename_lock);
- free(hostname, M_TEMP);
- if (compress == COMPRESS_GZIP)
- sbuf_cat(&sb, GZIP_SUFFIX);
- else if (compress == COMPRESS_ZSTD)
- sbuf_cat(&sb, ZSTD_SUFFIX);
- if (sbuf_error(&sb) != 0) {
- log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too "
- "long\n", (long)pid, comm, (u_long)uid);
- sbuf_delete(&sb);
- free(name, M_TEMP);
- return (ENOMEM);
- }
- sbuf_finish(&sb);
- sbuf_delete(&sb);
-
- if (indexpos != -1) {
- error = corefile_open_last(td, name, indexpos, indexlen, ncores,
- vpp);
- if (error != 0) {
- log(LOG_ERR,
- "pid %d (%s), uid (%u): Path `%s' failed "
- "on initial open test, error = %d\n",
- pid, comm, uid, name, error);
- }
- } else {
- cmode = S_IRUSR | S_IWUSR;
- oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE |
- (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0);
- flags = O_CREAT | FWRITE | O_NOFOLLOW;
- if ((td->td_proc->p_flag & P_SUGID) != 0)
- flags |= O_EXCL;
-
- NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name);
- error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred,
- NULL);
- if (error == 0) {
- *vpp = nd.ni_vp;
- NDFREE_PNBUF(&nd);
- }
- }
-
- if (error != 0) {
-#ifdef AUDIT
- audit_proc_coredump(td, name, error);
-#endif
- free(name, M_TEMP);
- return (error);
- }
- *namep = name;
- return (0);
-}
-
-/*
- * Dump a process' core. The main routine does some
- * policy checking, and creates the name of the coredump;
- * then it passes on a vnode and a size limit to the process-specific
- * coredump routine if there is one; if there _is not_ one, it returns
- * ENOSYS; otherwise it returns the error from the process-specific routine.
- */
-
-static int
-coredump(struct thread *td)
-{
- struct proc *p = td->td_proc;
- struct ucred *cred = td->td_ucred;
- struct vnode *vp;
- struct flock lf;
- struct vattr vattr;
- size_t fullpathsize;
- int error, error1, jid, locked, ppid, sig;
- char *name; /* name of corefile */
- void *rl_cookie;
- off_t limit;
- char *fullpath, *freepath = NULL;
- struct sbuf *sb;
-
- PROC_LOCK_ASSERT(p, MA_OWNED);
- MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
-
- if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) ||
- (p->p_flag2 & P2_NOTRACE) != 0) {
- PROC_UNLOCK(p);
- return (EFAULT);
- }
-
- /*
- * Note that the bulk of limit checking is done after
- * the corefile is created. The exception is if the limit
- * for corefiles is 0, in which case we don't bother
- * creating the corefile at all. This layout means that
- * a corefile is truncated instead of not being created,
- * if it is larger than the limit.
- */
- limit = (off_t)lim_cur(td, RLIMIT_CORE);
- if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) {
- PROC_UNLOCK(p);
- return (EFBIG);
- }
-
- ppid = p->p_oppid;
- sig = p->p_sig;
- jid = p->p_ucred->cr_prison->pr_id;
- PROC_UNLOCK(p);
-
- error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td,
- compress_user_cores, p->p_sig, &vp, &name);
- if (error != 0)
- return (error);
-
- /*
- * Don't dump to non-regular files or files with links.
- * Do not dump into system files. Effective user must own the corefile.
- */
- if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 ||
- vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 ||
- vattr.va_uid != cred->cr_uid) {
- VOP_UNLOCK(vp);
- error = EFAULT;
- goto out;
- }
-
- VOP_UNLOCK(vp);
-
- /* Postpone other writers, including core dumps of other processes. */
- rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
-
- lf.l_whence = SEEK_SET;
- lf.l_start = 0;
- lf.l_len = 0;
- lf.l_type = F_WRLCK;
- locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0);
-
- VATTR_NULL(&vattr);
- vattr.va_size = 0;
- if (set_core_nodump_flag)
- vattr.va_flags = UF_NODUMP;
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- VOP_SETATTR(vp, &vattr, cred);
- VOP_UNLOCK(vp);
- PROC_LOCK(p);
- p->p_acflag |= ACORE;
- PROC_UNLOCK(p);
-
- if (p->p_sysent->sv_coredump != NULL) {
- error = p->p_sysent->sv_coredump(td, vp, limit, 0);
- } else {
- error = ENOSYS;
- }
-
- if (locked) {
- lf.l_type = F_UNLCK;
- VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK);
- }
- vn_rangelock_unlock(vp, rl_cookie);
-
- /*
- * Notify the userland helper that a process triggered a core dump.
- * This allows the helper to run an automated debugging session.
- */
- if (error != 0 || coredump_devctl == 0)
- goto out;
- sb = sbuf_new_auto();
- if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0)
- goto out2;
- sbuf_cat(sb, "comm=\"");
- devctl_safe_quote_sb(sb, fullpath);
- free(freepath, M_TEMP);
- sbuf_cat(sb, "\" core=\"");
-
- /*
- * We can't lookup core file vp directly. When we're replacing a core, and
- * other random times, we flush the name cache, so it will fail. Instead,
- * if the path of the core is relative, add the current dir in front if it.
- */
- if (name[0] != '/') {
- fullpathsize = MAXPATHLEN;
- freepath = malloc(fullpathsize, M_TEMP, M_WAITOK);
- if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) {
- free(freepath, M_TEMP);
- goto out2;
- }
- devctl_safe_quote_sb(sb, fullpath);
- free(freepath, M_TEMP);
- sbuf_putc(sb, '/');
- }
- devctl_safe_quote_sb(sb, name);
- sbuf_putc(sb, '"');
-
- sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d",
- jid, p->p_pid, ppid, sig);
- if (sbuf_finish(sb) == 0)
- devctl_notify("kernel", "signal", "coredump", sbuf_data(sb));
-out2:
- sbuf_delete(sb);
-out:
- error1 = vn_close(vp, FWRITE, cred, td);
- if (error == 0)
- error = error1;
-#ifdef AUDIT
- audit_proc_coredump(td, name, error);
-#endif
- free(name, M_TEMP);
- return (error);
-}
-
/*
* Nonexistent system call-- signal process (may want to handle it). Flag
* error in case process won't see signal immediately (blocked or ignored).
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 46226cc31980..25da134661e9 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -2368,7 +2368,7 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
priv = PRIV_SYSCTL_WRITEJAIL;
#ifdef VIMAGE
else if ((oid->oid_kind & CTLFLAG_VNET) &&
- prison_owns_vnet(req->td->td_ucred))
+ prison_owns_vnet(req->td->td_ucred->cr_prison))
priv = PRIV_SYSCTL_WRITEJAIL;
#endif
else
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index f853af193016..50b040132396 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -571,7 +571,7 @@ threadinit(void)
/*
* Thread structures are specially aligned so that (at least) the
- * 5 lower bits of a pointer to 'struct thead' must be 0. These bits
+ * 5 lower bits of a pointer to 'struct thread' must be 0. These bits
* are used by synchronization primitives to store flags in pointers to
* such structures.
*/
diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c
new file mode 100644
index 000000000000..d425596b5f24
--- /dev/null
+++ b/sys/kern/kern_ucoredump.c
@@ -0,0 +1,299 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/acct.h>
+#include <sys/compressor.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/rmlock.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/ucoredump.h>
+#include <sys/wait.h>
+
+static int coredump(struct thread *td, const char **);
+
+int compress_user_cores = 0;
+
+static SLIST_HEAD(, coredumper) coredumpers =
+ SLIST_HEAD_INITIALIZER(coredumpers);
+static struct rmlock coredump_rmlock;
+RM_SYSINIT(coredump_lock, &coredump_rmlock, "coredump_lock");
+
+static int kern_logsigexit = 1;
+SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW,
+ &kern_logsigexit, 0,
+ "Log processes quitting on abnormal signals to syslog(3)");
+
+static int sugid_coredump;
+SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN,
+ &sugid_coredump, 0, "Allow setuid and setgid processes to dump core");
+
+static int do_coredump = 1;
+SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW,
+ &do_coredump, 0, "Enable/Disable coredumps");
+
+static int
+sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = compress_user_cores;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (val != 0 && !compressor_avail(val))
+ return (EINVAL);
+ compress_user_cores = val;
+ return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores,
+ CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int),
+ sysctl_compress_user_cores, "I",
+ "Enable compression of user corefiles ("
+ __XSTRING(COMPRESS_GZIP) " = gzip, "
+ __XSTRING(COMPRESS_ZSTD) " = zstd)");
+
+int compress_user_cores_level = 6;
+SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN,
+ &compress_user_cores_level, 0,
+ "Corefile compression level");
+
+void
+coredumper_register(struct coredumper *cd)
+{
+
+ blockcount_init(&cd->cd_refcount);
+ rm_wlock(&coredump_rmlock);
+ SLIST_INSERT_HEAD(&coredumpers, cd, cd_entry);
+ rm_wunlock(&coredump_rmlock);
+}
+
+void
+coredumper_unregister(struct coredumper *cd)
+{
+
+ rm_wlock(&coredump_rmlock);
+ SLIST_REMOVE(&coredumpers, cd, coredumper, cd_entry);
+ rm_wunlock(&coredump_rmlock);
+
+ /*
+ * Wait for any in-process coredumps to finish before returning.
+ */
+ blockcount_wait(&cd->cd_refcount, NULL, "dumpwait", 0);
+}
+
+/*
+ * Force the current process to exit with the specified signal, dumping core
+ * if appropriate. We bypass the normal tests for masked and caught signals,
+ * allowing unrecoverable failures to terminate the process without changing
+ * signal state. Mark the accounting record with the signal termination.
+ * If dumping core, save the signal number for the debugger. Calls exit and
+ * does not return.
+ */
+void
+sigexit(struct thread *td, int sig)
+{
+ struct proc *p = td->td_proc;
+ int rv;
+ bool logexit;
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ proc_set_p2_wexit(p);
+
+ p->p_acflag |= AXSIG;
+ if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0)
+ logexit = kern_logsigexit != 0;
+ else
+ logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0;
+
+ /*
+ * We must be single-threading to generate a core dump. This
+ * ensures that the registers in the core file are up-to-date.
+ * Also, the ELF dump handler assumes that the thread list doesn't
+ * change out from under it.
+ *
+ * XXX If another thread attempts to single-thread before us
+ * (e.g. via fork()), we won't get a dump at all.
+ */
+ if (sig_do_core(sig) && thread_single(p, SINGLE_NO_EXIT) == 0) {
+ const char *err = NULL;
+
+ p->p_sig = sig;
+ /*
+ * Log signals which would cause core dumps
+ * (Log as LOG_INFO to appease those who don't want
+ * these messages.)
+ * XXX : Todo, as well as euid, write out ruid too
+ * Note that coredump() drops proc lock.
+ */
+ rv = coredump(td, &err);
+ if (rv == 0) {
+ MPASS(err == NULL);
+ sig |= WCOREFLAG;
+ } else if (err == NULL) {
+ switch (rv) {
+ case EFAULT:
+ err = "bad address";
+ break;
+ case EINVAL:
+ err = "invalild argument";
+ break;
+ case EFBIG:
+ err = "too large";
+ break;
+ default:
+ err = "other error";
+ break;
+ }
+ }
+ if (logexit)
+ log(LOG_INFO,
+ "pid %d (%s), jid %d, uid %d: exited on "
+ "signal %d (%s%s)\n", p->p_pid, p->p_comm,
+ p->p_ucred->cr_prison->pr_id,
+ td->td_ucred->cr_uid, sig &~ WCOREFLAG,
+ err != NULL ? "no core dump - " : "core dumped",
+ err != NULL ? err : "");
+ } else
+ PROC_UNLOCK(p);
+ exit1(td, 0, sig);
+ /* NOTREACHED */
+}
+
+
+/*
+ * Dump a process' core. The main routine does some
+ * policy checking, and creates the name of the coredump;
+ * then it passes on a vnode and a size limit to the process-specific
+ * coredump routine if there is one; if there _is not_ one, it returns
+ * ENOSYS; otherwise it returns the error from the process-specific routine.
+ */
+static int
+coredump(struct thread *td, const char **errmsg)
+{
+ struct coredumper *iter, *chosen;
+ struct proc *p = td->td_proc;
+ struct rm_priotracker tracker;
+ off_t limit;
+ int error, priority;
+
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td);
+
+ if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) ||
+ (p->p_flag2 & P2_NOTRACE) != 0) {
+ PROC_UNLOCK(p);
+
+ if (!do_coredump)
+ *errmsg = "denied by kern.coredump";
+ else if ((p->p_flag2 & P2_NOTRACE) != 0)
+ *errmsg = "process has trace disabled";
+ else
+ *errmsg = "sugid process denied by kern.sugid_coredump";
+ return (EFAULT);
+ }
+
+ /*
+ * Note that the bulk of limit checking is done after
+ * the corefile is created. The exception is if the limit
+ * for corefiles is 0, in which case we don't bother
+ * creating the corefile at all. This layout means that
+ * a corefile is truncated instead of not being created,
+ * if it is larger than the limit.
+ */
+ limit = (off_t)lim_cur(td, RLIMIT_CORE);
+ if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) {
+ PROC_UNLOCK(p);
+ *errmsg = "coredumpsize limit is 0";
+ return (EFBIG);
+ }
+
+ rm_rlock(&coredump_rmlock, &tracker);
+ priority = -1;
+ chosen = NULL;
+ SLIST_FOREACH(iter, &coredumpers, cd_entry) {
+ if (iter->cd_probe == NULL) {
+ /*
+ * If we haven't found anything of a higher priority
+ * yet, we'll call this a GENERIC. Ideally, we want
+ * coredumper modules to include a probe function.
+ */
+ if (priority < 0) {
+ priority = COREDUMPER_GENERIC;
+ chosen = iter;
+ }
+
+ continue;
+ }
+
+ error = (*iter->cd_probe)(td);
+ if (error < 0)
+ continue;
+
+ /*
+ * Higher priority than previous options.
+ */
+ if (error > priority) {
+ priority = error;
+ chosen = iter;
+ }
+ }
+
+ /*
+ * Acquire our refcount before we drop the lock so that
+ * coredumper_unregister() can safely assume that the refcount will only
+ * go down once it's dropped the rmlock.
+ */
+ blockcount_acquire(&chosen->cd_refcount, 1);
+ rm_runlock(&coredump_rmlock, &tracker);
+
+ /* Currently, we always have the vnode dumper built in. */
+ MPASS(chosen != NULL);
+ error = ((*chosen->cd_handle)(td, limit));
+ PROC_LOCK_ASSERT(p, MA_NOTOWNED);
+
+ blockcount_release(&chosen->cd_refcount, 1);
+
+ return (error);
+}
diff --git a/sys/kern/subr_compressor.c b/sys/kern/subr_compressor.c
index 280264881241..5d59622e0455 100644
--- a/sys/kern/subr_compressor.c
+++ b/sys/kern/subr_compressor.c
@@ -538,6 +538,12 @@ compressor_init(compressor_cb_t cb, int format, size_t maxiosize, int level,
return (s);
}
+int
+compressor_format(const struct compressor *stream)
+{
+ return (stream->methods->format);
+}
+
void
compressor_reset(struct compressor *stream)
{
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index b472aaea89e6..5606b36f772f 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -2269,6 +2269,7 @@ exterr_copyout(struct thread *td)
ue.error = 0;
sz = sizeof(ue.error);
} else {
+ ktrexterr(td);
sz = sizeof(ue) - __offsetof(struct uexterror, error);
}
error = copyout(&ue.error, uloc, sz);
@@ -2335,7 +2336,6 @@ exterr_set(int eerror, int category, const char *mmsg, uintptr_t pp1,
td->td_kexterr.p1 = pp1;
td->td_kexterr.p2 = pp2;
td->td_kexterr.src_line = line;
- ktrexterr(td);
}
return (eerror);
}
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 6f83b875a6b6..85fe48ddd466 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -1134,10 +1134,10 @@ shm_doremove(struct shm_mapping *map)
int
kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
- int shmflags, struct filecaps *fcaps, const char *name __unused)
+ int shmflags, struct filecaps *fcaps, const char *name __unused,
+ struct shmfd *shmfd)
{
struct pwddesc *pdp;
- struct shmfd *shmfd;
struct file *fp;
char *path;
void *rl_cookie;
@@ -1214,23 +1214,41 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode,
if (error != 0)
goto outnofp;
- /* A SHM_ANON path pointer creates an anonymous object. */
+ /*
+ * A SHM_ANON path pointer creates an anonymous object. We allow other
+ * parts of the kernel to pre-populate a shmfd and then materialize an
+ * fd for it here as a means to pass data back up to userland. This
+ * doesn't really make sense for named shm objects, but it makes plenty
+ * of sense for anonymous objects.
+ */
if (userpath == SHM_ANON) {
- /* A read-only anonymous object is pointless. */
- if ((flags & O_ACCMODE) == O_RDONLY) {
- error = EINVAL;
- goto out;
- }
- shmfd = shm_alloc(td->td_ucred, cmode, largepage);
- if (shmfd == NULL) {
- error = ENOMEM;
- goto out;
+ if (shmfd != NULL) {
+ shm_hold(shmfd);
+ } else {
+ /*
+ * A read-only anonymous object is pointless, unless it
+ * was pre-populated by the kernel with the expectation
+ * that a shmfd would later be created for userland to
+ * access it through.
+ */
+ if ((flags & O_ACCMODE) == O_RDONLY) {
+ error = EINVAL;
+ goto out;
+ }
+ shmfd = shm_alloc(td->td_ucred, cmode, largepage);
+ if (shmfd == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+
+ shmfd->shm_seals = initial_seals;
+ shmfd->shm_flags = shmflags;
}
- shmfd->shm_seals = initial_seals;
- shmfd->shm_flags = shmflags;
} else {
fnv = fnv_32_str(path, FNV1_32_INIT);
sx_xlock(&shm_dict_lock);
+
+ MPASS(shmfd == NULL);
shmfd = shm_lookup(path, fnv);
if (shmfd == NULL) {
/* Object does not yet exist, create it if requested. */
@@ -2173,7 +2191,7 @@ kern_shm_open(struct thread *td, const char *path, int flags, mode_t mode,
struct filecaps *caps)
{
- return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL));
+ return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL, NULL));
}
/*
@@ -2191,7 +2209,7 @@ sys_shm_open2(struct thread *td, struct shm_open2_args *uap)
{
return (kern_shm_open2(td, uap->path, uap->flags, uap->mode,
- uap->shmflags, NULL, uap->name));
+ uap->shmflags, NULL, uap->name, NULL));
}
int
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 02973146068d..e63fa4c01434 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -222,6 +222,7 @@ typedef struct oaiocb {
#define KAIOCB_CHECKSYNC 0x08
#define KAIOCB_CLEARED 0x10
#define KAIOCB_FINISHED 0x20
+#define KAIOCB_MARKER 0x40
/* ioflags */
#define KAIOCB_IO_FOFFSET 0x01
@@ -584,6 +585,12 @@ aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct kaiocb *job)
int cancelled;
AIO_LOCK_ASSERT(ki, MA_OWNED);
+
+ /*
+ * If we're running down the queue, the process must be single-threaded,
+ * and so no markers should be present.
+ */
+ MPASS((job->jobflags & KAIOCB_MARKER) == 0);
if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED))
return (0);
MPASS((job->jobflags & KAIOCB_CANCELLING) == 0);
@@ -658,7 +665,7 @@ restart:
}
/* Wait for all running I/O to be finished */
- if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) {
+ if (!TAILQ_EMPTY(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) {
ki->kaio_flags |= KAIO_WAKEUP;
msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz);
goto restart;
@@ -1804,6 +1811,8 @@ aio_queue_file(struct file *fp, struct kaiocb *job)
} else if (job->uaiocb.aio_lio_opcode & LIO_SYNC) {
AIO_LOCK(ki);
TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) {
+ if ((job2->jobflags & KAIOCB_MARKER) != 0)
+ continue;
if (job2->fd_file == job->fd_file &&
((job2->uaiocb.aio_lio_opcode & LIO_SYNC) == 0) &&
job2->seqno < job->seqno) {
@@ -2033,7 +2042,7 @@ sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap)
{
struct proc *p = td->td_proc;
struct kaioinfo *ki;
- struct kaiocb *job, *jobn;
+ struct kaiocb *job, *jobn, marker;
struct file *fp;
int error;
int cancelled = 0;
@@ -2058,16 +2067,30 @@ sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap)
}
}
+ /*
+ * We may have to drop the list mutex in order to cancel a job. After
+ * that point it is unsafe to rely on the stability of the list. We
+ * could restart the search from the beginning after canceling a job,
+ * but this may inefficient. Instead, use a marker job to keep our
+ * place in the list.
+ */
+ memset(&marker, 0, sizeof(marker));
+ marker.jobflags = KAIOCB_MARKER;
+
AIO_LOCK(ki);
TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) {
- if ((uap->fd == job->uaiocb.aio_fildes) &&
- ((uap->aiocbp == NULL) ||
- (uap->aiocbp == job->ujob))) {
+ if (uap->fd == job->uaiocb.aio_fildes &&
+ (uap->aiocbp == NULL || uap->aiocbp == job->ujob) &&
+ (job->jobflags & KAIOCB_MARKER) == 0) {
+ TAILQ_INSERT_AFTER(&ki->kaio_jobqueue, job, &marker,
+ plist);
if (aio_cancel_job(p, ki, job)) {
cancelled++;
} else {
notcancelled++;
}
+ jobn = TAILQ_NEXT(&marker, plist);
+ TAILQ_REMOVE(&ki->kaio_jobqueue, &marker, plist);
if (uap->aiocbp != NULL)
break;
}
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index a314bda164de..bd7caa01e153 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -134,7 +134,7 @@ vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
np->netc_exflags = argp->ex_flags;
np->netc_anon = crget();
np->netc_anon->cr_uid = argp->ex_uid;
- crsetgroups_fallback(np->netc_anon, argp->ex_ngroups,
+ crsetgroups_and_egid(np->netc_anon, argp->ex_ngroups,
argp->ex_groups, GID_NOGROUP);
np->netc_anon->cr_prison = &prison0;
prison_hold(np->netc_anon->cr_prison);
@@ -213,7 +213,7 @@ vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
np->netc_exflags = argp->ex_flags;
np->netc_anon = crget();
np->netc_anon->cr_uid = argp->ex_uid;
- crsetgroups_fallback(np->netc_anon, argp->ex_ngroups, argp->ex_groups,
+ crsetgroups_and_egid(np->netc_anon, argp->ex_ngroups, argp->ex_groups,
GID_NOGROUP);
np->netc_anon->cr_prison = &prison0;
prison_hold(np->netc_anon->cr_prison);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 7cb6e2124326..99c9ec9dcd01 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -34,6 +34,7 @@ SUBDIR= \
alq \
${_amd_ecc_inject} \
${_amdgpio} \
+ ${_amdsmu} \
${_amdsbwd} \
${_amdsmn} \
${_amdtemp} \
@@ -772,6 +773,7 @@ _acpi= acpi
_aesni= aesni
.endif
_amd_ecc_inject=amd_ecc_inject
+_amdsmu= amdsmu
_amdsbwd= amdsbwd
_amdsmn= amdsmn
_amdtemp= amdtemp
diff --git a/sys/modules/amdsmu/Makefile b/sys/modules/amdsmu/Makefile
new file mode 100644
index 000000000000..752f57173d61
--- /dev/null
+++ b/sys/modules/amdsmu/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2025 The FreeBSD Foundation
+#
+# This software was developed by Aymeric Wibo <obiwac@freebsd.org>
+# under sponsorship from the FreeBSD Foundation.
+
+.PATH: ${SRCTOP}/sys/dev/amdsmu
+
+KMOD= amdsmu
+SRCS= amdsmu.c
+SRCS+= bus_if.h device_if.h pci_if.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 5b3ee740d75e..0a35fb4095fb 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -76,31 +76,34 @@
* heterogeneous bridges).
*/
-#include <sys/cdefs.h>
#include "opt_inet.h"
#include "opt_inet6.h"
+#define EXTERR_CATEGORY EXTERR_CAT_BRIDGE
+
#include <sys/param.h>
+#include <sys/ctype.h> /* string functions */
#include <sys/eventhandler.h>
-#include <sys/mbuf.h>
+#include <sys/exterrvar.h>
+#include <sys/jail.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
#include <sys/protosw.h>
+#include <sys/random.h>
#include <sys/systm.h>
-#include <sys/jail.h>
-#include <sys/time.h>
#include <sys/socket.h> /* for net/if.h */
#include <sys/sockio.h>
-#include <sys/ctype.h> /* string functions */
-#include <sys/kernel.h>
-#include <sys/random.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
+#include <sys/time.h>
+
#include <vm/uma.h>
-#include <sys/module.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
#include <net/bpf.h>
#include <net/if.h>
@@ -254,8 +257,8 @@ struct bridge_iflist {
uint32_t bif_addrcnt; /* cur. # of addresses */
uint32_t bif_addrexceeded;/* # of address violations */
struct epoch_context bif_epoch_ctx;
- ether_vlanid_t bif_untagged; /* untagged vlan id */
- ifbvlan_set_t bif_vlan_set; /* allowed tagged vlans */
+ ether_vlanid_t bif_pvid; /* port vlan id */
+ ifbvlan_set_t bif_vlan_set; /* if allowed tagged vlans */
};
/*
@@ -404,7 +407,7 @@ static int bridge_ioctl_sma(struct bridge_softc *, void *);
static int bridge_ioctl_sifprio(struct bridge_softc *, void *);
static int bridge_ioctl_sifcost(struct bridge_softc *, void *);
static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *);
-static int bridge_ioctl_sifuntagged(struct bridge_softc *, void *);
+static int bridge_ioctl_sifpvid(struct bridge_softc *, void *);
static int bridge_ioctl_sifvlanset(struct bridge_softc *, void *);
static int bridge_ioctl_gifvlanset(struct bridge_softc *, void *);
static int bridge_ioctl_addspan(struct bridge_softc *, void *);
@@ -625,7 +628,7 @@ static const struct bridge_control bridge_control_table[] = {
{ bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq),
BC_F_COPYIN|BC_F_SUSER },
- { bridge_ioctl_sifuntagged, sizeof(struct ifbreq),
+ { bridge_ioctl_sifpvid, sizeof(struct ifbreq),
BC_F_COPYIN|BC_F_SUSER },
{ bridge_ioctl_sifvlanset, sizeof(struct ifbif_vlan_req),
@@ -986,31 +989,37 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCGDRVSPEC:
case SIOCSDRVSPEC:
if (ifd->ifd_cmd >= bridge_control_table_size) {
- error = EINVAL;
+ error = EXTERROR(EINVAL, "Invalid control command");
break;
}
bc = &bridge_control_table[ifd->ifd_cmd];
if (cmd == SIOCGDRVSPEC &&
(bc->bc_flags & BC_F_COPYOUT) == 0) {
- error = EINVAL;
+ error = EXTERROR(EINVAL,
+ "Inappropriate ioctl for command "
+ "(expected SIOCSDRVSPEC)");
break;
}
else if (cmd == SIOCSDRVSPEC &&
(bc->bc_flags & BC_F_COPYOUT) != 0) {
- error = EINVAL;
+ error = EXTERROR(EINVAL,
+ "Inappropriate ioctl for command "
+ "(expected SIOCGDRVSPEC)");
break;
}
if (bc->bc_flags & BC_F_SUSER) {
error = priv_check(td, PRIV_NET_BRIDGE);
- if (error)
+ if (error) {
+ EXTERROR(error, "PRIV_NET_BRIDGE required");
break;
+ }
}
if (ifd->ifd_len != bc->bc_argsize ||
ifd->ifd_len > sizeof(args)) {
- error = EINVAL;
+ error = EXTERROR(EINVAL, "Invalid argument size");
break;
}
@@ -1062,7 +1071,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
oldmtu = sc->sc_ifp->if_mtu;
if (ifr->ifr_mtu < IF_MINMTU) {
- error = EINVAL;
+ error = EXTERROR(EINVAL,
+ "Requested MTU is lower than IF_MINMTU");
break;
}
if (CK_LIST_EMPTY(&sc->sc_iflist)) {
@@ -1088,6 +1098,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
(*bif->bif_ifp->if_ioctl)(bif->bif_ifp,
SIOCSIFMTU, (caddr_t)ifr);
}
+ EXTERROR(error,
+ "Failed to set MTU on member interface");
} else {
sc->sc_ifp->if_mtu = ifr->ifr_mtu;
}
@@ -1125,14 +1137,14 @@ bridge_mutecaps(struct bridge_softc *sc)
mask = BRIDGE_IFCAPS_MASK;
CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
- /* Every member must support it or its disabled */
+ /* Every member must support it or it's disabled */
mask &= bif->bif_savedcaps;
}
CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
enabled = bif->bif_ifp->if_capenable;
enabled &= ~BRIDGE_IFCAPS_STRIP;
- /* strip off mask bits and enable them again if allowed */
+ /* Strip off mask bits and enable them again if allowed */
enabled &= ~BRIDGE_IFCAPS_MASK;
enabled |= mask;
bridge_set_ifcap(sc, bif, enabled);
@@ -1282,7 +1294,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif,
#endif
break;
}
- /* reneable any interface capabilities */
+ /* Re-enable any interface capabilities */
bridge_set_ifcap(sc, bif, bif->bif_savedcaps);
}
bstp_destroy(&bif->bif_stp); /* prepare to free */
@@ -1318,21 +1330,48 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
ifs = ifunit(req->ifbr_ifsname);
if (ifs == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "No such interface",
+ req->ifbr_ifsname));
if (ifs->if_ioctl == NULL) /* must be supported */
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Interface must support ioctl(2)"));
+
+ /*
+ * If the new interface is a vlan(4), it could be a bridge SVI.
+ * Don't allow such things to be added to bridges.
+ */
+ if (ifs->if_type == IFT_L2VLAN) {
+ struct ifnet *parent;
+ struct epoch_tracker et;
+ bool is_bridge;
+
+ /*
+ * Entering NET_EPOCH with BRIDGE_LOCK held, but this is okay
+ * since we don't sleep here.
+ */
+ NET_EPOCH_ENTER(et);
+ parent = VLAN_TRUNKDEV(ifs);
+ is_bridge = (parent != NULL && parent->if_type == IFT_BRIDGE);
+ NET_EPOCH_EXIT(et);
+
+ if (is_bridge)
+ return (EXTERROR(EINVAL,
+ "Bridge SVI cannot be added to a bridge"));
+ }
/* If it's in the span list, it can't be a member. */
CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
if (ifs == bif->bif_ifp)
- return (EBUSY);
+ return (EXTERROR(EBUSY,
+ "Span interface cannot be a member"));
if (ifs->if_bridge) {
struct bridge_iflist *sbif = ifs->if_bridge;
if (sbif->bif_sc == sc)
- return (EEXIST);
+ return (EXTERROR(EEXIST,
+ "Interface is already a member of this bridge"));
- return (EBUSY);
+ return (EXTERROR(EBUSY,
+ "Interface is already a member of another bridge"));
}
switch (ifs->if_type) {
@@ -1342,7 +1381,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
/* permitted interface types */
break;
default:
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Unsupported interface type"));
}
#ifdef INET6
@@ -1394,11 +1433,15 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) {
#ifdef INET
if (ifa->ifa_addr->sa_family == AF_INET)
- return (EINVAL);
+ return (EXTERROR(EINVAL,
+ "Member interface may not have "
+ "an IPv4 address configured"));
#endif
#ifdef INET6
if (ifa->ifa_addr->sa_family == AF_INET6)
- return (EINVAL);
+ return (EXTERROR(EINVAL,
+ "Member interface may not have "
+ "an IPv6 address configured"));
#endif
}
}
@@ -1420,7 +1463,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg)
" new member %s\n", sc->sc_ifp->if_xname,
ifr.ifr_mtu,
ifs->if_xname);
- return (EINVAL);
+ return (EXTERROR(EINVAL,
+ "Failed to set MTU on new member"));
}
}
@@ -1482,7 +1526,7 @@ bridge_ioctl_del(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->ifbr_ifsname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
bridge_delete_member(sc, bif, 0);
@@ -1498,7 +1542,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->ifbr_ifsname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
bp = &bif->bif_stp;
req->ifbr_ifsflags = bif->bif_flags;
@@ -1512,7 +1556,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
req->ifbr_addrcnt = bif->bif_addrcnt;
req->ifbr_addrmax = bif->bif_addrmax;
req->ifbr_addrexceeded = bif->bif_addrexceeded;
- req->ifbr_untagged = bif->bif_untagged;
+ req->ifbr_pvid = bif->bif_pvid;
/* Copy STP state options as flags */
if (bp->bp_operedge)
@@ -1541,12 +1585,12 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->ifbr_ifsname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
bp = &bif->bif_stp;
if (req->ifbr_ifsflags & IFBIF_SPAN)
/* SPAN is readonly */
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Span interface cannot be modified"));
NET_EPOCH_ENTER(et);
@@ -1555,7 +1599,8 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
error = bstp_enable(&bif->bif_stp);
if (error) {
NET_EPOCH_EXIT(et);
- return (error);
+ return (EXTERROR(error,
+ "Failed to enable STP"));
}
}
} else {
@@ -1724,7 +1769,7 @@ bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->ifba_ifsname);
if (bif == NULL) {
NET_EPOCH_EXIT(et);
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
}
/* bridge_rtupdate() may acquire the lock. */
@@ -1858,7 +1903,7 @@ bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->ifbr_ifsname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority));
}
@@ -1871,7 +1916,7 @@ bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->ifbr_ifsname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost));
}
@@ -1884,28 +1929,28 @@ bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->ifbr_ifsname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
bif->bif_addrmax = req->ifbr_addrmax;
return (0);
}
static int
-bridge_ioctl_sifuntagged(struct bridge_softc *sc, void *arg)
+bridge_ioctl_sifpvid(struct bridge_softc *sc, void *arg)
{
struct ifbreq *req = arg;
struct bridge_iflist *bif;
bif = bridge_lookup_member(sc, req->ifbr_ifsname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
- if (req->ifbr_untagged > DOT1Q_VID_MAX)
- return (EINVAL);
+ if (req->ifbr_pvid > DOT1Q_VID_MAX)
+ return (EXTERROR(EINVAL, "Invalid VLAN ID"));
- if (req->ifbr_untagged != DOT1Q_VID_NULL)
+ if (req->ifbr_pvid != DOT1Q_VID_NULL)
bif->bif_flags |= IFBIF_VLANFILTER;
- bif->bif_untagged = req->ifbr_untagged;
+ bif->bif_pvid = req->ifbr_pvid;
return (0);
}
@@ -1917,12 +1962,12 @@ bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->bv_ifname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
/* Reject invalid VIDs. */
if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) ||
BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL))
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Invalid VLAN ID in set"));
switch (req->bv_op) {
/* Replace the existing vlan set with the new set */
@@ -1942,7 +1987,8 @@ bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg)
/* Invalid or unknown operation */
default:
- return (EINVAL);
+ return (EXTERROR(EINVAL,
+ "Unsupported BRDGSIFVLANSET operation"));
}
/*
@@ -1962,7 +2008,7 @@ bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg)
bif = bridge_lookup_member(sc, req->bv_ifname);
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a bridge member"));
BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set);
return (0);
@@ -1977,14 +2023,16 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
ifs = ifunit(req->ifbr_ifsname);
if (ifs == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "No such interface"));
CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
if (ifs == bif->bif_ifp)
- return (EBUSY);
+ return (EXTERROR(EBUSY,
+ "Interface is already a span port"));
if (ifs->if_bridge != NULL)
- return (EBUSY);
+ return (EXTERROR(EEXIST,
+ "Interface is already a bridge member"));
switch (ifs->if_type) {
case IFT_ETHER:
@@ -1992,7 +2040,7 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg)
case IFT_L2VLAN:
break;
default:
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Unsupported interface type"));
}
bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO);
@@ -2016,14 +2064,14 @@ bridge_ioctl_delspan(struct bridge_softc *sc, void *arg)
ifs = ifunit(req->ifbr_ifsname);
if (ifs == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "No such interface"));
CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next)
if (ifs == bif->bif_ifp)
break;
if (bif == NULL)
- return (ENOENT);
+ return (EXTERROR(ENOENT, "Interface is not a span port"));
bridge_delete_span(sc, bif);
@@ -2278,8 +2326,8 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
* the VLAN header.
*/
if ((bif->bif_flags & IFBIF_VLANFILTER) &&
- bif->bif_untagged != DOT1Q_VID_NULL &&
- VLANTAGOF(m) == bif->bif_untagged) {
+ bif->bif_pvid != DOT1Q_VID_NULL &&
+ VLANTAGOF(m) == bif->bif_pvid) {
m->m_flags &= ~M_VLANTAG;
m->m_pkthdr.ether_vtag = 0;
}
@@ -3145,14 +3193,14 @@ bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m)
* The frame doesn't have a tag. If the interface does not
* have an untagged vlan configured, drop the frame.
*/
- if (sbif->bif_untagged == DOT1Q_VID_NULL)
+ if (sbif->bif_pvid == DOT1Q_VID_NULL)
return (false);
/*
* Otherwise, insert a new tag based on the interface's
* untagged vlan id.
*/
- m->m_pkthdr.ether_vtag = sbif->bif_untagged;
+ m->m_pkthdr.ether_vtag = sbif->bif_pvid;
m->m_flags |= M_VLANTAG;
} else {
/*
@@ -3213,7 +3261,7 @@ bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m)
* If the frame's vlan matches the interfaces's untagged vlan,
* allow it.
*/
- if (vlan == dbif->bif_untagged)
+ if (vlan == dbif->bif_pvid)
return (true);
/*
@@ -3244,10 +3292,11 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc);
/* Check the source address is valid and not multicast. */
- if (ETHER_IS_MULTICAST(dst) ||
- (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
- dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0)
- return (EINVAL);
+ if (ETHER_IS_MULTICAST(dst))
+ return (EXTERROR(EINVAL, "Multicast address not permitted"));
+ if (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 &&
+ dst[3] == 0 && dst[4] == 0 && dst[5] == 0)
+ return (EXTERROR(EINVAL, "Zero address not permitted"));
/*
* A route for this destination might already exist. If so,
@@ -3266,13 +3315,14 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
if (sc->sc_brtcnt >= sc->sc_brtmax) {
sc->sc_brtexceeded++;
BRIDGE_RT_UNLOCK(sc);
- return (ENOSPC);
+ return (EXTERROR(ENOSPC, "Address table is full"));
}
/* Check per interface address limits (if enabled) */
if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) {
bif->bif_addrexceeded++;
BRIDGE_RT_UNLOCK(sc);
- return (ENOSPC);
+ return (EXTERROR(ENOSPC,
+ "Interface address limit exceeded"));
}
/*
@@ -3283,7 +3333,8 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO);
if (brt == NULL) {
BRIDGE_RT_UNLOCK(sc);
- return (ENOMEM);
+ return (EXTERROR(ENOMEM,
+ "Cannot allocate address node"));
}
brt->brt_vnet = curvnet;
@@ -3631,7 +3682,7 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
do {
dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr);
if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan)
- return (EEXIST);
+ return (EXTERROR(EEXIST, "Address already exists"));
if (dir > 0) {
CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
goto out;
diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h
index 97b63e3d4416..c458dcc152a0 100644
--- a/sys/net/if_bridgevar.h
+++ b/sys/net/if_bridgevar.h
@@ -124,7 +124,7 @@
#define BRDGSPROTO 28 /* set protocol (ifbrparam) */
#define BRDGSTXHC 29 /* set tx hold count (ifbrparam) */
#define BRDGSIFAMAX 30 /* set max interface addrs (ifbreq) */
-#define BRDGSIFUNTAGGED 31 /* set if untagged vlan */
+#define BRDGSIFPVID 31 /* set if PVID */
#define BRDGSIFVLANSET 32 /* set if vlan set */
#define BRDGGIFVLANSET 33 /* get if vlan set */
@@ -144,7 +144,7 @@ struct ifbreq {
uint32_t ifbr_addrcnt; /* member if addr number */
uint32_t ifbr_addrmax; /* member if addr max */
uint32_t ifbr_addrexceeded; /* member if addr violations */
- ether_vlanid_t ifbr_untagged; /* member if untagged vlan */
+ ether_vlanid_t ifbr_pvid; /* member if PVID */
uint8_t pad[32];
};
diff --git a/sys/net/if_ovpn.c b/sys/net/if_ovpn.c
index 853a0556a080..fe3e7bbd7fff 100644
--- a/sys/net/if_ovpn.c
+++ b/sys/net/if_ovpn.c
@@ -34,11 +34,13 @@
#include <sys/epoch.h>
#include <sys/file.h>
#include <sys/filedesc.h>
+#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/nv.h>
+#include <sys/osd.h>
#include <sys/priv.h>
#include <sys/protosw.h>
#include <sys/rmlock.h>
@@ -131,6 +133,9 @@ struct ovpn_notification {
/* Delete notification */
enum ovpn_del_reason del_reason;
struct ovpn_peer_counters counters;
+
+ /* Float notification */
+ struct sockaddr_storage address;
};
struct ovpn_softc;
@@ -195,6 +200,10 @@ struct ovpn_softc {
struct epoch_context epoch_ctx;
};
+struct ovpn_mtag {
+ struct sockaddr_storage addr;
+};
+
static struct ovpn_kpeer *ovpn_find_peer(struct ovpn_softc *, uint32_t);
static bool ovpn_udp_input(struct mbuf *, int, struct inpcb *,
const struct sockaddr *, void *);
@@ -206,6 +215,8 @@ static void ovpn_free_kkey_dir(struct ovpn_kkey_dir *);
static bool ovpn_check_replay(struct ovpn_kkey_dir *, uint32_t);
static int ovpn_peer_compare(const struct ovpn_kpeer *,
const struct ovpn_kpeer *);
+static bool ovpn_sockaddr_compare(const struct sockaddr *,
+ const struct sockaddr *);
static RB_PROTOTYPE(ovpn_kpeers, ovpn_kpeer, tree, ovpn_peer_compare);
static RB_GENERATE(ovpn_kpeers, ovpn_kpeer, tree, ovpn_peer_compare);
@@ -283,6 +294,43 @@ ovpn_peer_compare(const struct ovpn_kpeer *a, const struct ovpn_kpeer *b)
return (a->peerid - b->peerid);
}
+static bool
+ovpn_sockaddr_compare(const struct sockaddr *a,
+ const struct sockaddr *b)
+{
+ if (a->sa_family != b->sa_family)
+ return (false);
+ MPASS(a->sa_len == b->sa_len);
+
+ switch (a->sa_family) {
+ case AF_INET: {
+ const struct sockaddr_in *a4, *b4;
+
+ a4 = (const struct sockaddr_in *)a;
+ b4 = (const struct sockaddr_in *)b;
+
+ if (a4->sin_port != b4->sin_port)
+ return (false);
+
+ return (a4->sin_addr.s_addr == b4->sin_addr.s_addr);
+ }
+ case AF_INET6: {
+ const struct sockaddr_in6 *a6, *b6;
+
+ a6 = (const struct sockaddr_in6 *)a;
+ b6 = (const struct sockaddr_in6 *)b;
+
+ if (a6->sin6_port != b6->sin6_port)
+ return (false);
+
+ return (memcmp(&a6->sin6_addr, &b6->sin6_addr,
+ sizeof(a6->sin6_addr)) == 0);
+ }
+ default:
+ panic("Unknown address family %d", a->sa_family);
+ }
+}
+
static struct ovpn_kpeer *
ovpn_find_peer(struct ovpn_softc *sc, uint32_t peerid)
{
@@ -394,6 +442,44 @@ ovpn_nvlist_to_sockaddr(const nvlist_t *nvl, struct sockaddr_storage *sa)
return (0);
}
+static int
+ovpn_add_sockaddr(nvlist_t *parent, const char *name, const struct sockaddr *s)
+{
+ nvlist_t *nvl;
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ return (ENOMEM);
+
+ nvlist_add_number(nvl, "af", s->sa_family);
+
+ switch (s->sa_family) {
+ case AF_INET: {
+ const struct sockaddr_in *s4 = (const struct sockaddr_in *)s;
+
+ nvlist_add_number(nvl, "port", s4->sin_port);
+ nvlist_add_binary(nvl, "address", &s4->sin_addr,
+ sizeof(s4->sin_addr));
+ break;
+ }
+ case AF_INET6: {
+ const struct sockaddr_in6 *s6 = (const struct sockaddr_in6 *)s;
+
+ nvlist_add_number(nvl, "port", s6->sin6_port);
+ nvlist_add_binary(nvl, "address", &s6->sin6_addr,
+ sizeof(s6->sin6_addr));
+ break;
+ }
+ default:
+ nvlist_destroy(nvl);
+ return (EINVAL);
+ }
+
+ nvlist_move_nvlist(parent, name, nvl);
+
+ return (0);
+}
+
static void
ovpn_notify_del_peer(struct ovpn_softc *sc, struct ovpn_kpeer *peer)
{
@@ -446,6 +532,33 @@ ovpn_notify_key_rotation(struct ovpn_softc *sc, struct ovpn_kpeer *peer)
}
}
+static int
+ovpn_notify_float(struct ovpn_softc *sc, uint32_t peerid,
+ const struct sockaddr_storage *remote)
+{
+ struct ovpn_notification *n;
+
+ n = malloc(sizeof(*n), M_OVPN, M_NOWAIT | M_ZERO);
+ if (n == NULL)
+ return (ENOMEM);
+
+ n->peerid = peerid;
+ n->type = OVPN_NOTIF_FLOAT;
+ memcpy(&n->address, remote, sizeof(n->address));
+
+ if (buf_ring_enqueue(sc->notifring, n) != 0) {
+ free(n, M_OVPN);
+ return (ENOMEM);
+ } else if (sc->so != NULL) {
+ /* Wake up userspace */
+ sc->so->so_error = EAGAIN;
+ sorwakeup(sc->so);
+ sowwakeup(sc->so);
+ }
+
+ return (0);
+}
+
static void
ovpn_peer_release_ref(struct ovpn_kpeer *peer, bool locked)
{
@@ -1377,12 +1490,36 @@ opvn_get_pkt(struct ovpn_softc *sc, nvlist_t **onvl)
}
nvlist_add_number(nvl, "peerid", n->peerid);
nvlist_add_number(nvl, "notification", n->type);
- if (n->type == OVPN_NOTIF_DEL_PEER) {
+ switch (n->type) {
+ case OVPN_NOTIF_DEL_PEER: {
nvlist_add_number(nvl, "del_reason", n->del_reason);
/* No error handling, because we want to send the notification
* even if we can't attach the counters. */
ovpn_notif_add_counters(nvl, n);
+ break;
+ }
+ case OVPN_NOTIF_FLOAT: {
+ int ret;
+
+ ret = ovpn_add_sockaddr(nvl, "address",
+ (struct sockaddr *)&n->address);
+
+ if (ret) {
+ /*
+ * Try to re-enqueue the notification. Maybe we'll
+ * have better luck next time. No error handling,
+ * because if we fail to re-enqueue there's nothing we can do.
+ */
+ (void)ovpn_notify_float(sc, n->peerid, &n->address);
+ nvlist_destroy(nvl);
+ free(n, M_OVPN);
+ return (ret);
+ }
+ break;
+ }
+ default:
+ break;
}
free(n, M_OVPN);
@@ -1538,6 +1675,7 @@ ovpn_finish_rx(struct ovpn_softc *sc, struct mbuf *m,
struct rm_priotracker *_ovpn_lock_trackerp)
{
uint32_t af;
+ struct m_tag *mtag;
OVPN_RASSERT(sc);
NET_EPOCH_ASSERT();
@@ -1556,6 +1694,38 @@ ovpn_finish_rx(struct ovpn_softc *sc, struct mbuf *m,
OVPN_RUNLOCK(sc);
+ /* Check if the peer changed to a new source address. */
+ mtag = m_tag_find(m, PACKET_TAG_OVPN, NULL);
+ if (mtag != NULL) {
+ struct ovpn_mtag *ot = (struct ovpn_mtag *)(mtag + 1);
+
+ OVPN_WLOCK(sc);
+
+ /*
+ * Check the address against the peer's remote again, because we may race
+ * against ourselves (i.e. we may have tagged multiple packets to indicate we
+ * floated).
+ */
+ if (ovpn_sockaddr_compare((struct sockaddr *)&ot->addr,
+ (struct sockaddr *)&peer->remote)) {
+ OVPN_WUNLOCK(sc);
+ goto skip_float;
+ }
+
+ /* And notify userspace. */
+ if (ovpn_notify_float(sc, peer->peerid, &ot->addr) == 0) {
+ /*
+ * Update the 'remote' for this peer, but only if
+ * we've actually enqueued the notification.
+ * Otherwise we can try again later.
+ */
+ memcpy(&peer->remote, &ot->addr, sizeof(peer->remote));
+ }
+
+ OVPN_WUNLOCK(sc);
+ }
+
+skip_float:
OVPN_COUNTER_ADD(sc, received_data_pkts, 1);
OVPN_COUNTER_ADD(sc, tunnel_bytes_received, m->m_pkthdr.len);
OVPN_PEER_COUNTER_ADD(peer, pkt_in, 1);
@@ -2318,6 +2488,29 @@ ovpn_udp_input(struct mbuf *m, int off, struct inpcb *inp,
return (true);
}
+ /*
+ * If we got this from a different address than we expected tag the packet.
+ * We'll deal with notifiying userspace later, after we've decrypted and
+ * verified.
+ */
+ if (! ovpn_sockaddr_compare((struct sockaddr *)&peer->remote, sa)) {
+ struct m_tag *mt;
+ struct ovpn_mtag *ot;
+
+ MPASS(sa->sa_len <= sizeof(ot->addr));
+ mt = m_tag_get(PACKET_TAG_OVPN, sizeof(*ot), M_NOWAIT);
+ /*
+ * If we fail to allocate here we'll just try again on the next
+ * packet.
+ */
+ if (mt != NULL) {
+ ot = (struct ovpn_mtag *)(mt + 1);
+ memcpy(&ot->addr, sa, sa->sa_len);
+
+ m_tag_prepend(m, mt);
+ }
+ }
+
if (key->decrypt->cipher == OVPN_CIPHER_ALG_NONE) {
/* Now remove the outer headers */
m_adj_decap(m, sizeof(struct udphdr) + ohdrlen);
@@ -2593,23 +2786,53 @@ vnet_ovpn_init(const void *unused __unused)
VNET_SYSINIT(vnet_ovpn_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
vnet_ovpn_init, NULL);
-static void
-vnet_ovpn_uninit(const void *unused __unused)
+static int
+ovpn_prison_remove(void *obj, void *data __unused)
{
- if_clone_detach(V_ovpn_cloner);
+#ifdef VIMAGE
+ struct prison *pr;
+
+ pr = obj;
+ if (prison_owns_vnet(pr)) {
+ CURVNET_SET(pr->pr_vnet);
+ if (V_ovpn_cloner != NULL) {
+ ifc_detach_cloner(V_ovpn_cloner);
+ V_ovpn_cloner = NULL;
+ }
+ CURVNET_RESTORE();
+ }
+#endif
+ return (0);
}
-VNET_SYSUNINIT(vnet_ovpn_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY,
- vnet_ovpn_uninit, NULL);
static int
ovpnmodevent(module_t mod, int type, void *data)
{
+ static int ovpn_osd_jail_slot;
+
switch (type) {
- case MOD_LOAD:
- /* Done in vnet_ovpn_init() */
+ case MOD_LOAD: {
+ /*
+ * Registration is handled in vnet_ovpn_init(), but cloned
+ * interfaces must be destroyed via PR_METHOD_REMOVE since they
+ * hold a reference to the prison via the UDP socket, which
+ * prevents the prison from being destroyed.
+ */
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_REMOVE] = ovpn_prison_remove,
+ };
+ ovpn_osd_jail_slot = osd_jail_register(NULL, methods);
break;
+ }
case MOD_UNLOAD:
- /* Done in vnet_ovpn_uninit() */
+ if (ovpn_osd_jail_slot != 0)
+ osd_jail_deregister(ovpn_osd_jail_slot);
+ CURVNET_SET(vnet0);
+ if (V_ovpn_cloner != NULL) {
+ ifc_detach_cloner(V_ovpn_cloner);
+ V_ovpn_cloner = NULL;
+ }
+ CURVNET_RESTORE();
break;
default:
return (EOPNOTSUPP);
diff --git a/sys/net/if_ovpn.h b/sys/net/if_ovpn.h
index 2d6b8c1e7eff..2a24c35788a9 100644
--- a/sys/net/if_ovpn.h
+++ b/sys/net/if_ovpn.h
@@ -37,6 +37,7 @@
enum ovpn_notif_type {
OVPN_NOTIF_DEL_PEER,
OVPN_NOTIF_ROTATE_KEY,
+ OVPN_NOTIF_FLOAT,
};
enum ovpn_del_reason {
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
index 3bab04aa4d38..5e6f65c04b2f 100644
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -74,6 +74,7 @@
#include <sys/malloc.h>
#include <sys/random.h>
#include <sys/ctype.h>
+#include <sys/osd.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -178,6 +179,7 @@ struct tuntap_softc {
static struct mtx tunmtx;
static eventhandler_tag arrival_tag;
static eventhandler_tag clone_tag;
+static int tuntap_osd_jail_slot;
static const char tunname[] = "tun";
static const char tapname[] = "tap";
static const char vmnetname[] = "vmnet";
@@ -497,6 +499,10 @@ vmnet_clone_match(struct if_clone *ifc, const char *name)
return (0);
}
+/*
+ * Create a clone via the ifnet cloning mechanism. Note that this is invoked
+ * indirectly by tunclone() below.
+ */
static int
tun_clone_create(struct if_clone *ifc, char *name, size_t len,
struct ifc_data *ifd, struct ifnet **ifpp)
@@ -532,15 +538,19 @@ tun_clone_create(struct if_clone *ifc, char *name, size_t len,
if (i != 0)
i = tun_create_device(drv, unit, NULL, &dev, name);
if (i == 0) {
- dev_ref(dev);
+ struct tuntap_softc *tp;
+
tuncreate(dev);
- struct tuntap_softc *tp = dev->si_drv1;
+ tp = dev->si_drv1;
*ifpp = tp->tun_ifp;
}
return (i);
}
+/*
+ * Create a clone via devfs access.
+ */
static void
tunclone(void *arg, struct ucred *cred, char *name, int namelen,
struct cdev **dev)
@@ -595,11 +605,12 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen,
}
i = tun_create_device(drv, u, cred, dev, name);
- }
- if (i == 0) {
+ } else {
+ /* Consumed by the dev_clone invoker. */
dev_ref(*dev);
- if_clone_create(name, namelen, NULL);
}
+ if (i == 0)
+ if_clone_create(name, namelen, NULL);
out:
CURVNET_RESTORE();
}
@@ -670,16 +681,6 @@ VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY,
vnet_tun_init, NULL);
static void
-vnet_tun_uninit(const void *unused __unused)
-{
-
- for (u_int i = 0; i < NDRV; ++i)
- if_clone_detach(V_tuntap_driver_cloners[i]);
-}
-VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY,
- vnet_tun_uninit, NULL);
-
-static void
tun_uninit(const void *unused __unused)
{
struct tuntap_driver *drv;
@@ -689,6 +690,16 @@ tun_uninit(const void *unused __unused)
EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag);
EVENTHANDLER_DEREGISTER(dev_clone, clone_tag);
+ CURVNET_SET(vnet0);
+ for (u_int i = 0; i < NDRV; i++) {
+ if_clone_detach(V_tuntap_driver_cloners[i]);
+ V_tuntap_driver_cloners[i] = NULL;
+ }
+ CURVNET_RESTORE();
+
+ if (tuntap_osd_jail_slot != 0)
+ osd_jail_deregister(tuntap_osd_jail_slot);
+
mtx_lock(&tunmtx);
while ((tp = TAILQ_FIRST(&tunhead)) != NULL) {
TAILQ_REMOVE(&tunhead, tp, tun_list);
@@ -724,6 +735,30 @@ tuntap_driver_from_ifnet(const struct ifnet *ifp)
return (NULL);
}
+/*
+ * Remove devices that were created by devfs cloning, as they hold references
+ * which prevent the prison from collapsing, in which state VNET sysuninits will
+ * not be invoked.
+ */
+static int
+tuntap_prison_remove(void *obj, void *data __unused)
+{
+#ifdef VIMAGE
+ struct prison *pr;
+
+ pr = obj;
+ if (prison_owns_vnet(pr)) {
+ CURVNET_SET(pr->pr_vnet);
+ for (u_int i = 0; i < NDRV; i++) {
+ if_clone_detach(V_tuntap_driver_cloners[i]);
+ V_tuntap_driver_cloners[i] = NULL;
+ }
+ CURVNET_RESTORE();
+ }
+#endif
+ return (0);
+}
+
static int
tuntapmodevent(module_t mod, int type, void *data)
{
@@ -738,8 +773,12 @@ tuntapmodevent(module_t mod, int type, void *data)
clone_setup(&drv->clones);
drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx);
}
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_REMOVE] = tuntap_prison_remove,
+ };
+ tuntap_osd_jail_slot = osd_jail_register(NULL, methods);
arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event,
- tunrename, 0, 1000);
+ tunrename, 0, 1000);
if (arrival_tag == NULL)
return (ENOMEM);
clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000);
@@ -747,7 +786,7 @@ tuntapmodevent(module_t mod, int type, void *data)
return (ENOMEM);
break;
case MOD_UNLOAD:
- /* See tun_uninit, so it's done after the vnet_sysuninit() */
+ /* See tun_uninit(). */
break;
default:
return EOPNOTSUPP;
@@ -798,6 +837,8 @@ tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr,
args.mda_si_drv1 = tp;
error = make_dev_s(&args, dev, "%s", name);
if (error != 0) {
+ mtx_destroy(&tp->tun_mtx);
+ cv_destroy(&tp->tun_cv);
free(tp, M_TUN);
return (error);
}
@@ -914,7 +955,6 @@ tap_transmit(struct ifnet *ifp, struct mbuf *m)
return (error);
}
-/* XXX: should return an error code so it can fail. */
static void
tuncreate(struct cdev *dev)
{
diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c
index 22fcb7bf7c64..61000018e5a4 100644
--- a/sys/net/if_vlan.c
+++ b/sys/net/if_vlan.c
@@ -2336,6 +2336,18 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = ENOENT;
break;
}
+
+ /*
+ * If the ifp is in a bridge, do not allow setting the device
+ * to a bridge; this prevents having a bridge SVI as a bridge
+ * member (which is not permitted).
+ */
+ if (ifp->if_bridge != NULL && p->if_type == IFT_BRIDGE) {
+ if_rele(p);
+ error = EINVAL;
+ break;
+ }
+
if (vlr.vlr_proto == 0)
vlr.vlr_proto = ETHERTYPE_VLAN;
oldmtu = ifp->if_mtu;
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
index 9b81760e58f3..51e7c2fbc4b0 100644
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -69,6 +69,7 @@
#include <sys/cdefs.h>
#include "opt_ipstealth.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -102,6 +103,10 @@
#include <machine/in_cksum.h>
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+#include <netinet/sctp_crc32.h>
+#endif
+
#define V_ipsendredirects VNET(ipsendredirects)
static struct mbuf *
@@ -460,6 +465,23 @@ passout:
} else
gw = (const struct sockaddr *)dst;
+ /*
+ * If TCP/UDP header still needs a valid checksum and interface will not
+ * calculate it for us, do it here.
+ */
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &
+ ~nh->nh_ifp->if_hwassist)) {
+ in_delayed_cksum(m);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+ }
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP_SCTP &
+ ~nh->nh_ifp->if_hwassist)) {
+ sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2));
+ m->m_pkthdr.csum_flags &= ~CSUM_IP_SCTP;
+ }
+#endif
+
/* Handle redirect case. */
redest.s_addr = 0;
if (V_ipsendredirects && osrc.s_addr == ip->ip_src.s_addr &&
diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c
index dc31ffbc2161..5f637cc63df5 100644
--- a/sys/netinet/sctp_input.c
+++ b/sys/netinet/sctp_input.c
@@ -5780,7 +5780,11 @@ sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
goto out;
}
ecn_bits = ip->ip_tos;
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
+ if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP_SCTP)) {
+ /*
+ * Packet with CSUM_IP_SCTP were sent from local host using
+ * checksum offloading. Checksum not required.
+ */
SCTP_STAT_INCR(sctps_recvhwcrc);
compute_crc = 0;
} else {
diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c
index 66af716eea52..7d8cb965ab09 100644
--- a/sys/netinet/sctp_timer.c
+++ b/sys/netinet/sctp_timer.c
@@ -35,7 +35,6 @@
#define _IP_VHL
#include <netinet/sctp_os.h>
#include <netinet/sctp_pcb.h>
-
#include <netinet/sctp_var.h>
#include <netinet/sctp_sysctl.h>
#include <netinet/sctp_timer.h>
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index b60cdf45af52..22fc99496d34 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -170,6 +170,50 @@
#define NUM_OF_HPTSI_SLOTS 102400
+/* The number of connections after which the dynamic sleep logic kicks in. */
+#define DEFAULT_CONNECTION_THRESHOLD 100
+
+/*
+ * When using the hpts, a TCP stack must make sure
+ * that once a INP_DROPPED flag is applied to a INP
+ * that it does not expect tcp_output() to ever be
+ * called by the hpts. The hpts will *not* call
+ * any output (or input) functions on a TCB that
+ * is in the DROPPED state.
+ *
+ * This implies final ACK's and RST's that might
+ * be sent when a TCB is still around must be
+ * sent from a routine like tcp_respond().
+ */
+#define LOWEST_SLEEP_ALLOWED 50
+#define DEFAULT_MIN_SLEEP 250 /* How many usec's is default for hpts sleep
+ * this determines min granularity of the
+ * hpts. If 1, granularity is 10useconds at
+ * the cost of more CPU (context switching).
+ * Note do not set this to 0.
+ */
+#define DYNAMIC_MIN_SLEEP DEFAULT_MIN_SLEEP
+#define DYNAMIC_MAX_SLEEP 5000 /* 5ms */
+
+/* Thresholds for raising/lowering sleep */
+#define SLOTS_INDICATE_MORE_SLEEP 100 /* This would be 1ms */
+#define SLOTS_INDICATE_LESS_SLEEP 1000 /* This would indicate 10ms */
+/**
+ *
+ * Dynamic adjustment of sleeping times is done in "new" mode
+ * where we are depending on syscall returns and lro returns
+ * to push hpts forward mainly and the timer is only a backstop.
+ *
+ * When we are in the "new" mode i.e. conn_cnt > conn_cnt_thresh
+ * then we do a dynamic adjustment on the time we sleep.
+ * Our threshold is if the lateness of the first client served (in ticks) is
+ * greater than or equal too slots_indicate_more_sleep (10ms
+ * or 10000 ticks). If we were that late, the actual sleep time
+ * is adjusted down by 50%. If the ticks_ran is less than
+ * slots_indicate_more_sleep (100 ticks or 1000usecs).
+ *
+ */
+
/* Each hpts has its own p_mtx which is used for locking */
#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED)
#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx)
@@ -244,11 +288,10 @@ static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout);
static void tcp_hpts_thread(void *ctx);
int32_t tcp_min_hptsi_time = DEFAULT_MIN_SLEEP;
-static int conn_cnt_thresh = DEFAULT_CONNECTION_THESHOLD;
+static int conn_cnt_thresh = DEFAULT_CONNECTION_THRESHOLD;
static int32_t dynamic_min_sleep = DYNAMIC_MIN_SLEEP;
static int32_t dynamic_max_sleep = DYNAMIC_MAX_SLEEP;
-
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hpts, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"TCP Hpts controls");
SYSCTL_NODE(_net_inet_tcp_hpts, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
@@ -366,7 +409,7 @@ sysctl_net_inet_tcp_hpts_max_sleep(SYSCTL_HANDLER_ARGS)
new = hpts_sleep_max;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr) {
- if ((new < (dynamic_min_sleep/HPTS_TICKS_PER_SLOT)) ||
+ if ((new < (dynamic_min_sleep/HPTS_USECS_PER_SLOT)) ||
(new > HPTS_MAX_SLEEP_ALLOWED))
error = EINVAL;
else
@@ -404,15 +447,15 @@ SYSCTL_PROC(_net_inet_tcp_hpts, OID_AUTO, minsleep,
&sysctl_net_inet_tcp_hpts_min_sleep, "IU",
"The minimum time the hpts must sleep before processing more slots");
-static int ticks_indicate_more_sleep = TICKS_INDICATE_MORE_SLEEP;
-static int ticks_indicate_less_sleep = TICKS_INDICATE_LESS_SLEEP;
+static int slots_indicate_more_sleep = SLOTS_INDICATE_MORE_SLEEP;
+static int slots_indicate_less_sleep = SLOTS_INDICATE_LESS_SLEEP;
static int tcp_hpts_no_wake_over_thresh = 1;
SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, more_sleep, CTLFLAG_RW,
- &ticks_indicate_more_sleep, 0,
+ &slots_indicate_more_sleep, 0,
"If we only process this many or less on a timeout, we need longer sleep on the next callout");
SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, less_sleep, CTLFLAG_RW,
- &ticks_indicate_less_sleep, 0,
+ &slots_indicate_less_sleep, 0,
"If we process this many or more on a timeout, we need less sleep on the next callout");
SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, nowake_over_thresh, CTLFLAG_RW,
&tcp_hpts_no_wake_over_thresh, 0,
@@ -453,7 +496,7 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb *tp, struct timeval *tv,
log.u_bbr.inflight = slots_to_run;
log.u_bbr.applimited = hpts->overidden_sleep;
log.u_bbr.delivered = hpts->saved_curtick;
- log.u_bbr.timeStamp = tcp_tv_to_usectick(tv);
+ log.u_bbr.timeStamp = tcp_tv_to_usec(tv);
log.u_bbr.epoch = hpts->saved_curslot;
log.u_bbr.lt_epoch = hpts->saved_prev_slot;
log.u_bbr.pkts_out = hpts->p_delayed_by;
@@ -877,7 +920,7 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
return (slot_on);
}
/* Get the current time relative to the wheel */
- wheel_cts = tcp_tv_to_hptstick(&tv);
+ wheel_cts = tcp_tv_to_hpts_slot(&tv);
/* Map it onto the wheel */
wheel_slot = tick_to_wheel(wheel_cts);
/* Now what's the max we can place it at? */
@@ -949,7 +992,7 @@ tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line, struct hpts_
* We need to reschedule the hpts's time-out.
*/
hpts->p_hpts_sleep_time = slot;
- need_new_to = slot * HPTS_TICKS_PER_SLOT;
+ need_new_to = slot * HPTS_USECS_PER_SLOT;
}
}
/*
@@ -1104,7 +1147,7 @@ tcp_hptsi(struct tcp_hpts_entry *hpts, bool from_callout)
hpts->p_lasttick = hpts->p_curtick;
hpts->p_curtick = tcp_gethptstick(&tv);
- tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usectick(&tv);
+ tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
orig_exit_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
if ((hpts->p_on_queue_cnt == 0) ||
(hpts->p_lasttick == hpts->p_curtick)) {
@@ -1121,7 +1164,7 @@ again:
HPTS_MTX_ASSERT(hpts);
slots_to_run = hpts_slots_diff(hpts->p_prev_slot, hpts->p_cur_slot);
if (((hpts->p_curtick - hpts->p_lasttick) >
- ((NUM_OF_HPTSI_SLOTS-1) * HPTS_TICKS_PER_SLOT)) &&
+ ((NUM_OF_HPTSI_SLOTS-1) * HPTS_USECS_PER_SLOT)) &&
(hpts->p_on_queue_cnt != 0)) {
/*
* Wheel wrap is occuring, basically we
@@ -1202,7 +1245,7 @@ again:
* was not any (i.e. if slots_to_run == 1, no delay).
*/
hpts->p_delayed_by = (slots_to_run - (i + 1)) *
- HPTS_TICKS_PER_SLOT;
+ HPTS_USECS_PER_SLOT;
runningslot = hpts->p_runningslot;
hptsh = &hpts->p_hptss[runningslot];
@@ -1446,7 +1489,7 @@ no_one:
goto again;
}
no_run:
- tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usectick(&tv);
+ tcp_pace.cts_last_ran[hpts->p_num] = tcp_tv_to_usec(&tv);
/*
* Set flag to tell that we are done for
* any slot input that happens during
@@ -1569,7 +1612,7 @@ __tcp_run_hpts(void)
ticks_ran = tcp_hptsi(hpts, false);
/* We may want to adjust the sleep values here */
if (hpts->p_on_queue_cnt >= conn_cnt_thresh) {
- if (ticks_ran > ticks_indicate_less_sleep) {
+ if (ticks_ran > slots_indicate_less_sleep) {
struct timeval tv;
sbintime_t sb;
@@ -1579,7 +1622,7 @@ __tcp_run_hpts(void)
/* Reschedule with new to value */
tcp_hpts_set_max_sleep(hpts, 0);
tv.tv_sec = 0;
- tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_SLOT;
+ tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
/* Validate its in the right ranges */
if (tv.tv_usec < hpts->p_mysleep.tv_usec) {
hpts->overidden_sleep = tv.tv_usec;
@@ -1601,7 +1644,7 @@ __tcp_run_hpts(void)
callout_reset_sbt_on(&hpts->co, sb, 0,
hpts_timeout_swi, hpts, hpts->p_cpu,
(C_DIRECT_EXEC | C_PREL(tcp_hpts_precision)));
- } else if (ticks_ran < ticks_indicate_more_sleep) {
+ } else if (ticks_ran < slots_indicate_more_sleep) {
/* For the further sleep, don't reschedule hpts */
hpts->p_mysleep.tv_usec *= 2;
if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
@@ -1683,7 +1726,7 @@ tcp_hpts_thread(void *ctx)
hpts->p_hpts_active = 1;
ticks_ran = tcp_hptsi(hpts, true);
tv.tv_sec = 0;
- tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_SLOT;
+ tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
if ((hpts->p_on_queue_cnt > conn_cnt_thresh) && (hpts->hit_callout_thresh == 0)) {
hpts->hit_callout_thresh = 1;
atomic_add_int(&hpts_that_need_softclock, 1);
@@ -1697,11 +1740,11 @@ tcp_hpts_thread(void *ctx)
* Only adjust sleep time if we were
* called from the callout i.e. direct_wake == 0.
*/
- if (ticks_ran < ticks_indicate_more_sleep) {
+ if (ticks_ran < slots_indicate_more_sleep) {
hpts->p_mysleep.tv_usec *= 2;
if (hpts->p_mysleep.tv_usec > dynamic_max_sleep)
hpts->p_mysleep.tv_usec = dynamic_max_sleep;
- } else if (ticks_ran > ticks_indicate_less_sleep) {
+ } else if (ticks_ran > slots_indicate_less_sleep) {
hpts->p_mysleep.tv_usec /= 2;
if (hpts->p_mysleep.tv_usec < dynamic_min_sleep)
hpts->p_mysleep.tv_usec = dynamic_min_sleep;
@@ -1948,7 +1991,7 @@ tcp_hpts_mod_load(void)
hpts->p_hpts_sleep_time = hpts_sleep_max;
hpts->p_num = i;
hpts->p_curtick = tcp_gethptstick(&tv);
- tcp_pace.cts_last_ran[i] = tcp_tv_to_usectick(&tv);
+ tcp_pace.cts_last_ran[i] = tcp_tv_to_usec(&tv);
hpts->p_prev_slot = hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
hpts->p_cpu = 0xffff;
hpts->p_nxt_slot = hpts_slot(hpts->p_cur_slot, 1);
@@ -1995,7 +2038,7 @@ tcp_hpts_mod_load(void)
}
}
tv.tv_sec = 0;
- tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_SLOT;
+ tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_USECS_PER_SLOT;
hpts->sleeping = tv.tv_usec;
sb = tvtosbt(tv);
callout_reset_sbt_on(&hpts->co, sb, 0,
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index f5856ed8e688..6172baf2a062 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -26,14 +26,38 @@
#ifndef __tcp_hpts_h__
#define __tcp_hpts_h__
-/* Number of useconds in a hpts tick */
-#define HPTS_TICKS_PER_SLOT 10
+/* Number of useconds represented by an hpts slot */
+#define HPTS_USECS_PER_SLOT 10
#define HPTS_MS_TO_SLOTS(x) ((x * 100) + 1)
#define HPTS_USEC_TO_SLOTS(x) ((x+9) /10)
#define HPTS_USEC_IN_SEC 1000000
#define HPTS_MSEC_IN_SEC 1000
#define HPTS_USEC_IN_MSEC 1000
+static inline uint32_t
+tcp_tv_to_hpts_slot(const struct timeval *sv)
+{
+ return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_USECS_PER_SLOT));
+}
+
+static inline uint32_t
+tcp_tv_to_usec(const struct timeval *sv)
+{
+ return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
+}
+
+static inline uint32_t
+tcp_tv_to_msec(const struct timeval *sv)
+{
+ return ((uint32_t) ((sv->tv_sec * HPTS_MSEC_IN_SEC) + (sv->tv_usec/HPTS_USEC_IN_MSEC)));
+}
+
+static inline uint64_t
+tcp_tv_to_lusec(const struct timeval *sv)
+{
+ return ((uint64_t)((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
+}
+
struct hpts_diag {
uint32_t p_hpts_active; /* bbr->flex7 x */
uint32_t p_nxt_slot; /* bbr->flex1 x */
@@ -66,52 +90,16 @@ struct hpts_diag {
#define PACE_PKT_OUTPUT 0x40 /* Output Packets being paced */
#define PACE_TMR_MASK (PACE_TMR_KEEP|PACE_TMR_PERSIT|PACE_TMR_RXT|PACE_TMR_TLP|PACE_TMR_RACK|PACE_TMR_DELACK)
-#define DEFAULT_CONNECTION_THESHOLD 100
+#ifdef _KERNEL
/*
- * When using the hpts, a TCP stack must make sure
- * that once a INP_DROPPED flag is applied to a INP
- * that it does not expect tcp_output() to ever be
- * called by the hpts. The hpts will *not* call
- * any output (or input) functions on a TCB that
- * is in the DROPPED state.
- *
- * This implies final ACK's and RST's that might
- * be sent when a TCB is still around must be
- * sent from a routine like tcp_respond().
- */
-#define LOWEST_SLEEP_ALLOWED 50
-#define DEFAULT_MIN_SLEEP 250 /* How many usec's is default for hpts sleep
- * this determines min granularity of the
- * hpts. If 1, granularity is 10useconds at
- * the cost of more CPU (context switching).
- * Note do not set this to 0.
- */
-#define DYNAMIC_MIN_SLEEP DEFAULT_MIN_SLEEP
-#define DYNAMIC_MAX_SLEEP 5000 /* 5ms */
-
-/* Thresholds for raising/lowering sleep */
-#define TICKS_INDICATE_MORE_SLEEP 100 /* This would be 1ms */
-#define TICKS_INDICATE_LESS_SLEEP 1000 /* This would indicate 10ms */
-/**
- *
- * Dynamic adjustment of sleeping times is done in "new" mode
- * where we are depending on syscall returns and lro returns
- * to push hpts forward mainly and the timer is only a backstop.
- *
- * When we are in the "new" mode i.e. conn_cnt > conn_cnt_thresh
- * then we do a dynamic adjustment on the time we sleep.
- * Our threshold is if the lateness of the first client served (in ticks) is
- * greater than or equal too ticks_indicate_more_sleep (10ms
- * or 10000 ticks). If we were that late, the actual sleep time
- * is adjusted down by 50%. If the ticks_ran is less than
- * ticks_indicate_more_sleep (100 ticks or 1000usecs).
- *
- */
+ * The following are the definitions for the kernel HPTS interface for managing
+ * the HPTS ring and the TCBs on it.
+*/
-#ifdef _KERNEL
void tcp_hpts_init(struct tcpcb *);
void tcp_hpts_remove(struct tcpcb *);
+
static inline bool
tcp_in_hpts(struct tcpcb *tp)
{
@@ -151,51 +139,12 @@ uint32_t tcp_hpts_insert_diag(struct tcpcb *tp, uint32_t slot, int32_t line,
void tcp_set_hpts(struct tcpcb *tp);
-void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
-
-void tcp_lro_hpts_init(void);
-void tcp_lro_hpts_uninit(void);
-
-extern int32_t tcp_min_hptsi_time;
-
-#endif /* _KERNEL */
-
-/*
- * The following functions should also be available
- * to userspace as well.
- */
-static inline uint32_t
-tcp_tv_to_hptstick(const struct timeval *sv)
-{
- return ((sv->tv_sec * 100000) + (sv->tv_usec / HPTS_TICKS_PER_SLOT));
-}
-
-static inline uint32_t
-tcp_tv_to_usectick(const struct timeval *sv)
-{
- return ((uint32_t) ((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
-}
-
-static inline uint32_t
-tcp_tv_to_mssectick(const struct timeval *sv)
-{
- return ((uint32_t) ((sv->tv_sec * HPTS_MSEC_IN_SEC) + (sv->tv_usec/HPTS_USEC_IN_MSEC)));
-}
-
-static inline uint64_t
-tcp_tv_to_lusectick(const struct timeval *sv)
-{
- return ((uint64_t)((sv->tv_sec * HPTS_USEC_IN_SEC) + sv->tv_usec));
-}
-
-#ifdef _KERNEL
-
extern int32_t tcp_min_hptsi_time;
static inline int32_t
get_hpts_min_sleep_time(void)
{
- return (tcp_min_hptsi_time + HPTS_TICKS_PER_SLOT);
+ return (tcp_min_hptsi_time + HPTS_USECS_PER_SLOT);
}
static inline uint32_t
@@ -206,7 +155,7 @@ tcp_gethptstick(struct timeval *sv)
if (sv == NULL)
sv = &tv;
microuptime(sv);
- return (tcp_tv_to_hptstick(sv));
+ return (tcp_tv_to_hpts_slot(sv));
}
static inline uint64_t
@@ -217,7 +166,7 @@ tcp_get_u64_usecs(struct timeval *tv)
if (tv == NULL)
tv = &tvd;
microuptime(tv);
- return (tcp_tv_to_lusectick(tv));
+ return (tcp_tv_to_lusec(tv));
}
static inline uint32_t
@@ -228,8 +177,15 @@ tcp_get_usecs(struct timeval *tv)
if (tv == NULL)
tv = &tvd;
microuptime(tv);
- return (tcp_tv_to_usectick(tv));
+ return (tcp_tv_to_usec(tv));
}
+/*
+ * LRO HPTS initialization and uninitialization, only for internal use by the
+ * HPTS code.
+ */
+void tcp_lro_hpts_init(void);
+void tcp_lro_hpts_uninit(void);
+
#endif /* _KERNEL */
#endif /* __tcp_hpts_h__ */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index de428ae1af6f..d58cc69b7625 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -650,6 +650,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
th->th_sum = in6_cksum_pseudo(ip6, tlen,
IPPROTO_TCP, m->m_pkthdr.csum_data);
th->th_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP6_TCP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ th->th_sum = 0;
} else
th->th_sum = in6_cksum(m, IPPROTO_TCP, off0, tlen);
if (th->th_sum) {
@@ -710,6 +716,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
htonl(m->m_pkthdr.csum_data + tlen +
IPPROTO_TCP));
th->th_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP_TCP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ th->th_sum = 0;
} else {
struct ipovly *ipov = (struct ipovly *)ip;
diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c
index 7e756285da45..43587285fe26 100644
--- a/sys/netinet/tcp_lro_hpts.c
+++ b/sys/netinet/tcp_lro_hpts.c
@@ -188,7 +188,7 @@ tcp_lro_log(struct tcpcb *tp, const struct lro_ctrl *lc,
log.u_bbr.cur_del_rate = (uintptr_t)m;
log.u_bbr.bw_inuse = (uintptr_t)le->m_head;
bintime2timeval(&lc->lro_last_queue_time, &btv);
- log.u_bbr.flex6 = tcp_tv_to_usectick(&btv);
+ log.u_bbr.flex6 = tcp_tv_to_usec(&btv);
log.u_bbr.flex7 = le->compressed;
log.u_bbr.pacing_gain = le->uncompressed;
if (in_epoch(net_epoch_preempt))
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index b232d3f08fe6..ce4e9f30020c 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -2173,7 +2173,7 @@ bbr_log_rtt_sample(struct tcp_bbr *bbr, uint32_t rtt, uint32_t tsin)
log.u_bbr.flex3 = bbr->r_ctl.rc_ack_hdwr_delay;
log.u_bbr.flex4 = bbr->rc_tp->ts_offset;
log.u_bbr.flex5 = bbr->r_ctl.rc_target_at_state;
- log.u_bbr.pkts_out = tcp_tv_to_mssectick(&bbr->rc_tv);
+ log.u_bbr.pkts_out = tcp_tv_to_msec(&bbr->rc_tv);
log.u_bbr.flex6 = tsin;
log.u_bbr.flex7 = 0;
log.u_bbr.flex8 = bbr->rc_ack_was_delayed;
@@ -2241,13 +2241,13 @@ bbr_log_ack_event(struct tcp_bbr *bbr, struct tcphdr *th, struct tcpopt *to, uin
mbuf_tstmp2timespec(m, &ts);
tv.tv_sec = ts.tv_sec;
tv.tv_usec = ts.tv_nsec / 1000;
- log.u_bbr.lt_epoch = tcp_tv_to_usectick(&tv);
+ log.u_bbr.lt_epoch = tcp_tv_to_usec(&tv);
} else {
log.u_bbr.lt_epoch = 0;
}
if (m->m_flags & M_TSTMP_LRO) {
mbuf_tstmp2timeval(m, &tv);
- log.u_bbr.flex5 = tcp_tv_to_usectick(&tv);
+ log.u_bbr.flex5 = tcp_tv_to_usec(&tv);
} else {
/* No arrival timestamp */
log.u_bbr.flex5 = 0;
@@ -6792,7 +6792,7 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
(ack_type == BBR_CUM_ACKED) &&
(to->to_flags & TOF_TS) &&
(to->to_tsecr != 0)) {
- t = tcp_tv_to_mssectick(&bbr->rc_tv) - to->to_tsecr;
+ t = tcp_tv_to_msec(&bbr->rc_tv) - to->to_tsecr;
if (t < 1)
t = 1;
t *= MS_IN_USEC;
@@ -7330,7 +7330,7 @@ bbr_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th,
uint32_t ts, now, rtt;
ts = bbr_ts_convert(to->to_tsecr);
- now = bbr_ts_convert(tcp_tv_to_mssectick(&bbr->rc_tv));
+ now = bbr_ts_convert(tcp_tv_to_msec(&bbr->rc_tv));
rtt = now - ts;
if (rtt < 1)
rtt = 1;
@@ -8461,7 +8461,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, struct socket *so,
}
if ((to->to_flags & TOF_TS) != 0 &&
SEQ_LEQ(th->th_seq, tp->last_ack_sent)) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
/*
@@ -8893,7 +8893,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((to->to_flags & TOF_TS) != 0) {
uint32_t t, rtt;
- t = tcp_tv_to_mssectick(&bbr->rc_tv);
+ t = tcp_tv_to_msec(&bbr->rc_tv);
if (TSTMP_GEQ(t, to->to_tsecr)) {
rtt = t - to->to_tsecr;
if (rtt == 0) {
@@ -9034,7 +9034,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN | TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
tp->snd_wnd = tiwin;
@@ -9067,7 +9067,7 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, struct socket *so,
if ((to->to_flags & TOF_TS) != 0) {
uint32_t t, rtt;
- t = tcp_tv_to_mssectick(&bbr->rc_tv);
+ t = tcp_tv_to_msec(&bbr->rc_tv);
if (TSTMP_GEQ(t, to->to_tsecr)) {
rtt = t - to->to_tsecr;
if (rtt == 0) {
@@ -9258,7 +9258,7 @@ bbr_do_established(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN | TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
/*
@@ -9355,7 +9355,7 @@ bbr_do_close_wait(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN | TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
/*
@@ -9486,7 +9486,7 @@ bbr_do_fin_wait_1(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN | TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
/*
@@ -9602,7 +9602,7 @@ bbr_do_closing(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN | TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
/*
@@ -9704,7 +9704,7 @@ bbr_do_lastack(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN | TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
/*
@@ -9818,7 +9818,7 @@ bbr_do_fin_wait_2(struct mbuf *m, struct tcphdr *th, struct socket *so,
SEQ_LEQ(th->th_seq, tp->last_ack_sent) &&
SEQ_LEQ(tp->last_ack_sent, th->th_seq + tlen +
((thflags & (TH_SYN | TH_FIN)) != 0))) {
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
tp->ts_recent = to->to_tsval;
}
/*
@@ -11327,7 +11327,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
mbuf_tstmp2timespec(m, &ts);
bbr->rc_tv.tv_sec = ts.tv_sec;
bbr->rc_tv.tv_usec = ts.tv_nsec / 1000;
- bbr->r_ctl.rc_rcvtime = cts = tcp_tv_to_usectick(&bbr->rc_tv);
+ bbr->r_ctl.rc_rcvtime = cts = tcp_tv_to_usec(&bbr->rc_tv);
} else if (m->m_flags & M_TSTMP_LRO) {
/* Next the arrival timestamp */
struct timespec ts;
@@ -11335,7 +11335,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
mbuf_tstmp2timespec(m, &ts);
bbr->rc_tv.tv_sec = ts.tv_sec;
bbr->rc_tv.tv_usec = ts.tv_nsec / 1000;
- bbr->r_ctl.rc_rcvtime = cts = tcp_tv_to_usectick(&bbr->rc_tv);
+ bbr->r_ctl.rc_rcvtime = cts = tcp_tv_to_usec(&bbr->rc_tv);
} else {
/*
* Ok just get the current time.
@@ -11376,7 +11376,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
*/
if ((to.to_flags & TOF_TS) && (to.to_tsecr != 0)) {
to.to_tsecr -= tp->ts_offset;
- if (TSTMP_GT(to.to_tsecr, tcp_tv_to_mssectick(&bbr->rc_tv)))
+ if (TSTMP_GT(to.to_tsecr, tcp_tv_to_msec(&bbr->rc_tv)))
to.to_tsecr = 0;
}
/*
@@ -11414,7 +11414,7 @@ bbr_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
(tp->t_flags & TF_REQ_TSTMP)) {
tp->t_flags |= TF_RCVD_TSTMP;
tp->ts_recent = to.to_tsval;
- tp->ts_recent_age = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->ts_recent_age = tcp_tv_to_msec(&bbr->rc_tv);
} else
tp->t_flags &= ~TF_REQ_TSTMP;
if (to.to_flags & TOF_MSS)
@@ -11870,7 +11870,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
bbr = (struct tcp_bbr *)tp->t_fb_ptr;
/* We take a cache hit here */
memcpy(&bbr->rc_tv, tv, sizeof(struct timeval));
- cts = tcp_tv_to_usectick(&bbr->rc_tv);
+ cts = tcp_tv_to_usec(&bbr->rc_tv);
inp = bbr->rc_inp;
hpts_calling = !!(tp->t_flags2 & TF2_HPTS_CALLS);
tp->t_flags2 &= ~TF2_HPTS_CALLS;
@@ -12885,7 +12885,7 @@ send:
/* Timestamps. */
if ((tp->t_flags & TF_RCVD_TSTMP) ||
((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
- to.to_tsval = tcp_tv_to_mssectick(&bbr->rc_tv) + tp->ts_offset;
+ to.to_tsval = tcp_tv_to_msec(&bbr->rc_tv) + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
local_options += TCPOLEN_TIMESTAMP + 2;
@@ -12893,7 +12893,7 @@ send:
/* Set receive buffer autosizing timestamp. */
if (tp->rfbuf_ts == 0 &&
(so->so_rcv.sb_flags & SB_AUTOSIZE))
- tp->rfbuf_ts = tcp_tv_to_mssectick(&bbr->rc_tv);
+ tp->rfbuf_ts = tcp_tv_to_msec(&bbr->rc_tv);
/* Selective ACK's. */
if (flags & TH_SYN)
to.to_flags |= TOF_SACKPERM;
@@ -14123,17 +14123,17 @@ bbr_switch_failed(struct tcpcb *tp)
toval = bbr->rc_pacer_started - cts;
} else {
/* one slot please */
- toval = HPTS_TICKS_PER_SLOT;
+ toval = HPTS_USECS_PER_SLOT;
}
} else if (bbr->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
if (TSTMP_GT(bbr->r_ctl.rc_timer_exp, cts)) {
toval = bbr->r_ctl.rc_timer_exp - cts;
} else {
/* one slot please */
- toval = HPTS_TICKS_PER_SLOT;
+ toval = HPTS_USECS_PER_SLOT;
}
} else
- toval = HPTS_TICKS_PER_SLOT;
+ toval = HPTS_USECS_PER_SLOT;
(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
__LINE__, &diag);
bbr_log_hpts_diag(bbr, cts, &diag);
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 940a4024bb73..d6bbfeb886d9 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -604,7 +604,7 @@ rack_get_lt_bw(struct tcp_rack *rack)
/* Include all the current bytes too */
microuptime(&tv);
bytes += (rack->rc_tp->snd_una - rack->r_ctl.lt_seq);
- tim += (tcp_tv_to_lusectick(&tv) - rack->r_ctl.lt_timemark);
+ tim += (tcp_tv_to_lusec(&tv) - rack->r_ctl.lt_timemark);
}
if ((bytes != 0) && (tim != 0))
return ((bytes * (uint64_t)1000000) / tim);
@@ -2245,7 +2245,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int *capped)
ent = rack->r_ctl.rc_last_sft;
microuptime(&tv);
- timenow = tcp_tv_to_lusectick(&tv);
+ timenow = tcp_tv_to_lusec(&tv);
if (timenow >= ent->deadline) {
/* No time left we do DGP only */
rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
@@ -2888,7 +2888,7 @@ rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, uint32_t t, uint32_t l
log.u_bbr.lt_epoch = rack->r_ctl.rc_time_probertt_entered;
log.u_bbr.cur_del_rate = rack->r_ctl.rc_lower_rtt_us_cts;
log.u_bbr.delRate = rack->r_ctl.rc_gp_srtt;
- log.u_bbr.bw_inuse = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ log.u_bbr.bw_inuse = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
log.u_bbr.bw_inuse <<= 32;
if (rsm)
log.u_bbr.bw_inuse |= ((uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]);
@@ -3644,7 +3644,7 @@ rack_enough_for_measurement(struct tcpcb *tp, struct tcp_rack *rack, tcp_seq th_
}
/* Now what about time? */
srtts = (rack->r_ctl.rc_gp_srtt * rack_min_srtts);
- tim = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time) - tp->gput_ts;
+ tim = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time) - tp->gput_ts;
if ((tim >= srtts) && (IN_RECOVERY(rack->rc_tp->t_flags) == 0)) {
/*
* We do not allow a measurement if we are in recovery
@@ -4891,7 +4891,7 @@ rack_do_goodput_measurement(struct tcpcb *tp, struct tcp_rack *rack,
uint64_t resid_bw, subpart = 0, addpart = 0, srtt;
int did_add = 0;
- us_cts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ us_cts = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
if (TSTMP_GEQ(us_cts, tp->gput_ts))
tim = us_cts - tp->gput_ts;
@@ -5355,7 +5355,7 @@ skip_measurement:
rack->r_ctl.rc_gp_lowrtt = 0xffffffff;
rack->r_ctl.rc_gp_high_rwnd = rack->rc_tp->snd_wnd;
- tp->gput_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ tp->gput_ts = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
rack->app_limited_needs_set = 0;
tp->gput_seq = th_ack;
if (rack->in_probe_rtt)
@@ -5490,7 +5490,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack, uint32_t th_ack, uint
rack->r_ctl.lt_bw_bytes += (tp->snd_max - rack->r_ctl.lt_seq);
rack->r_ctl.lt_seq = tp->snd_max;
- tmark = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
+ tmark = tcp_tv_to_lusec(&rack->r_ctl.act_rcv_time);
if (tmark >= rack->r_ctl.lt_timemark) {
rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
}
@@ -6390,7 +6390,7 @@ rack_enter_persist(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, tcp_se
rack->r_ctl.lt_bw_bytes += (snd_una - rack->r_ctl.lt_seq);
rack->r_ctl.lt_seq = snd_una;
- tmark = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
+ tmark = tcp_tv_to_lusec(&rack->r_ctl.act_rcv_time);
if (tmark >= rack->r_ctl.lt_timemark) {
rack->r_ctl.lt_bw_time += (tmark - rack->r_ctl.lt_timemark);
}
@@ -6592,22 +6592,22 @@ rack_start_hpts_timer (struct tcp_rack *rack, struct tcpcb *tp, uint32_t cts,
* on the clock. We always have a min
* 10 slots (10 x 10 i.e. 100 usecs).
*/
- if (slot <= HPTS_TICKS_PER_SLOT) {
+ if (slot <= HPTS_USECS_PER_SLOT) {
/* We gain delay */
- rack->r_ctl.rc_agg_delayed += (HPTS_TICKS_PER_SLOT - slot);
- slot = HPTS_TICKS_PER_SLOT;
+ rack->r_ctl.rc_agg_delayed += (HPTS_USECS_PER_SLOT - slot);
+ slot = HPTS_USECS_PER_SLOT;
} else {
/* We take off some */
- rack->r_ctl.rc_agg_delayed -= (slot - HPTS_TICKS_PER_SLOT);
- slot = HPTS_TICKS_PER_SLOT;
+ rack->r_ctl.rc_agg_delayed -= (slot - HPTS_USECS_PER_SLOT);
+ slot = HPTS_USECS_PER_SLOT;
}
} else {
slot -= rack->r_ctl.rc_agg_delayed;
rack->r_ctl.rc_agg_delayed = 0;
/* Make sure we have 100 useconds at minimum */
- if (slot < HPTS_TICKS_PER_SLOT) {
- rack->r_ctl.rc_agg_delayed = HPTS_TICKS_PER_SLOT - slot;
- slot = HPTS_TICKS_PER_SLOT;
+ if (slot < HPTS_USECS_PER_SLOT) {
+ rack->r_ctl.rc_agg_delayed = HPTS_USECS_PER_SLOT - slot;
+ slot = HPTS_USECS_PER_SLOT;
}
if (rack->r_ctl.rc_agg_delayed == 0)
rack->r_late = 0;
@@ -8780,7 +8780,7 @@ tcp_rack_xmit_timer_commit(struct tcp_rack *rack, struct tcpcb *tp)
}
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_PATHRTT, imax(0, rack->r_ctl.rack_rs.rs_us_rtt));
#endif
- rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
+ rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_msec(&rack->r_ctl.act_rcv_time);
/*
* the retransmit should happen at rtt + 4 * rttvar. Because of the
* way we do the smoothing, srtt and rttvar will each average +1/2
@@ -8886,8 +8886,8 @@ rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
rack->r_ctl.rc_rack_min_rtt = 1;
}
}
- if (TSTMP_GT(tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time), rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]))
- us_rtt = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time) - (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
+ if (TSTMP_GT(tcp_tv_to_usec(&rack->r_ctl.act_rcv_time), rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)]))
+ us_rtt = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time) - (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
else
us_rtt = tcp_get_usecs(NULL) - (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt-1)];
if (us_rtt == 0)
@@ -8896,7 +8896,7 @@ rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
/* Kick the RTT to the CC */
CC_ALGO(tp)->rttsample(&tp->t_ccv, us_rtt, 1, rsm->r_fas);
}
- rack_apply_updated_usrtt(rack, us_rtt, tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time));
+ rack_apply_updated_usrtt(rack, us_rtt, tcp_tv_to_usec(&rack->r_ctl.act_rcv_time));
if (ack_type == SACKED) {
rack_log_rtt_sample_calc(rack, t, (uint32_t)rsm->r_tim_lastsent[(rsm->r_rtr_cnt - 1)], cts, 1);
tcp_rack_xmit_timer(rack, t + 1, len_acked, us_rtt, 2 , rsm, rsm->r_rtr_cnt);
@@ -8991,8 +8991,8 @@ rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
* we retransmitted. This is because
* we match the timestamps.
*/
- if (TSTMP_GT(tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time), rsm->r_tim_lastsent[i]))
- us_rtt = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time) - (uint32_t)rsm->r_tim_lastsent[i];
+ if (TSTMP_GT(tcp_tv_to_usec(&rack->r_ctl.act_rcv_time), rsm->r_tim_lastsent[i]))
+ us_rtt = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time) - (uint32_t)rsm->r_tim_lastsent[i];
else
us_rtt = tcp_get_usecs(NULL) - (uint32_t)rsm->r_tim_lastsent[i];
CC_ALGO(tp)->rttsample(&tp->t_ccv, us_rtt, 1, rsm->r_fas);
@@ -9185,7 +9185,7 @@ rack_need_set_test(struct tcpcb *tp,
seq = tp->gput_seq;
ts = tp->gput_ts;
rack->app_limited_needs_set = 0;
- tp->gput_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ tp->gput_ts = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
/* Do we start at a new end? */
if ((use_which == RACK_USE_BEG) &&
SEQ_GEQ(rsm->r_start, tp->gput_seq)) {
@@ -10820,7 +10820,7 @@ rack_log_ack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, int entered
changed = th_ack - rsm->r_start;
if (changed) {
rack_process_to_cumack(tp, rack, th_ack, cts, to,
- tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time));
+ tcp_tv_to_lusec(&rack->r_ctl.act_rcv_time));
}
if ((to->to_flags & TOF_SACK) == 0) {
/* We are done nothing left and no sack. */
@@ -11698,7 +11698,7 @@ rack_req_check_for_comp(struct tcp_rack *rack, tcp_seq th_ack)
rack_log_hybrid_sends(rack, ent, __LINE__);
/* calculate the time based on the ack arrival */
data = ent->end - ent->start;
- laa = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
+ laa = tcp_tv_to_lusec(&rack->r_ctl.act_rcv_time);
if (ent->flags & TCP_TRK_TRACK_FLG_FSND) {
if (ent->first_send > ent->localtime)
ftim = ent->first_send;
@@ -11844,7 +11844,7 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
* less than and we have not closed our window.
*/
if (SEQ_LT(th->th_ack, tp->snd_una) && (sbspace(&so->so_rcv) > ctf_fixed_maxseg(tp))) {
- rack->r_ctl.rc_reorder_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ rack->r_ctl.rc_reorder_ts = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
if (rack->r_ctl.rc_reorder_ts == 0)
rack->r_ctl.rc_reorder_ts = 1;
}
@@ -14368,17 +14368,17 @@ rack_switch_failed(struct tcpcb *tp)
toval = rack->r_ctl.rc_last_output_to - cts;
} else {
/* one slot please */
- toval = HPTS_TICKS_PER_SLOT;
+ toval = HPTS_USECS_PER_SLOT;
}
} else if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) {
if (TSTMP_GT(rack->r_ctl.rc_timer_exp, cts)) {
toval = rack->r_ctl.rc_timer_exp - cts;
} else {
/* one slot please */
- toval = HPTS_TICKS_PER_SLOT;
+ toval = HPTS_USECS_PER_SLOT;
}
} else
- toval = HPTS_TICKS_PER_SLOT;
+ toval = HPTS_USECS_PER_SLOT;
(void)tcp_hpts_insert_diag(tp, HPTS_USEC_TO_SLOTS(toval),
__LINE__, &diag);
rack_log_hpts_diag(rack, cts, &diag, &tv);
@@ -14743,12 +14743,12 @@ rack_init(struct tcpcb *tp, void **ptr)
rack->r_ctl.rack_per_of_gp_ss = 250;
}
rack->r_ctl.rack_per_of_gp_probertt = rack_per_of_gp_probertt;
- rack->r_ctl.rc_tlp_rxt_last_time = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
- rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_mssectick(&rack->r_ctl.act_rcv_time);
+ rack->r_ctl.rc_tlp_rxt_last_time = tcp_tv_to_msec(&rack->r_ctl.act_rcv_time);
+ rack->r_ctl.last_rcv_tstmp_for_rtt = tcp_tv_to_msec(&rack->r_ctl.act_rcv_time);
setup_time_filter_small(&rack->r_ctl.rc_gp_min_rtt, FILTER_TYPE_MIN,
rack_probertt_filter_life);
- us_cts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ us_cts = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
rack->r_ctl.rc_lower_rtt_us_cts = us_cts;
rack->r_ctl.rc_time_of_last_probertt = us_cts;
rack->r_ctl.rc_went_idle_time = us_cts;
@@ -14957,7 +14957,7 @@ rack_init(struct tcpcb *tp, void **ptr)
if (TSTMP_GT(qr.timer_pacing_to, us_cts))
tov = qr.timer_pacing_to - us_cts;
else
- tov = HPTS_TICKS_PER_SLOT;
+ tov = HPTS_USECS_PER_SLOT;
}
if (qr.timer_hpts_flags & PACE_TMR_MASK) {
rack->r_ctl.rc_timer_exp = qr.timer_timer_exp;
@@ -14965,7 +14965,7 @@ rack_init(struct tcpcb *tp, void **ptr)
if (TSTMP_GT(qr.timer_timer_exp, us_cts))
tov = qr.timer_timer_exp - us_cts;
else
- tov = HPTS_TICKS_PER_SLOT;
+ tov = HPTS_USECS_PER_SLOT;
}
}
rack_log_chg_info(tp, rack, 4,
@@ -15385,7 +15385,7 @@ rack_log_input_packet(struct tcpcb *tp, struct tcp_rack *rack, struct tcp_ackent
ts.tv_nsec = ae->timestamp % 1000000000;
ltv.tv_sec = ts.tv_sec;
ltv.tv_usec = ts.tv_nsec / 1000;
- log.u_bbr.lt_epoch = tcp_tv_to_usectick(&ltv);
+ log.u_bbr.lt_epoch = tcp_tv_to_usec(&ltv);
} else if (ae->flags & TSTMP_LRO) {
/* Record the LRO the arrival timestamp */
log.u_bbr.flex3 = M_TSTMP_LRO;
@@ -15393,7 +15393,7 @@ rack_log_input_packet(struct tcpcb *tp, struct tcp_rack *rack, struct tcp_ackent
ts.tv_nsec = ae->timestamp % 1000000000;
ltv.tv_sec = ts.tv_sec;
ltv.tv_usec = ts.tv_nsec / 1000;
- log.u_bbr.flex5 = tcp_tv_to_usectick(&ltv);
+ log.u_bbr.flex5 = tcp_tv_to_usec(&ltv);
}
log.u_bbr.timeStamp = tcp_get_usecs(&ltv);
/* Log the rcv time */
@@ -15564,7 +15564,7 @@ rack_log_pcm(struct tcp_rack *rack, uint8_t mod, uint32_t flex1, uint32_t flex2,
(void)tcp_get_usecs(&tv);
memset(&log, 0, sizeof(log));
- log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
+ log.u_bbr.timeStamp = tcp_tv_to_usec(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.flex8 = mod;
log.u_bbr.flex1 = flex1;
@@ -15747,8 +15747,8 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
the_win = tp->snd_wnd;
win_seq = tp->snd_wl1;
win_upd_ack = tp->snd_wl2;
- cts = tcp_tv_to_usectick(tv);
- ms_cts = tcp_tv_to_mssectick(tv);
+ cts = tcp_tv_to_usec(tv);
+ ms_cts = tcp_tv_to_msec(tv);
rack->r_ctl.rc_rcvtime = cts;
segsiz = ctf_fixed_maxseg(tp);
if ((rack->rc_gp_dyn_mul) &&
@@ -15864,7 +15864,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
* or it could be a keep-alive or persists
*/
if (SEQ_LT(ae->ack, tp->snd_una) && (sbspace(&so->so_rcv) > segsiz)) {
- rack->r_ctl.rc_reorder_ts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ rack->r_ctl.rc_reorder_ts = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
if (rack->r_ctl.rc_reorder_ts == 0)
rack->r_ctl.rc_reorder_ts = 1;
}
@@ -15883,7 +15883,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
}
if (rack->forced_ack) {
rack_handle_probe_response(rack, tiwin,
- tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time));
+ tcp_tv_to_usec(&rack->r_ctl.act_rcv_time));
}
#ifdef TCP_ACCOUNTING
win_up_req = 1;
@@ -15930,7 +15930,7 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb
rack->r_ctl.act_rcv_time = *tv;
}
rack_process_to_cumack(tp, rack, ae->ack, cts, to,
- tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time));
+ tcp_tv_to_lusec(&rack->r_ctl.act_rcv_time));
#ifdef TCP_REQUEST_TRK
rack_req_check_for_comp(rack, high_seq);
#endif
@@ -16398,7 +16398,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
* must process the ack coming in but need to defer sending
* anything becase a pacing timer is running.
*/
- us_cts = tcp_tv_to_usectick(tv);
+ us_cts = tcp_tv_to_usec(tv);
if (m->m_flags & M_ACKCMP) {
/*
* All compressed ack's are ack's by definition so
@@ -16466,8 +16466,8 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
if (m->m_flags & M_ACKCMP) {
panic("Impossible reach m has ackcmp? m:%p tp:%p", m, tp);
}
- cts = tcp_tv_to_usectick(tv);
- ms_cts = tcp_tv_to_mssectick(tv);
+ cts = tcp_tv_to_usec(tv);
+ ms_cts = tcp_tv_to_msec(tv);
nsegs = m->m_pkthdr.lro_nsegs;
counter_u64_add(rack_proc_non_comp_ack, 1);
#ifdef TCP_ACCOUNTING
@@ -16595,13 +16595,13 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
mbuf_tstmp2timespec(m, &ts);
ltv.tv_sec = ts.tv_sec;
ltv.tv_usec = ts.tv_nsec / 1000;
- log.u_bbr.lt_epoch = tcp_tv_to_usectick(&ltv);
+ log.u_bbr.lt_epoch = tcp_tv_to_usec(&ltv);
} else if (m->m_flags & M_TSTMP_LRO) {
/* Record the LRO the arrival timestamp */
mbuf_tstmp2timespec(m, &ts);
ltv.tv_sec = ts.tv_sec;
ltv.tv_usec = ts.tv_nsec / 1000;
- log.u_bbr.flex5 = tcp_tv_to_usectick(&ltv);
+ log.u_bbr.flex5 = tcp_tv_to_usec(&ltv);
}
log.u_bbr.timeStamp = tcp_get_usecs(&ltv);
/* Log the rcv time */
@@ -16819,7 +16819,7 @@ rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
}
if (thflags & TH_FIN)
tcp_log_end_status(tp, TCP_EI_STATUS_CLIENT_FIN);
- us_cts = tcp_tv_to_usectick(&rack->r_ctl.act_rcv_time);
+ us_cts = tcp_tv_to_usec(&rack->r_ctl.act_rcv_time);
if ((rack->rc_gp_dyn_mul) &&
(rack->use_fixed_rate == 0) &&
(rack->rc_always_pace)) {
@@ -19442,7 +19442,7 @@ again:
}
if ((error == 0) && (rack->lt_bw_up == 0)) {
/* Unlikely */
- rack->r_ctl.lt_timemark = tcp_tv_to_lusectick(tv);
+ rack->r_ctl.lt_timemark = tcp_tv_to_lusec(tv);
rack->r_ctl.lt_seq = tp->snd_una;
rack->lt_bw_up = 1;
} else if ((error == 0) &&
@@ -19785,7 +19785,7 @@ rack_output(struct tcpcb *tp)
#endif
early = 0;
cts = tcp_get_usecs(&tv);
- ms_cts = tcp_tv_to_mssectick(&tv);
+ ms_cts = tcp_tv_to_msec(&tv);
if (((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) == 0) &&
tcp_in_hpts(rack->rc_tp)) {
/*
@@ -20023,7 +20023,7 @@ rack_output(struct tcpcb *tp)
again:
sendalot = 0;
cts = tcp_get_usecs(&tv);
- ms_cts = tcp_tv_to_mssectick(&tv);
+ ms_cts = tcp_tv_to_msec(&tv);
tso = 0;
mtu = 0;
if (TCPS_HAVEESTABLISHED(tp->t_state) &&
@@ -22090,7 +22090,7 @@ out:
}
if (rsm == NULL) {
if (rack->lt_bw_up == 0) {
- rack->r_ctl.lt_timemark = tcp_tv_to_lusectick(&tv);
+ rack->r_ctl.lt_timemark = tcp_tv_to_lusec(&tv);
rack->r_ctl.lt_seq = tp->snd_una;
rack->lt_bw_up = 1;
} else if (((rack_seq + len) - rack->r_ctl.lt_seq) > 0x7fffffff) {
@@ -22838,7 +22838,7 @@ process_hybrid_pacing(struct tcp_rack *rack, struct tcp_hybrid_req *hybrid)
rack->r_ctl.rc_fixed_pacing_rate_ca = 0;
rack->r_ctl.rc_fixed_pacing_rate_ss = 0;
/* Now allocate or find our entry that will have these settings */
- sft = tcp_req_alloc_req_full(rack->rc_tp, &hybrid->req, tcp_tv_to_lusectick(&tv), 0);
+ sft = tcp_req_alloc_req_full(rack->rc_tp, &hybrid->req, tcp_tv_to_lusec(&tv), 0);
if (sft == NULL) {
rack->rc_tp->tcp_hybrid_error++;
/* no space, where would it have gone? */
diff --git a/sys/netinet/tcp_stacks/rack_pcm.c b/sys/netinet/tcp_stacks/rack_pcm.c
index 101e6826536c..759bfda98357 100644
--- a/sys/netinet/tcp_stacks/rack_pcm.c
+++ b/sys/netinet/tcp_stacks/rack_pcm.c
@@ -174,7 +174,7 @@ rack_update_pcm_ack(struct tcp_rack *rack, int was_cumack, uint32_t start, uint3
/*
* Record ACK data.
*/
- ack_arrival = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
+ ack_arrival = tcp_tv_to_lusec(&rack->r_ctl.act_rcv_time);
if (SEQ_GT(end, rack->r_ctl.pcm_i.eseq)) {
/* Trim the end to the end of our range if it is beyond */
end = rack->r_ctl.pcm_i.eseq;
@@ -242,7 +242,7 @@ skip_ack_accounting:
e = &rack->r_ctl.pcm_s[i];
memset(&log, 0, sizeof(log));
- log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
+ log.u_bbr.timeStamp = tcp_tv_to_usec(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.flex8 = 1;
log.u_bbr.flex1 = e->sseq;
@@ -286,7 +286,7 @@ skip_ack_accounting:
* Prev time holds the last ack arrival time.
*/
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
- log.u_bbr.timeStamp = tcp_tv_to_usectick(&tv);
+ log.u_bbr.timeStamp = tcp_tv_to_usec(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.flex8 = 2;
log.u_bbr.flex1 = rack->r_ctl.pcm_i.sseq;
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index 42cfb919e263..df8f293f9426 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -560,6 +560,12 @@ udp_input(struct mbuf **mp, int *offp, int proto)
ip->ip_dst.s_addr, htonl((u_short)len +
m->m_pkthdr.csum_data + proto));
uh_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP_UDP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ uh_sum = 0;
} else {
char b[offsetof(struct ipovly, ih_src)];
struct ipovly *ipov = (struct ipovly *)ip;
diff --git a/sys/netinet6/ip6_fastfwd.c b/sys/netinet6/ip6_fastfwd.c
index 0ed313bd49a5..7139267722b7 100644
--- a/sys/netinet6/ip6_fastfwd.c
+++ b/sys/netinet6/ip6_fastfwd.c
@@ -27,6 +27,7 @@
#include <sys/cdefs.h>
#include "opt_inet6.h"
#include "opt_ipstealth.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -54,6 +55,10 @@
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+#include <netinet/sctp_crc32.h>
+#endif
+
static int
ip6_findroute(struct nhop_object **pnh, const struct sockaddr_in6 *dst,
struct mbuf *m)
@@ -277,6 +282,29 @@ passout:
ip6->ip6_hlim -= IPV6_HLIMDEC;
}
+ /*
+ * If TCP/UDP header still needs a valid checksum and interface will not
+ * calculate it for us, do it here.
+ */
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
+ ~nh->nh_ifp->if_hwassist)) {
+ int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+
+ if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len)
+ goto drop;
+ in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP &
+ ~nh->nh_ifp->if_hwassist)) {
+ int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+
+ sctp_delayed_cksum(m, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP;
+ }
+#endif
+
m_clrprotoflags(m); /* Avoid confusing lower layers. */
IP_PROBE(send, NULL, NULL, ip6, nh->nh_ifp, NULL, ip6);
diff --git a/sys/netinet6/ip6_forward.c b/sys/netinet6/ip6_forward.c
index ad8c95c9363c..f6c09b0ac7bc 100644
--- a/sys/netinet6/ip6_forward.c
+++ b/sys/netinet6/ip6_forward.c
@@ -75,6 +75,10 @@
#include <netipsec/ipsec_support.h>
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+#include <netinet/sctp_crc32.h>
+#endif
+
/*
* Forward a packet. If some error occurs return the sender
* an icmp packet. Note we can't always generate a meaningful
@@ -389,6 +393,29 @@ pass:
goto bad;
}
+ /*
+ * If TCP/UDP header still needs a valid checksum and interface will not
+ * calculate it for us, do it here.
+ */
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 &
+ ~nh->nh_ifp->if_hwassist)) {
+ int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+
+ if (offset < sizeof(struct ip6_hdr) || offset > m->m_pkthdr.len)
+ goto bad;
+ in6_delayed_cksum(m, m->m_pkthdr.len - offset, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6;
+ }
+#if defined(SCTP) || defined(SCTP_SUPPORT)
+ if (__predict_false(m->m_pkthdr.csum_flags & CSUM_IP6_SCTP &
+ ~nh->nh_ifp->if_hwassist)) {
+ int offset = ip6_lasthdr(m, 0, IPPROTO_IPV6, NULL);
+
+ sctp_delayed_cksum(m, offset);
+ m->m_pkthdr.csum_flags &= ~CSUM_IP6_SCTP;
+ }
+#endif
+
/* Currently LLE layer stores embedded IPv6 addresses */
if (IN6_IS_SCOPE_LINKLOCAL(&dst.sin6_addr)) {
in6_set_unicast_scopeid(&dst.sin6_addr, dst.sin6_scope_id);
diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c
index 0987ea7e99ad..08702a2e81ab 100644
--- a/sys/netinet6/scope6.c
+++ b/sys/netinet6/scope6.c
@@ -505,8 +505,23 @@ in6_set_unicast_scopeid(struct in6_addr *in6, uint32_t scopeid)
struct ifnet*
in6_getlinkifnet(uint32_t zoneid)
{
+ struct ifnet *ifp;
- return (ifnet_byindex((u_short)zoneid));
+ ifp = ifnet_byindex((u_short)zoneid);
+
+ if (ifp == NULL)
+ return (NULL);
+
+ /* An interface might not be IPv6 capable. */
+ if (ifp->if_afdata[AF_INET6] == NULL) {
+ log(LOG_NOTICE,
+ "%s: embedded scope points to an interface without "
+ "IPv6: %s%%%d.\n", __func__,
+ if_name(ifp), zoneid);
+ return (NULL);
+ }
+
+ return (ifp);
}
/*
diff --git a/sys/netinet6/sctp6_usrreq.c b/sys/netinet6/sctp6_usrreq.c
index 8964ccf54c54..c4716fdafb6e 100644
--- a/sys/netinet6/sctp6_usrreq.c
+++ b/sys/netinet6/sctp6_usrreq.c
@@ -139,7 +139,11 @@ sctp6_input_with_port(struct mbuf **i_pak, int *offp, uint16_t port)
goto out;
}
ecn_bits = IPV6_TRAFFIC_CLASS(ip6);
- if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
+ if (m->m_pkthdr.csum_flags & (CSUM_SCTP_VALID | CSUM_IP6_SCTP)) {
+ /*
+ * Packet with CSUM_IP6_SCTP were sent from local host using
+ * checksum offloading. Checksum not required.
+ */
SCTP_STAT_INCR(sctps_recvhwcrc);
compute_crc = 0;
} else {
diff --git a/sys/netinet6/udp6_usrreq.c b/sys/netinet6/udp6_usrreq.c
index b3ed16fda713..0027cf3bd230 100644
--- a/sys/netinet6/udp6_usrreq.c
+++ b/sys/netinet6/udp6_usrreq.c
@@ -434,6 +434,12 @@ udp6_input(struct mbuf **mp, int *offp, int proto)
uh_sum = in6_cksum_pseudo(ip6, ulen, nxt,
m->m_pkthdr.csum_data);
uh_sum ^= 0xffff;
+ } else if (m->m_pkthdr.csum_flags & CSUM_IP6_UDP) {
+ /*
+ * Packet from local host (maybe from a VM).
+ * Checksum not required.
+ */
+ uh_sum = 0;
} else
uh_sum = in6_cksum_partial(m, nxt, off, plen, ulen);
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index ce323910af3f..e7908d6f3a44 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -308,6 +308,7 @@ static void
npt_clear(struct nl_pstate *npt)
{
lb_clear(&npt->lb);
+ npt->cookie = NULL;
npt->error = 0;
npt->err_msg = NULL;
npt->err_off = 0;
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 41e9ca27912d..79c298c18b46 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -7399,7 +7399,7 @@ pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif,
{
struct pf_sctp_multihome_job *j, *tmp;
struct pf_sctp_source *i;
- int ret __unused;
+ int ret;
struct pf_kstate *sm = NULL;
struct pf_krule *ra = NULL;
struct pf_krule *r = &V_pf_default_rule;
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index ea9f7fe441c6..9abc07c36788 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -2092,19 +2092,18 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
int rs_num;
int error = 0;
- if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) {
- error = EINVAL;
- goto errout_unlocked;
- }
+#define ERROUT(x) ERROUT_FUNCTION(errout, x)
+#define ERROUT_UNLOCKED(x) ERROUT_FUNCTION(errout_unlocked, x)
-#define ERROUT(x) ERROUT_FUNCTION(errout, x)
+ if ((rule->return_icmp >> 8) > ICMP_MAXTYPE)
+ ERROUT_UNLOCKED(EINVAL);
if ((error = pf_rule_checkaf(rule)))
- ERROUT(error);
+ ERROUT_UNLOCKED(error);
if (pf_validate_range(rule->src.port_op, rule->src.port))
- ERROUT(EINVAL);
+ ERROUT_UNLOCKED(EINVAL);
if (pf_validate_range(rule->dst.port_op, rule->dst.port))
- ERROUT(EINVAL);
+ ERROUT_UNLOCKED(EINVAL);
if (rule->ifname[0])
kif = pf_kkif_create(M_WAITOK);
@@ -2294,6 +2293,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
return (0);
#undef ERROUT
+#undef ERROUT_UNLOCKED
errout:
PF_RULES_WUNLOCK();
PF_CONFIG_UNLOCK();
diff --git a/sys/riscv/include/vmm_dev.h b/sys/riscv/include/vmm_dev.h
index 856ff0778b95..4d30d5a1c35b 100644
--- a/sys/riscv/include/vmm_dev.h
+++ b/sys/riscv/include/vmm_dev.h
@@ -34,6 +34,8 @@
#ifndef _VMM_DEV_H_
#define _VMM_DEV_H_
+#include <sys/domainset.h>
+
#include <machine/vmm.h>
struct vm_memmap {
@@ -56,6 +58,9 @@ struct vm_memseg {
int segid;
size_t len;
char name[VM_MAX_SUFFIXLEN + 1];
+ domainset_t *ds_mask;
+ size_t ds_mask_size;
+ int ds_policy;
};
struct vm_register {
diff --git a/sys/rpc/authunix_prot.c b/sys/rpc/authunix_prot.c
index 7b531946488a..b107d5541c50 100644
--- a/sys/rpc/authunix_prot.c
+++ b/sys/rpc/authunix_prot.c
@@ -96,8 +96,12 @@ xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
if (!xdr_uint32_t(xdrs, &cred->cr_gid))
return (FALSE);
- /* XXXKE Fix this is cr_gid gets separated out. */
if (xdrs->x_op == XDR_ENCODE) {
+ /*
+ * Note that this is a `struct xucred`, which maintains its
+ * historical layout of preserving the egid in cr_ngroups and
+ * cr_groups[0] == egid.
+ */
ngroups = cred->cr_ngroups - 1;
if (ngroups > NGRPS)
ngroups = NGRPS;
diff --git a/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c b/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c
index b1790dd167d5..51077c71822c 100644
--- a/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c
+++ b/sys/rpc/rpcsec_gss/svc_rpcsec_gss.c
@@ -537,7 +537,7 @@ rpc_gss_svc_getcred(struct svc_req *req, struct ucred **crp, int *flavorp)
cr = client->cl_cred = crget();
cr->cr_uid = cr->cr_ruid = cr->cr_svuid = uc->uid;
cr->cr_rgid = cr->cr_svgid = uc->gid;
- crsetgroups_fallback(cr, uc->gidlen, uc->gidlist, uc->gid);
+ crsetgroups_and_egid(cr, uc->gidlen, uc->gidlist, uc->gid);
cr->cr_prison = curthread->td_ucred->cr_prison;
prison_hold(cr->cr_prison);
*crp = crhold(cr);
diff --git a/sys/rpc/svc_auth.c b/sys/rpc/svc_auth.c
index 92f1ee0f2844..acbb1112e270 100644
--- a/sys/rpc/svc_auth.c
+++ b/sys/rpc/svc_auth.c
@@ -39,6 +39,7 @@
*/
#include <sys/param.h>
+#include <sys/conf.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -191,7 +192,7 @@ svc_getcred(struct svc_req *rqst, struct ucred **crp, int *flavorp)
return (FALSE);
cr = crget();
cr->cr_uid = cr->cr_ruid = cr->cr_svuid = xprt->xp_uid;
- crsetgroups(cr, xprt->xp_ngrps, xprt->xp_gidp);
+ crsetgroups_and_egid(cr, xprt->xp_ngrps, xprt->xp_gidp, GID_NOGROUP);
cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
cr->cr_prison = curthread->td_ucred->cr_prison;
prison_hold(cr->cr_prison);
@@ -206,7 +207,7 @@ svc_getcred(struct svc_req *rqst, struct ucred **crp, int *flavorp)
return (FALSE);
cr = crget();
cr->cr_uid = cr->cr_ruid = cr->cr_svuid = xcr->cr_uid;
- crsetgroups(cr, xcr->cr_ngroups, xcr->cr_groups);
+ crsetgroups_and_egid(cr, xcr->cr_ngroups, xcr->cr_groups, GID_NOGROUP);
cr->cr_rgid = cr->cr_svgid = cr->cr_gid;
cr->cr_prison = curthread->td_ucred->cr_prison;
prison_hold(cr->cr_prison);
diff --git a/sys/rpc/svc_auth_unix.c b/sys/rpc/svc_auth_unix.c
index b10ef33be704..963f4f272964 100644
--- a/sys/rpc/svc_auth_unix.c
+++ b/sys/rpc/svc_auth_unix.c
@@ -89,8 +89,12 @@ _svcauth_unix(struct svc_req *rqst, struct rpc_msg *msg)
stat = AUTH_BADCRED;
goto done;
}
- /* XXXKE Fix this if cr_gid gets separated out. */
for (i = 0; i < gid_len; i++) {
+ /*
+ * Note that this is a `struct xucred`, which maintains
+ * its historical layout of preserving the egid in
+ * cr_ngroups and cr_groups[0] == egid.
+ */
if (i + 1 < XU_NGROUPS)
xcr->cr_groups[i + 1] = IXDR_GET_INT32(buf);
else
diff --git a/sys/sys/compressor.h b/sys/sys/compressor.h
index cad9080b46ff..e59eeabec2cd 100644
--- a/sys/sys/compressor.h
+++ b/sys/sys/compressor.h
@@ -42,6 +42,7 @@ struct compressor;
bool compressor_avail(int format);
struct compressor *compressor_init(compressor_cb_t cb, int format,
size_t maxiosize, int level, void *arg);
+int compressor_format(const struct compressor *stream);
void compressor_reset(struct compressor *stream);
int compressor_write(struct compressor *stream, void *data,
size_t len);
diff --git a/sys/sys/domainset.h b/sys/sys/domainset.h
index f98b175e9bc8..f3dc92ec6383 100644
--- a/sys/sys/domainset.h
+++ b/sys/sys/domainset.h
@@ -113,6 +113,20 @@ void domainset_zero(void);
* returned value will not match the key pointer.
*/
struct domainset *domainset_create(const struct domainset *);
+
+/*
+ * Remove empty domains from a given domainset.
+ * Returns 'false' if the domainset consists entirely of empty domains.
+ */
+bool domainset_empty_vm(struct domainset *domain);
+
+/*
+ * Validate and populate a domainset structure according to the specified
+ * policy and mask.
+ */
+int domainset_populate(struct domainset *domain, const domainset_t *mask, int policy,
+ size_t mask_size);
+
#ifdef _SYS_SYSCTL_H_
int sysctl_handle_domainset(SYSCTL_HANDLER_ARGS);
#endif
diff --git a/sys/sys/exec.h b/sys/sys/exec.h
index 4bf114a7c698..580a5372c4db 100644
--- a/sys/sys/exec.h
+++ b/sys/sys/exec.h
@@ -57,16 +57,6 @@ struct ps_strings {
unsigned int ps_nenvstr; /* the number of environment strings */
};
-/* Coredump output parameters. */
-struct coredump_params {
- off_t offset;
- struct ucred *active_cred;
- struct ucred *file_cred;
- struct thread *td;
- struct vnode *vp;
- struct compressor *comp;
-};
-
struct image_params;
struct execsw {
@@ -105,16 +95,6 @@ int exec_unregister(const struct execsw *);
enum uio_seg;
-#define CORE_BUF_SIZE (16 * 1024)
-
-int core_write(struct coredump_params *, const void *, size_t, off_t,
- enum uio_seg, size_t *);
-int core_output(char *, size_t, off_t, struct coredump_params *, void *);
-int sbuf_drain_core_output(void *, const char *, int);
-
-extern int coredump_pack_fileinfo;
-extern int coredump_pack_vmmapinfo;
-
/*
* note: name##_mod cannot be const storage because the
* linker_file_sysinit() function modifies _file in the
diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h
index cab94ac511a5..80cff53b3576 100644
--- a/sys/sys/exterr_cat.h
+++ b/sys/sys/exterr_cat.h
@@ -18,6 +18,8 @@
#define EXTERR_CAT_FUSE 4
#define EXTERR_CAT_INOTIFY 5
#define EXTERR_CAT_GENIO 6
+#define EXTERR_CAT_BRIDGE 7
+#define EXTERR_CAT_SWAP 8
#endif
diff --git a/sys/sys/imgact_elf.h b/sys/sys/imgact_elf.h
index c9444e5aec41..2845a9dbc1e2 100644
--- a/sys/sys/imgact_elf.h
+++ b/sys/sys/imgact_elf.h
@@ -45,6 +45,7 @@
{(pos)->a_type = (id); (pos)->a_un.a_ptr = (ptr); (pos)++;}
#endif
+struct coredump_writer;
struct image_params;
struct thread;
struct vnode;
@@ -114,7 +115,7 @@ bool __elfN(brand_inuse)(Elf_Brandinfo *entry);
int __elfN(insert_brand_entry)(Elf_Brandinfo *entry);
int __elfN(remove_brand_entry)(Elf_Brandinfo *entry);
int __elfN(freebsd_fixup)(uintptr_t *, struct image_params *);
-int __elfN(coredump)(struct thread *, struct vnode *, off_t, int);
+int __elfN(coredump)(struct thread *, struct coredump_writer *, off_t, int);
size_t __elfN(populate_note)(int, void *, void *, size_t, void **);
int __elfN(freebsd_copyout_auxargs)(struct image_params *, uintptr_t);
void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t, int);
diff --git a/sys/sys/jail.h b/sys/sys/jail.h
index 08caa9f49270..24c420e2c976 100644
--- a/sys/sys/jail.h
+++ b/sys/sys/jail.h
@@ -435,7 +435,7 @@ void prison0_init(void);
bool prison_allow(struct ucred *, unsigned);
int prison_check(struct ucred *cred1, struct ucred *cred2);
bool prison_check_nfsd(struct ucred *cred);
-bool prison_owns_vnet(struct ucred *);
+bool prison_owns_vnet(struct prison *pr);
int prison_canseemount(struct ucred *cred, struct mount *mp);
void prison_enforce_statfs(struct ucred *cred, struct mount *mp,
struct statfs *sp);
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index c75094aea450..f9141bf70742 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -641,16 +641,15 @@ m_epg_pagelen(const struct mbuf *m, int pidx, int pgoff)
/*
* Flags indicating checksum, segmentation and other offload work to be
- * done, or already done, by hardware or lower layers. It is split into
- * separate inbound and outbound flags.
+ * done, or already done, by hardware or lower layers.
*
- * Outbound flags that are set by upper protocol layers requesting lower
+ * Flags that are set by upper protocol layers requesting lower
* layers, or ideally the hardware, to perform these offloading tasks.
- * For outbound packets this field and its flags can be directly tested
- * against ifnet if_hwassist. Note that the outbound and the inbound flags do
- * not collide right now but they could be allowed to (as long as the flags are
- * scrubbed appropriately when the direction of an mbuf changes). CSUM_BITS
- * would also have to split into CSUM_BITS_TX and CSUM_BITS_RX.
+ * Before passing packets to a network interface this field and its flags can
+ * be directly tested against ifnet if_hwassist. Note that the flags
+ * CSUM_IP_SCTP, CSUM_IP_TCP, and CSUM_IP_UDP can appear on input processing
+ * of SCTP, TCP, and UDP. In such a case the checksum will not be computed or
+ * validated by SCTP, TCP, or TCP, since the packet has not been on the wire.
*
* CSUM_INNER_<x> is the same as CSUM_<x> but it applies to the inner frame.
* The CSUM_ENCAP_<x> bits identify the outer encapsulation.
@@ -679,7 +678,7 @@ m_epg_pagelen(const struct mbuf *m, int pidx, int pgoff)
#define CSUM_ENCAP_VXLAN 0x00040000 /* VXLAN outer encapsulation */
#define CSUM_ENCAP_RSVD1 0x00080000
-/* Inbound checksum support where the checksum was verified by hardware. */
+/* Flags used to indicate that the checksum was verified by hardware. */
#define CSUM_INNER_L3_CALC 0x00100000
#define CSUM_INNER_L3_VALID 0x00200000
#define CSUM_INNER_L4_CALC 0x00400000
@@ -1391,6 +1390,7 @@ extern bool mb_use_ext_pgs; /* Use ext_pgs for sendfile */
#define PACKET_TAG_PF_REASSEMBLED 31
#define PACKET_TAG_IPSEC_ACCEL_OUT 32 /* IPSEC accel out */
#define PACKET_TAG_IPSEC_ACCEL_IN 33 /* IPSEC accel in */
+#define PACKET_TAG_OVPN 34 /* if_ovpn */
/* Specific cookies and tags. */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index f941f021a423..f7abc740ddc3 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -74,7 +74,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1500054
+#define __FreeBSD_version 1500056
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 23e8426b26ee..8f181b7beee6 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -403,6 +403,7 @@ int sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **);
void sigfastblock_clear(struct thread *td);
void sigfastblock_fetch(struct thread *td);
int sig_intr(void);
+bool sig_do_core(int);
void siginit(struct proc *p);
void signotify(struct thread *td);
void sigqueue_delete(struct sigqueue *queue, int sig);
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index fd183ffbc7a4..8237165b84ce 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -60,6 +60,7 @@ struct rusage;
struct sched_param;
struct sembuf;
union semun;
+struct shmfd;
struct sockaddr;
struct spacectl_range;
struct stat;
@@ -337,7 +338,7 @@ int kern_shm_open(struct thread *td, const char *userpath, int flags,
mode_t mode, struct filecaps *fcaps);
int kern_shm_open2(struct thread *td, const char *path, int flags,
mode_t mode, int shmflags, struct filecaps *fcaps,
- const char *name);
+ const char *name, struct shmfd *shmfd);
int kern_shmat(struct thread *td, int shmid, const void *shmaddr,
int shmflg);
int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf,
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index 4ddfc8516053..1714fa5a7416 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -90,6 +90,7 @@ struct sysent { /* system call table */
#define SY_THR_STATIC_KLD SY_THR_STATIC
#endif
+struct coredump_writer;
struct image_params;
struct proc;
struct __sigset;
@@ -108,7 +109,8 @@ struct sysentvec {
int *sv_szsigcode; /* size of sigtramp code */
int sv_sigcodeoff;
char *sv_name; /* name of binary type */
- int (*sv_coredump)(struct thread *, struct vnode *, off_t, int);
+ int (*sv_coredump)(struct thread *, struct coredump_writer *,
+ off_t, int);
/* function to dump core, or NULL */
int sv_elf_core_osabi;
const char *sv_elf_core_abi_vendor;
diff --git a/sys/sys/ucoredump.h b/sys/sys/ucoredump.h
new file mode 100644
index 000000000000..0a51ee7f50c8
--- /dev/null
+++ b/sys/sys/ucoredump.h
@@ -0,0 +1,99 @@
+/*
+ *
+ * Copyright (c) 2015 Mark Johnston <markj@FreeBSD.org>
+ * Copyright (c) 2025 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ */
+
+#ifndef _SYS_UCOREDUMP_H_
+#define _SYS_UCOREDUMP_H_
+
+#ifdef _KERNEL
+
+#include <sys/_uio.h>
+#include <sys/blockcount.h>
+#include <sys/queue.h>
+
+/* Coredump output parameters. */
+struct coredump_params;
+struct coredump_writer;
+struct thread;
+struct ucred;
+
+typedef int coredump_init_fn(const struct coredump_writer *,
+ const struct coredump_params *);
+typedef int coredump_write_fn(const struct coredump_writer *, const void *, size_t,
+ off_t, enum uio_seg, struct ucred *, size_t *, struct thread *);
+typedef int coredump_extend_fn(const struct coredump_writer *, off_t,
+ struct ucred *);
+
+struct coredump_vnode_ctx {
+ struct vnode *vp;
+ struct ucred *fcred;
+};
+
+coredump_write_fn core_vn_write;
+coredump_extend_fn core_vn_extend;
+
+struct coredump_writer {
+ void *ctx;
+ coredump_init_fn *init_fn;
+ coredump_write_fn *write_fn;
+ coredump_extend_fn *extend_fn;
+};
+
+struct coredump_params {
+ off_t offset;
+ struct ucred *active_cred;
+ struct thread *td;
+ const struct coredump_writer *cdw;
+ struct compressor *comp;
+};
+
+#define CORE_BUF_SIZE (16 * 1024)
+
+int core_write(struct coredump_params *, const void *, size_t, off_t,
+ enum uio_seg, size_t *);
+int core_output(char *, size_t, off_t, struct coredump_params *, void *);
+int sbuf_drain_core_output(void *, const char *, int);
+
+extern int coredump_pack_fileinfo;
+extern int coredump_pack_vmmapinfo;
+
+extern int compress_user_cores;
+extern int compress_user_cores_level;
+
+typedef int coredumper_probe_fn(struct thread *);
+
+/*
+ * Some arbitrary values for coredumper probes to return. The highest priority
+ * we can find wins. It's somewhat expected that a coredumper may want to bid
+ * differently based on the process in question. Note that probe functions will
+ * be called with the proc lock held, so they must not sleep.
+ */
+#define COREDUMPER_NOMATCH (-1) /* Decline to touch it */
+#define COREDUMPER_GENERIC (0) /* I handle coredumps */
+#define COREDUMPER_SPECIAL (50) /* Special handler */
+#define COREDUMPER_HIGH_PRIORITY (100) /* High-priority handler */
+
+/*
+ * The handle functions will be called with the proc lock held, and should
+ * return with the proc lock dropped.
+ */
+typedef int coredumper_handle_fn(struct thread *, off_t);
+
+struct coredumper {
+ SLIST_ENTRY(coredumper) cd_entry;
+ const char *cd_name;
+ coredumper_probe_fn *cd_probe;
+ coredumper_handle_fn *cd_handle;
+ blockcount_t cd_refcount;
+};
+
+void coredumper_register(struct coredumper *);
+void coredumper_unregister(struct coredumper *);
+
+#endif /* _KERNEL */
+#endif /* _SYS_UCOREDUMP_H_ */
diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h
index ddd8f3ddb63d..9c1d8545af34 100644
--- a/sys/sys/ucred.h
+++ b/sys/sys/ucred.h
@@ -44,6 +44,7 @@
* Flags for cr_flags.
*/
#define CRED_FLAG_CAPMODE 0x00000001 /* In capability mode. */
+#define CRED_FLAG_GROUPSET 0x00000002 /* Groups have been set. */
/*
* Number of groups inlined in 'struct ucred'. It must stay reasonably low as
@@ -76,15 +77,12 @@ struct ucred {
u_int cr_users; /* (c) proc + thread using this cred */
u_int cr_flags; /* credential flags */
struct auditinfo_addr cr_audit; /* Audit properties. */
+ int cr_ngroups; /* number of supplementary groups */
#define cr_startcopy cr_uid
uid_t cr_uid; /* effective user id */
uid_t cr_ruid; /* real user id */
uid_t cr_svuid; /* saved user id */
- /*
- * XXXOC: On the next ABI change, please move 'cr_ngroups' out of the
- * copied area (crcopy() already copes with this change).
- */
- int cr_ngroups; /* number of groups */
+ gid_t cr_gid; /* effective group id */
gid_t cr_rgid; /* real group id */
gid_t cr_svgid; /* saved group id */
struct uidinfo *cr_uidinfo; /* per euid resource consumption */
@@ -111,8 +109,20 @@ struct ucred {
struct xucred {
u_int cr_version; /* structure layout version */
uid_t cr_uid; /* effective user id */
- short cr_ngroups; /* number of groups */
- gid_t cr_groups[XU_NGROUPS]; /* groups */
+ short cr_ngroups; /* number of groups (incl. cr_gid). */
+ union {
+ /*
+ * Special little hack to avoid needing a cr_gid macro, which
+ * would cause problems if one were to use it with struct ucred
+ * which also has a cr_groups member.
+ */
+ struct {
+ gid_t cr_gid; /* effective group id */
+ gid_t cr_sgroups[XU_NGROUPS - 1];
+ };
+
+ gid_t cr_groups[XU_NGROUPS]; /* groups */
+ };
union {
void *_cr_unused1; /* compatibility with old ucred */
pid_t cr_pid;
@@ -120,9 +130,6 @@ struct xucred {
};
#define XUCRED_VERSION 0
-/* This can be used for both ucred and xucred structures. */
-#define cr_gid cr_groups[0]
-
struct mac;
/*
* Structure to pass as an argument to the setcred() system call.
@@ -235,8 +242,8 @@ void crcowfree(struct thread *td);
void cru2x(struct ucred *cr, struct xucred *xcr);
void cru2xt(struct thread *td, struct xucred *xcr);
void crsetgroups(struct ucred *cr, int ngrp, const gid_t *groups);
-void crsetgroups_fallback(struct ucred *cr, int ngrp, const gid_t *groups,
- const gid_t fallback);
+void crsetgroups_and_egid(struct ucred *cr, int ngrp, const gid_t *groups,
+ const gid_t default_egid);
bool cr_xids_subset(struct ucred *active_cred, struct ucred *obj_cred);
/*
diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
index c291c1dc2b95..85ed93fd359d 100644
--- a/sys/sys/unistd.h
+++ b/sys/sys/unistd.h
@@ -156,6 +156,8 @@
#define _PC_DEALLOC_PRESENT 65
#define _PC_NAMEDATTR_ENABLED 66
#define _PC_HAS_NAMEDATTR 67
+#define _PC_XATTR_ENABLED _PC_NAMEDATTR_ENABLED /* Solaris Compatible */
+#define _PC_XATTR_EXISTS _PC_HAS_NAMEDATTR /* Solaris Compatible */
#define _PC_HAS_HIDDENSYSTEM 68
#endif
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 3f4aec02ba49..67cd6fb4b738 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -274,7 +274,7 @@ void
softdep_setup_remove(struct buf *bp,
struct inode *dp,
struct inode *ip,
- int isrmdir)
+ bool isrmdir)
{
panic("softdep_setup_remove called");
@@ -285,7 +285,7 @@ softdep_setup_directory_change(struct buf *bp,
struct inode *dp,
struct inode *ip,
ino_t newinum,
- int isrmdir)
+ u_int newparent)
{
panic("softdep_setup_directory_change called");
@@ -765,7 +765,7 @@ static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
static void handle_workitem_freefile(struct freefile *);
static int handle_workitem_remove(struct dirrem *, int);
static struct dirrem *newdirrem(struct buf *, struct inode *,
- struct inode *, int, struct dirrem **);
+ struct inode *, bool, struct dirrem **);
static struct indirdep *indirdep_lookup(struct mount *, struct inode *,
struct buf *);
static void cancel_indirdep(struct indirdep *, struct buf *,
@@ -9169,7 +9169,7 @@ softdep_setup_remove(
struct buf *bp, /* buffer containing directory block */
struct inode *dp, /* inode for the directory being modified */
struct inode *ip, /* inode for directory entry being removed */
- int isrmdir) /* indicates if doing RMDIR */
+ bool isrmdir) /* indicates if doing RMDIR */
{
struct dirrem *dirrem, *prevdirrem;
struct inodedep *inodedep;
@@ -9361,7 +9361,7 @@ newdirrem(
struct buf *bp, /* buffer containing directory block */
struct inode *dp, /* inode for the directory being modified */
struct inode *ip, /* inode for directory entry being removed */
- int isrmdir, /* indicates if doing RMDIR */
+ bool isrmdir, /* indicates if doing RMDIR */
struct dirrem **prevdirremp) /* previously referenced inode, if any */
{
int offset;
@@ -9490,7 +9490,7 @@ newdirrem(
dirrem->dm_state |= COMPLETE;
cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref);
#ifdef INVARIANTS
- if (isrmdir == 0) {
+ if (!isrmdir) {
struct worklist *wk;
LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
@@ -9525,7 +9525,7 @@ softdep_setup_directory_change(
struct inode *dp, /* inode for the directory being modified */
struct inode *ip, /* inode for directory entry being removed */
ino_t newinum, /* new inode number for changed entry */
- int isrmdir) /* indicates if doing RMDIR */
+ u_int newparent) /* indicates if doing RMDIR */
{
int offset;
struct diradd *dap = NULL;
@@ -9558,10 +9558,10 @@ softdep_setup_directory_change(
/*
* Allocate a new dirrem and ACQUIRE_LOCK.
*/
- dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
+ dirrem = newdirrem(bp, dp, ip, newparent != 0, &prevdirrem);
pagedep = dirrem->dm_pagedep;
/*
- * The possible values for isrmdir:
+ * The possible values for newparent:
* 0 - non-directory file rename
* 1 - directory rename within same directory
* inum - directory rename to new directory of given inode number
@@ -9572,7 +9572,7 @@ softdep_setup_directory_change(
* the DIRCHG flag to tell handle_workitem_remove to skip the
* followup dirrem.
*/
- if (isrmdir > 1)
+ if (newparent > 1)
dirrem->dm_state |= DIRCHG;
/*
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index ccd9046a5fa8..111fb1cb40b3 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -66,8 +66,8 @@ void ufs_makedirentry(struct inode *, struct componentname *,
struct direct *);
int ufs_direnter(struct vnode *, struct vnode *, struct direct *,
struct componentname *, struct buf *);
-int ufs_dirremove(struct vnode *, struct inode *, int, int);
-int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, int);
+int ufs_dirremove(struct vnode *, struct inode *, int, bool);
+int ufs_dirrewrite(struct inode *, struct inode *, ino_t, int, u_int);
int ufs_lookup_ino(struct vnode *, struct vnode **, struct componentname *,
ino_t *);
int ufs_getlbns(struct vnode *, ufs2_daddr_t, struct indir *, int *);
@@ -93,9 +93,9 @@ int softdep_setup_directory_add(struct buf *, struct inode *, off_t,
ino_t, struct buf *, int);
void softdep_change_directoryentry_offset(struct buf *, struct inode *,
caddr_t, caddr_t, caddr_t, int);
-void softdep_setup_remove(struct buf *,struct inode *, struct inode *, int);
+void softdep_setup_remove(struct buf *,struct inode *, struct inode *, bool);
void softdep_setup_directory_change(struct buf *, struct inode *,
- struct inode *, ino_t, int);
+ struct inode *, ino_t, u_int);
void softdep_change_linkcnt(struct inode *);
int softdep_slowdown(struct vnode *);
void softdep_setup_create(struct inode *, struct inode *);
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 3f9c95e934fc..fd0539c40c0d 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1101,7 +1101,7 @@ ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp,
* to the size of the previous entry.
*/
int
-ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir)
+ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, bool isrmdir)
{
struct inode *dp;
struct direct *ep, *rep;
@@ -1224,7 +1224,7 @@ out:
*/
int
ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
- int isrmdir)
+ u_int newparent)
{
struct buf *bp;
struct direct *ep;
@@ -1267,7 +1267,8 @@ ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype,
if (!OFSFMT(vdp))
ep->d_type = newtype;
if (DOINGSOFTDEP(vdp)) {
- softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
+ softdep_setup_directory_change(bp, dp, oip, newinum,
+ newparent);
bdwrite(bp);
} else {
if (DOINGASYNC(vdp)) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 17308706c3f4..ffc993aef9fc 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1051,7 +1051,7 @@ ufs_remove(
#ifdef UFS_GJOURNAL
ufs_gjournal_orphan(vp);
#endif
- error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
+ error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, false);
if (ip->i_nlink <= 0)
vp->v_vflag |= VV_NOSYNC;
if (IS_SNAPSHOT(ip)) {
@@ -1209,7 +1209,7 @@ ufs_whiteout(
#endif
cnp->cn_flags &= ~DOWHITEOUT;
- error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
+ error = ufs_dirremove(dvp, NULL, cnp->cn_flags, false);
break;
default:
panic("ufs_whiteout: unknown op");
@@ -1268,7 +1268,8 @@ ufs_rename(
struct inode *fip, *tip, *tdp, *fdp;
struct direct newdir;
off_t endoff;
- int doingdirectory, newparent;
+ int doingdirectory;
+ u_int newparent;
int error = 0;
struct mount *mp;
ino_t ino;
@@ -1475,7 +1476,7 @@ relock:
* the user must have write permission in the source so
* as to be able to change "..".
*/
- if (doingdirectory && newparent) {
+ if (doingdirectory && newparent != 0) {
error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread);
if (error)
goto unlockout;
@@ -1538,7 +1539,7 @@ relock:
if (tip == NULL) {
if (ITODEV(tdp) != ITODEV(fip))
panic("ufs_rename: EXDEV");
- if (doingdirectory && newparent) {
+ if (doingdirectory && newparent != 0) {
/*
* Account for ".." in new directory.
* When source and destination have the same
@@ -1631,7 +1632,7 @@ relock:
goto bad;
}
if (doingdirectory) {
- if (!newparent) {
+ if (newparent == 0) {
tdp->i_effnlink--;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
@@ -1641,11 +1642,11 @@ relock:
softdep_change_linkcnt(tip);
}
error = ufs_dirrewrite(tdp, tip, fip->i_number,
- IFTODT(fip->i_mode),
- (doingdirectory && newparent) ? newparent : doingdirectory);
+ IFTODT(fip->i_mode), (doingdirectory && newparent != 0) ?
+ newparent : doingdirectory);
if (error) {
if (doingdirectory) {
- if (!newparent) {
+ if (newparent == 0) {
tdp->i_effnlink++;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(tdp);
@@ -1668,7 +1669,7 @@ relock:
* disk, so when running with that code we avoid doing
* them now.
*/
- if (!newparent) {
+ if (newparent == 0) {
tdp->i_nlink--;
DIP_SET_NLINK(tdp, tdp->i_nlink);
UFS_INODE_SET_FLAG(tdp, IN_CHANGE);
@@ -1697,7 +1698,7 @@ relock:
* parent directory must be decremented
* and ".." set to point to the new parent.
*/
- if (doingdirectory && newparent) {
+ if (doingdirectory && newparent != 0) {
/*
* Set the directory depth based on its new parent.
*/
@@ -1727,7 +1728,7 @@ relock:
"rename: missing .. entry");
cache_purge(fdvp);
}
- error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
+ error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, false);
/*
* The kern_renameat() looks up the fvp using the DELETE flag, which
* causes the removal of the name cache entry for fvp.
@@ -2037,7 +2038,6 @@ ufs_mkdir(
{
#ifdef QUOTA
struct ucred ucred, *ucp;
- gid_t ucred_group;
ucp = cnp->cn_cred;
#endif
/*
@@ -2064,13 +2064,8 @@ ufs_mkdir(
*/
ucred.cr_ref = 1;
ucred.cr_uid = ip->i_uid;
-
- /*
- * XXXKE Fix this is cr_gid gets separated out
- */
- ucred.cr_ngroups = 1;
- ucred.cr_groups = &ucred_group;
- ucred.cr_gid = ucred_group = dp->i_gid;
+ ucred.cr_gid = dp->i_gid;
+ ucred.cr_ngroups = 0;
ucp = &ucred;
}
#endif
@@ -2308,7 +2303,7 @@ ufs_rmdir(
ip->i_effnlink--;
if (DOINGSOFTDEP(vp))
softdep_setup_rmdir(dp, ip);
- error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
+ error = ufs_dirremove(dvp, ip, cnp->cn_flags, true);
if (error) {
dp->i_effnlink++;
ip->i_effnlink++;
@@ -2801,7 +2796,6 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
{
#ifdef QUOTA
struct ucred ucred, *ucp;
- gid_t ucred_group;
ucp = cnp->cn_cred;
#endif
/*
@@ -2827,13 +2821,8 @@ ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
*/
ucred.cr_ref = 1;
ucred.cr_uid = ip->i_uid;
-
- /*
- * XXXKE Fix this is cr_gid gets separated out
- */
- ucred.cr_ngroups = 1;
- ucred.cr_groups = &ucred_group;
- ucred.cr_gid = ucred_group = pdir->i_gid;
+ ucred.cr_gid = pdir->i_gid;
+ ucred.cr_ngroups = 0;
ucp = &ucred;
#endif
} else {
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index d6bd06226d04..c01b9e45a32b 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -65,9 +65,9 @@
* from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
*/
-#include <sys/cdefs.h>
#include "opt_vm.h"
+#define EXTERR_CATEGORY EXTERR_CAT_SWAP
#include <sys/param.h>
#include <sys/bio.h>
#include <sys/blist.h>
@@ -76,6 +76,7 @@
#include <sys/disk.h>
#include <sys/disklabel.h>
#include <sys/eventhandler.h>
+#include <sys/exterrvar.h>
#include <sys/fcntl.h>
#include <sys/limits.h>
#include <sys/lock.h>
@@ -2686,7 +2687,7 @@ swapon_check_swzone(void)
}
}
-static void
+static int
swaponsomething(struct vnode *vp, void *id, u_long nblks,
sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags)
{
@@ -2701,6 +2702,8 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
*/
nblks &= ~(ctodb(1) - 1);
nblks = dbtoc(nblks);
+ if (nblks == 0)
+ return (EXTERROR(EINVAL, "swap device too small"));
sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO);
sp->sw_blist = blist_create(nblks, M_WAITOK);
@@ -2742,6 +2745,8 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
swp_sizecheck();
mtx_unlock(&sw_dev_mtx);
EVENTHANDLER_INVOKE(swapon, sp);
+
+ return (0);
}
/*
@@ -3273,6 +3278,7 @@ swapongeom_locked(struct cdev *dev, struct vnode *vp)
cp->index = 1; /* Number of active I/Os, plus one for being active. */
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
g_attach(cp, pp);
+
/*
* XXX: Every time you think you can improve the margin for
* footshooting, somebody depends on the ability to do so:
@@ -3280,16 +3286,20 @@ swapongeom_locked(struct cdev *dev, struct vnode *vp)
* set an exclusive count :-(
*/
error = g_access(cp, 1, 1, 0);
+
+ if (error == 0) {
+ nblks = pp->mediasize / DEV_BSIZE;
+ error = swaponsomething(vp, cp, nblks, swapgeom_strategy,
+ swapgeom_close, dev2udev(dev),
+ (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0);
+ if (error != 0)
+ g_access(cp, -1, -1, 0);
+ }
if (error != 0) {
g_detach(cp);
g_destroy_consumer(cp);
- return (error);
}
- nblks = pp->mediasize / DEV_BSIZE;
- swaponsomething(vp, cp, nblks, swapgeom_strategy,
- swapgeom_close, dev2udev(dev),
- (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0);
- return (0);
+ return (error);
}
static int
@@ -3378,9 +3388,11 @@ swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
if (error != 0)
return (error);
- swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
+ error = swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,
NODEV, 0);
- return (0);
+ if (error != 0)
+ VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
+ return (error);
}
static int
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index bbae55895c2c..b239a6ffb4ce 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -396,7 +396,7 @@ vm_page_blacklist_load(char **list, char **end)
}
*list = ptr;
if (ptr != NULL)
- *end = ptr + len;
+ *end = ptr + len - 1;
else
*end = NULL;
return;
diff --git a/targets/pseudo/userland/misc/Makefile.depend b/targets/pseudo/userland/misc/Makefile.depend
index d3c97fc56b40..546800004d11 100644
--- a/targets/pseudo/userland/misc/Makefile.depend
+++ b/targets/pseudo/userland/misc/Makefile.depend
@@ -54,7 +54,6 @@ DIRDEPS.x86sys= \
.if ${MK_ZFS} != "no"
DIRDEPS.x86sys+= \
stand/i386/gptzfsboot \
- stand/i386/zfsboot \
stand/i386/zfsloader \
DIRDEPS+= \
diff --git a/tests/sys/net/if_bridge_test.sh b/tests/sys/net/if_bridge_test.sh
index cd38adea28ad..c0c085f22273 100755
--- a/tests/sys/net/if_bridge_test.sh
+++ b/tests/sys/net/if_bridge_test.sh
@@ -1221,6 +1221,29 @@ vlan_qinq_cleanup()
vnet_cleanup
}
+# Adding a bridge SVI to a bridge should not be allowed.
+atf_test_case "bridge_svi_in_bridge" "cleanup"
+bridge_svi_in_bridge_head()
+{
+ atf_set descr 'adding a bridge SVI to a bridge is not allowed (1)'
+ atf_set require.user root
+}
+
+bridge_svi_in_bridge_body()
+{
+ vnet_init
+ vnet_init_bridge
+
+ bridge=$(vnet_mkbridge)
+ atf_check -s exit:0 ifconfig ${bridge}.1 create
+ atf_check -s exit:1 -e ignore ifconfig ${bridge} addm ${bridge}.1
+}
+
+bridge_svi_in_bridge_cleanup()
+{
+ vnet_cleanup
+}
+
atf_init_test_cases()
{
atf_add_test_case "bridge_transmit_ipv4_unicast"
@@ -1247,4 +1270,5 @@ atf_init_test_cases()
atf_add_test_case "vlan_ifconfig_tagged"
atf_add_test_case "vlan_svi"
atf_add_test_case "vlan_qinq"
+ atf_add_test_case "bridge_svi_in_bridge"
}
diff --git a/tests/sys/net/if_ovpn/if_ovpn.sh b/tests/sys/net/if_ovpn/if_ovpn.sh
index 26807a095455..c42344da1a3b 100644
--- a/tests/sys/net/if_ovpn/if_ovpn.sh
+++ b/tests/sys/net/if_ovpn/if_ovpn.sh
@@ -1314,6 +1314,96 @@ multihome6_cleanup()
ovpn_cleanup
}
+atf_test_case "float" "cleanup"
+float_head()
+{
+ atf_set descr 'Test peer float notification'
+ atf_set require.user root
+}
+
+float_body()
+{
+ ovpn_init
+
+ l=$(vnet_mkepair)
+
+ vnet_mkjail a ${l}a
+ jexec a ifconfig ${l}a 192.0.2.1/24 up
+ jexec a ifconfig lo0 127.0.0.1/8 up
+ vnet_mkjail b ${l}b
+ jexec b ifconfig ${l}b 192.0.2.2/24 up
+
+ # Sanity check
+ atf_check -s exit:0 -o ignore jexec a ping -c 1 192.0.2.2
+
+ ovpn_start a "
+ dev ovpn0
+ dev-type tun
+ proto udp4
+
+ cipher AES-256-GCM
+ auth SHA256
+
+ local 192.0.2.1
+ server 198.51.100.0 255.255.255.0
+ ca $(atf_get_srcdir)/ca.crt
+ cert $(atf_get_srcdir)/server.crt
+ key $(atf_get_srcdir)/server.key
+ dh $(atf_get_srcdir)/dh.pem
+
+ mode server
+ script-security 2
+ auth-user-pass-verify /usr/bin/true via-env
+ topology subnet
+
+ keepalive 2 10
+
+ management 192.0.2.1 1234
+ "
+ ovpn_start b "
+ dev tun0
+ dev-type tun
+
+ client
+
+ remote 192.0.2.1
+ auth-user-pass $(atf_get_srcdir)/user.pass
+
+ ca $(atf_get_srcdir)/ca.crt
+ cert $(atf_get_srcdir)/client.crt
+ key $(atf_get_srcdir)/client.key
+ dh $(atf_get_srcdir)/dh.pem
+
+ keepalive 2 10
+ "
+
+ # Give the tunnel time to come up
+ sleep 10
+
+ atf_check -s exit:0 -o ignore jexec b ping -c 3 198.51.100.1
+
+ # We expect the client on 192.0.2.2
+ if ! echo "status" | jexec a nc -N 192.0.2.1 1234 | grep 192.0.2.2; then
+ atf_fail "Client not found in status list!"
+ fi
+
+ # Now change the client IP
+ jexec b ifconfig ${l}b 192.0.2.3/24 up
+
+ # And wait for keepalives to trigger the float notification
+ sleep 5
+
+ # So the client now has the new address in userspace
+ if ! echo "status" | jexec a nc -N 192.0.2.1 1234 | grep 192.0.2.3; then
+ atf_fail "Client not found in status list!"
+ fi
+}
+
+float_cleanup()
+{
+ ovpn_cleanup
+}
+
atf_init_test_cases()
{
atf_add_test_case "4in4"
@@ -1332,4 +1422,5 @@ atf_init_test_cases()
atf_add_test_case "destroy_unused"
atf_add_test_case "multihome4"
atf_add_test_case "multihome6"
+ atf_add_test_case "float"
}
diff --git a/tests/sys/net/if_vlan.sh b/tests/sys/net/if_vlan.sh
index 424eac705b94..8122203337e2 100755
--- a/tests/sys/net/if_vlan.sh
+++ b/tests/sys/net/if_vlan.sh
@@ -333,6 +333,32 @@ conflict_id_cleanup()
}
+# If a vlan interface is in a bridge, changing the vlandev to refer to
+# a bridge should not be allowed.
+atf_test_case "bridge_vlandev" "cleanup"
+bridge_vlandev_head()
+{
+ atf_set descr 'transforming a bridge member vlan into an SVI is not allowed'
+ atf_set require.user root
+}
+
+bridge_vlandev_body()
+{
+ vnet_init
+ vnet_init_bridge
+
+ bridge=$(vnet_mkbridge)
+ vlan=$(vnet_mkvlan)
+
+ atf_check -s exit:0 ifconfig ${bridge} addm ${vlan}
+ atf_check -s exit:1 -e ignore ifconfig ${vlan} vlan 1 vlandev ${bridge}
+}
+
+bridge_vlandev_cleanup()
+{
+ vnet_cleanup
+}
+
atf_init_test_cases()
{
atf_add_test_case "basic"
@@ -343,4 +369,5 @@ atf_init_test_cases()
atf_add_test_case "qinq_setflags"
atf_add_test_case "bpf_pcp"
atf_add_test_case "conflict_id"
+ atf_add_test_case "bridge_vlandev"
}
diff --git a/tests/sys/netpfil/pf/nat64.py b/tests/sys/netpfil/pf/nat64.py
index 5cc4713a16cc..a5890fc4a161 100644
--- a/tests/sys/netpfil/pf/nat64.py
+++ b/tests/sys/netpfil/pf/nat64.py
@@ -33,7 +33,7 @@ from atf_python.sys.net.tools import ToolsHelper
from atf_python.sys.net.vnet import VnetTestTemplate
class TestNAT64(VnetTestTemplate):
- REQUIRED_MODULES = [ "pf" ]
+ REQUIRED_MODULES = [ "pf", "pflog" ]
TOPOLOGY = {
"vnet1": {"ifaces": ["if1"]},
"vnet2": {"ifaces": ["if1", "if2"]},
@@ -92,12 +92,15 @@ class TestNAT64(VnetTestTemplate):
def vnet2_handler(self, vnet):
ifname = vnet.iface_alias_map["if1"].name
+ ToolsHelper.print_output("/sbin/sysctl net.inet6.ip6.forwarding=1")
ToolsHelper.print_output("/sbin/route add default 192.0.2.2")
ToolsHelper.print_output("/sbin/pfctl -e")
ToolsHelper.pf_rules([
"pass inet6 proto icmp6",
"pass in on %s inet6 af-to inet from 192.0.2.1" % ifname])
+ vnet.pipe.send(socket.if_nametoindex("pflog0"))
+
@pytest.mark.require_user("root")
@pytest.mark.require_progs(["scapy"])
def test_tcp_rst(self):
@@ -287,3 +290,39 @@ class TestNAT64(VnetTestTemplate):
reply = sp.sr1(packet, timeout=3)
# We don't expect a reply to a corrupted packet
assert not reply
+
+ @pytest.mark.require_user("root")
+ @pytest.mark.require_progs(["scapy"])
+ def test_noip6(self):
+ """
+ PR 288263: link-local target address in icmp6 ADVERT can cause NULL deref
+ """
+ ifname = self.vnet.iface_alias_map["if1"].name
+ gw_mac = self.vnet.iface_alias_map["if1"].epairb.ether
+ scopeid = self.wait_object(self.vnet_map["vnet2"].pipe)
+ ToolsHelper.print_output("/sbin/route -6 add default 2001:db8::1")
+
+ import scapy.all as sp
+
+ pkt = sp.Ether(dst=gw_mac) \
+ / sp.IPv6(dst="64:ff9b::203.0.113.2") \
+ / sp.ICMPv6ND_NA(tgt="FFA2:%x:2821:125F:1D27:B3B2:3F6F:C43C" % scopeid)
+ pkt.show()
+ sp.hexdump(pkt)
+ s = DelayedSend(pkt, sendif=ifname)
+
+ packets = sp.sniff(iface=ifname, timeout=5)
+ for r in packets:
+ r.show()
+
+ # Try scope id that likely doesn't have an interface at all
+ pkt = sp.Ether(dst=gw_mac) \
+ / sp.IPv6(dst="64:ff9b::203.0.113.2") \
+ / sp.ICMPv6ND_NA(tgt="FFA2:%x:2821:125F:1D27:B3B2:3F6F:C43C" % 255)
+ pkt.show()
+ sp.hexdump(pkt)
+ s = DelayedSend(pkt, sendif=ifname)
+
+ packets = sp.sniff(iface=ifname, timeout=5)
+ for r in packets:
+ r.show()
diff --git a/tools/boot/install-boot.sh b/tools/boot/install-boot.sh
index 217bf0ff1457..10e62dd32ba8 100755
--- a/tools/boot/install-boot.sh
+++ b/tools/boot/install-boot.sh
@@ -294,27 +294,9 @@ boot_nogeli_mbr_ufs_both() {
boot_nogeli_mbr_ufs_uefi $1 $2 $3
}
+# ZFS+MBR+BIOS is not a supported configuration
boot_nogeli_mbr_zfs_legacy() {
- dev=$1
- dst=$2
-
- # search to find the BSD slice
- s=$(find_part $dev "freebsd")
- if [ -z "$s" ] ; then
- die "No BSD slice found"
- fi
- idx=$(find_part ${dev}s${s} "freebsd-zfs")
- if [ -z "$idx" ] ; then
- die "No freebsd-zfs slice found"
- fi
- # search to find the freebsd-zfs partition within the slice
- # Or just assume it is 'a' because it has to be since it fails otherwise
- doit gpart bootcode -b ${dst}/boot/mbr ${dev}
- dd if=${dst}/boot/zfsboot of=/tmp/zfsboot1 count=1
- doit gpart bootcode -b /tmp/zfsboot1 ${dev}s${s} # Put boot1 into the start of part
- sysctl kern.geom.debugflags=0x10 # Put boot2 into ZFS boot slot
- doit dd if=${dst}/boot/zfsboot of=/dev/${dev}s${s}a skip=1 seek=1024
- sysctl kern.geom.debugflags=0x0
+ exit 1
}
boot_nogeli_mbr_zfs_uefi() {
@@ -322,7 +304,6 @@ boot_nogeli_mbr_zfs_uefi() {
}
boot_nogeli_mbr_zfs_both() {
- boot_nogeli_mbr_zfs_legacy $1 $2 $3
boot_nogeli_mbr_zfs_uefi $1 $2 $3
}
diff --git a/tools/boot/rootgen.sh b/tools/boot/rootgen.sh
index d87eb481e2c1..2cd65bdd180d 100755
--- a/tools/boot/rootgen.sh
+++ b/tools/boot/rootgen.sh
@@ -202,33 +202,6 @@ mk_nogeli_mbr_ufs_both() {
rm -f ${src}/etc/fstab
}
-mk_nogeli_mbr_zfs_legacy() {
- src=$1
- img=$2
- mntpt=$3
- geli=$4
- scheme=$5
- fs=$6
- bios=$7
- pool=nogeli-mbr-zfs-legacy
-
- zfs_extra $src $dst
- makefs -t zfs -s 200m \
- -o poolname=${pool} -o bootfs=${pool} -o rootpath=/ \
- ${img}.s1a ${src} ${dst}
- # The old boot1/boot2 boot split is also used by zfs. We need to extract zfsboot1
- # from this image. Since there's no room in the mbr format for the rest of the loader,
- # it will load the zfsboot loader from the reserved for bootloader area of the ZFS volume
- # being booted, hence the need to dd it into the raw img later.
- # Please note: zfsboot only works with partition 'a' which must be the root
- # partition / zfs volume
- dd if=${src}/boot/zfsboot of=${dst}/zfsboot1 count=1
- mkimg -s bsd -b ${dst}zfsboot1 -p freebsd-zfs:=${img}.s1a -o ${img}.s1
- dd if=${src}/boot/zfsboot of=${img}.s1a skip=1 seek=1024
- mkimg -a 1 -s mbr -b ${src}/boot/mbr -p freebsd:=${img}.s1 -o ${img}
- rm -rf ${dst}
-}
-
mk_nogeli_mbr_zfs_uefi() {
src=$1
img=$2
@@ -244,38 +217,11 @@ mk_nogeli_mbr_zfs_uefi() {
makefs -t zfs -s 200m \
-o poolname=${pool} -o bootfs=${pool} -o rootpath=/ \
${img}.s2a ${src} ${dst}
- mkimg -s bsd -b ${dst}zfsboot1 -p freebsd-zfs:=${img}.s2a -o ${img}.s2
+ mkimg -s bsd -p freebsd-zfs:=${img}.s2a -o ${img}.s2
mkimg -a 1 -s mbr -b ${src}/boot/mbr -p efi:=${img}.s1 -p freebsd:=${img}.s2 -o ${img}
rm -rf ${dst}
}
-mk_nogeli_mbr_zfs_both() {
- src=$1
- img=$2
- mntpt=$3
- geli=$4
- scheme=$5
- fs=$6
- bios=$7
- pool=nogeli-mbr-zfs-both
-
- zfs_extra $src $dst
- make_esp_file ${img}.s1 ${espsize} ${src}/boot/loader.efi
- makefs -t zfs -s 200m \
- -o poolname=${pool} -o bootfs=${pool} -o rootpath=/ \
- ${img}.s2a ${src} ${dst}
- # The old boot1/boot2 boot split is also used by zfs. We need to extract zfsboot1
- # from this image. Since there's no room in the mbr format for the rest of the loader,
- # it will load the zfsboot loader from the reserved for bootloader area of the ZFS volume
- # being booted, hence the need to dd it into the raw img later.
- # Please note: zfsboot only works with partition 'a' which must be the root
- # partition / zfs volume
- dd if=${src}/boot/zfsboot of=${dst}/zfsboot1 count=1
- mkimg -s bsd -b ${dst}zfsboot1 -p freebsd-zfs:=${img}.s2a -o ${img}.s2
- dd if=${src}/boot/zfsboot of=${img}.s1a skip=1 seek=1024
- mkimg -a 1 -s mbr -b ${src}/boot/mbr -p efi:=${img}.s1 -p freebsd:=${img}.s2 -o ${img}
-}
-
mk_geli_gpt_ufs_legacy() {
src=$1
img=$2
@@ -728,6 +674,10 @@ for arch in amd64; do
for scheme in gpt mbr; do
for fs in ufs zfs; do
for bios in legacy uefi both; do
+ # ZFS+MBR+BIOS is not supported
+ if [ "$scheme" = "mbr" -a "$fs" = "zfs" -a "$bios" != "uefi" ]; then
+ continue
+ fi
make_one_image ${arch} ${geli} ${scheme} ${fs} ${bios}
done
done
@@ -750,6 +700,11 @@ for arch in i386; do
for bios in legacy; do
# The legacy boot is shared with amd64 so those routines could
# likely be used here.
+
+ # ZFS+MBR+BIOS is not supported
+ if [ "$scheme" = "mbr" -a "$fs" = "zfs" -a "$bios" != "uefi" ]; then
+ continue
+ fi
make_one_image ${arch} ${geli} ${scheme} ${fs} ${bios}
done
done
diff --git a/tools/build/cross-build/include/mac/endian.h b/tools/build/cross-build/include/mac/endian.h
new file mode 100644
index 000000000000..11788044f05a
--- /dev/null
+++ b/tools/build/cross-build/include/mac/endian.h
@@ -0,0 +1,7 @@
+/*
+ * Copyright (c) 2025 John Baldwin <jhb@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/endian.h>
diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc
index f7eb1e979d07..580be4362a18 100644
--- a/tools/build/mk/OptionalObsoleteFiles.inc
+++ b/tools/build/mk/OptionalObsoleteFiles.inc
@@ -357,7 +357,6 @@ OLD_FILES+=boot/shortcuts.4th
OLD_FILES+=boot/support.4th
OLD_FILES+=boot/userboot.so
OLD_FILES+=boot/version.4th
-OLD_FILES+=boot/zfsboot
OLD_FILES+=boot/zfsloader
OLD_FILES+=usr/lib/kgzldr.o
OLD_FILES+=usr/share/man/man5/loader.conf.5.gz
@@ -374,7 +373,6 @@ OLD_FILES+=usr/share/man/man8/menu.4th.8.gz
OLD_FILES+=usr/share/man/man8/menusets.4th.8.gz
OLD_FILES+=usr/share/man/man8/pxeboot.8.gz
OLD_FILES+=usr/share/man/man8/version.4th.8.gz
-OLD_FILES+=usr/share/man/man8/zfsboot.8.gz
OLD_FILES+=usr/share/man/man8/zfsloader.8.gz
.endif
@@ -3713,33 +3711,33 @@ OLD_FILES+=usr/lib/krb5/plugins/preauth/test.so
OLD_FILES+=usr/lib/krb5/plugins/tls/k5tls.so
OLD_FILES+=usr/lib/libcom_err.a
OLD_LIBS+=usr/lib/libcom_err.so
-OLD_LIBS+=usr/lib/libcom_err.so.121
+OLD_LIBS+=usr/lib/libcom_err.so.122
OLD_FILES+=usr/lib/libgssapi_krb5.so
-OLD_LIBS+=usr/lib/libgssapi_krb5.so.121
+OLD_LIBS+=usr/lib/libgssapi_krb5.so.122
OLD_FILES+=usr/lib/libgssrpc.so
-OLD_LIBS+=usr/lib/libgssrpc.so.121
+OLD_LIBS+=usr/lib/libgssrpc.so.122
OLD_FILES+=usr/lib/libk5crypto.so
-OLD_LIBS+=usr/lib/libk5crypto.so.121
+OLD_LIBS+=usr/lib/libk5crypto.so.122
OLD_FILES+=usr/lib/libkadm5clnt.so
OLD_FILES+=usr/lib/libkadm5clnt_mit.so
-OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121
+OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.122
OLD_FILES+=usr/lib/libkadm5srv.so
OLD_FILES+=usr/lib/libkadm5srv_mit.so
-OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121
+OLD_LIBS+=usr/lib/libkadm5srv_mit.so.122
OLD_FILES+=usr/lib/libkdb5.so
-OLD_LIBS+=usr/lib/libkdb5.so.121
+OLD_LIBS+=usr/lib/libkdb5.so.122
OLD_FILES+=usr/lib/libkrad.so
-OLD_LIBS+=usr/lib/libkrad.so.121
+OLD_LIBS+=usr/lib/libkrad.so.122
OLD_FILES+=usr/lib/libkrb5.so
-OLD_LIBS+=usr/lib/libkrb5.so.121
+OLD_LIBS+=usr/lib/libkrb5.so.122
OLD_FILES+=usr/lib/libkrb5profile.a
OLD_FILES+=usr/lib/libkrb5profile.so
-OLD_LIBS+=usr/lib/libkrb5profile.so.121
+OLD_LIBS+=usr/lib/libkrb5profile.so.122
OLD_FILES+=usr/lib/libkrb5support.a
OLD_FILES+=usr/lib/libkrb5support.so
-OLD_LIBS+=usr/lib/libkrb5support.so.121
+OLD_LIBS+=usr/lib/libkrb5support.so.122
OLD_FILES+=usr/lib/libverto.so
-OLD_LIBS+=usr/lib/libverto.so.121
+OLD_LIBS+=usr/lib/libverto.so.122
OLD_FILES+=usr/libdata/pkgconfig/gssrpc.pc
OLD_FILES+=usr/libdata/pkgconfig/kadm-client.pc
OLD_FILES+=usr/libdata/pkgconfig/kadm-server.pc
@@ -5770,36 +5768,36 @@ OLD_FILES+=usr/lib/krb5/plugins/preauth/pkinit.so
OLD_FILES+=usr/lib/krb5/plugins/preauth/spake.so
OLD_FILES+=usr/lib/krb5/plugins/preauth/test.so
OLD_FILES+=usr/lib/krb5/plugins/tls/k5tls.so
-OLD_LIBS+=usr/lib/libcom_err.so.121
-OLD_LIBS+=usr/lib/libgssapi_krb5.so.121
+OLD_LIBS+=usr/lib/libcom_err.so.122
+OLD_LIBS+=usr/lib/libgssapi_krb5.so.122
OLD_FILES+=usr/lib/libgssrpc.a
OLD_FILES+=usr/lib/libgssrpc.so
-OLD_LIBS+=usr/lib/libgssrpc.so.121
+OLD_LIBS+=usr/lib/libgssrpc.so.122
OLD_FILES+=usr/lib/libk5crypto.a
OLD_FILES+=usr/lib/libk5crypto.so
-OLD_LIBS+=usr/lib/libk5crypto.so.121
+OLD_LIBS+=usr/lib/libk5crypto.so.122
OLD_FILES+=usr/lib/libkadm5clnt_mit.a
OLD_FILES+=usr/lib/libkadm5clnt_mit.so
-OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121
+OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.122
OLD_FILES+=usr/lib/libkadm5srv_mit.a
OLD_FILES+=usr/lib/libkadm5srv_mit.so
-OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121
+OLD_LIBS+=usr/lib/libkadm5srv_mit.so.122
OLD_FILES+=usr/lib/libkdb5.a
OLD_FILES+=usr/lib/libkdb5.so
-OLD_LIBS+=usr/lib/libkdb5.so.121
+OLD_LIBS+=usr/lib/libkdb5.so.122
OLD_FILES+=usr/lib/libkrad.so
OLD_FILES+=usr/lib/libkrad.a
-OLD_LIBS+=usr/lib/libkrad.so.121
-OLD_LIBS+=usr/lib/libkrb5.so.121
+OLD_LIBS+=usr/lib/libkrad.so.122
+OLD_LIBS+=usr/lib/libkrb5.so.122
OLD_FILES+=usr/lib/libkrb5profile.a
OLD_FILES+=usr/lib/libkrb5profile.so
-OLD_LIBS+=usr/lib/libkrb5profile.so.121
+OLD_LIBS+=usr/lib/libkrb5profile.so.122
OLD_FILES+=usr/lib/libkrb5support.a
OLD_FILES+=usr/lib/libkrb5support.so
-OLD_LIBS+=usr/lib/libkrb5support.so.121
+OLD_LIBS+=usr/lib/libkrb5support.so.122
OLD_FILES+=usr/lib/libverto.a
OLD_FILES+=usr/lib/libverto.so
-OLD_LIBS+=usr/lib/libverto.so.121
+OLD_LIBS+=usr/lib/libverto.so.122
OLD_FILES+=usr/libdata/pkgconfig/gssrpc.pc
OLD_FILES+=usr/libdata/pkgconfig/kadm-client.pc
OLD_FILES+=usr/libdata/pkgconfig/kadm-server.pc
@@ -12277,7 +12275,6 @@ OLD_FILES+=usr/share/snmp/mibs/BEGEMOT-WIRELESS-MIB.txt
.if ${MK_ZFS} == no
OLD_FILES+=boot/gptzfsboot
-OLD_FILES+=boot/zfsboot
OLD_FILES+=boot/zfsloader
OLD_FILES+=etc/rc.d/zfs
OLD_FILES+=etc/rc.d/zfsbe
@@ -12380,7 +12377,6 @@ OLD_FILES+=usr/share/man/man8/gptzfsboot.8.gz
OLD_FILES+=usr/share/man/man8/zdb.8.gz
OLD_FILES+=usr/share/man/man8/zfs-program.8.gz
OLD_FILES+=usr/share/man/man8/zfs.8.gz
-OLD_FILES+=usr/share/man/man8/zfsboot.8.gz
OLD_FILES+=usr/share/man/man8/zfsbootcfg.8.gz
OLD_FILES+=usr/share/man/man8/zfsd.8.gz
OLD_FILES+=usr/share/man/man8/zfsloader.8.gz
diff --git a/usr.bin/bmake/Makefile.inc b/usr.bin/bmake/Makefile.inc
index 5140bd18bb37..a064563a2283 100644
--- a/usr.bin/bmake/Makefile.inc
+++ b/usr.bin/bmake/Makefile.inc
@@ -3,6 +3,8 @@ MK_host_egacy= no
.sinclude <src.opts.mk>
+PACKAGE?= bmake
+
.if defined(.PARSEDIR)
# make sure this is available to unit-tests/Makefile
.export SRCTOP
diff --git a/usr.bin/clang/clang-scan-deps/Makefile b/usr.bin/clang/clang-scan-deps/Makefile
index 16fecdb88867..8da12faccc45 100644
--- a/usr.bin/clang/clang-scan-deps/Makefile
+++ b/usr.bin/clang/clang-scan-deps/Makefile
@@ -10,13 +10,14 @@ SRCS+= ClangScanDeps.cpp \
.include "${SRCTOP}/lib/clang/clang.pre.mk"
CFLAGS+= -I${.OBJDIR}
-TDFILE= Opts.td
-INCFILE= ${TDFILE:.td=.inc}
+
+INCFILE= Opts.inc
+TDFILE= ${LLVM_BASE}/${SRCDIR}/Opts.td
GENOPT= -gen-opt-parser-defs
${INCFILE}: ${TDFILE}
${LLVM_TBLGEN} ${GENOPT} -I ${LLVM_SRCS}/include -d ${.TARGET:C/$/.d/} \
- -o ${.TARGET} ${.ALLSRC}
+ -o ${.TARGET} ${TDFILE}
TGHDRS+= ${INCFILE}
DEPENDFILES+= ${TGHDRS:C/$/.d/}
diff --git a/usr.bin/clang/clang.prog.mk b/usr.bin/clang/clang.prog.mk
index 36c601bcbe36..3baf3d0baf0f 100644
--- a/usr.bin/clang/clang.prog.mk
+++ b/usr.bin/clang/clang.prog.mk
@@ -31,7 +31,7 @@ DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT}
LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT}
.endfor
-PACKAGE= clang
+PACKAGE?= clang
.if ${.MAKE.OS} == "FreeBSD" || !defined(BOOTSTRAPPING)
LIBADD+= execinfo
diff --git a/usr.bin/clang/llvm-ar/Makefile b/usr.bin/clang/llvm-ar/Makefile
index fd12b1ddef57..e019c89b3581 100644
--- a/usr.bin/clang/llvm-ar/Makefile
+++ b/usr.bin/clang/llvm-ar/Makefile
@@ -1,5 +1,6 @@
.include <src.opts.mk>
+PACKAGE= toolchain
PROG_CXX= llvm-ar
MAN= llvm-ar.1 llvm-ranlib.1
diff --git a/usr.bin/clang/llvm-nm/Makefile b/usr.bin/clang/llvm-nm/Makefile
index 825faf74719b..7e089d1b408d 100644
--- a/usr.bin/clang/llvm-nm/Makefile
+++ b/usr.bin/clang/llvm-nm/Makefile
@@ -1,5 +1,6 @@
.include <src.opts.mk>
+PACKAGE= toolchain
PROG_CXX= llvm-nm
SRCDIR= llvm/tools/llvm-nm
diff --git a/usr.bin/clang/llvm-size/Makefile b/usr.bin/clang/llvm-size/Makefile
index 2860a0069538..9d3505cdd319 100644
--- a/usr.bin/clang/llvm-size/Makefile
+++ b/usr.bin/clang/llvm-size/Makefile
@@ -1,5 +1,6 @@
.include <src.opts.mk>
+PACKAGE= toolchain
PROG_CXX= llvm-size
SRCDIR= llvm/tools/llvm-size
diff --git a/usr.bin/clang/llvm.prog.mk b/usr.bin/clang/llvm.prog.mk
index f702082e31bd..c369fe8d5944 100644
--- a/usr.bin/clang/llvm.prog.mk
+++ b/usr.bin/clang/llvm.prog.mk
@@ -25,7 +25,7 @@ DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT}
LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT}
.endfor
-PACKAGE= clang
+PACKAGE?= clang
.if ${.MAKE.OS} == "FreeBSD" || !defined(BOOTSTRAPPING)
LIBADD+= execinfo
diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1
index 3012ae472015..b16c4bcc95a2 100644
--- a/usr.bin/find/find.1
+++ b/usr.bin/find/find.1
@@ -28,7 +28,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd November 23, 2024
+.Dd July 26, 2025
.Dt FIND 1
.Os
.Sh NAME
@@ -1096,7 +1096,77 @@ as zero;
It is not yet implemented.
.It Format:
One or two characters, described below, which indicates the information to display.
-XXX need to write this.
+.Bl -tag -width Ds
+.It p
+Path to file
+.It f
+Filename without directories.
+.It h
+Path relative to the starting point, or '.' if that's empty for some reason.
+.It P
+Unimplemented -- File with command line arg.
+.It H
+Unimplemented -- Command line arg.
+.It g
+gid in human readable form.
+.It G
+gid as a number.
+.It h
+uid in human readable form.
+.It U
+uid as a number.
+.It m
+File permission mode in octal.
+.It M
+File mode in
+.Xr ls 1
+standard form.
+.It k
+File size in KiB (units of 1024 bytes).
+.It b
+File size in blocks (Always 512 byte units, even if underlying storage
+size differs).
+.It s
+Size in bytes of the file.
+.It S
+Sparseness of the file.
+The blocks the file occupies times 512 divided by the file size.
+.It d
+Depth in the tree
+.It D
+Device number for the file.
+.It F
+Unimplemented -- Filesystem type where the file resides.
+.It l
+Object of the symbolic link.
+.It i
+Inode of the file.
+.It n
+Number of hard links.
+.It y
+Unimplemented -- Type of the file
+.It Y
+Unimplemented -- Type of the file with loop detection
+.It a
+Access time of the file.
+.It A
+Access time of the file in strftime format.
+Takes an additional argument.
+.It B
+Birth time of the file in strftime format.
+Takes an additional argument.
+.It c
+Creation time of the file.
+.It C
+Creation time of the file in strftime format.
+Takes an additional argument.
+.It t
+Modification time of the file.
+.It T
+Modification time of the file in strftime format.
+Takes an additional argument.
+.El
+Any format not listed is not supported, though the error changes.
.El
.El
.Sh ENVIRONMENT
diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c
index 11455b395022..b260a71ef4a9 100644
--- a/usr.bin/find/function.c
+++ b/usr.bin/find/function.c
@@ -1449,11 +1449,12 @@ c_printf(OPTION *option, char ***argvp)
{
PLAN *new;
- isoutput = 1;
/*
* XXX We could scan the format looking for stat-dependent formats, and
- * turn off the stat if there's none: `%p`/`%f`/`%h` don't need a stat.
+ * turn off the nostat bit for trival cases: `%p`/`%f`/`%h`.
*/
+ isoutput = 1;
+ ftsoptions &= ~FTS_NOSTAT;
new = palloc(option);
new->c_data = nextarg(option, argvp);
diff --git a/usr.bin/pom/pom.c b/usr.bin/pom/pom.c
index db0033373b47..bcfbcadc8238 100644
--- a/usr.bin/pom/pom.c
+++ b/usr.bin/pom/pom.c
@@ -83,6 +83,7 @@ main(int argc, char **argv)
err(1, "unable to limit capabitilities for stdio");
caph_cache_catpages();
+ caph_cache_tzdata();
if (caph_enter() < 0)
err(1, "unable to enter capability mode");
diff --git a/usr.bin/sockstat/Makefile b/usr.bin/sockstat/Makefile
index 188432dfc27e..7254511f21c6 100644
--- a/usr.bin/sockstat/Makefile
+++ b/usr.bin/sockstat/Makefile
@@ -2,7 +2,7 @@
PROG= sockstat
-LIBADD= jail
+LIBADD= jail xo
.if ${MK_CASPER} != "no"
LIBADD+= casper
diff --git a/usr.bin/sockstat/sockstat.1 b/usr.bin/sockstat/sockstat.1
index 4832a09764fd..091911cd0879 100644
--- a/usr.bin/sockstat/sockstat.1
+++ b/usr.bin/sockstat/sockstat.1
@@ -25,7 +25,7 @@
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd June 30, 2025
+.Dd July 17, 2025
.Dt SOCKSTAT 1
.Os
.Sh NAME
@@ -33,6 +33,7 @@
.Nd list open sockets
.Sh SYNOPSIS
.Nm
+.Op Fl -libxo
.Op Fl 46ACcfIiLlnqSsUuvw
.Op Fl j Ar jail
.Op Fl p Ar ports
@@ -46,6 +47,13 @@ domain sockets.
.Pp
The following options are available:
.Bl -tag -width Fl
+.It Fl -libxo
+Generate output via
+.Xr libxo 3
+in a selection of different human and machine readable formats.
+See
+.Xr xo_options 7
+for details on command line arguments.
.It Fl 4
Show
.Dv AF_INET
@@ -229,6 +237,11 @@ Show TCP IPv6 sockets which are listening and connected (default):
.Bd -literal -offset indent
$ sockstat -6 -P tcp
.Ed
+.Pp
+Show all sockets in JSON format with neat alignment:
+.Bd -literal -offset indent
+$ sockstat --libxo json,pretty
+.Ed
.Sh SEE ALSO
.Xr fstat 1 ,
.Xr netstat 1 ,
@@ -237,6 +250,8 @@ $ sockstat -6 -P tcp
.Xr inet 4 ,
.Xr inet6 4 ,
.Xr protocols 5
+.Xr libxo 3 ,
+.Xr xo_options 7
.Sh HISTORY
The
.Nm
diff --git a/usr.bin/sockstat/sockstat.c b/usr.bin/sockstat/sockstat.c
index d0540c54a1aa..7355eaa272a0 100644
--- a/usr.bin/sockstat/sockstat.c
+++ b/usr.bin/sockstat/sockstat.c
@@ -55,7 +55,6 @@
#include <capsicum_helpers.h>
#include <ctype.h>
-#include <err.h>
#include <errno.h>
#include <inttypes.h>
#include <jail.h>
@@ -67,6 +66,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <libxo/xo.h>
#include <libcasper.h>
#include <casper/cap_net.h>
@@ -74,6 +74,7 @@
#include <casper/cap_pwd.h>
#include <casper/cap_sysctl.h>
+#define SOCKSTAT_XO_VERSION "1"
#define sstosin(ss) ((struct sockaddr_in *)(ss))
#define sstosin6(ss) ((struct sockaddr_in6 *)(ss))
#define sstosun(ss) ((struct sockaddr_un *)(ss))
@@ -197,7 +198,7 @@ static bool
_check_ksize(size_t received_size, size_t expected_size, const char *struct_name)
{
if (received_size != expected_size) {
- warnx("%s size mismatch: expected %zd, received %zd",
+ xo_warnx("%s size mismatch: expected %zd, received %zd",
struct_name, expected_size, received_size);
return false;
}
@@ -209,7 +210,7 @@ static void
_enforce_ksize(size_t received_size, size_t expected_size, const char *struct_name)
{
if (received_size != expected_size) {
- errx(1, "fatal: struct %s size mismatch: expected %zd, received %zd",
+ xo_errx(1, "fatal: struct %s size mismatch: expected %zd, received %zd",
struct_name, expected_size, received_size);
}
}
@@ -227,7 +228,7 @@ get_proto_type(const char *proto)
else
pent = getprotobyname(proto);
if (pent == NULL) {
- warn("cap_getprotobyname");
+ xo_warn("cap_getprotobyname");
return (-1);
}
return (pent->p_proto);
@@ -248,7 +249,7 @@ init_protos(int num)
}
if ((protos = malloc(sizeof(int) * proto_count)) == NULL)
- err(1, "malloc");
+ xo_err(1, "malloc");
numprotos = proto_count;
}
@@ -282,17 +283,17 @@ parse_ports(const char *portspec)
if (ports == NULL)
if ((ports = calloc(65536 / INT_BIT, sizeof(int))) == NULL)
- err(1, "calloc()");
+ xo_err(1, "calloc()");
p = portspec;
while (*p != '\0') {
if (!isdigit(*p))
- errx(1, "syntax error in port range");
+ xo_errx(1, "syntax error in port range");
for (q = p; *q != '\0' && isdigit(*q); ++q)
/* nothing */ ;
for (port = 0; p < q; ++p)
port = port * 10 + digittoint(*p);
if (port < 0 || port > 65535)
- errx(1, "invalid port number");
+ xo_errx(1, "invalid port number");
SET_PORT(port);
switch (*p) {
case '-':
@@ -310,7 +311,7 @@ parse_ports(const char *portspec)
for (end = 0; p < q; ++p)
end = end * 10 + digittoint(*p);
if (end < port || end > 65535)
- errx(1, "invalid port number");
+ xo_errx(1, "invalid port number");
while (port++ < end)
SET_PORT(port);
if (*p == ',')
@@ -395,15 +396,15 @@ gather_sctp(void)
varname = "net.inet.sctp.assoclist";
if (cap_sysctlbyname(capsysctl, varname, 0, &len, 0, 0) < 0) {
if (errno != ENOENT)
- err(1, "cap_sysctlbyname()");
+ xo_err(1, "cap_sysctlbyname()");
return;
}
if ((buf = (char *)malloc(len)) == NULL) {
- err(1, "malloc()");
+ xo_err(1, "malloc()");
return;
}
if (cap_sysctlbyname(capsysctl, varname, buf, &len, 0, 0) < 0) {
- err(1, "cap_sysctlbyname()");
+ xo_err(1, "cap_sysctlbyname()");
free(buf);
return;
}
@@ -411,7 +412,7 @@ gather_sctp(void)
offset = sizeof(struct xsctp_inpcb);
while ((offset < len) && (xinpcb->last == 0)) {
if ((sock = calloc(1, sizeof *sock)) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
sock->socket = xinpcb->socket;
sock->proto = IPPROTO_SCTP;
sock->protoname = "sctp";
@@ -439,7 +440,7 @@ gather_sctp(void)
if (xladdr->last == 1)
break;
if ((laddr = calloc(1, sizeof(struct addr))) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
switch (xladdr->address.sa.sa_family) {
case AF_INET:
#define __IN_IS_ADDR_LOOPBACK(pina) \
@@ -461,7 +462,7 @@ gather_sctp(void)
htons(xinpcb->local_port));
break;
default:
- errx(1, "address family %d not supported",
+ xo_errx(1, "address family %d not supported",
xladdr->address.sa.sa_family);
}
laddr->next = NULL;
@@ -474,7 +475,7 @@ gather_sctp(void)
if (sock->laddr == NULL) {
if ((sock->laddr =
calloc(1, sizeof(struct addr))) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
sock->laddr->address.ss_family = sock->family;
if (sock->family == AF_INET)
sock->laddr->address.ss_len =
@@ -485,7 +486,7 @@ gather_sctp(void)
local_all_loopback = 0;
}
if ((sock->faddr = calloc(1, sizeof(struct addr))) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
sock->faddr->address.ss_family = sock->family;
if (sock->family == AF_INET)
sock->faddr->address.ss_len =
@@ -512,7 +513,7 @@ gather_sctp(void)
no_stcb = 0;
if (opt_c) {
if ((sock = calloc(1, sizeof *sock)) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
sock->socket = xinpcb->socket;
sock->proto = IPPROTO_SCTP;
sock->protoname = "sctp";
@@ -542,7 +543,7 @@ gather_sctp(void)
continue;
laddr = calloc(1, sizeof(struct addr));
if (laddr == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
switch (xladdr->address.sa.sa_family) {
case AF_INET:
#define __IN_IS_ADDR_LOOPBACK(pina) \
@@ -564,7 +565,7 @@ gather_sctp(void)
htons(xstcb->local_port));
break;
default:
- errx(1,
+ xo_errx(1,
"address family %d not supported",
xladdr->address.sa.sa_family);
}
@@ -587,7 +588,7 @@ gather_sctp(void)
continue;
faddr = calloc(1, sizeof(struct addr));
if (faddr == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
switch (xraddr->address.sa.sa_family) {
case AF_INET:
#define __IN_IS_ADDR_LOOPBACK(pina) \
@@ -609,7 +610,7 @@ gather_sctp(void)
htons(xstcb->remote_port));
break;
default:
- errx(1,
+ xo_errx(1,
"address family %d not supported",
xraddr->address.sa.sa_family);
}
@@ -673,7 +674,7 @@ gather_inet(int proto)
protoname = "div";
break;
default:
- errx(1, "protocol %d not supported", proto);
+ xo_errx(1, "protocol %d not supported", proto);
}
buf = NULL;
@@ -682,7 +683,7 @@ gather_inet(int proto)
do {
for (;;) {
if ((buf = realloc(buf, bufsize)) == NULL)
- err(1, "realloc()");
+ xo_err(1, "realloc()");
len = bufsize;
if (cap_sysctlbyname(capsysctl, varname, buf, &len,
NULL, 0) == 0)
@@ -690,7 +691,7 @@ gather_inet(int proto)
if (errno == ENOENT)
goto out;
if (errno != ENOMEM || len != bufsize)
- err(1, "cap_sysctlbyname()");
+ xo_err(1, "cap_sysctlbyname()");
bufsize *= 2;
}
xig = (struct xinpgen *)buf;
@@ -701,7 +702,7 @@ gather_inet(int proto)
} while (xig->xig_gen != exig->xig_gen && retry--);
if (xig->xig_gen != exig->xig_gen && opt_v)
- warnx("warning: data may be inconsistent");
+ xo_warnx("warning: data may be inconsistent");
for (;;) {
xig = (struct xinpgen *)(void *)((char *)xig + xig->xig_len);
@@ -722,7 +723,7 @@ gather_inet(int proto)
goto out;
break;
default:
- errx(1, "protocol %d not supported", proto);
+ xo_errx(1, "protocol %d not supported", proto);
}
so = &xip->xi_socket;
if ((xip->inp_vflag & vflag) == 0)
@@ -748,15 +749,15 @@ gather_inet(int proto)
continue;
} else {
if (opt_v)
- warnx("invalid vflag 0x%x", xip->inp_vflag);
+ xo_warnx("invalid vflag 0x%x", xip->inp_vflag);
continue;
}
if ((sock = calloc(1, sizeof(*sock))) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
if ((laddr = calloc(1, sizeof *laddr)) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
if ((faddr = calloc(1, sizeof *faddr)) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
sock->socket = so->xso_so;
sock->pcb = so->so_pcb;
sock->splice_socket = so->so_splice_so;
@@ -822,7 +823,9 @@ gather_unix(int proto)
break;
case SOCK_SEQPACKET:
varname = "net.local.seqpacket.pcblist";
- protoname = "seqpac";
+ protoname = (xo_get_style(NULL) == XO_STYLE_TEXT)
+ ? "seqpac"
+ : "seqpacket";
break;
default:
abort();
@@ -833,13 +836,13 @@ gather_unix(int proto)
do {
for (;;) {
if ((buf = realloc(buf, bufsize)) == NULL)
- err(1, "realloc()");
+ xo_err(1, "realloc()");
len = bufsize;
if (cap_sysctlbyname(capsysctl, varname, buf, &len,
NULL, 0) == 0)
break;
if (errno != ENOMEM || len != bufsize)
- err(1, "cap_sysctlbyname()");
+ xo_err(1, "cap_sysctlbyname()");
bufsize *= 2;
}
xug = (struct xunpgen *)buf;
@@ -851,7 +854,7 @@ gather_unix(int proto)
} while (xug->xug_gen != exug->xug_gen && retry--);
if (xug->xug_gen != exug->xug_gen && opt_v)
- warnx("warning: data may be inconsistent");
+ xo_warnx("warning: data may be inconsistent");
for (;;) {
xug = (struct xunpgen *)(void *)((char *)xug + xug->xug_len);
@@ -864,11 +867,11 @@ gather_unix(int proto)
(xup->unp_conn != 0 && !opt_c))
continue;
if ((sock = calloc(1, sizeof(*sock))) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
if ((laddr = calloc(1, sizeof *laddr)) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
if ((faddr = calloc(1, sizeof *faddr)) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
sock->socket = xup->xu_socket.xso_so;
sock->pcb = xup->xu_unpp;
sock->proto = proto;
@@ -899,21 +902,21 @@ getfiles(void)
olen = len = sizeof(*xfiles);
if ((xfiles = malloc(len)) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
while (cap_sysctlbyname(capsysctl, "kern.file", xfiles, &len, 0, 0)
== -1) {
if (errno != ENOMEM || len != olen)
- err(1, "cap_sysctlbyname()");
+ xo_err(1, "cap_sysctlbyname()");
olen = len *= 2;
if ((xfiles = realloc(xfiles, len)) == NULL)
- err(1, "realloc()");
+ xo_err(1, "realloc()");
}
if (len > 0)
enforce_ksize(xfiles->xf_size, struct xfile);
nfiles = len / sizeof(*xfiles);
if ((files = malloc(nfiles * sizeof(struct file))) == NULL)
- err(1, "malloc()");
+ xo_err(1, "malloc()");
for (int i = 0; i < nfiles; i++) {
files[i].xf_data = xfiles[i].xf_data;
@@ -932,6 +935,7 @@ formataddr(struct sockaddr_storage *ss, char *buf, size_t bufsize)
struct sockaddr_un *sun;
char addrstr[NI_MAXHOST] = { '\0', '\0' };
int error, off, port = 0;
+ const bool is_text_style = (xo_get_style(NULL) == XO_STYLE_TEXT);
switch (ss->ss_family) {
case AF_INET:
@@ -947,6 +951,11 @@ formataddr(struct sockaddr_storage *ss, char *buf, size_t bufsize)
case AF_UNIX:
sun = sstosun(ss);
off = (int)((char *)&sun->sun_path - (char *)sun);
+ if (!is_text_style) {
+ xo_emit("{:path/%.*s}", sun->sun_len - off,
+ sun->sun_path);
+ return 0;
+ }
return snprintf(buf, bufsize, "%.*s",
sun->sun_len - off, sun->sun_path);
}
@@ -954,7 +963,12 @@ formataddr(struct sockaddr_storage *ss, char *buf, size_t bufsize)
error = cap_getnameinfo(capnet, sstosa(ss), ss->ss_len,
addrstr, sizeof(addrstr), NULL, 0, NI_NUMERICHOST);
if (error)
- errx(1, "cap_getnameinfo()");
+ xo_errx(1, "cap_getnameinfo()");
+ }
+ if (!is_text_style) {
+ xo_emit("{:address/%s}", addrstr);
+ xo_emit("{:port/%d}", port);
+ return 0;
}
if (port == 0)
return snprintf(buf, bufsize, "%s:*", addrstr);
@@ -977,7 +991,7 @@ getprocname(pid_t pid)
== -1) {
/* Do not warn if the process exits before we get its name. */
if (errno != ESRCH)
- warn("cap_sysctl()");
+ xo_warn("cap_sysctl()");
return ("??");
}
return (proc.ki_comm);
@@ -999,7 +1013,7 @@ getprocjid(pid_t pid)
== -1) {
/* Do not warn if the process exits before we get its jid. */
if (errno != ESRCH)
- warn("cap_sysctl()");
+ xo_warn("cap_sysctl()");
return (-1);
}
return (proc.ki_jid);
@@ -1099,13 +1113,15 @@ format_unix_faddr(struct addr *faddr, char *buf, size_t bufsize) {
#define SAFESIZE (buf == NULL ? 0 : bufsize - pos)
size_t pos = 0;
- /* Remote peer we connect(2) to, if any. */
+ const bool is_text_style = (xo_get_style(NULL) == XO_STYLE_TEXT);
if (faddr->conn != 0) {
+ /* Remote peer we connect(2) to, if any. */
struct sock *p;
- pos += strlcpy(SAFEBUF, "-> ", SAFESIZE);
+ if (is_text_style)
+ pos += strlcpy(SAFEBUF, "-> ", SAFESIZE);
p = RB_FIND(pcbs_t, &pcbs,
&(struct sock){ .pcb = faddr->conn });
- if (__predict_false(p == NULL)) {
+ if (__predict_false(p == NULL) && is_text_style) {
/* XXGL: can this happen at all? */
pos += snprintf(SAFEBUF, SAFESIZE, "??");
} else if (p->laddr->address.ss_len == 0) {
@@ -1114,34 +1130,52 @@ format_unix_faddr(struct addr *faddr, char *buf, size_t bufsize) {
&(struct file){ .xf_data =
p->socket });
if (f != NULL) {
- pos += snprintf(SAFEBUF, SAFESIZE, "[%lu %d]",
- (u_long)f->xf_pid, f->xf_fd);
+ if (is_text_style) {
+ pos += snprintf(SAFEBUF, SAFESIZE,
+ "[%lu %d]", (u_long)f->xf_pid,
+ f->xf_fd);
+ } else {
+ xo_open_list("connections");
+ xo_open_instance("connections");
+ xo_emit("{:pid/%lu}", (u_long)f->xf_pid);
+ xo_emit("{:fd/%d}", f->xf_fd);
+ xo_close_instance("connections");
+ xo_close_list("connections");
+ }
}
} else
pos += formataddr(&p->laddr->address,
SAFEBUF, SAFESIZE);
- }
- /* Remote peer(s) connect(2)ed to us, if any. */
- if (faddr->firstref != 0) {
+ } else if (faddr->firstref != 0) {
+ /* Remote peer(s) connect(2)ed to us, if any. */
struct sock *p;
struct file *f;
kvaddr_t ref = faddr->firstref;
bool fref = true;
- pos += snprintf(SAFEBUF, SAFESIZE, " <- ");
-
+ if (is_text_style)
+ pos += snprintf(SAFEBUF, SAFESIZE, " <- ");
+ xo_open_list("connections");
while ((p = RB_FIND(pcbs_t, &pcbs,
&(struct sock){ .pcb = ref })) != 0) {
f = RB_FIND(files_t, &ftree,
&(struct file){ .xf_data = p->socket });
if (f != NULL) {
- pos += snprintf(SAFEBUF, SAFESIZE,
- "%s[%lu %d]", fref ? "" : ",",
- (u_long)f->xf_pid, f->xf_fd);
+ if (is_text_style) {
+ pos += snprintf(SAFEBUF, SAFESIZE,
+ "%s[%lu %d]", fref ? "" : ",",
+ (u_long)f->xf_pid, f->xf_fd);
+ } else {
+ xo_open_instance("connections");
+ xo_emit("{:pid/%lu}", (u_long)f->xf_pid);
+ xo_emit("{:fd/%d}", f->xf_fd);
+ xo_close_instance("connections");
+ }
}
ref = p->faddr->nextref;
fref = false;
}
+ xo_close_list("connections");
}
return pos;
}
@@ -1183,7 +1217,7 @@ calculate_sock_column_widths(struct col_widths *cw, struct sock *s)
while (laddr != NULL || faddr != NULL) {
if (opt_w && s->family == AF_UNIX) {
if ((laddr == NULL) || (faddr == NULL))
- errx(1, "laddr = %p or faddr = %p is NULL",
+ xo_errx(1, "laddr = %p or faddr = %p is NULL",
(void *)laddr, (void *)faddr);
if (laddr->address.ss_len > 0)
len = formataddr(&laddr->address, NULL, 0);
@@ -1298,6 +1332,7 @@ calculate_column_widths(struct col_widths *cw)
struct sock *s;
struct passwd *pwd;
+ cap_setpassent(cappwd, 1);
for (xf = files, n = 0; n < nfiles; ++n, ++xf) {
if (xf->xf_data == 0)
continue;
@@ -1345,65 +1380,104 @@ display_sock(struct sock *s, struct col_widths *cw, char *buf, size_t bufsize)
laddr = s->laddr;
faddr = s->faddr;
first = true;
+ const bool is_text_style = (xo_get_style(NULL) == XO_STYLE_TEXT);
snprintf(buf, bufsize, "%s%s%s",
s->protoname,
s->vflag & INP_IPV4 ? "4" : "",
s->vflag & INP_IPV6 ? "6" : "");
- printf(" %-*s", cw->proto, buf);
+ xo_emit(" {:proto/%-*s}", cw->proto, buf);
while (laddr != NULL || faddr != NULL) {
if (s->family == AF_UNIX) {
if ((laddr == NULL) || (faddr == NULL))
- errx(1, "laddr = %p or faddr = %p is NULL",
+ xo_errx(1, "laddr = %p or faddr = %p is NULL",
(void *)laddr, (void *)faddr);
- if (laddr->address.ss_len > 0)
+ if (laddr->address.ss_len > 0) {
+ xo_open_container("local");
formataddr(&laddr->address, buf, bufsize);
- else if (laddr->address.ss_len == 0 && faddr->conn == 0)
- strlcpy(buf, "(not connected)", bufsize);
- else
- strlcpy(buf, "??", bufsize);
- printf(" %-*.*s", cw->local_addr, cw->local_addr, buf);
- if (format_unix_faddr(faddr, buf, bufsize) == 0)
- strlcpy(buf, "??", bufsize);
- printf(" %-*.*s", cw->foreign_addr,
- cw->foreign_addr, buf);
+ if (is_text_style) {
+ xo_emit(" {:/%-*.*s}", cw->local_addr,
+ cw->local_addr, buf);
+ }
+ xo_close_container("local");
+ } else if (laddr->address.ss_len == 0 &&
+ faddr->conn == 0 && is_text_style) {
+ xo_emit(" {:/%-*.*s}", cw->local_addr,
+ cw->local_addr, "(not connected)");
+ } else if (is_text_style) {
+ xo_emit(" {:/%-*.*s}", cw->local_addr,
+ cw->local_addr, "??");
+ }
+ if (faddr->conn != 0 || faddr->firstref != 0) {
+ xo_open_container("foreign");
+ int len = format_unix_faddr(faddr, buf,
+ bufsize);
+ if (len == 0 && is_text_style)
+ xo_emit(" {:/%-*s}",
+ cw->foreign_addr, "??");
+ else if (is_text_style)
+ xo_emit(" {:/%-*.*s}", cw->foreign_addr,
+ cw->foreign_addr, buf);
+ xo_close_container("foreign");
+ } else if (is_text_style)
+ xo_emit(" {:/%-*s}", cw->foreign_addr, "??");
} else {
- if (laddr != NULL)
+ if (laddr != NULL) {
+ xo_open_container("local");
formataddr(&laddr->address, buf, bufsize);
- else
- strlcpy(buf, "??", bufsize);
- printf(" %-*.*s", cw->local_addr, cw->local_addr, buf);
- if (faddr != NULL)
+ if (is_text_style) {
+ xo_emit(" {:/%-*.*s}", cw->local_addr,
+ cw->local_addr, buf);
+ }
+ xo_close_container("local");
+ } else if (is_text_style)
+ xo_emit(" {:/%-*.*s}", cw->local_addr,
+ cw->local_addr, "??");
+ if (faddr != NULL) {
+ xo_open_container("foreign");
formataddr(&faddr->address, buf, bufsize);
- else
- strlcpy(buf, "??", bufsize);
- printf(" %-*.*s", cw->foreign_addr,
- cw->foreign_addr, buf);
+ if (is_text_style) {
+ xo_emit(" {:/%-*.*s}", cw->foreign_addr,
+ cw->foreign_addr, buf);
+ }
+ xo_close_container("foreign");
+ } else if (is_text_style) {
+ xo_emit(" {:/%-*.*s}", cw->foreign_addr,
+ cw->foreign_addr, "??");
+ }
+ }
+ if (opt_A) {
+ snprintf(buf, bufsize, "%#*" PRIx64,
+ cw->pcb_kva, s->pcb);
+ xo_emit(" {:pcb-kva/%s}", buf);
}
- if (opt_A)
- printf(" %#*" PRIx64, cw->pcb_kva, s->pcb);
if (opt_f)
- printf(" %*d", cw->fib, s->fibnum);
+ xo_emit(" {:fib/%*d}", cw->fib, s->fibnum);
if (opt_I) {
if (s->splice_socket != 0) {
struct sock *sp;
sp = RB_FIND(socks_t, &socks, &(struct sock)
{ .socket = s->splice_socket });
- if (sp != NULL)
+ if (sp != NULL) {
+ xo_open_container("splice");
formataddr(&sp->laddr->address,
buf, bufsize);
- else
+ xo_close_container("splice");
+ } else if (is_text_style)
strlcpy(buf, "??", bufsize);
- } else
+ } else if (is_text_style)
strlcpy(buf, "??", bufsize);
- printf(" %-*s", cw->splice_address, buf);
+ if (is_text_style)
+ xo_emit(" {:/%-*s}", cw->splice_address, buf);
}
if (opt_i) {
if (s->proto == IPPROTO_TCP || s->proto == IPPROTO_UDP)
- printf(" %*" PRIu64, cw->inp_gencnt,
+ {
+ snprintf(buf, bufsize, "%" PRIu64,
s->inp_gencnt);
- else
- printf(" %*s", cw->inp_gencnt, "??");
+ xo_emit(" {:id/%*s}", cw->inp_gencnt, buf);
+ } else if (is_text_style)
+ xo_emit(" {:/%*s}", cw->inp_gencnt, "??");
}
if (opt_U) {
if (faddr != NULL &&
@@ -1414,10 +1488,10 @@ display_sock(struct sock *s, struct col_widths *cw, char *buf, size_t bufsize)
(s->proto == IPPROTO_TCP &&
s->state != TCPS_CLOSED &&
s->state != TCPS_LISTEN))) {
- printf(" %*u", cw->encaps,
+ xo_emit(" {:encaps/%*u}", cw->encaps,
ntohs(faddr->encaps_port));
- } else
- printf(" %*s", cw->encaps, "??");
+ } else if (is_text_style)
+ xo_emit(" {:/%*s}", cw->encaps, "??");
}
if (opt_s) {
if (faddr != NULL &&
@@ -1425,10 +1499,10 @@ display_sock(struct sock *s, struct col_widths *cw, char *buf, size_t bufsize)
s->state != SCTP_CLOSED &&
s->state != SCTP_BOUND &&
s->state != SCTP_LISTEN) {
- printf(" %-*s", cw->path_state,
+ xo_emit(" {:path-state/%-*s}", cw->path_state,
sctp_path_state(faddr->state));
- } else
- printf(" %-*s", cw->path_state, "??");
+ } else if (is_text_style)
+ xo_emit(" {:/%-*s}", cw->path_state, "??");
}
if (first) {
if (opt_s) {
@@ -1436,47 +1510,52 @@ display_sock(struct sock *s, struct col_widths *cw, char *buf, size_t bufsize)
s->proto == IPPROTO_TCP) {
switch (s->proto) {
case IPPROTO_SCTP:
- printf(" %-*s", cw->conn_state,
- sctp_conn_state(s->state));
+ xo_emit(" {:path-state/%-*s}",
+ cw->path_state,
+ sctp_path_state(
+ faddr->state));
break;
case IPPROTO_TCP:
if (s->state >= 0 &&
s->state < TCP_NSTATES)
- printf(" %-*s",
- cw->conn_state,
- tcpstates[s->state]);
- else
- printf(" %-*s",
- cw->conn_state, "??");
+ xo_emit(" {:conn-state/%-*s}",
+ cw->conn_state,
+ tcpstates[s->state]);
+ else if (is_text_style)
+ xo_emit(" {:/%-*s}",
+ cw->conn_state, "??");
break;
}
- } else
- printf(" %-*s", cw->conn_state, "??");
+ } else if (is_text_style)
+ xo_emit(" {:/%-*s}",
+ cw->conn_state, "??");
}
if (opt_S) {
if (s->proto == IPPROTO_TCP)
- printf(" %-*s", cw->stack, s->stack);
- else
- printf(" %-*s", cw->stack, "??");
+ xo_emit(" {:stack/%-*s}",
+ cw->stack, s->stack);
+ else if (is_text_style)
+ xo_emit(" {:/%-*s}",
+ cw->stack, "??");
}
if (opt_C) {
if (s->proto == IPPROTO_TCP)
- printf(" %-*s", cw->cc, s->cc);
- else
- printf(" %-*s", cw->cc, "??");
+ xo_emit(" {:cc/%-*s}", cw->cc, s->cc);
+ else if (is_text_style)
+ xo_emit(" {:/%-*s}", cw->cc, "??");
}
}
if (laddr != NULL)
laddr = laddr->next;
if (faddr != NULL)
faddr = faddr->next;
- if (laddr != NULL || faddr != NULL)
- printf("%-*s %-*s %-*s %-*s %-*s", cw->user, "",
- cw->command, "", cw->pid, "", cw->fd, "",
- cw->proto, "");
+ if (is_text_style && (laddr != NULL || faddr != NULL))
+ xo_emit("{:/%-*s} {:/%-*s} {:/%*s} {:/%*s}",
+ cw->user, "??", cw->command, "??",
+ cw->pid, "??", cw->fd, "??");
first = false;
}
- printf("\n");
+ xo_emit("\n");
}
static void
@@ -1490,56 +1569,63 @@ display(void)
const size_t bufsize = 512;
void *buf;
if ((buf = (char *)malloc(bufsize)) == NULL) {
- err(1, "malloc()");
+ xo_err(1, "malloc()");
return;
}
- cw = (struct col_widths) {
- .user = strlen("USER"),
- .command = 10,
- .pid = strlen("PID"),
- .fd = strlen("FD"),
- .proto = strlen("PROTO"),
- .local_addr = opt_w ? strlen("LOCAL ADDRESS") : 21,
- .foreign_addr = opt_w ? strlen("FOREIGN ADDRESS") : 21,
- .pcb_kva = 18,
- .fib = strlen("FIB"),
- .splice_address = strlen("SPLICE ADDRESS"),
- .inp_gencnt = strlen("ID"),
- .encaps = strlen("ENCAPS"),
- .path_state = strlen("PATH STATE"),
- .conn_state = strlen("CONN STATE"),
- .stack = strlen("STACK"),
- .cc = strlen("CC"),
- };
- calculate_column_widths(&cw);
+ if (xo_get_style(NULL) == XO_STYLE_TEXT) {
+ cw = (struct col_widths) {
+ .user = strlen("USER"),
+ .command = 10,
+ .pid = strlen("PID"),
+ .fd = strlen("FD"),
+ .proto = strlen("PROTO"),
+ .local_addr = opt_w ? strlen("LOCAL ADDRESS") : 21,
+ .foreign_addr = opt_w ? strlen("FOREIGN ADDRESS") : 21,
+ .pcb_kva = 18,
+ .fib = strlen("FIB"),
+ .splice_address = strlen("SPLICE ADDRESS"),
+ .inp_gencnt = strlen("ID"),
+ .encaps = strlen("ENCAPS"),
+ .path_state = strlen("PATH STATE"),
+ .conn_state = strlen("CONN STATE"),
+ .stack = strlen("STACK"),
+ .cc = strlen("CC"),
+ };
+ calculate_column_widths(&cw);
+ } else
+ memset(&cw, 0, sizeof(cw));
+ xo_set_version(SOCKSTAT_XO_VERSION);
+ xo_open_container("sockstat");
+ xo_open_list("socket");
if (!opt_q) {
- printf("%-*s %-*s %*s %*s %-*s %-*s %-*s",
- cw.user, "USER", cw.command, "COMMAND",
- cw.pid, "PID", cw.fd, "FD", cw.proto, "PROTO",
- cw.local_addr, "LOCAL ADDRESS",
- cw.foreign_addr,"FOREIGN ADDRESS");
+ xo_emit("{T:/%-*s} {T:/%-*s} {T:/%*s} {T:/%*s} {T:/%-*s} "
+ "{T:/%-*s} {T:/%-*s}", cw.user, "USER", cw.command,
+ "COMMAND", cw.pid, "PID", cw.fd, "FD", cw.proto,
+ "PROTO", cw.local_addr, "LOCAL ADDRESS",
+ cw.foreign_addr, "FOREIGN ADDRESS");
if (opt_A)
- printf(" %-*s", cw.pcb_kva, "PCB KVA");
+ xo_emit(" {T:/%-*s}", cw.pcb_kva, "PCB KVA");
if (opt_f)
/* RT_MAXFIBS is 65535. */
- printf(" %*s", cw.fib, "FIB");
+ xo_emit(" {T:/%*s}", cw.fib, "FIB");
if (opt_I)
- printf(" %-*s", cw.splice_address, "SPLICE ADDRESS");
+ xo_emit(" {T:/%-*s}", cw.splice_address,
+ "SPLICE ADDRESS");
if (opt_i)
- printf(" %*s", cw.inp_gencnt, "ID");
+ xo_emit(" {T:/%*s}", cw.inp_gencnt, "ID");
if (opt_U)
- printf(" %*s", cw.encaps, "ENCAPS");
+ xo_emit(" {T:/%*s}", cw.encaps, "ENCAPS");
if (opt_s) {
- printf(" %-*s", cw.path_state, "PATH STATE");
- printf(" %-*s", cw.conn_state, "CONN STATE");
+ xo_emit(" {T:/%-*s}", cw.path_state, "PATH STATE");
+ xo_emit(" {T:/%-*s}", cw.conn_state, "CONN STATE");
}
if (opt_S)
- printf(" %-*s", cw.stack, "STACK");
+ xo_emit(" {T:/%-*s}", cw.stack, "STACK");
if (opt_C)
- printf(" %-*s", cw.cc, "CC");
- printf("\n");
+ xo_emit(" {T:/%-*s}", cw.cc, "CC");
+ xo_emit("\n");
}
cap_setpassent(cappwd, 1);
for (xf = files, n = 0; n < nfiles; ++n, ++xf) {
@@ -1550,17 +1636,24 @@ display(void)
s = RB_FIND(socks_t, &socks,
&(struct sock){ .socket = xf->xf_data});
if (s != NULL && check_ports(s)) {
+ xo_open_instance("socket");
s->shown = 1;
if (opt_n ||
(pwd = cap_getpwuid(cappwd, xf->xf_uid)) == NULL)
- printf("%-*lu", cw.user, (u_long)xf->xf_uid);
+ xo_emit("{:user/%-*lu}", cw.user,
+ (u_long)xf->xf_uid);
+ else
+ xo_emit("{:user/%-*s}", cw.user, pwd->pw_name);
+ if (xo_get_style(NULL) == XO_STYLE_TEXT)
+ xo_emit(" {:/%-*.10s}", cw.command,
+ getprocname(xf->xf_pid));
else
- printf("%-*s", cw.user, pwd->pw_name);
- printf(" %-*.*s", cw.command, cw.command,
- getprocname(xf->xf_pid));
- printf(" %*lu", cw.pid, (u_long)xf->xf_pid);
- printf(" %*d", cw.fd, xf->xf_fd);
+ xo_emit(" {:command/%-*s}", cw.command,
+ getprocname(xf->xf_pid));
+ xo_emit(" {:pid/%*lu}", cw.pid, (u_long)xf->xf_pid);
+ xo_emit(" {:fd/%*d}", cw.fd, xf->xf_fd);
display_sock(s, &cw, buf, bufsize);
+ xo_close_instance("socket");
}
}
if (opt_j >= 0)
@@ -1568,20 +1661,33 @@ display(void)
SLIST_FOREACH(s, &nosocks, socket_list) {
if (!check_ports(s))
continue;
- printf("%-*s %-*s %*s %*s", cw.user, "??", cw.command, "??",
- cw.pid, "??", cw.fd, "??");
+ xo_open_instance("socket");
+ if (xo_get_style(NULL) == XO_STYLE_TEXT)
+ xo_emit("{:/%-*s} {:/%-*s} {:/%*s} {:/%*s}",
+ cw.user, "??", cw.command, "??",
+ cw.pid, "??", cw.fd, "??");
display_sock(s, &cw, buf, bufsize);
+ xo_close_instance("socket");
}
RB_FOREACH(s, socks_t, &socks) {
if (s->shown)
continue;
if (!check_ports(s))
continue;
- printf("%-*s %-*s %*s %*s", cw.user, "??", cw.command, "??",
- cw.pid, "??", cw.fd, "??");
+ xo_open_instance("socket");
+ if (xo_get_style(NULL) == XO_STYLE_TEXT)
+ xo_emit("{:/%-*s} {:/%-*s} {:/%*s} {:/%*s}",
+ cw.user, "??", cw.command, "??",
+ cw.pid, "??", cw.fd, "??");
display_sock(s, &cw, buf, bufsize);
+ xo_close_instance("socket");
}
+ xo_close_list("socket");
+ xo_close_container("sockstat");
+ if (xo_finish() < 0)
+ xo_err(1, "stdout");
free(buf);
+ cap_endpwent(cappwd);
}
static int
@@ -1597,7 +1703,7 @@ set_default_protos(void)
pname = default_protos[pindex];
prot = cap_getprotobyname(capnetdb, pname);
if (prot == NULL)
- err(1, "cap_getprotobyname: %s", pname);
+ xo_err(1, "cap_getprotobyname: %s", pname);
protos[pindex] = prot->p_proto;
}
numprotos = pindex;
@@ -1643,8 +1749,10 @@ jail_getvnet(int jid)
static void
usage(void)
{
- errx(1,
- "usage: sockstat [-46ACcfIiLlnqSsUuvw] [-j jid] [-p ports] [-P protocols]");
+ xo_error(
+"usage: sockstat [--libxo] [-46ACcfIiLlnqSsUuvw] [-j jid] [-p ports]\n"
+" [-P protocols]\n");
+ exit(1);
}
int
@@ -1657,6 +1765,9 @@ main(int argc, char *argv[])
int protos_defined = -1;
int o, i;
+ argc = xo_parse_args(argc, argv);
+ if (argc < 0)
+ exit(1);
opt_j = -1;
while ((o = getopt(argc, argv, "46ACcfIij:Llnp:P:qSsUuvw")) != -1)
switch (o) {
@@ -1687,7 +1798,7 @@ main(int argc, char *argv[])
case 'j':
opt_j = jail_getid(optarg);
if (opt_j < 0)
- errx(1, "jail_getid: %s", jail_errmsg);
+ xo_errx(1, "jail_getid: %s", jail_errmsg);
break;
case 'L':
opt_L = true;
@@ -1738,10 +1849,10 @@ main(int argc, char *argv[])
if (opt_j > 0) {
switch (jail_getvnet(opt_j)) {
case -1:
- errx(2, "jail_getvnet: %s", jail_errmsg);
+ xo_errx(2, "jail_getvnet: %s", jail_errmsg);
case JAIL_SYS_NEW:
if (jail_attach(opt_j) < 0)
- err(3, "jail_attach()");
+ xo_err(3, "jail_attach()");
/* Set back to -1 for normal output in vnet jail. */
opt_j = -1;
break;
@@ -1752,31 +1863,31 @@ main(int argc, char *argv[])
capcas = cap_init();
if (capcas == NULL)
- err(1, "Unable to contact Casper");
+ xo_err(1, "Unable to contact Casper");
if (caph_enter_casper() < 0)
- err(1, "Unable to enter capability mode");
+ xo_err(1, "Unable to enter capability mode");
capnet = cap_service_open(capcas, "system.net");
if (capnet == NULL)
- err(1, "Unable to open system.net service");
+ xo_err(1, "Unable to open system.net service");
capnetdb = cap_service_open(capcas, "system.netdb");
if (capnetdb == NULL)
- err(1, "Unable to open system.netdb service");
+ xo_err(1, "Unable to open system.netdb service");
capsysctl = cap_service_open(capcas, "system.sysctl");
if (capsysctl == NULL)
- err(1, "Unable to open system.sysctl service");
+ xo_err(1, "Unable to open system.sysctl service");
cappwd = cap_service_open(capcas, "system.pwd");
if (cappwd == NULL)
- err(1, "Unable to open system.pwd service");
+ xo_err(1, "Unable to open system.pwd service");
cap_close(capcas);
limit = cap_net_limit_init(capnet, CAPNET_ADDR2NAME);
if (limit == NULL)
- err(1, "Unable to init cap_net limits");
+ xo_err(1, "Unable to init cap_net limits");
if (cap_net_limit(limit) < 0)
- err(1, "Unable to apply limits");
+ xo_err(1, "Unable to apply limits");
if (cap_pwd_limit_cmds(cappwd, pwdcmds, nitems(pwdcmds)) < 0)
- err(1, "Unable to apply pwd commands limits");
+ xo_err(1, "Unable to apply pwd commands limits");
if (cap_pwd_limit_fields(cappwd, pwdfields, nitems(pwdfields)) < 0)
- err(1, "Unable to apply pwd commands limits");
+ xo_err(1, "Unable to apply pwd commands limits");
if ((!opt_4 && !opt_6) && protos_defined != -1)
opt_4 = opt_6 = true;
diff --git a/usr.bin/strings/Makefile b/usr.bin/strings/Makefile
index 8e2572810947..c01e775b0b89 100644
--- a/usr.bin/strings/Makefile
+++ b/usr.bin/strings/Makefile
@@ -1,5 +1,7 @@
.include <src.opts.mk>
+PACKAGE= toolchain
+
ELFTCDIR= ${SRCTOP}/contrib/elftoolchain
.PATH: ${ELFTCDIR}/strings
diff --git a/usr.bin/xargs/tests/Makefile b/usr.bin/xargs/tests/Makefile
index 1a9265b88b4e..9fa8ff11fac2 100644
--- a/usr.bin/xargs/tests/Makefile
+++ b/usr.bin/xargs/tests/Makefile
@@ -1,6 +1,6 @@
PACKAGE= tests
-TAP_TESTS_SH= legacy_test
+ATF_TESTS_SH= xargs_test
${PACKAGE}FILES+= regress.0.in
${PACKAGE}FILES+= regress.0.out
@@ -23,6 +23,5 @@ ${PACKAGE}FILES+= regress.n3.out
${PACKAGE}FILES+= regress.normal.out
${PACKAGE}FILES+= regress.quotes.in
${PACKAGE}FILES+= regress.quotes.out
-${PACKAGE}FILES+= regress.sh
.include <bsd.test.mk>
diff --git a/usr.bin/xargs/tests/legacy_test.sh b/usr.bin/xargs/tests/legacy_test.sh
deleted file mode 100644
index 3c7842d07bf0..000000000000
--- a/usr.bin/xargs/tests/legacy_test.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-SRCDIR="$(dirname "${0}")"; export SRCDIR
-
-m4 "${SRCDIR}/../regress.m4" "${SRCDIR}/regress.sh" | sh
diff --git a/usr.bin/xargs/tests/regress.sh b/usr.bin/xargs/tests/regress.sh
deleted file mode 100644
index e65a5a703505..000000000000
--- a/usr.bin/xargs/tests/regress.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-
-echo 1..23
-
-REGRESSION_START($1)
-
-REGRESSION_TEST(`normal', `xargs echo The <${SRCDIR}/regress.in')
-REGRESSION_TEST(`I', `xargs -I% echo The % % % %% % % <${SRCDIR}/regress.in')
-REGRESSION_TEST(`J', `xargs -J% echo The % again. <${SRCDIR}/regress.in')
-REGRESSION_TEST(`L', `xargs -L3 echo <${SRCDIR}/regress.in')
-REGRESSION_TEST(`P1', `xargs -P1 echo <${SRCDIR}/regress.in')
-REGRESSION_TEST(`R', `xargs -I% -R1 echo The % % % %% % % <${SRCDIR}/regress.in')
-REGRESSION_TEST(`R-1', `xargs -I% -R-1 echo The % % % %% % % <${SRCDIR}/regress.in')
-REGRESSION_TEST(`n1', `xargs -n1 echo <${SRCDIR}/regress.in')
-REGRESSION_TEST(`n2', `xargs -n2 echo <${SRCDIR}/regress.in')
-argmax=$(sysctl -n kern.argmax)
-REGRESSION_TEST(`nargmax', `xargs -n$argmax <${SRCDIR}/regress.in')
-REGRESSION_TEST(`n2P0',`xargs -n2 -P0 echo <${SRCDIR}/regress.in | sort')
-REGRESSION_TEST(`n3', `xargs -n3 echo <${SRCDIR}/regress.in')
-REGRESSION_TEST(`0', `xargs -0 -n1 echo <${SRCDIR}/regress.0.in')
-REGRESSION_TEST(`0I', `xargs -0 -I% echo The % %% % <${SRCDIR}/regress.0.in')
-REGRESSION_TEST(`0J', `xargs -0 -J% echo The % again. <${SRCDIR}/regress.0.in')
-REGRESSION_TEST(`0L', `xargs -0 -L2 echo <${SRCDIR}/regress.0.in')
-REGRESSION_TEST(`0P1', `xargs -0 -P1 echo <${SRCDIR}/regress.0.in')
-REGRESSION_TEST(`quotes', `xargs -n1 echo <${SRCDIR}/regress.quotes.in')
-
-REGRESSION_TEST_FREEFORM(`parallel1', `echo /var/empty /var/empty | xargs -n1 -P2 test -d; [ $? = 0 ]')
-REGRESSION_TEST_FREEFORM(`parallel2', `echo /var/empty /var/empty/nodir | xargs -n1 -P2 test -d; [ $? = 1 ]')
-REGRESSION_TEST_FREEFORM(`parallel3', `echo /var/empty/nodir /var/empty | xargs -n1 -P2 test -d; [ $? = 1 ]')
-REGRESSION_TEST_FREEFORM(`parallel4', `echo /var/empty/nodir /var/empty/nodir | xargs -n1 -P2 test -d; [ $? = 1 ]')
-REGRESSION_TEST_FREEFORM(`ntoobig', `seq 42 | xargs -n$((argmax+1)); [ $? = 1 ]')
-
-REGRESSION_END()
diff --git a/usr.bin/xargs/tests/xargs_test.sh b/usr.bin/xargs/tests/xargs_test.sh
new file mode 100755
index 000000000000..12c9407a7e45
--- /dev/null
+++ b/usr.bin/xargs/tests/xargs_test.sh
@@ -0,0 +1,193 @@
+#
+# Copyright (c) 2002 Juli Mallett <jmallett@FreeBSD.org>
+# Copyright (c) 2025 Dag-Erling Smørgrav <des@FreeBSD.org>
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+
+SRCDIR=$(atf_get_srcdir)
+
+atf_test_case xargs_normal
+xargs_normal_body()
+{
+ atf_check -o file:${SRCDIR}/regress.normal.out \
+ xargs echo The <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_I
+xargs_I_body()
+{
+ atf_check -o file:${SRCDIR}/regress.I.out \
+ xargs -I% echo The % % % %% % % <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_J
+xargs_J_body()
+{
+ atf_check -o file:${SRCDIR}/regress.J.out \
+ xargs -J% echo The % again. <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_L
+xargs_L_body()
+{
+ atf_check -o file:${SRCDIR}/regress.L.out \
+ xargs -L3 echo <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_P1
+xargs_P1_body()
+{
+ atf_check -o file:${SRCDIR}/regress.P1.out \
+ xargs -P1 echo <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_R
+xargs_R_body()
+{
+ atf_check -o file:${SRCDIR}/regress.R.out \
+ xargs -I% -R1 echo The % % % %% % % <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_R_1
+xargs_R_1_body()
+{
+ atf_check -o file:${SRCDIR}/regress.R-1.out \
+ xargs -I% -R-1 echo The % % % %% % % <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_n1
+xargs_n1_body()
+{
+ atf_check -o file:${SRCDIR}/regress.n1.out \
+ xargs -n1 echo <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_n2
+xargs_n2_body()
+{
+ atf_check -o file:${SRCDIR}/regress.n2.out \
+ xargs -n2 echo <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_nargmax
+xargs_nargmax_body()
+{
+ argmax=$(sysctl -n kern.argmax)
+ atf_check -o file:${SRCDIR}/regress.nargmax.out \
+ xargs -n$((argmax)) <${SRCDIR}/regress.in
+ atf_check -s exit:1 -e match:"too large" \
+ xargs -n$((argmax+1)) <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_n2P0
+xargs_n2P0_body()
+{
+ atf_check -o save:regress.out \
+ xargs -n2 -P0 echo <${SRCDIR}/regress.in
+ atf_check -o file:${SRCDIR}/regress.n2P0.out \
+ sort regress.out
+}
+
+atf_test_case xargs_n3
+xargs_n3_body()
+{
+ atf_check -o file:${SRCDIR}/regress.n3.out \
+ xargs -n3 echo <${SRCDIR}/regress.in
+}
+
+atf_test_case xargs_0
+xargs_0_body()
+{
+ atf_check -o file:${SRCDIR}/regress.0.out \
+ xargs -0 -n1 echo <${SRCDIR}/regress.0.in
+}
+
+atf_test_case xargs_0I
+xargs_0I_body()
+{
+ atf_check -o file:${SRCDIR}/regress.0I.out \
+ xargs -0 -I% echo The % %% % <${SRCDIR}/regress.0.in
+}
+
+atf_test_case xargs_0J
+xargs_0J_body()
+{
+ atf_check -o file:${SRCDIR}/regress.0J.out \
+ xargs -0 -J% echo The % again. <${SRCDIR}/regress.0.in
+}
+
+atf_test_case xargs_0L
+xargs_0L_body()
+{
+ atf_check -o file:${SRCDIR}/regress.0L.out \
+ xargs -0 -L2 echo <${SRCDIR}/regress.0.in
+}
+
+atf_test_case xargs_0P1
+xargs_0P1_body()
+{
+ atf_check -o file:${SRCDIR}/regress.0P1.out \
+ xargs -0 -P1 echo <${SRCDIR}/regress.0.in
+}
+
+atf_test_case xargs_quotes
+xargs_quotes_body()
+{
+ atf_check -o file:${SRCDIR}/regress.quotes.out \
+ xargs -n1 echo <${SRCDIR}/regress.quotes.in
+}
+
+atf_test_case xargs_parallel1
+xargs_parallel1_body()
+{
+ echo /var/empty /var/empty >input
+ atf_check xargs -n1 -P2 test -d <input
+}
+
+atf_test_case xargs_parallel2
+xargs_parallel2_body()
+{
+ echo /var/empty /var/empty/nodir >input
+ atf_check -s exit:1 xargs -n1 -P2 test -d <input
+}
+
+atf_test_case xargs_parallel3
+xargs_parallel3_body()
+{
+ echo /var/empty/nodir /var/empty >input
+ atf_check -s exit:1 xargs -n1 -P2 test -d <input
+}
+
+atf_test_case xargs_parallel4
+xargs_parallel4_body()
+{
+ echo /var/empty/nodir /var/empty/nodir >input
+ atf_check -s exit:1 xargs -n1 -P2 test -d <input
+}
+
+atf_init_test_cases()
+{
+ atf_add_test_case xargs_normal
+ atf_add_test_case xargs_I
+ atf_add_test_case xargs_J
+ atf_add_test_case xargs_L
+ atf_add_test_case xargs_P1
+ atf_add_test_case xargs_R
+ atf_add_test_case xargs_R_1
+ atf_add_test_case xargs_n1
+ atf_add_test_case xargs_n2
+ atf_add_test_case xargs_nargmax
+ atf_add_test_case xargs_n2P0
+ atf_add_test_case xargs_n3
+ atf_add_test_case xargs_0
+ atf_add_test_case xargs_0I
+ atf_add_test_case xargs_0J
+ atf_add_test_case xargs_0L
+ atf_add_test_case xargs_0P1
+ atf_add_test_case xargs_quotes
+ atf_add_test_case xargs_parallel1
+ atf_add_test_case xargs_parallel2
+ atf_add_test_case xargs_parallel3
+ atf_add_test_case xargs_parallel4
+}
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index 85864da57af2..6ff8dd8e273b 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -37,9 +37,12 @@
*/
#include <sys/param.h>
+#include <sys/cpuset.h>
+#include <sys/domainset.h>
#include <sys/endian.h>
#include <sys/errno.h>
#include <sys/stat.h>
+#include <sys/tree.h>
#include <err.h>
#include <paths.h>
@@ -50,7 +53,9 @@
#include <string.h>
#include <unistd.h>
+#include <dev/vmm/vmm_mem.h>
#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
#include <vmmapi.h>
#include "bhyverun.h"
@@ -79,6 +84,22 @@ static char basl_template[MAXPATHLEN];
static char basl_stemplate[MAXPATHLEN];
/*
+ * SRAT vCPU affinity info.
+ */
+struct acpi_vcpu_affinity_entry {
+ RB_ENTRY(acpi_vcpu_affinity_entry) entry;
+ int vcpuid;
+ int domain;
+};
+
+static int vcpu_affinity_cmp(struct acpi_vcpu_affinity_entry *const a1,
+ struct acpi_vcpu_affinity_entry *const a2);
+static RB_HEAD(vcpu_affinities,
+ acpi_vcpu_affinity_entry) aff_head = RB_INITIALIZER(&aff_head);
+RB_GENERATE_STATIC(vcpu_affinities, acpi_vcpu_affinity_entry, entry,
+ vcpu_affinity_cmp);
+
+/*
* State for dsdt_line(), dsdt_indent(), and dsdt_unindent().
*/
static FILE *dsdt_fp;
@@ -121,6 +142,31 @@ acpi_tables_add_device(const struct acpi_device *const dev)
return (0);
}
+static int
+vcpu_affinity_cmp(struct acpi_vcpu_affinity_entry *a1,
+ struct acpi_vcpu_affinity_entry *a2)
+{
+ return (a1->vcpuid < a2->vcpuid ? -1 : a1->vcpuid > a2->vcpuid);
+}
+
+int
+acpi_add_vcpu_affinity(int vcpuid, int domain)
+{
+ struct acpi_vcpu_affinity_entry *entry = calloc(1, sizeof(*entry));
+ if (entry == NULL) {
+ return (ENOMEM);
+ }
+
+ entry->vcpuid = vcpuid;
+ entry->domain = domain;
+ if (RB_INSERT(vcpu_affinities, &aff_head, entry) != NULL) {
+ free(entry);
+ return (EEXIST);
+ }
+
+ return (0);
+}
+
/*
* Helper routines for writing to the DSDT from other modules.
*/
@@ -726,6 +772,83 @@ build_spcr(struct vmctx *const ctx)
return (0);
}
+static int
+build_srat(struct vmctx *const ctx)
+{
+ ACPI_TABLE_SRAT srat;
+ ACPI_SRAT_MEM_AFFINITY srat_mem_affinity;
+ ACPI_SRAT_CPU_AFFINITY srat_cpu_affinity;
+
+ struct acpi_vcpu_affinity_entry *ep;
+ struct basl_table *table;
+ int segid, domain;
+ int _flags, _prot;
+ vm_ooffset_t _off;
+ size_t maplen;
+ uint64_t gpa;
+ int ret;
+
+ if (RB_EMPTY(&aff_head))
+ return (0);
+
+ memset(&srat, 0, sizeof(srat));
+ BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_SRAT,
+ BASL_TABLE_ALIGNMENT));
+ BASL_EXEC(basl_table_append_header(table, ACPI_SIG_SRAT, 1, 1));
+ srat.TableRevision = 1;
+ BASL_EXEC(basl_table_append_content(table, &srat, sizeof(srat)));
+
+ /*
+ * Iterate over the VM's memory maps and add
+ * a 'Memory Affinity Structure' for each mapping.
+ */
+ gpa = 0;
+ while (1) {
+ ret = vm_mmap_getnext(ctx, &gpa, &segid, &_off, &maplen, &_prot,
+ &_flags);
+ if (ret) {
+ break;
+ }
+
+ if (segid >= VM_SYSMEM && segid < VM_BOOTROM) {
+ domain = segid - VM_SYSMEM;
+ } else {
+ /* Treat devmem segs as domain 0. */
+ domain = 0;
+ }
+ memset(&srat_mem_affinity, 0, sizeof(srat_mem_affinity));
+ srat_mem_affinity.Header.Type = ACPI_SRAT_TYPE_MEMORY_AFFINITY;
+ srat_mem_affinity.Header.Length = sizeof(srat_mem_affinity);
+ srat_mem_affinity.Flags |= ACPI_SRAT_MEM_ENABLED;
+ srat_mem_affinity.ProximityDomain = htole32(domain);
+ srat_mem_affinity.BaseAddress = htole64(gpa);
+ srat_mem_affinity.Length = htole64(maplen);
+ srat_mem_affinity.Flags = htole32(ACPI_SRAT_MEM_ENABLED);
+ BASL_EXEC(basl_table_append_bytes(table, &srat_mem_affinity,
+ sizeof(srat_mem_affinity)));
+ gpa += maplen;
+ }
+
+ /*
+ * Iterate over each "vCPUid to domain id" mapping and emit a
+ * 'Processor Local APIC/SAPIC Affinity Structure' for each entry.
+ */
+ RB_FOREACH(ep, vcpu_affinities, &aff_head) {
+ memset(&srat_cpu_affinity, 0, sizeof(srat_cpu_affinity));
+ srat_cpu_affinity.Header.Type = ACPI_SRAT_TYPE_CPU_AFFINITY;
+ srat_cpu_affinity.Header.Length = sizeof(srat_cpu_affinity);
+ srat_cpu_affinity.ProximityDomainLo = (uint8_t)ep->domain;
+ srat_cpu_affinity.ApicId = (uint8_t)ep->vcpuid;
+ srat_cpu_affinity.Flags = htole32(ACPI_SRAT_CPU_USE_AFFINITY);
+ BASL_EXEC(basl_table_append_bytes(table, &srat_cpu_affinity,
+ sizeof(srat_cpu_affinity)));
+ }
+
+ BASL_EXEC(basl_table_register_to_rsdt(table));
+
+ return (0);
+}
+
int
acpi_build(struct vmctx *ctx, int ncpu)
{
@@ -765,6 +888,7 @@ acpi_build(struct vmctx *ctx, int ncpu)
BASL_EXEC(build_mcfg(ctx));
BASL_EXEC(build_facs(ctx));
BASL_EXEC(build_spcr(ctx));
+ BASL_EXEC(build_srat(ctx));
/* Build ACPI device-specific tables such as a TPM2 table. */
const struct acpi_device_list_entry *entry;
diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h
index 4b557993d67f..f4d24d63800e 100644
--- a/usr.sbin/bhyve/acpi.h
+++ b/usr.sbin/bhyve/acpi.h
@@ -56,7 +56,8 @@ struct vmctx;
int acpi_build(struct vmctx *ctx, int ncpu);
void acpi_raise_gpe(struct vmctx *ctx, unsigned bit);
int acpi_tables_add_device(const struct acpi_device *const dev);
-void dsdt_line(const char *fmt, ...);
+int acpi_add_vcpu_affinity(int vcpuid, int domain);
+void dsdt_line(const char *fmt, ...) __printflike(1, 2);
void dsdt_fixed_ioport(uint16_t iobase, uint16_t length);
void dsdt_fixed_irq(uint8_t irq);
void dsdt_fixed_mem32(uint32_t base, uint32_t length);
diff --git a/usr.sbin/bhyve/amd64/bhyverun_machdep.c b/usr.sbin/bhyve/amd64/bhyverun_machdep.c
index 85af124b5536..dad8f1e52e4e 100644
--- a/usr.sbin/bhyve/amd64/bhyverun_machdep.c
+++ b/usr.sbin/bhyve/amd64/bhyverun_machdep.c
@@ -91,6 +91,7 @@ bhyve_usage(int code)
" -K: PS2 keyboard layout\n"
" -l: LPC device configuration\n"
" -m: memory size\n"
+ " -n: NUMA domain specification\n"
" -o: set config 'var' to 'value'\n"
" -P: vmexit from the guest on pause\n"
" -p: pin 'vcpu' to 'hostcpu'\n"
@@ -117,9 +118,9 @@ bhyve_optparse(int argc, char **argv)
int c;
#ifdef BHYVE_SNAPSHOT
- optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:n:l:K:U:r:";
#else
- optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:";
+ optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:n:l:K:U:";
#endif
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
@@ -194,6 +195,15 @@ bhyve_optparse(int argc, char **argv)
case 'm':
set_config_value("memory.size", optarg);
break;
+ case 'n':
+ if (bhyve_numa_parse(optarg) != 0)
+ errx(EX_USAGE,
+ "invalid NUMA configuration "
+ "'%s'",
+ optarg);
+ if (!get_config_bool("acpi_tables"))
+ errx(EX_USAGE, "NUMA emulation requires ACPI");
+ break;
case 'o':
if (!bhyve_parse_config_option(optarg)) {
errx(EX_USAGE,
diff --git a/usr.sbin/bhyve/amd64/xmsr.c b/usr.sbin/bhyve/amd64/xmsr.c
index cd80e4ef782e..7c174728f4fa 100644
--- a/usr.sbin/bhyve/amd64/xmsr.c
+++ b/usr.sbin/bhyve/amd64/xmsr.c
@@ -204,6 +204,15 @@ emulate_rdmsr(struct vcpu *vcpu __unused, uint32_t num, uint64_t *val)
*val = 1;
break;
+ case MSR_VM_CR:
+ /*
+ * We currently don't support nested virt.
+ * Windows seems to ignore the cpuid bits and reads this
+ * MSR anyways.
+ */
+ *val = VM_CR_SVMDIS;
+ break;
+
default:
error = -1;
break;
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
index 62e567fd359d..89c0b23961a8 100644
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -269,8 +269,56 @@ or
(either upper or lower case)
to indicate a multiple of kilobytes, megabytes, gigabytes, or terabytes.
If no suffix is given, the value is assumed to be in megabytes.
-.Pp
The default is 256M.
+.Pp
+.It Fl n Ar id Ns Cm \&, Ns Ar size Ns Cm \&, Ns Ar cpus Ns Op Cm \&, Ns Ar domain_policy
+Configure guest NUMA domains.
+This option applies only to the amd64 platform.
+.Pp
+The
+.Fl n
+option allows the guest physical address space to be partitioned into domains.
+The layout of each domain is encoded in an ACPI table
+visible to the guest operating system.
+The
+.Fl n
+option also allows the specification of a
+.Xr domainset 9
+memory allocation policy for the host memory backing a given NUMA domain.
+A guest can have up to 8 NUMA domains.
+This feature requires that the guest use a boot ROM, and in
+particular cannot be used if the guest was initialized using
+.Xr bhyveload 8 .
+.Pp
+Each domain is identified by a numerical
+.Em id .
+The domain memory
+.Em size
+is specified using the same format as the
+.Fl m
+flag.
+The sum of all
+.Em size
+parameters overrides the total VM memory size specified by the
+.Fl m
+flag.
+However, if at least one domain memory size parameter is
+missing, the total VM memory size will be equally distributed across
+all emulated domains.
+The
+.Em cpuset
+parameter specifies the set of CPUs that are part of the domain.
+The
+.Em domain_policy
+parameter may be optionally used to configure the
+.Xr domainset 9
+host NUMA memory allocation policy for an emulated
+domain.
+See the
+.Ar -n
+flag in
+.Xr cpuset 1
+for a list of valid NUMA memory allocation policies and their formats.
.It Fl o Ar var Ns Cm = Ns Ar value
Set the configuration variable
.Ar var
@@ -1202,6 +1250,33 @@ using this configuration file, use flag
.Bd -literal -offset indent
/usr/sbin/bhyve -k configfile vm0
.Ed
+.Pp
+Run a UEFI virtual machine with four CPUs and two emulated NUMA domains:
+.Bd -literal -offset indent
+bhyve -c 4 -w -H \\
+ -s 0,hostbridge \\
+ -s 4,ahci-hd,disk.img \\
+ -s 31,lpc -l com1,stdio \\
+ -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \\
+ -n id=0,size=4G,cpus=0-1 \\
+ -n id=1,size=4G,cpus=2-3 \\
+ numavm
+.Ed
+.Pp
+Assuming a host machine with two NUMA domains,
+run a UEFI virtual machine with four CPUs using a
+.Ar prefer
+.Xr domainset 9
+policy to allocate guest memory from the first host NUMA domain only.
+.Bd -literal -offset indent
+bhyve -c 2 -w -H \\
+ -s 0,hostbridge \\
+ -s 4,ahci-hd,disk.img \\
+ -s 31,lpc -l com1,stdio \\
+ -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \\
+ -n id=0,size=4G,cpus=0-1,domain_policy=prefer:0 \\
+ numavm
+.Ed
.Sh SEE ALSO
.Xr bhyve 4 ,
.Xr netgraph 4 ,
@@ -1211,7 +1286,8 @@ using this configuration file, use flag
.Xr bhyve_config 5 ,
.Xr ethers 5 ,
.Xr bhyvectl 8 ,
-.Xr bhyveload 8
+.Xr bhyveload 8 ,
+.Xr domainset 9
.Pp
.Rs
.%A Intel
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index be9cd1611700..9ead49582a7d 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -30,6 +30,8 @@
#ifndef WITHOUT_CAPSICUM
#include <sys/capsicum.h>
#endif
+#include <sys/cpuset.h>
+#include <sys/domainset.h>
#include <sys/mman.h>
#ifdef BHYVE_SNAPSHOT
#include <sys/socket.h>
@@ -54,6 +56,7 @@
#include <fcntl.h>
#endif
#include <libgen.h>
+#include <libutil.h>
#include <unistd.h>
#include <assert.h>
#include <pthread.h>
@@ -68,6 +71,7 @@
#include <libxo/xo.h>
#endif
+#include <dev/vmm/vmm_mem.h>
#include <vmmapi.h>
#include "acpi.h"
@@ -108,6 +112,9 @@ static const int BSP = 0;
static cpuset_t cpumask;
+static struct vm_mem_domain guest_domains[VM_MAXMEMDOM];
+static int guest_ndomains = 0;
+
static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu);
static struct vcpu_info {
@@ -179,6 +186,118 @@ parse_int_value(const char *key, const char *value, int minval, int maxval)
return (lval);
}
+int
+bhyve_numa_parse(const char *opt)
+{
+ int id = -1;
+ nvlist_t *nvl;
+ char *cp, *str, *tofree;
+ char pathbuf[64] = { 0 };
+ char *size = NULL, *cpus = NULL, *domain_policy = NULL;
+
+ if (*opt == '\0') {
+ return (-1);
+ }
+
+ tofree = str = strdup(opt);
+ if (str == NULL)
+ errx(4, "Failed to allocate memory");
+
+ while ((cp = strsep(&str, ",")) != NULL) {
+ if (strncmp(cp, "id=", strlen("id=")) == 0)
+ id = parse_int_value("id", cp + strlen("id="), 0,
+ UINT8_MAX);
+ else if (strncmp(cp, "size=", strlen("size=")) == 0)
+ size = cp + strlen("size=");
+ else if (strncmp(cp,
+ "domain_policy=", strlen("domain_policy=")) == 0)
+ domain_policy = cp + strlen("domain_policy=");
+ else if (strncmp(cp, "cpus=", strlen("cpus=")) == 0)
+ cpus = cp + strlen("cpus=");
+ }
+
+ if (id == -1) {
+ EPRINTLN("Missing NUMA domain ID in '%s'", opt);
+ goto out;
+ }
+
+ snprintf(pathbuf, sizeof(pathbuf), "domains.%d", id);
+ nvl = find_config_node(pathbuf);
+ if (nvl == NULL)
+ nvl = create_config_node(pathbuf);
+ if (size != NULL)
+ set_config_value_node(nvl, "size", size);
+ if (domain_policy != NULL)
+ set_config_value_node(nvl, "domain_policy", domain_policy);
+ if (cpus != NULL)
+ set_config_value_node(nvl, "cpus", cpus);
+
+ free(tofree);
+ return (0);
+
+out:
+ free(tofree);
+ return (-1);
+}
+
+static void
+calc_mem_affinity(size_t vm_memsize)
+{
+ int i;
+ nvlist_t *nvl;
+ bool need_recalc;
+ const char *value;
+ struct vm_mem_domain *dom;
+ char pathbuf[64] = { 0 };
+
+ need_recalc = false;
+ for (i = 0; i < VM_MAXMEMDOM; i++) {
+ dom = &guest_domains[i];
+ snprintf(pathbuf, sizeof(pathbuf), "domains.%d", i);
+ nvl = find_config_node(pathbuf);
+ if (nvl == NULL) {
+ break;
+ }
+
+ value = get_config_value_node(nvl, "size");
+ need_recalc |= value == NULL;
+ if (value != NULL && vm_parse_memsize(value, &dom->size)) {
+ errx(EX_USAGE, "invalid memsize for domain %d: '%s'", i,
+ value);
+ }
+
+ dom->ds_mask = calloc(1, sizeof(domainset_t));
+ if (dom->ds_mask == NULL) {
+ errx(EX_OSERR, "Failed to allocate domainset mask");
+ }
+ dom->ds_size = sizeof(domainset_t);
+ value = get_config_value_node(nvl, "domain_policy");
+ if (value == NULL) {
+ dom->ds_policy = DOMAINSET_POLICY_INVALID;
+ DOMAINSET_ZERO(dom->ds_mask);
+ } else if (domainset_parselist(value, dom->ds_mask, &dom->ds_policy) !=
+ CPUSET_PARSE_OK) {
+ errx(EX_USAGE, "failed to parse domain policy '%s'", value);
+ }
+ }
+
+ guest_ndomains = i;
+ if (guest_ndomains == 0) {
+ /*
+ * No domains were specified - create domain
+ * 0 holding all CPUs and memory.
+ */
+ guest_ndomains = 1;
+ guest_domains[0].size = vm_memsize;
+ } else if (need_recalc) {
+ warnx("At least one domain memory size was not specified, distributing"
+ " total VM memory size across all domains");
+ for (i = 0; i < guest_ndomains; i++) {
+ guest_domains[i].size = vm_memsize / guest_ndomains;
+ }
+ }
+}
+
/*
* Set the sockets, cores, threads, and guest_cpus variables based on
* the configured topology.
@@ -340,6 +459,56 @@ build_vcpumaps(void)
}
}
+static void
+set_vcpu_affinities(void)
+{
+ int cpu, error;
+ nvlist_t *nvl = NULL;
+ cpuset_t cpus;
+ const char *value;
+ char pathbuf[64] = { 0 };
+
+ for (int dom = 0; dom < guest_ndomains; dom++) {
+ snprintf(pathbuf, sizeof(pathbuf), "domains.%d", dom);
+ nvl = find_config_node(pathbuf);
+ if (nvl == NULL)
+ break;
+
+ value = get_config_value_node(nvl, "cpus");
+ if (value == NULL) {
+ EPRINTLN("Missing CPU set for domain %d", dom);
+ exit(4);
+ }
+
+ parse_cpuset(dom, value, &cpus);
+ CPU_FOREACH_ISSET(cpu, &cpus) {
+ error = acpi_add_vcpu_affinity(cpu, dom);
+ if (error) {
+ EPRINTLN(
+ "Unable to set vCPU %d affinity for domain %d: %s",
+ cpu, dom, strerror(errno));
+ exit(4);
+ }
+ }
+ }
+ if (guest_ndomains > 1 || nvl != NULL)
+ return;
+
+ /*
+ * If we're dealing with one domain and no cpuset was provided, create a
+ * default one holding all cpus.
+ */
+ for (cpu = 0; cpu < guest_ncpus; cpu++) {
+ error = acpi_add_vcpu_affinity(cpu, 0);
+ if (error) {
+ EPRINTLN(
+ "Unable to set vCPU %d affinity for domain %d: %s",
+ cpu, 0, strerror(errno));
+ exit(4);
+ }
+ }
+}
+
void *
paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len)
{
@@ -713,18 +882,21 @@ main(int argc, char *argv[])
vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid);
}
+ calc_mem_affinity(memsize);
memflags = 0;
if (get_config_bool_default("memory.wired", false))
memflags |= VM_MEM_F_WIRED;
if (get_config_bool_default("memory.guest_in_core", false))
memflags |= VM_MEM_F_INCORE;
vm_set_memflags(ctx, memflags);
- error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
+ error = vm_setup_memory_domains(ctx, VM_MMAP_ALL, guest_domains,
+ guest_ndomains);
if (error) {
fprintf(stderr, "Unable to setup memory (%d)\n", errno);
exit(4);
}
+ set_vcpu_affinities();
init_mem(guest_ncpus);
init_bootrom(ctx);
if (bhyve_init_platform(ctx, bsp) != 0)
diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h
index 005de6dc5410..0a7bbd72a19c 100644
--- a/usr.sbin/bhyve/bhyverun.h
+++ b/usr.sbin/bhyve/bhyverun.h
@@ -73,6 +73,7 @@ void bhyve_parse_gdb_options(const char *opt);
#endif
int bhyve_pincpu_parse(const char *opt);
int bhyve_topology_parse(const char *opt);
+int bhyve_numa_parse(const char *opt);
void bhyve_init_vcpu(struct vcpu *vcpu);
void bhyve_start_vcpu(struct vcpu *vcpu, bool bsp);
diff --git a/usr.sbin/bhyve/bootrom.c b/usr.sbin/bhyve/bootrom.c
index e4adaca55947..339974cb2017 100644
--- a/usr.sbin/bhyve/bootrom.c
+++ b/usr.sbin/bhyve/bootrom.c
@@ -31,6 +31,7 @@
#include <sys/mman.h>
#include <sys/stat.h>
+#include <dev/vmm/vmm_mem.h>
#include <machine/vmm.h>
#include <err.h>
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index 2f04a488d9c1..9d6060e3e254 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -42,6 +42,7 @@
#include <stdbool.h>
#include <sysexits.h>
+#include <dev/vmm/vmm_mem.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
diff --git a/usr.sbin/bhyve/pci_fbuf.c b/usr.sbin/bhyve/pci_fbuf.c
index 125428e0b772..1e3ec77c15b0 100644
--- a/usr.sbin/bhyve/pci_fbuf.c
+++ b/usr.sbin/bhyve/pci_fbuf.c
@@ -29,6 +29,7 @@
#include <sys/types.h>
#include <sys/mman.h>
+#include <dev/vmm/vmm_mem.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
#include <vmmapi.h>
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
index 9d38ae9168a1..a82078f6e036 100644
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -38,6 +38,7 @@
#include <dev/io/iodev.h>
#include <dev/pci/pcireg.h>
+#include <dev/vmm/vmm_mem.h>
#include <vm/vm.h>
diff --git a/usr.sbin/bhyve/tpm_ppi_qemu.c b/usr.sbin/bhyve/tpm_ppi_qemu.c
index 01b8493e7273..6974b574b983 100644
--- a/usr.sbin/bhyve/tpm_ppi_qemu.c
+++ b/usr.sbin/bhyve/tpm_ppi_qemu.c
@@ -207,7 +207,7 @@ tpm_ppi_write_dsdt_regions(void *sc __unused)
* Used for TCG Platform Reset Attack Mitigation
*/
dsdt_line("OperationRegion(TPP3, SystemMemory, 0x%8x, 1)",
- TPM_PPI_ADDRESS + sizeof(struct tpm_ppi_qemu));
+ TPM_PPI_ADDRESS + (uint32_t)sizeof(struct tpm_ppi_qemu));
dsdt_line("Field(TPP3, ByteAcc, NoLock, Preserve)");
dsdt_line("{");
dsdt_line(" MOVV, 8,");
diff --git a/usr.sbin/bsdinstall/bsdinstall.8 b/usr.sbin/bsdinstall/bsdinstall.8
index 8fadacab9189..181abdcf9d05 100644
--- a/usr.sbin/bsdinstall/bsdinstall.8
+++ b/usr.sbin/bsdinstall/bsdinstall.8
@@ -451,7 +451,7 @@ Each option must be preceded by the -O flag to be taken into consideration
or the pool will not be created due to errors using the command
.Cm zpool .
Default:
-.Dq Li "-O compress=lz4 -O atime=off"
+.Dq Li "-O compression=on -O atime=off"
.It Ev ZFSBOOT_BEROOT_NAME
Name for the boot environment parent dataset.
This is a non-mountable dataset meant to be a parent dataset where different
diff --git a/usr.sbin/bsdinstall/scripts/bootconfig b/usr.sbin/bsdinstall/scripts/bootconfig
index 9b330801e409..41243ad14b9b 100755
--- a/usr.sbin/bsdinstall/scripts/bootconfig
+++ b/usr.sbin/bsdinstall/scripts/bootconfig
@@ -74,7 +74,7 @@ update_uefi_bootentry()
fi
$DIALOG --backtitle "$OSNAME Installer" --title 'Boot Configuration' \
- --yesno "There are multiple \"$OSNAME\" EFI boot entries. Would you like to remove them all and add a new one?" 0 0
+ --yesno "One or more \"$OSNAME\" EFI boot manager entries already exist. Would you like to remove them all and add a new one?" 0 0
if [ $? -eq $DIALOG_OK ]; then
for entry in $(efibootmgr | awk "\$NF == \"$EFI_LABEL_NAME\" { sub(/.*Boot/,\"\", \$1); sub(/\*/,\"\", \$1); print \$1 }"); do
efibootmgr -B -b ${entry}
diff --git a/usr.sbin/bsdinstall/scripts/pkgbase.in b/usr.sbin/bsdinstall/scripts/pkgbase.in
index cf8e84de6923..d123394c170e 100755
--- a/usr.sbin/bsdinstall/scripts/pkgbase.in
+++ b/usr.sbin/bsdinstall/scripts/pkgbase.in
@@ -165,7 +165,9 @@ local function select_packages(pkg, options)
table.insert(components["src"], package)
elseif package == "FreeBSD-tests" or package:match("^FreeBSD%-tests%-.*") then
table.insert(components["tests"], package)
- elseif package:match("^FreeBSD%-kernel%-.*") then
+ elseif package:match("^FreeBSD%-kernel%-.*") and
+ package ~= "FreeBSD-kernel-man"
+ then
-- Kernels other than FreeBSD-kernel-generic are ignored
if package == "FreeBSD-kernel-generic" then
table.insert(components["kernel"], package)
diff --git a/usr.sbin/bsdinstall/scripts/zfsboot b/usr.sbin/bsdinstall/scripts/zfsboot
index 60feec28e888..a3c1e2ddb89f 100755
--- a/usr.sbin/bsdinstall/scripts/zfsboot
+++ b/usr.sbin/bsdinstall/scripts/zfsboot
@@ -51,7 +51,7 @@ f_include $BSDCFG_SHARE/variable.subr
#
# Default options to use when creating zroot pool
#
-: ${ZFSBOOT_POOL_CREATE_OPTIONS:=-O compress=lz4 -O atime=off}
+: ${ZFSBOOT_POOL_CREATE_OPTIONS:=-O compression=on -O atime=off}
#
# Default name for the boot environment parent dataset
diff --git a/usr.sbin/bsnmpd/modules/snmp_wlan/wlan_sys.c b/usr.sbin/bsnmpd/modules/snmp_wlan/wlan_sys.c
index b129e42b9d85..e80b53dcf44e 100644
--- a/usr.sbin/bsnmpd/modules/snmp_wlan/wlan_sys.c
+++ b/usr.sbin/bsnmpd/modules/snmp_wlan/wlan_sys.c
@@ -2167,7 +2167,7 @@ wlan_add_new_scan_result(struct wlan_iface *wif,
return (-1);
sr->opchannel = wlan_channel_flags_to_snmp_phy(isr->isr_flags);
- sr->rssi = isr->isr_rssi;
+ sr->rssi = (isr->isr_rssi / 2) - isr->isr_noise;
sr->frequency = isr->isr_freq;
sr->noise = isr->isr_noise;
sr->bintval = isr->isr_intval;
diff --git a/usr.sbin/chroot/chroot.8 b/usr.sbin/chroot/chroot.8
index f26b7e937da9..4a1a5a396631 100644
--- a/usr.sbin/chroot/chroot.8
+++ b/usr.sbin/chroot/chroot.8
@@ -25,7 +25,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd July 20, 2021
+.Dd July 25, 2025
.Dt CHROOT 8
.Os
.Sh NAME
@@ -52,13 +52,15 @@ or an interactive copy of the user's login shell.
The options are as follows:
.Bl -tag -width "-G group[,group ...]"
.It Fl G Ar group Ns Op Cm \&, Ns Ar group ...
-Run the command with the permissions of the specified groups.
+Run the command with the specified groups as supplementary groups.
.It Fl g Ar group
-Run the command with the permissions of the specified
-.Ar group .
+Run the command with the specified
+.Ar group
+as the real, effective and saved groups.
.It Fl u Ar user
-Run the command as the
-.Ar user .
+Run the command with the specified
+.Ar user
+as the real, effective and saved users.
.It Fl n
Use the
.Dv PROC_NO_NEW_PRIVS_CTL
diff --git a/usr.sbin/makefs/zfs/dsl.c b/usr.sbin/makefs/zfs/dsl.c
index 8a8cee7c82b2..1977521d7f92 100644
--- a/usr.sbin/makefs/zfs/dsl.c
+++ b/usr.sbin/makefs/zfs/dsl.c
@@ -119,7 +119,7 @@ dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir)
if (nvlist_find_string(pdir->propsnv, "mountpoint",
&tmp) == 0) {
- easprintf(&mountpoint, "%s%s%s", tmp,
+ (void)easprintf(&mountpoint, "%s%s%s", tmp,
tmp[strlen(tmp) - 1] == '/' ? "" : "/",
origmountpoint);
free(tmp);
@@ -127,7 +127,7 @@ dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir)
break;
}
- easprintf(&mountpoint, "%s/%s", pdir->name,
+ (void)easprintf(&mountpoint, "%s/%s", pdir->name,
origmountpoint);
free(origmountpoint);
}
@@ -175,22 +175,22 @@ dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key,
"the root path `%s'", val, zfs->rootpath);
}
}
- nvlist_add_string(nvl, key, val);
+ (void)nvlist_add_string(nvl, key, val);
} else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 ||
strcmp(key, "setuid") == 0) {
if (strcmp(val, "on") == 0)
- nvlist_add_uint64(nvl, key, 1);
+ (void)nvlist_add_uint64(nvl, key, 1);
else if (strcmp(val, "off") == 0)
- nvlist_add_uint64(nvl, key, 0);
+ (void)nvlist_add_uint64(nvl, key, 0);
else
errx(1, "invalid value `%s' for %s", val, key);
} else if (strcmp(key, "canmount") == 0) {
if (strcmp(val, "noauto") == 0)
- nvlist_add_uint64(nvl, key, 2);
+ (void)nvlist_add_uint64(nvl, key, 2);
else if (strcmp(val, "on") == 0)
- nvlist_add_uint64(nvl, key, 1);
+ (void)nvlist_add_uint64(nvl, key, 1);
else if (strcmp(val, "off") == 0)
- nvlist_add_uint64(nvl, key, 0);
+ (void)nvlist_add_uint64(nvl, key, 0);
else
errx(1, "invalid value `%s' for %s", val, key);
} else if (strcmp(key, "compression") == 0) {
@@ -237,7 +237,7 @@ dsl_metadir_alloc(zfs_opt_t *zfs, const char *name)
zfs_dsl_dir_t *dir;
char *path;
- easprintf(&path, "%s/%s", zfs->poolname, name);
+ (void)easprintf(&path, "%s/%s", zfs->poolname, name);
dir = dsl_dir_alloc(zfs, path);
free(path);
return (dir);
@@ -322,11 +322,11 @@ dsl_init(zfs_opt_t *zfs)
* user didn't override the defaults.
*/
if (nvpair_find(zfs->rootdsldir->propsnv, "compression") == NULL) {
- nvlist_add_uint64(zfs->rootdsldir->propsnv, "compression",
- ZIO_COMPRESS_OFF);
+ (void)nvlist_add_uint64(zfs->rootdsldir->propsnv,
+ "compression", ZIO_COMPRESS_OFF);
}
if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) {
- nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint",
+ (void)nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint",
zfs->rootpath);
}
}
@@ -431,6 +431,7 @@ dsl_dir_alloc(zfs_opt_t *zfs, const char *name)
STAILQ_INIT(&l);
STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next);
origname = dirname = nextdir = estrdup(name);
+ parent = NULL;
for (lp = &l;; lp = &parent->children) {
dirname = strsep(&nextdir, "/");
if (nextdir == NULL)
diff --git a/usr.sbin/makefs/zfs/fs.c b/usr.sbin/makefs/zfs/fs.c
index 073dce3ce697..75f6e30e1500 100644
--- a/usr.sbin/makefs/zfs/fs.c
+++ b/usr.sbin/makefs/zfs/fs.c
@@ -28,6 +28,7 @@
* SUCH DAMAGE.
*/
+#include <sys/param.h>
#include <sys/stat.h>
#include <assert.h>
@@ -383,22 +384,34 @@ fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur,
links = 1; /* .. */
objsize = 1; /* .. */
- /*
- * The size of a ZPL directory is the number of entries
- * (including "." and ".."), and the link count is the number of
- * entries which are directories (including "." and "..").
- */
- for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child;
- c != NULL; c = c->next) {
- switch (c->type) {
- case S_IFDIR:
- links++;
- /* FALLTHROUGH */
- case S_IFREG:
- case S_IFLNK:
- objsize++;
- break;
+ if ((cur->inode->flags & FI_ROOT) == 0 ) {
+ /*
+ * The size of a ZPL directory is the number of entries
+ * (including "." and ".."), and the link count is the
+ * number of entries which are directories
+ * (including "." and "..").
+ */
+ for (fsnode *c =
+ fsnode_isroot(cur) ? cur->next : cur->child;
+ c != NULL; c = c->next) {
+ switch (c->type) {
+ case S_IFDIR:
+ links++;
+ /* FALLTHROUGH */
+ case S_IFREG:
+ case S_IFLNK:
+ objsize++;
+ break;
+ }
}
+ } else {
+ /*
+ * Root directory children do belong to
+ * different dataset and this directory is
+ * empty in the current objset.
+ */
+ links++; /* . */
+ objsize++; /* . */
}
/* The root directory is its own parent. */
@@ -734,7 +747,7 @@ fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index,
assert(sizeof(layout[0]) == 2);
- snprintf(ti, sizeof(ti), "%u", index);
+ (void)snprintf(ti, sizeof(ti), "%u", index);
zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt,
(const uint8_t *)layout);
}
diff --git a/usr.sbin/makefs/zfs/objset.c b/usr.sbin/makefs/zfs/objset.c
index 6be732db477a..f47953ac4339 100644
--- a/usr.sbin/makefs/zfs/objset.c
+++ b/usr.sbin/makefs/zfs/objset.c
@@ -28,6 +28,7 @@
* SUCH DAMAGE.
*/
+#include <sys/param.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
diff --git a/usr.sbin/makefs/zfs/vdev.c b/usr.sbin/makefs/zfs/vdev.c
index ef9e681af2da..afcce402cb13 100644
--- a/usr.sbin/makefs/zfs/vdev.c
+++ b/usr.sbin/makefs/zfs/vdev.c
@@ -28,6 +28,7 @@
* SUCH DAMAGE.
*/
+#include <sys/param.h>
#include <assert.h>
#include <fcntl.h>
#include <stdlib.h>
diff --git a/usr.sbin/makefs/zfs/zap.c b/usr.sbin/makefs/zfs/zap.c
index decf5fc6a473..316d1446cecf 100644
--- a/usr.sbin/makefs/zfs/zap.c
+++ b/usr.sbin/makefs/zfs/zap.c
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*/
-#include <sys/types.h>
+#include <sys/param.h>
#include <sys/endian.h>
#include <assert.h>
@@ -172,14 +172,14 @@ zap_add_uint64_self(zfs_zap_t *zap, uint64_t val)
{
char name[32];
- snprintf(name, sizeof(name), "%jx", (uintmax_t)val);
+ (void)snprintf(name, sizeof(name), "%jx", (uintmax_t)val);
zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val);
}
void
zap_add_string(zfs_zap_t *zap, const char *name, const char *val)
{
- zap_add(zap, name, 1, strlen(val) + 1, val);
+ zap_add(zap, name, 1, strlen(val) + 1, (const uint8_t *)val);
}
bool
@@ -221,7 +221,8 @@ zap_micro_write(zfs_opt_t *zfs, zfs_zap_t *zap)
STAILQ_FOREACH(ent, &zap->kvps, next) {
memcpy(&ment->mze_value, ent->valp, ent->intsz * ent->intcnt);
ment->mze_cd = cd++;
- strlcpy(ment->mze_name, ent->name, sizeof(ment->mze_name));
+ (void)strlcpy(ment->mze_name, ent->name,
+ sizeof(ment->mze_name));
ment++;
}
@@ -247,6 +248,7 @@ zap_fat_write_array_chunk(zap_leaf_t *l, uint16_t li, size_t sz,
struct zap_leaf_array *la;
assert(sz <= ZAP_MAXVALUELEN);
+ assert(sz > 0);
for (uint16_t n, resid = sz; resid > 0; resid -= n, val += n, li++) {
n = MIN(resid, ZAP_LEAF_ARRAY_BYTES);
@@ -503,7 +505,8 @@ zap_fat_write(zfs_opt_t *zfs, zfs_zap_t *zap)
le->le_value_intlen = ent->intsz;
le->le_value_numints = ent->intcnt;
le->le_hash = ent->hash;
- zap_fat_write_array_chunk(&l, *lptr + 1, namelen, ent->name);
+ zap_fat_write_array_chunk(&l, *lptr + 1, namelen,
+ (uint8_t *)ent->name);
zap_fat_write_array_chunk(&l, *lptr + 1 + nnamechunks,
ent->intcnt * ent->intsz, ent->valp);
}
diff --git a/usr.sbin/syslogd/syslogd.c b/usr.sbin/syslogd/syslogd.c
index 726cedc17b1d..fe7427130b78 100644
--- a/usr.sbin/syslogd/syslogd.c
+++ b/usr.sbin/syslogd/syslogd.c
@@ -2571,7 +2571,7 @@ syslogd_cap_enter(void)
if (cap_syslogd == NULL)
err(1, "Failed to open the syslogd.casper libcasper service");
cap_net = cap_service_open(cap_casper, "system.net");
- if (cap_syslogd == NULL)
+ if (cap_net == NULL)
err(1, "Failed to open the system.net libcasper service");
cap_close(cap_casper);
limit = cap_net_limit_init(cap_net,