diff options
200 files changed, 4650 insertions, 2144 deletions
diff --git a/.github/workflows/checklist.yml b/.github/workflows/checklist.yml index 7f7b0d51f46e..ecc3939f34b8 100644 --- a/.github/workflows/checklist.yml +++ b/.github/workflows/checklist.yml @@ -89,7 +89,7 @@ jobs: /* Loop for each key in "checklist". */ for (const c in checklist) msg += "- " + c + "<sup>" + checklist[c].join(", ") + "</sup>\n"; - msg += "\nPlease review CONTRIBUTING.md, then update and push your branch again.\n" + msg += "\nPlease review [CONTRIBUTING.md](https://github.com/freebsd/freebsd-src/blob/main/CONTRIBUTING.md), then update and push your branch again.\n" comment_func({ owner: context.repo.owner, diff --git a/Makefile.inc1 b/Makefile.inc1 index 010f5ac2bb55..b66743e154eb 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -2627,8 +2627,6 @@ _kerberos5_bootstrap_tools= \ krb5/util/compile_et \ krb5/util/support \ krb5/util/et \ - lib/ncurses/tinfo \ - lib/libedit \ krb5/util/ss \ krb5/util/profile \ krb5/util/verto diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index 385e891049e7..83fb2d3f3a2c 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -51,6 +51,47 @@ # xargs -n1 | sort | uniq -d; # done +# 20250728: zfsboot (MBR) removed +OLD_FILES+=boot/zfsboot +OLD_FILES+=usr/share/man/man8/zfsboot.8.gz + +# 20250728: Machine versions of 'runq.h' do not exist anymore +OLD_FILES+=usr/include/machine/runq.h + +# 20250726: MIT KRB5 DSO bump +OLD_LIBS+=usr/lib/libcom_err.so.121 +OLD_LIBS+=usr/lib/libgssapi_krb5.so.121 +OLD_LIBS+=usr/lib/libgssrpc.so.121 +OLD_LIBS+=usr/lib/libk5crypto.so.121 +OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121 +OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121 +OLD_LIBS+=usr/lib/libkdb5.so.121 +OLD_LIBS+=usr/lib/libkrad.so.121 +OLD_LIBS+=usr/lib/libkrb5.so.121 +OLD_LIBS+=usr/lib/libkrb5profile.so.121 +OLD_LIBS+=usr/lib/libkrb5support.so.121 +OLD_LIBS+=usr/lib/libverto.so.121 +OLD_LIBS+=usr/lib/libcom_err.so.121 +OLD_LIBS+=usr/lib/libgssapi_krb5.so.121 +OLD_LIBS+=usr/lib/libgssrpc.so.121 +OLD_LIBS+=usr/lib/libk5crypto.so.121 +OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121 +OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121 +OLD_LIBS+=usr/lib/libkdb5.so.121 +OLD_LIBS+=usr/lib/libkrad.so.121 +OLD_LIBS+=usr/lib/libkrb5.so.121 +OLD_LIBS+=usr/lib/libkrb5profile.so.121 +OLD_LIBS+=usr/lib/libkrb5support.so.121 +OLD_LIBS+=usr/lib/libverto.so.121 + +# 20250726: xargs tests rewritten +OLD_FILES+=usr/tests/usr.bin/xargs/legacy_test +OLD_FILES+=usr/tests/usr.bin/xargs/regress.n2147483647.out +OLD_FILES+=usr/tests/usr.bin/xargs/regress.sh + +# 20250726: This file is now installed in /etc/dma +OLD_FILES+=usr/share/examples/dma/auth.conf + # 20250725: libbsnmp bumped to version 7 OLD_LIBS+=usr/lib/libbsnmp.so.6 @@ -27,6 +27,29 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 15.x IS SLOW: world, or to merely disable the most expensive debugging functionality at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20250730: + The usbhid(4) USB HID driver is now enabled by default, and will be + used in preference to other USB HID drivers like ukbd(4), ums(4), and + uhid(4). Work on a FIDO/U2F driver and moused(8) is in progress. + The default is being switched now so that we can find and fix any + additional issues prior to FreeBSD 15.0. + + To revert to the previous USB HID driver behavior, set the loader + tunable hw.usb.usbhid_enable=0. + +20250727: + bmake (i.e., /usr/bin/make and /usr/share/mk) has moved to a new + package, FreeBSD-bmake. If you use pkgbase and you need make, you + should install this package. + +20250727: + LLVM's debugging assertions are now disabled in main by default. + The WITH_LLVM_ASSERTIONS src.conf(5) knob should be used to + enable it when working on LLVM or requesting help with it. + +20250726: + amd64 kernel configurations must contain "options SMP". + 20250725: gssd(8) has been moved to a new package, FreeBSD-gssd. If you use pkgbase and you need gssd, you should install this package. diff --git a/bin/cpuset/Makefile b/bin/cpuset/Makefile index d6f58db62901..639dd9812171 100644 --- a/bin/cpuset/Makefile +++ b/bin/cpuset/Makefile @@ -1,6 +1,6 @@ PROG= cpuset -LIBADD= jail +LIBADD= jail util SYMLINKS+= ../..${BINDIR}/cpuset /usr/bin/cpuset diff --git a/bin/cpuset/cpuset.c b/bin/cpuset/cpuset.c index 82ffcaeec252..7416e100a3c6 100644 --- a/bin/cpuset/cpuset.c +++ b/bin/cpuset/cpuset.c @@ -43,6 +43,7 @@ #include <err.h> #include <errno.h> #include <jail.h> +#include <libutil.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> @@ -69,154 +70,6 @@ static cpuwhich_t which; static void usage(void) __dead2; -struct numa_policy { - const char *name; - int policy; -}; - -static struct numa_policy policies[] = { - { "round-robin", DOMAINSET_POLICY_ROUNDROBIN }, - { "rr", DOMAINSET_POLICY_ROUNDROBIN }, - { "first-touch", DOMAINSET_POLICY_FIRSTTOUCH }, - { "ft", DOMAINSET_POLICY_FIRSTTOUCH }, - { "prefer", DOMAINSET_POLICY_PREFER }, - { "interleave", DOMAINSET_POLICY_INTERLEAVE}, - { "il", DOMAINSET_POLICY_INTERLEAVE}, - { NULL, DOMAINSET_POLICY_INVALID } -}; - -static void printset(struct bitset *mask, int size); - -static void -parselist(char *list, struct bitset *mask, int size) -{ - enum { NONE, NUM, DASH } state; - int lastnum; - int curnum; - char *l; - - state = NONE; - curnum = lastnum = 0; - for (l = list; *l != '\0';) { - if (isdigit(*l)) { - curnum = atoi(l); - if (curnum >= size) - errx(EXIT_FAILURE, - "List entry %d exceeds maximum of %d", - curnum, size - 1); - while (isdigit(*l)) - l++; - switch (state) { - case NONE: - lastnum = curnum; - state = NUM; - break; - case DASH: - for (; lastnum <= curnum; lastnum++) - BIT_SET(size, lastnum, mask); - state = NONE; - break; - case NUM: - default: - goto parserr; - } - continue; - } - switch (*l) { - case ',': - switch (state) { - case NONE: - break; - case NUM: - BIT_SET(size, curnum, mask); - state = NONE; - break; - case DASH: - goto parserr; - break; - } - break; - case '-': - if (state != NUM) - goto parserr; - state = DASH; - break; - default: - goto parserr; - } - l++; - } - switch (state) { - case NONE: - break; - case NUM: - BIT_SET(size, curnum, mask); - break; - case DASH: - goto parserr; - } - return; -parserr: - errx(EXIT_FAILURE, "Malformed list %s", list); -} - -static void -parsecpulist(char *list, cpuset_t *mask) -{ - - if (strcasecmp(list, "all") == 0) { - if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, - sizeof(*mask), mask) != 0) - err(EXIT_FAILURE, "getaffinity"); - return; - } - parselist(list, (struct bitset *)mask, CPU_SETSIZE); -} - -/* - * permissively parse policy:domain list - * allow: - * round-robin:0-4 explicit - * round-robin:all explicit root domains - * 0-4 implicit root policy - * round-robin implicit root domains - * all explicit root domains and implicit policy - */ -static void -parsedomainlist(char *list, domainset_t *mask, int *policyp) -{ - domainset_t rootmask; - struct numa_policy *policy; - char *l; - int p; - - /* - * Use the rootset's policy as the default for unspecified policies. - */ - if (cpuset_getdomain(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, - sizeof(rootmask), &rootmask, &p) != 0) - err(EXIT_FAILURE, "getdomain"); - - l = list; - for (policy = &policies[0]; policy->name != NULL; policy++) { - if (strncasecmp(l, policy->name, strlen(policy->name)) == 0) { - p = policy->policy; - l += strlen(policy->name); - if (*l != ':' && *l != '\0') - errx(EXIT_FAILURE, "Malformed list %s", list); - if (*l == ':') - l++; - break; - } - } - *policyp = p; - if (strcasecmp(l, "all") == 0 || *l == '\0') { - DOMAINSET_COPY(&rootmask, mask); - return; - } - parselist(l, (struct bitset *)mask, DOMAINSET_SETSIZE); -} - static void printset(struct bitset *mask, int size) { @@ -327,11 +180,11 @@ main(int argc, char *argv[]) break; case 'l': lflag = 1; - parsecpulist(optarg, &mask); + cpuset_parselist(optarg, &mask); break; case 'n': nflag = 1; - parsedomainlist(optarg, &domains, &policy); + domainset_parselist(optarg, &domains, &policy); break; case 'p': pflag = 1; diff --git a/crypto/krb5/src/util/ss/listen.c b/crypto/krb5/src/util/ss/listen.c index 08427df1e5c7..fe18475447be 100644 --- a/crypto/krb5/src/util/ss/listen.c +++ b/crypto/krb5/src/util/ss/listen.c @@ -14,9 +14,6 @@ #include <termios.h> #include <sys/param.h> -#ifdef __FreeBSD__ -#include <edit/readline/readline.h> -#else #if defined(HAVE_LIBEDIT) #include <editline/readline.h> #elif defined(HAVE_READLINE) @@ -25,7 +22,6 @@ #else #define NO_READLINE #endif -#endif static ss_data *current_info; static jmp_buf listen_jmpb; diff --git a/crypto/openssh/sshd_config b/crypto/openssh/sshd_config index a17484b1da2d..88c93386db65 100644 --- a/crypto/openssh/sshd_config +++ b/crypto/openssh/sshd_config @@ -56,12 +56,15 @@ AuthorizedKeysFile .ssh/authorized_keys # Don't read the user's ~/.rhosts and ~/.shosts files #IgnoreRhosts yes -# Change to yes to enable built-in password authentication. +# Change to "yes" to enable built-in password authentication. # Note that passwords may also be accepted via KbdInteractiveAuthentication. #PasswordAuthentication no #PermitEmptyPasswords no -# Change to no to disable PAM authentication +# Change to "no" to disable keyboard-interactive authentication. Depending on +# the system's configuration, this may involve passwords, challenge-response, +# one-time passwords or some combination of these and other methods. +# Keyboard interactive authentication is also used for PAM authentication. #KbdInteractiveAuthentication yes # Kerberos options diff --git a/etc/gss-krb5/mech b/etc/gss-krb5/mech index 94fed68a24eb..b13f665705c5 100644 --- a/etc/gss-krb5/mech +++ b/etc/gss-krb5/mech @@ -1,10 +1,10 @@ # # Name OID Library name Kernel module -kerberosv5 1.2.840.113554.1.2.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 -kerberosv5 1.2.840.113554.1.2.3 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 -kerberosv5 1.3.6.1.5.5.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 -kerberosv5 1.2.840.48018.1.2.2.1 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 -kerberosv5 1.2.840.48018.1.2.2.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 -kerberosv5 1.2.840.48018.1.2.2.4 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 -kerberosv5 1.2.840.48018.1.2.2.5 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 -kerberosv5 1.3.5.1.5.2 /usr/lib/libgssapi_krb5.so.121 kgssapi_krb5 +kerberosv5 1.2.840.113554.1.2.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 +kerberosv5 1.2.840.113554.1.2.3 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 +kerberosv5 1.3.6.1.5.5.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 +kerberosv5 1.2.840.48018.1.2.2.1 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 +kerberosv5 1.2.840.48018.1.2.2.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 +kerberosv5 1.2.840.48018.1.2.2.4 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 +kerberosv5 1.2.840.48018.1.2.2.5 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 +kerberosv5 1.3.5.1.5.2 /usr/lib/libgssapi_krb5.so.122 kgssapi_krb5 diff --git a/krb5/include/autoconf.h b/krb5/include/autoconf.h index 24039611c7e7..fe281d136954 100644 --- a/krb5/include/autoconf.h +++ b/krb5/include/autoconf.h @@ -5,9 +5,6 @@ #ifndef KRB5_AUTOCONF_H #define KRB5_AUTOCONF_H -#include <sys/types.h> -#include <machine/param.h> - /* Define if AES-NI support is enabled */ /* #undef AESNI */ @@ -263,7 +260,7 @@ #define HAVE_LIBCRYPTO 1 /* Define if building with libedit. */ -#define HAVE_LIBEDIT 1 +/* #undef HAVE_LIBEDIT */ /* Define to 1 if you have the `nsl' library (-lnsl). */ /* #undef HAVE_LIBNSL */ diff --git a/krb5/lib/Makefile.inc b/krb5/lib/Makefile.inc index 6d86e7a35146..b6e5f6275039 100644 --- a/krb5/lib/Makefile.inc +++ b/krb5/lib/Makefile.inc @@ -15,4 +15,4 @@ KRB5_KRB5LIBDIR= ${KRB5_SRCLIBDIR}/krb5 KRB5_K5CRYPTODIR= ${KRB5_SRCLIBDIR}/crypto SHLIBDIR?= /usr/lib -SHLIB_MAJOR?= 121 +SHLIB_MAJOR?= 122 diff --git a/krb5/plugins/Makefile.inc b/krb5/plugins/Makefile.inc index e888f79acd49..d98ed1d3887b 100644 --- a/krb5/plugins/Makefile.inc +++ b/krb5/plugins/Makefile.inc @@ -12,6 +12,6 @@ MK_INSTALLLIB= no SHLIB_NAME?= ${LIB}.so.${SHLIB_MAJOR} PLUGINSDIR= ${LIBDIR_BASE}/krb5/plugins SHLIBDIR= ${LIBDIR} -SHLIB_MAJOR= 121 +SHLIB_MAJOR= 122 .include "../Makefile.inc" diff --git a/krb5/usr.bin/kadmin/Makefile b/krb5/usr.bin/kadmin/Makefile index b2a094795d48..182cabb8f9f6 100644 --- a/krb5/usr.bin/kadmin/Makefile +++ b/krb5/usr.bin/kadmin/Makefile @@ -9,8 +9,8 @@ PROG= kadmin -LIBADD= kadmin_common edit kadm5clnt_mit gssrpc gssapi_krb5 krb5 k5crypto \ - com_err krb5ss krb5profile krb5support tinfow sys +LIBADD= kadmin_common kadm5clnt_mit gssrpc gssapi_krb5 krb5 k5crypto \ + com_err krb5ss krb5profile krb5support sys SRCS= keytab.c diff --git a/krb5/usr.bin/ktutil/Makefile b/krb5/usr.bin/ktutil/Makefile index 6bcb4877ed6f..15991cb49bce 100644 --- a/krb5/usr.bin/ktutil/Makefile +++ b/krb5/usr.bin/ktutil/Makefile @@ -9,7 +9,7 @@ PROG= ktutil -LIBADD= edit krb5 k5crypto com_err krb5profile krb5support krb5ss tinfow sys +LIBADD= krb5 k5crypto com_err krb5profile krb5support krb5ss sys SRCS= ktutil.c \ ktutil_ct.c \ diff --git a/krb5/usr.sbin/kadmin.local/Makefile b/krb5/usr.sbin/kadmin.local/Makefile index 4b99f490bd7b..3930c0fc4694 100644 --- a/krb5/usr.sbin/kadmin.local/Makefile +++ b/krb5/usr.sbin/kadmin.local/Makefile @@ -11,8 +11,8 @@ PACKAGE= kerberos-kdc PROG= kadmin.local -LIBADD= kadmin_common edit kadm5srv_mit kdb5 gssrpc gssapi_krb5 krb5 \ - k5crypto com_err krb5profile krb5support krb5ss tinfow sys +LIBADD= kadmin_common kadm5srv_mit kdb5 gssrpc gssapi_krb5 krb5 \ + k5crypto com_err krb5profile krb5support krb5ss sys SRCS= keytab_local.c diff --git a/krb5/util/Makefile.inc b/krb5/util/Makefile.inc index 858c1eb48dd9..95b93a793d77 100644 --- a/krb5/util/Makefile.inc +++ b/krb5/util/Makefile.inc @@ -10,4 +10,4 @@ .include "../Makefile.inc" SHLIBDIR?= /usr/lib -SHLIB_MAJOR?= 121 +SHLIB_MAJOR?= 122 diff --git a/krb5/util/ss/Makefile b/krb5/util/ss/Makefile index e7e025184284..2c43f2b5934f 100644 --- a/krb5/util/ss/Makefile +++ b/krb5/util/ss/Makefile @@ -47,6 +47,11 @@ CFLAGS+=-I${KRB5_DIR}/util/ss \ -I${.OBJDIR:H} \ -I${.OBJDIR} +.if !defined(BOOTSTRAPPING) +CFLAGS+= -DHAVE_READLINE=1 \ + -I${SYSROOT:U${DESTDIR}}/${INCLUDEDIR}/edit +.endif + GEN= std_rqs.c ${GEN_SS_ERR_C} ${GEN_SS_ERR_H} GEN_SCRIPTS= ct_c.awk ct_c.sed mk_cmds GEN_SS_ERR_C= ${GEN_SS_ERR:S/.et$/.c/} diff --git a/lib/libc/db/hash/hash.c b/lib/libc/db/hash/hash.c index cc96fb5ce326..1eb01ee0f0c5 100644 --- a/lib/libc/db/hash/hash.c +++ b/lib/libc/db/hash/hash.c @@ -99,11 +99,6 @@ __hash_open(const char *file, int flags, int mode, DB *dbp; int bpages, hdrsize, new_table, nsegs, save_errno; - if ((flags & O_ACCMODE) == O_WRONLY) { - errno = EINVAL; - return (NULL); - } - if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB)))) return (NULL); hashp->fp = -1; @@ -115,6 +110,10 @@ __hash_open(const char *file, int flags, int mode, * we can check accesses. */ hashp->flags = flags; + if ((flags & O_ACCMODE) == O_WRONLY) { + flags &= ~O_WRONLY; + flags |= O_RDWR; + } if (file) { if ((hashp->fp = _open(file, flags | O_CLOEXEC, mode)) == -1) @@ -180,7 +179,7 @@ __hash_open(const char *file, int flags, int mode, __buf_init(hashp, DEF_BUFSIZE); hashp->new_file = new_table; - hashp->save_file = file && (hashp->flags & O_RDWR); + hashp->save_file = file && (flags & O_RDWR); hashp->cbucket = -1; if (!(dbp = (DB *)malloc(sizeof(DB)))) { save_errno = errno; @@ -524,6 +523,10 @@ hash_get(const DB *dbp, const DBT *key, DBT *data, u_int32_t flag) hashp->error = errno = EINVAL; return (ERROR); } + if ((hashp->flags & O_ACCMODE) == O_WRONLY) { + hashp->error = errno = EPERM; + return (ERROR); + } return (hash_access(hashp, HASH_GET, (DBT *)key, data)); } diff --git a/lib/libc/db/man/dbm.3 b/lib/libc/db/man/dbm.3 index c5a83c7acef4..30787600ad2d 100644 --- a/lib/libc/db/man/dbm.3 +++ b/lib/libc/db/man/dbm.3 @@ -13,7 +13,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 2, 2022 +.Dd July 25, 2025 .Dt DBM 3 .Os .Sh NAME @@ -99,9 +99,6 @@ is a typical value for .Li 0660 is a typical value for .Fa mode . -.Dv O_WRONLY -is not allowed in -.Fa flags . The pointer returned by .Fn dbm_open identifies the database and is the diff --git a/lib/libc/db/man/dbopen.3 b/lib/libc/db/man/dbopen.3 index 64cef88506d8..7fe515f17849 100644 --- a/lib/libc/db/man/dbopen.3 +++ b/lib/libc/db/man/dbopen.3 @@ -76,13 +76,10 @@ are as specified to the .Xr open 2 routine, however, only the .Dv O_CREAT , O_EXCL , O_EXLOCK , O_NOFOLLOW , O_NONBLOCK , -.Dv O_RDONLY , O_RDWR , O_SHLOCK , O_SYNC +.Dv O_RDONLY , O_RDWR , O_SHLOCK , O_SYNC, O_WRONLY, and .Dv O_TRUNC flags are meaningful. -(Note, opening a database file -.Dv O_WRONLY -is not possible.) .\"Three additional options may be specified by .\".Em or Ns 'ing .\"them into the diff --git a/lib/libc/tests/db/Makefile b/lib/libc/tests/db/Makefile index 54b38b94a581..cc181cc81160 100644 --- a/lib/libc/tests/db/Makefile +++ b/lib/libc/tests/db/Makefile @@ -8,6 +8,7 @@ PROGS+= h_lfsr ${PACKAGE}FILES+= README ATF_TESTS_C+= dbm_open_test +ATF_TESTS_C+= dbm_perm_test NETBSD_ATF_TESTS_C+= db_hash_seq_test NETBSD_ATF_TESTS_SH+= db_test diff --git a/lib/libc/tests/db/dbm_open_test.c b/lib/libc/tests/db/dbm_open_test.c index 18d398e16b2a..8a3e888bf72c 100644 --- a/lib/libc/tests/db/dbm_open_test.c +++ b/lib/libc/tests/db/dbm_open_test.c @@ -4,14 +4,15 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include <sys/mman.h> - #include <fcntl.h> #include <ndbm.h> #include <stdio.h> #include <atf-c.h> +static const char *path = "tmp"; +static const char *dbname = "tmp.db"; + ATF_TC(dbm_open_missing_test); ATF_TC_HEAD(dbm_open_missing_test, tc) { @@ -21,23 +22,31 @@ ATF_TC_HEAD(dbm_open_missing_test, tc) ATF_TC_BODY(dbm_open_missing_test, tc) { - const char *path = "tmp"; - const char *dbname = "tmp.db"; /* * POSIX.1 specifies that a missing database file should * always get created if O_CREAT is present, except when * O_EXCL is specified as well. */ - ATF_CHECK(dbm_open(path, O_RDONLY, _PROT_ALL) == NULL); + ATF_CHECK(dbm_open(path, O_RDONLY, 0755) == NULL); + ATF_REQUIRE(!atf_utils_file_exists(dbname)); + ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT, 0755) != NULL); + ATF_REQUIRE(atf_utils_file_exists(dbname)); + ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT | O_EXCL, 0755) == NULL); +} + +ATF_TC_WITHOUT_HEAD(dbm_open_wronly_test); +ATF_TC_BODY(dbm_open_wronly_test, tc) +{ + ATF_CHECK(dbm_open(path, O_WRONLY, 0755) == NULL); ATF_REQUIRE(!atf_utils_file_exists(dbname)); - ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT, _PROT_ALL) != NULL); + ATF_CHECK(dbm_open(path, O_WRONLY | O_CREAT, 0755) != NULL); ATF_REQUIRE(atf_utils_file_exists(dbname)); - ATF_CHECK(dbm_open(path, O_RDONLY | O_CREAT | O_EXCL, _PROT_ALL) == NULL); } ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, dbm_open_missing_test); + ATF_TP_ADD_TC(tp, dbm_open_wronly_test); return (atf_no_error()); } diff --git a/lib/libc/tests/db/dbm_perm_test.c b/lib/libc/tests/db/dbm_perm_test.c new file mode 100644 index 000000000000..c07210292014 --- /dev/null +++ b/lib/libc/tests/db/dbm_perm_test.c @@ -0,0 +1,98 @@ +/*- + * Copyright (c) 2025 Klara, Inc. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <errno.h> +#include <fcntl.h> +#include <ndbm.h> +#include <stdio.h> + +#include <atf-c.h> + +static const char *path = "tmp"; +static const char *dbname = "tmp.db"; + +static void +create_db(void) +{ + DB *db; + datum data, key; + + data.dptr = "bar"; + data.dsize = strlen("bar"); + key.dptr = "foo"; + key.dsize = strlen("foo"); + + db = dbm_open(path, O_RDWR | O_CREAT, 0755); + ATF_CHECK(db != NULL); + ATF_REQUIRE(atf_utils_file_exists(dbname)); + ATF_REQUIRE(dbm_store(db, key, data, DBM_INSERT) != -1); + dbm_close(db); +} + +ATF_TC_WITHOUT_HEAD(dbm_rdonly_test); +ATF_TC_BODY(dbm_rdonly_test, tc) +{ + DB *db; + datum data, key; + + bzero(&data, sizeof(data)); + key.dptr = "foo"; + key.dsize = strlen("foo"); + create_db(); + + db = dbm_open(path, O_RDONLY, 0755); + data = dbm_fetch(db, key); + ATF_REQUIRE(data.dptr != NULL); + ATF_REQUIRE(strncmp((const char*)data.dptr, "bar", data.dsize) == 0); + ATF_REQUIRE(dbm_store(db, key, data, DBM_REPLACE) == -1); + ATF_REQUIRE(errno == EPERM); +} + +ATF_TC_WITHOUT_HEAD(dbm_wronly_test); +ATF_TC_BODY(dbm_wronly_test, tc) +{ + DB *db; + datum data, key; + + key.dptr = "foo"; + key.dsize = strlen("foo"); + data.dptr = "baz"; + data.dsize = strlen("baz"); + create_db(); + + db = dbm_open(path, O_WRONLY, 0755); + data = dbm_fetch(db, key); + ATF_REQUIRE(data.dptr == NULL); + ATF_REQUIRE(errno == EPERM); + ATF_REQUIRE(dbm_store(db, key, data, DBM_REPLACE) != -1); +} + +ATF_TC_WITHOUT_HEAD(dbm_rdwr_test); +ATF_TC_BODY(dbm_rdwr_test, tc) +{ + DB *db; + datum data, key; + + key.dptr = "foo"; + key.dsize = strlen("foo"); + create_db(); + + db = dbm_open(path, O_RDWR, 0755); + data = dbm_fetch(db, key); + ATF_REQUIRE(data.dptr != NULL); + data.dptr = "baz"; + data.dsize = strlen("baz"); + ATF_REQUIRE(dbm_store(db, key, data, DBM_REPLACE) != -1); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, dbm_rdonly_test); + ATF_TP_ADD_TC(tp, dbm_wronly_test); + ATF_TP_ADD_TC(tp, dbm_rdwr_test); + + return (atf_no_error()); +} diff --git a/lib/libnvmf/libnvmf.h b/lib/libnvmf/libnvmf.h index 7cdd7e433455..6b38fd286596 100644 --- a/lib/libnvmf/libnvmf.h +++ b/lib/libnvmf/libnvmf.h @@ -111,8 +111,13 @@ const void *nvmf_capsule_cqe(const struct nvmf_capsule *nc); /* Return a string name for a transport type. */ const char *nvmf_transport_type(uint8_t trtype); -/* Validate a NVMe Qualified Name. */ +/* + * Validate a NVMe Qualified Name. The second version enforces + * stricter checks inline with the specification. The first version + * enforces more minimal checks. + */ bool nvmf_nqn_valid(const char *nqn); +bool nvmf_nqn_valid_strict(const char *nqn); /* Controller-specific APIs. */ diff --git a/lib/libnvmf/nvmf_controller.c b/lib/libnvmf/nvmf_controller.c index 971dccbe039e..f26f11633e03 100644 --- a/lib/libnvmf/nvmf_controller.c +++ b/lib/libnvmf/nvmf_controller.c @@ -7,6 +7,7 @@ #include <sys/utsname.h> #include <assert.h> +#include <ctype.h> #include <errno.h> #include <string.h> #include <unistd.h> @@ -15,6 +16,55 @@ #include "internal.h" #include "nvmft_subr.h" +bool +nvmf_nqn_valid_strict(const char *nqn) +{ + size_t len; + + if (!nvmf_nqn_valid(nqn)) + return (false); + + /* + * Stricter checks from the spec. Linux does not seem to + * require these. + */ + len = strlen(nqn); + + /* + * NVMF_NQN_MIN_LEN does not include '.' and require at least + * one character of a domain name. + */ + if (len < NVMF_NQN_MIN_LEN + 2) + return (false); + if (memcmp("nqn.", nqn, strlen("nqn.")) != 0) + return (false); + nqn += strlen("nqn."); + + /* Next 4 digits must be a year. */ + for (u_int i = 0; i < 4; i++) { + if (!isdigit(nqn[i])) + return (false); + } + nqn += 4; + + /* '-' between year and month. */ + if (nqn[0] != '-') + return (false); + nqn++; + + /* 2 digit month. */ + for (u_int i = 0; i < 2; i++) { + if (!isdigit(nqn[i])) + return (false); + } + nqn += 2; + + /* '.' between month and reverse domain name. */ + if (nqn[0] != '.') + return (false); + return (true); +} + void nvmf_init_cqe(void *cqe, const struct nvmf_capsule *nc, uint16_t status) { diff --git a/lib/libsys/getsockopt.2 b/lib/libsys/getsockopt.2 index 8839b61597a2..3867824681d7 100644 --- a/lib/libsys/getsockopt.2 +++ b/lib/libsys/getsockopt.2 @@ -593,6 +593,15 @@ specified amount of time has elapsed since the initial call to If .Fa sp_fd is -1, the socket will be unspliced immediately. +A successful +.Xr select 2 , +.Xr poll 2 , +or +.Xr kqueue 2 +operation testing the ability to read from the source socket indicates +that the splicing has terminated and at least one byte is available for +reading. +When one of the sockets gets closed, splicing ends. .Pp When passed to .Fn getsockopt , diff --git a/lib/libthr/thread/thr_getthreadid_np.c b/lib/libthr/thread/thr_getthreadid_np.c index ade332519dfb..ffecd0bc7ea9 100644 --- a/lib/libthr/thread/thr_getthreadid_np.c +++ b/lib/libthr/thread/thr_getthreadid_np.c @@ -36,7 +36,7 @@ __weak_reference(_thr_getthreadid_np, _pthread_getthreadid_np); __weak_reference(_thr_getthreadid_np, pthread_getthreadid_np); /* - * Provide the equivelant to AIX pthread_getthreadid_np() function. + * Provide the equivalent to AIX pthread_getthreadid_np() function. */ int _thr_getthreadid_np(void) diff --git a/lib/libutil/Makefile b/lib/libutil/Makefile index 0639745d08fc..2d92c5ba1916 100644 --- a/lib/libutil/Makefile +++ b/lib/libutil/Makefile @@ -38,6 +38,7 @@ MAN+= cpuset.3 expand_number.3 flopen.3 fparseln.3 ftime.3 getlocalbase.3 \ property.3 pty.3 quotafile.3 realhostname.3 realhostname_sa.3 \ _secure_path.3 trimdomain.3 uucplock.3 pw_util.3 MAN+= login.conf.5 +MLINKS+=cpuset.3 domainset_parselist.3 MLINKS+=flopen.3 flopenat.3 MLINKS+=kld.3 kld_isloaded.3 kld.3 kld_load.3 MLINKS+=login_auth.3 auth_cat.3 login_auth.3 auth_checknologin.3 diff --git a/lib/libutil/cpuset.3 b/lib/libutil/cpuset.3 index be29d5309ef0..47dffd209ee6 100644 --- a/lib/libutil/cpuset.3 +++ b/lib/libutil/cpuset.3 @@ -22,21 +22,22 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd October 31, 2017 +.Dd June 24, 2025 .Dt CPUSET 3 .Os .Sh NAME -.Nm cpuset_parselist -.Nd utility functions for -.Xr cpuset 2 -handling +.Nm cpuset_parselist , +.Nm domainset_parselist +.Nd utility functions for cpuset(2) handling .Sh LIBRARY .Lb libutil .Sh SYNOPSIS .In sys/cpuset.h .In libutil.h .Ft int -.Fn cpuset_parselist "const char *cpu-list" "cpuset_t *mask" +.Fn cpuset_parselist "const char *cpu_list" "cpuset_t *mask" +.Ft int +.Fn domainset_parselist "const char *domain_policy" "domainset_t *domain_mask" "int *policyp" .Sh DESCRIPTION The .Fn cpuset_parselist @@ -52,6 +53,27 @@ numbers. A special list of .Dq all may be specified in which case the list includes all CPUs from the root set. +.Pp +The +.Fn domainset_parselist +function parses a +.Xr domainset 9 +memory domain allocation policy +specified by +.Va domain_policy +filling the +.Va domain_mask +and the +.Va policyp . +A valid +.Va domain_policy +is formatted as +.Ar policy:domain-list . +See the +.Ar -n +flag in +.Xr cpuset 1 +for a list of valid domain policies. .Sh RETURN VALUES Return values can be the following .Bl -tag -width Er @@ -60,19 +82,30 @@ The parsing was successful .It Dv CPUSET_PARSE_ERROR The .Va cpu-list +or +.Va domain-policy format is invalid .It Dv CPUSET_PARSE_GETAFFINITY The .Xr cpuset_getaffinity 2 call has failed .It Dv CPUSET_PARSE_INVALID_CPU -The number of supported CPUs has been exceeded. +The number of supported CPUs or NUMA domains has been exceeded. The maximum number being -.Va CPU_SETSIZE . +.Va CPU_SETSIZE +and +.Va DOMAINSET_SETSIZE +respectively. +.It Dv CPUSET_PARSE_GETDOMAIN +The +.Xr cpuset_getdomain 2 +call has failed .El .Sh SEE ALSO .Xr cpuset 1 , .Xr cpuset 2 , -.Xr cpuset 9 +.Xr numa 4 , +.Xr cpuset 9 , +.Xr domainset 9 .Sh AUTHORS .An Jeffrey Roberson Aq Mt jeff@FreeBSD.org diff --git a/lib/libutil/cpuset.c b/lib/libutil/cpuset.c index 3c374bfa6cac..d4840af7e175 100644 --- a/lib/libutil/cpuset.c +++ b/lib/libutil/cpuset.c @@ -27,34 +27,48 @@ * SUCH DAMAGE. */ +#include <sys/cdefs.h> +#define _WANT_FREEBSD_BITSET + #include <sys/types.h> #include <sys/cpuset.h> +#include <sys/domainset.h> #include <stdlib.h> #include <string.h> #include <libutil.h> #include <ctype.h> -int -cpuset_parselist(const char *list, cpuset_t *mask) +struct numa_policy { + const char *name; + int policy; +}; + +static const struct numa_policy policies[] = { + { "round-robin", DOMAINSET_POLICY_ROUNDROBIN }, + { "rr", DOMAINSET_POLICY_ROUNDROBIN }, + { "first-touch", DOMAINSET_POLICY_FIRSTTOUCH }, + { "ft", DOMAINSET_POLICY_FIRSTTOUCH }, + { "prefer", DOMAINSET_POLICY_PREFER }, + { "interleave", DOMAINSET_POLICY_INTERLEAVE}, + { "il", DOMAINSET_POLICY_INTERLEAVE}, + { NULL, DOMAINSET_POLICY_INVALID } +}; + +static int +parselist(const char *list, struct bitset *mask, int size) { enum { NONE, NUM, DASH } state; int lastnum; int curnum; const char *l; - if (strcasecmp(list, "all") == 0) { - if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, - sizeof(*mask), mask) != 0) - return (CPUSET_PARSE_GETAFFINITY); - return (CPUSET_PARSE_OK); - } state = NONE; curnum = lastnum = 0; for (l = list; *l != '\0';) { if (isdigit(*l)) { curnum = atoi(l); - if (curnum > CPU_SETSIZE) + if (curnum >= size) return (CPUSET_PARSE_INVALID_CPU); while (isdigit(*l)) l++; @@ -65,7 +79,7 @@ cpuset_parselist(const char *list, cpuset_t *mask) break; case DASH: for (; lastnum <= curnum; lastnum++) - CPU_SET(lastnum, mask); + BIT_SET(size, lastnum, mask); state = NONE; break; case NUM: @@ -80,7 +94,7 @@ cpuset_parselist(const char *list, cpuset_t *mask) case NONE: break; case NUM: - CPU_SET(curnum, mask); + BIT_SET(size, curnum, mask); state = NONE; break; case DASH: @@ -102,7 +116,7 @@ cpuset_parselist(const char *list, cpuset_t *mask) case NONE: break; case NUM: - CPU_SET(curnum, mask); + BIT_SET(size, curnum, mask); break; case DASH: goto parserr; @@ -111,3 +125,63 @@ cpuset_parselist(const char *list, cpuset_t *mask) parserr: return (CPUSET_PARSE_ERROR); } + +/* + * permissively parse policy:domain list + * allow: + * round-robin:0-4 explicit + * round-robin:all explicit root domains + * 0-4 implicit root policy + * round-robin implicit root domains + * all explicit root domains and implicit policy + */ +int +domainset_parselist(const char *list, domainset_t *mask, int *policyp) +{ + domainset_t rootmask; + const struct numa_policy *policy; + const char *l; + int p; + + /* + * Use the rootset's policy as the default for unspecified policies. + */ + if (cpuset_getdomain(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, + sizeof(rootmask), &rootmask, &p) != 0) + return (CPUSET_PARSE_GETDOMAIN); + + if (list == NULL || strcasecmp(list, "all") == 0 || *list == '\0') { + *policyp = p; + DOMAINSET_COPY(&rootmask, mask); + return (CPUSET_PARSE_OK); + } + + l = list; + for (policy = &policies[0]; policy->name != NULL; policy++) { + if (strncasecmp(l, policy->name, strlen(policy->name)) == 0) { + p = policy->policy; + l += strlen(policy->name); + if (*l != ':' && *l != '\0') + return (CPUSET_PARSE_ERROR); + if (*l == ':') + l++; + break; + } + } + *policyp = p; + + return (parselist(l, (struct bitset *)mask, DOMAINSET_SETSIZE)); +} + +int +cpuset_parselist(const char *list, cpuset_t *mask) +{ + if (strcasecmp(list, "all") == 0) { + if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1, + sizeof(*mask), mask) != 0) + return (CPUSET_PARSE_GETAFFINITY); + return (CPUSET_PARSE_OK); + } + + return (parselist(list, (struct bitset *)mask, CPU_SETSIZE)); +} diff --git a/lib/libutil/libutil.h b/lib/libutil/libutil.h index 919855184caf..7d8bfdf67fac 100644 --- a/lib/libutil/libutil.h +++ b/lib/libutil/libutil.h @@ -213,7 +213,13 @@ int cpuset_parselist(const char *list, cpuset_t *mask); #define CPUSET_PARSE_OK 0 #define CPUSET_PARSE_GETAFFINITY -1 #define CPUSET_PARSE_ERROR -2 -#define CPUSET_PARSE_INVALID_CPU -3 +#define CPUSET_PARSE_OUT_OF_RANGE -3 +#define CPUSET_PARSE_GETDOMAIN -4 +#define CPUSET_PARSE_INVALID_CPU CPUSET_PARSE_OUT_OF_RANGE /* backwards compat */ +#endif + +#ifdef _SYS_DOMAINSET_H_ +int domainset_parselist(const char *list, domainset_t *mask, int *policyp); #endif __END_DECLS diff --git a/lib/libvmmapi/Makefile b/lib/libvmmapi/Makefile index 1866c8fa5e7c..6dd0deeaa9c0 100644 --- a/lib/libvmmapi/Makefile +++ b/lib/libvmmapi/Makefile @@ -1,6 +1,6 @@ PACKAGE=lib${LIB} LIB= vmmapi -SHLIB_MAJOR= 6 +SHLIB_MAJOR= 7 SRCS= vmmapi.c INCS= vmmapi.h diff --git a/lib/libvmmapi/internal.h b/lib/libvmmapi/internal.h index aa7b1d8e6a93..4afe1cab3460 100644 --- a/lib/libvmmapi/internal.h +++ b/lib/libvmmapi/internal.h @@ -8,12 +8,7 @@ #define __VMMAPI_INTERNAL_H__ #include <sys/types.h> - -enum { - VM_MEMSEG_LOW, - VM_MEMSEG_HIGH, - VM_MEMSEG_COUNT, -}; +#include <dev/vmm/vmm_mem.h> struct vmctx { int fd; /* device file descriptor */ @@ -21,7 +16,9 @@ struct vmctx { struct { vm_paddr_t base; vm_size_t size; - } memsegs[VM_MEMSEG_COUNT]; + } memsegs[VM_MAX_MEMSEGS]; + size_t lowmem_size; + size_t highmem_size; int memflags; char *baseaddr; char *name; diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index a1a5d56ff8a2..77f0f8f5c581 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -28,13 +28,14 @@ #include <sys/param.h> #include <sys/capsicum.h> +#include <sys/cpuset.h> +#include <sys/domainset.h> #include <sys/sysctl.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/linker.h> #include <sys/module.h> #include <sys/_iovec.h> -#include <sys/cpuset.h> #include <capsicum_helpers.h> #include <err.h> @@ -322,8 +323,8 @@ vm_get_guestmem_from_ctx(struct vmctx *ctx, char **guest_baseaddr, { *guest_baseaddr = ctx->baseaddr; - *lowmem_size = ctx->memsegs[VM_MEMSEG_LOW].size; - *highmem_size = ctx->memsegs[VM_MEMSEG_HIGH].size; + *lowmem_size = ctx->lowmem_size; + *highmem_size = ctx->highmem_size; return (0); } @@ -379,7 +380,8 @@ cmpseg(size_t len, const char *str, size_t len2, const char *str2) } static int -vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) +vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name, + int ds_policy, domainset_t *ds_mask, size_t ds_size) { struct vm_memseg memseg; size_t n; @@ -407,6 +409,13 @@ vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) bzero(&memseg, sizeof(struct vm_memseg)); memseg.segid = segid; memseg.len = len; + if (ds_mask == NULL) { + memseg.ds_policy = DOMAINSET_POLICY_INVALID; + } else { + memseg.ds_policy = ds_policy; + memseg.ds_mask = ds_mask; + memseg.ds_mask_size = ds_size; + } if (name != NULL) { n = strlcpy(memseg.name, name, sizeof(memseg.name)); if (n >= sizeof(memseg.name)) { @@ -442,13 +451,14 @@ vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, } static int -setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) +map_memory_segment(struct vmctx *ctx, int segid, vm_paddr_t gpa, size_t len, + size_t segoff, char *base) { char *ptr; int error, flags; /* Map 'len' bytes starting at 'gpa' in the guest address space */ - error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); + error = vm_mmap_memseg(ctx, gpa, segid, segoff, len, PROT_ALL); if (error) return (error); @@ -464,65 +474,136 @@ setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) return (0); } +/* + * Allocates and maps virtual machine memory segments according + * to the NUMA topology specified by the 'doms' array. + * + * The domains are laid out sequentially in the guest's physical address space. + * The [VM_LOWMEM_LIMIT, VM_HIGHMEM_BASE) address range is skipped and + * left unmapped. + */ int -vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) +vm_setup_memory_domains(struct vmctx *ctx, enum vm_mmap_style vms, + struct vm_mem_domain *doms, int ndoms) { - size_t objsize, len; - vm_paddr_t gpa; + size_t low_len, len, totalsize; + struct vm_mem_domain *dom; + struct vm_memseg memseg; char *baseaddr, *ptr; - int error; + int error, i, segid; + vm_paddr_t gpa; + /* Sanity checks. */ assert(vms == VM_MMAP_ALL); - - /* - * If 'memsize' cannot fit entirely in the 'lowmem' segment then create - * another 'highmem' segment above VM_HIGHMEM_BASE for the remainder. - */ - if (memsize > VM_LOWMEM_LIMIT) { - ctx->memsegs[VM_MEMSEG_LOW].size = VM_LOWMEM_LIMIT; - ctx->memsegs[VM_MEMSEG_HIGH].size = memsize - VM_LOWMEM_LIMIT; - objsize = VM_HIGHMEM_BASE + ctx->memsegs[VM_MEMSEG_HIGH].size; - } else { - ctx->memsegs[VM_MEMSEG_LOW].size = memsize; - ctx->memsegs[VM_MEMSEG_HIGH].size = 0; - objsize = memsize; + if (doms == NULL || ndoms <= 0 || ndoms > VM_MAXMEMDOM) { + errno = EINVAL; + return (-1); } - error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); - if (error) - return (error); + /* Calculate total memory size. */ + totalsize = 0; + for (i = 0; i < ndoms; i++) + totalsize += doms[i].size; + + if (totalsize > VM_LOWMEM_LIMIT) + totalsize = VM_HIGHMEM_BASE + (totalsize - VM_LOWMEM_LIMIT); /* * Stake out a contiguous region covering the guest physical memory * and the adjoining guard regions. */ - len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; + len = VM_MMAP_GUARD_SIZE + totalsize + VM_MMAP_GUARD_SIZE; ptr = mmap(NULL, len, PROT_NONE, MAP_GUARD | MAP_ALIGNED_SUPER, -1, 0); if (ptr == MAP_FAILED) return (-1); - baseaddr = ptr + VM_MMAP_GUARD_SIZE; - if (ctx->memsegs[VM_MEMSEG_HIGH].size > 0) { - gpa = VM_HIGHMEM_BASE; - len = ctx->memsegs[VM_MEMSEG_HIGH].size; - error = setup_memory_segment(ctx, gpa, len, baseaddr); - if (error) - return (error); - } - if (ctx->memsegs[VM_MEMSEG_LOW].size > 0) { - gpa = 0; - len = ctx->memsegs[VM_MEMSEG_LOW].size; - error = setup_memory_segment(ctx, gpa, len, baseaddr); - if (error) - return (error); - } + /* + * Allocate and map memory segments for the virtual machine. + */ + gpa = VM_LOWMEM_LIMIT > 0 ? 0 : VM_HIGHMEM_BASE; + ctx->lowmem_size = 0; + ctx->highmem_size = 0; + for (i = 0; i < ndoms; i++) { + segid = VM_SYSMEM + i; + dom = &doms[i]; + + /* + * Check if the memory segment already exists. + * If 'ndoms' is greater than one, refuse to proceed if the + * memseg already exists. If only one domain was requested, use + * the existing segment to preserve the behaviour of the previous + * implementation. + * + * Splitting existing memory segments is tedious and + * error-prone, which is why we don't support NUMA + * domains for bhyveload(8)-loaded VMs. + */ + error = vm_get_memseg(ctx, segid, &len, memseg.name, + sizeof(memseg.name)); + if (error == 0 && len != 0) { + if (ndoms != 1) { + errno = EEXIST; + return (-1); + } else + doms[0].size = len; + } else { + error = vm_alloc_memseg(ctx, segid, dom->size, NULL, + dom->ds_policy, dom->ds_mask, dom->ds_size); + if (error) + return (error); + } + /* + * If a domain is split by VM_LOWMEM_LIMIT then break + * its segment mapping into two parts, one below VM_LOWMEM_LIMIT + * and one above VM_HIGHMEM_BASE. + */ + if (gpa <= VM_LOWMEM_LIMIT && + gpa + dom->size > VM_LOWMEM_LIMIT) { + low_len = VM_LOWMEM_LIMIT - gpa; + error = map_memory_segment(ctx, segid, gpa, low_len, 0, + baseaddr); + if (error) + return (error); + ctx->lowmem_size = VM_LOWMEM_LIMIT; + /* Map the remainder. */ + gpa = VM_HIGHMEM_BASE; + len = dom->size - low_len; + error = map_memory_segment(ctx, segid, gpa, len, + low_len, baseaddr); + if (error) + return (error); + } else { + len = dom->size; + error = map_memory_segment(ctx, segid, gpa, len, 0, + baseaddr); + if (error) + return (error); + } + if (gpa <= VM_LOWMEM_LIMIT) + ctx->lowmem_size += len; + else + ctx->highmem_size += len; + gpa += len; + } ctx->baseaddr = baseaddr; return (0); } +int +vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) +{ + struct vm_mem_domain dom0; + + memset(&dom0, 0, sizeof(dom0)); + dom0.ds_policy = DOMAINSET_POLICY_INVALID; + dom0.size = memsize; + + return (vm_setup_memory_domains(ctx, vms, &dom0, 1)); +} + /* * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in * the lowmem or highmem regions. @@ -535,13 +616,13 @@ vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) { vm_size_t lowsize, highsize; - lowsize = ctx->memsegs[VM_MEMSEG_LOW].size; + lowsize = ctx->lowmem_size; if (lowsize > 0) { if (gaddr < lowsize && len <= lowsize && gaddr + len <= lowsize) return (ctx->baseaddr + gaddr); } - highsize = ctx->memsegs[VM_MEMSEG_HIGH].size; + highsize = ctx->highmem_size; if (highsize > 0 && gaddr >= VM_HIGHMEM_BASE) { if (gaddr < VM_HIGHMEM_BASE + highsize && len <= highsize && gaddr + len <= VM_HIGHMEM_BASE + highsize) @@ -559,12 +640,12 @@ vm_rev_map_gpa(struct vmctx *ctx, void *addr) offaddr = (char *)addr - ctx->baseaddr; - lowsize = ctx->memsegs[VM_MEMSEG_LOW].size; + lowsize = ctx->lowmem_size; if (lowsize > 0) if (offaddr <= lowsize) return (offaddr); - highsize = ctx->memsegs[VM_MEMSEG_HIGH].size; + highsize = ctx->highmem_size; if (highsize > 0) if (offaddr >= VM_HIGHMEM_BASE && offaddr < VM_HIGHMEM_BASE + highsize) @@ -583,8 +664,7 @@ vm_get_name(struct vmctx *ctx) size_t vm_get_lowmem_size(struct vmctx *ctx) { - - return (ctx->memsegs[VM_MEMSEG_LOW].size); + return (ctx->lowmem_size); } vm_paddr_t @@ -597,8 +677,7 @@ vm_get_highmem_base(struct vmctx *ctx __unused) size_t vm_get_highmem_size(struct vmctx *ctx) { - - return (ctx->memsegs[VM_MEMSEG_HIGH].size); + return (ctx->highmem_size); } void * @@ -616,7 +695,7 @@ vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) goto done; } - error = vm_alloc_memseg(ctx, segid, len, name); + error = vm_alloc_memseg(ctx, segid, len, name, 0, NULL, 0); if (error) goto done; diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index 440064ad13cb..b637c45d1eff 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -40,7 +40,7 @@ * API version for out-of-tree consumers like grub-bhyve for making compile * time decisions. */ -#define VMMAPI_VERSION 0200 /* 2 digit major followed by 2 digit minor */ +#define VMMAPI_VERSION 0300 /* 2 digit major followed by 2 digit minor */ struct iovec; struct vcpu; @@ -64,16 +64,12 @@ enum vm_mmap_style { #define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */ #define VM_MEM_F_WIRED 0x02 /* guest memory is wired */ -/* - * Identifiers for memory segments: - * - vm_setup_memory() uses VM_SYSMEM for the system memory segment. - * - the remaining identifiers can be used to create devmem segments. - */ -enum { - VM_SYSMEM, - VM_BOOTROM, - VM_FRAMEBUFFER, - VM_PCIROM, +/* Memory size and allocation policy for a single NUMA domain. */ +struct vm_mem_domain { + size_t size; + int ds_policy; + domainset_t *ds_mask; + size_t ds_size; }; __BEGIN_DECLS @@ -127,7 +123,9 @@ struct vcpu *vm_vcpu_open(struct vmctx *ctx, int vcpuid); void vm_vcpu_close(struct vcpu *vcpu); int vcpu_id(struct vcpu *vcpu); int vm_parse_memsize(const char *optarg, size_t *memsize); -int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); +int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); +int vm_setup_memory_domains(struct vmctx *ctx, enum vm_mmap_style s, + struct vm_mem_domain *doms, int ndoms); void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); /* inverse operation to vm_map_gpa - extract guest address from host pointer */ vm_paddr_t vm_rev_map_gpa(struct vmctx *ctx, void *addr); diff --git a/libexec/comsat/comsat.c b/libexec/comsat/comsat.c index d5d1eedeb5f3..cb00ee4a9392 100644 --- a/libexec/comsat/comsat.c +++ b/libexec/comsat/comsat.c @@ -113,29 +113,24 @@ mailfor(char *name) char *file; off_t offset; int folder; - char buf[sizeof(_PATH_MAILDIR) + sizeof(utp->ut_user) + 1]; - char buf2[sizeof(_PATH_MAILDIR) + sizeof(utp->ut_user) + 1]; + char buf[MAXPATHLEN]; - if (!(cp = strchr(name, '@'))) + if ((cp = strchr(name, '@')) == NULL) return; *cp = '\0'; offset = strtoll(cp + 1, NULL, 10); - if (!(cp = strchr(cp + 1, ':'))) - file = name; - else - file = cp + 1; - sprintf(buf, "%s/%.*s", _PATH_MAILDIR, (int)sizeof(utp->ut_user), - name); - if (*file != '/') { - sprintf(buf2, "%s/%.*s", _PATH_MAILDIR, - (int)sizeof(utp->ut_user), file); - file = buf2; + if ((cp = strchr(cp + 1, ':')) != NULL && + strchr((file = cp + 1), '/') == NULL) { + snprintf(buf, sizeof(buf), "%s/%s", _PATH_MAILDIR, file); + folder = 1; + } else { + snprintf(buf, sizeof(buf), "%s/%s", _PATH_MAILDIR, name); + folder = 0; } - folder = strcmp(buf, file); setutxent(); while ((utp = getutxent()) != NULL) if (utp->ut_type == USER_PROCESS && !strcmp(utp->ut_user, name)) - notify(utp, file, offset, folder); + notify(utp, buf, offset, folder); endutxent(); } @@ -159,8 +154,7 @@ notify(struct utmpx *utp, char file[], off_t offset, int folder) utp->ut_line); return; } - (void)snprintf(tty, sizeof(tty), "%s%.*s", - _PATH_DEV, (int)sizeof(utp->ut_line), utp->ut_line); + (void)snprintf(tty, sizeof(tty), "%s%s", _PATH_DEV, utp->ut_line); if (stat(tty, &stb) == -1 || !(stb.st_mode & (S_IXUSR | S_IXGRP))) { dsyslog(LOG_DEBUG, "%s: wrong mode on %s", utp->ut_user, tty); return; @@ -187,26 +181,20 @@ notify(struct utmpx *utp, char file[], off_t offset, int folder) initgroups(p->pw_name, p->pw_gid) == -1 || setgid(p->pw_gid) == -1 || setuid(p->pw_uid) == -1) - return; + _exit(1); - switch (stb.st_mode & (S_IXUSR | S_IXGRP)) { - case S_IXUSR: - case (S_IXUSR | S_IXGRP): + if (stb.st_mode & S_IXUSR) { (void)fprintf(tp, "%s\007New mail for %s@%.*s\007 has arrived%s%s%s:%s----%s", cr, utp->ut_user, (int)sizeof(hostname), hostname, folder ? cr : "", folder ? "to " : "", folder ? file : "", cr, cr); jkfprintf(tp, file, offset); - break; - case S_IXGRP: + } else if (stb.st_mode & S_IXGRP) { (void)fprintf(tp, "\007"); (void)fflush(tp); (void)sleep(1); (void)fprintf(tp, "\007"); - break; - default: - break; } (void)fclose(tp); _exit(0); diff --git a/libexec/dma/dmagent/Makefile b/libexec/dma/dmagent/Makefile index 5f7deeea0b05..f707cfa3264f 100644 --- a/libexec/dma/dmagent/Makefile +++ b/libexec/dma/dmagent/Makefile @@ -15,14 +15,16 @@ SRCS= aliases_parse.y \ net.c \ spool.c \ util.c -MAN8= dma.8 +MAN= dma.8 MLINKS= dma.8 dma.conf.5 -CONFS= dma.conf +CONFSMODE= 0640 +CONFSGRP= mail +CONFS= auth.conf dma.conf CONFSDIR= ${CONFDIR}/dma CFLAGS+= -DOPENSSL_API_COMPAT=0x10100000L YFLAGS+= -i CLEANFILES= aliases_parse.i -FILES= auth.conf mailer.conf +FILES= mailer.conf FILESDIR= ${SHAREDIR}/examples/dma BINMODE= 2555 diff --git a/release/packages/ucl/bmake-all.ucl b/release/packages/ucl/bmake-all.ucl new file mode 100644 index 000000000000..ee8175d1dd8a --- /dev/null +++ b/release/packages/ucl/bmake-all.ucl @@ -0,0 +1,5 @@ +comment = "Program maintenance utility" +desc = <<EOD +make(1) allows programs to be built from source files based on a specification +of the program's dependencies called a Makefile. +EOD diff --git a/release/packages/ucl/sendmail.ucl b/release/packages/ucl/sendmail.ucl new file mode 100644 index 000000000000..c79775eb8af4 --- /dev/null +++ b/release/packages/ucl/sendmail.ucl @@ -0,0 +1,7 @@ +deps { + # sendmail requires make to build its configuration file. + "bmake": { + version = "${VERSION}" + origin = "base" + } +} diff --git a/release/packages/ucl/yp.ucl b/release/packages/ucl/yp.ucl new file mode 100644 index 000000000000..14b2327e56d1 --- /dev/null +++ b/release/packages/ucl/yp.ucl @@ -0,0 +1,7 @@ +deps { + # YP requires bmake to rebuild the database. + "bmake": { + version = "${VERSION}" + origin = "base" + } +} diff --git a/sbin/dhclient/dhclient.c b/sbin/dhclient/dhclient.c index cbab3fa2973c..5d2a7453578b 100644 --- a/sbin/dhclient/dhclient.c +++ b/sbin/dhclient/dhclient.c @@ -539,7 +539,7 @@ main(int argc, char *argv[]) setproctitle("%s", ifi->name); /* setgroups(2) is not permitted in capability mode. */ - if (setgroups(1, &pw->pw_gid) != 0) + if (setgroups(0, NULL) != 0) error("can't restrict groups: %m"); if (caph_enter_casper() < 0) diff --git a/sbin/ifconfig/af_inet6.c b/sbin/ifconfig/af_inet6.c index 17dc068ee875..7986edf490b4 100644 --- a/sbin/ifconfig/af_inet6.c +++ b/sbin/ifconfig/af_inet6.c @@ -759,7 +759,7 @@ static struct afswtch af_inet6 = { #else .af_difaddr = NL_RTM_DELADDR, .af_aifaddr = NL_RTM_NEWADDR, - .af_ridreq = &in6_add, + .af_ridreq = &in6_del, .af_addreq = &in6_add, .af_exec = in6_exec_nl, #endif diff --git a/sbin/ifconfig/ifbridge.c b/sbin/ifconfig/ifbridge.c index 3465dc223ada..a75c37e640a2 100644 --- a/sbin/ifconfig/ifbridge.c +++ b/sbin/ifconfig/ifbridge.c @@ -80,6 +80,20 @@ get_val(const char *cp, u_long *valp) } static int +get_vlan_id(const char *cp, ether_vlanid_t *valp) +{ + u_long val; + + if (get_val(cp, &val) == -1) + return (-1); + if (val < DOT1Q_VID_MIN || val > DOT1Q_VID_MAX) + return (-1); + + *valp = (ether_vlanid_t)val; + return (0); +} + +static int do_cmd(if_ctx *ctx, u_long op, void *arg, size_t argsize, int set) { struct ifdrv ifd = {}; @@ -242,8 +256,8 @@ bridge_status(if_ctx *ctx) else printf(" <unknown state %d>", state); } - if (member->ifbr_untagged != 0) - printf(" untagged %u", (unsigned)member->ifbr_untagged); + if (member->ifbr_pvid != 0) + printf(" untagged %u", (unsigned)member->ifbr_pvid); print_vlans(&bridge->member_vlans[i]); printf("\n"); } @@ -614,25 +628,15 @@ static void setbridge_untagged(if_ctx *ctx, const char *ifn, const char *vlanid) { struct ifbreq req; - u_long val; memset(&req, 0, sizeof(req)); + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); - if (get_val(vlanid, &val) < 0) + if (get_vlan_id(vlanid, &req.ifbr_pvid) < 0) errx(1, "invalid VLAN identifier: %s", vlanid); - /* - * Reject vlan 0, since it's not a valid vlan identifier and has a - * special meaning in the kernel interface. - */ - if (val == 0) - errx(1, "invalid VLAN identifier: %lu", val); - - strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); - req.ifbr_untagged = val; - - if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) - err(1, "BRDGSIFUNTAGGED %s", vlanid); + if (do_cmd(ctx, BRDGSIFPVID, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFPVID %s", vlanid); } static void @@ -643,10 +647,10 @@ unsetbridge_untagged(if_ctx *ctx, const char *ifn, int dummy __unused) memset(&req, 0, sizeof(req)); strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); - req.ifbr_untagged = 0; + req.ifbr_pvid = 0; - if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) - err(1, "BRDGSIFUNTAGGED"); + if (do_cmd(ctx, BRDGSIFPVID, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFPVID"); } static void diff --git a/sbin/ifconfig/tests/inet6.sh b/sbin/ifconfig/tests/inet6.sh index edfd88d93af7..22399915a64d 100644 --- a/sbin/ifconfig/tests/inet6.sh +++ b/sbin/ifconfig/tests/inet6.sh @@ -76,8 +76,38 @@ broadcast_cleanup() vnet_cleanup } +atf_test_case "delete6" "cleanup" +delete6_head() +{ + atf_set descr 'Test removing IPv6 addresses' + atf_set require.user root +} + +delete6_body() +{ + vnet_init + + ep=$(vnet_mkepair) + + atf_check -s exit:0 \ + ifconfig ${ep}a inet6 fe80::42/64 + atf_check -s exit:0 -o match:"fe80::42%${ep}" \ + ifconfig ${ep}a inet6 + + atf_check -s exit:0 \ + ifconfig ${ep}a inet6 -alias fe80::42 + atf_check -s exit:0 -o not-match:"fe80::42%${ep}" \ + ifconfig ${ep}a inet6 +} + +delete6_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case netmask atf_add_test_case broadcast + atf_add_test_case delete6 } diff --git a/sbin/mount_fusefs/Makefile b/sbin/mount_fusefs/Makefile index e683b35f0c8a..a237ba99eb6b 100644 --- a/sbin/mount_fusefs/Makefile +++ b/sbin/mount_fusefs/Makefile @@ -20,7 +20,7 @@ DEBUG_FLAGS+= -DFUSE4BSD_VERSION="\"${F4BVERS}\"" PACKAGE=runtime PROG= mount_fusefs -MAN8= mount_fusefs.8 +MAN= mount_fusefs.8 LIBADD= util .include <bsd.prog.mk> diff --git a/sbin/ping/Makefile b/sbin/ping/Makefile index b4e3f115b245..30c68cbaba52 100644 --- a/sbin/ping/Makefile +++ b/sbin/ping/Makefile @@ -32,8 +32,6 @@ CFLAGS+=-DWITH_CASPER CFLAGS+=-DIPSEC LIBADD+= ipsec -CFLAGS+= -Wno-error=unused-but-set-variable - HAS_TESTS= SUBDIR.${MK_TESTS}+= tests diff --git a/sbin/recoverdisk/recoverdisk.c b/sbin/recoverdisk/recoverdisk.c index e1b283e54a93..f13a1f211863 100644 --- a/sbin/recoverdisk/recoverdisk.c +++ b/sbin/recoverdisk/recoverdisk.c @@ -715,7 +715,7 @@ main(int argc, char * const argv[]) int64_t sz; int error; time_t t_now, t_report, t_save; - unsigned snapshot = 60, unsaved; + time_t snapshot = 60, unsaved; setbuf(stdout, NULL); setbuf(stderr, NULL); diff --git a/sbin/zfsbootcfg/zfsbootcfg.8 b/sbin/zfsbootcfg/zfsbootcfg.8 index 5e7f02b2578c..3831adfc81bd 100644 --- a/sbin/zfsbootcfg/zfsbootcfg.8 +++ b/sbin/zfsbootcfg/zfsbootcfg.8 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 22, 2020 +.Dd July 28, 2025 .Dt ZFSBOOTCFG 8 .Os .Sh NAME @@ -44,14 +44,11 @@ is used to set .Xr boot.config 5 Ns -style options to be used by -.Xr zfsboot 8 , .Xr gptzfsboot 8 or .Xr loader 8 the next time the machine is booted. Once -.Xr zfsboot 8 -or .Xr gptzfsboot 8 or .Xr loader 8 @@ -130,8 +127,7 @@ To clear the boot options: .Xr boot.config 5 , .Xr bectl 8 , .Xr gptzfsboot 8 , -.Xr loader 8 , -.Xr zfsboot 8 +.Xr loader 8 .Sh HISTORY .Nm appeared in diff --git a/share/man/man4/usbhid.4 b/share/man/man4/usbhid.4 index 5109bbe72de6..e5ba370cd025 100644 --- a/share/man/man4/usbhid.4 +++ b/share/man/man4/usbhid.4 @@ -21,7 +21,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 12, 2021 +.Dd July 30, 2025 .Dt USBHID 4 .Os .Sh NAME @@ -60,7 +60,7 @@ and make its priority greater than other USB HID drivers, such as .Xr ums 4 , and .Xr uhid 4 . -Default is 0. +Default is 1. .El .Bl -tag -width indent .It Va hw.usb.usbhid.debug diff --git a/share/man/man4/vtnet.4 b/share/man/man4/vtnet.4 index 270366488a98..8b99cd9f17b9 100644 --- a/share/man/man4/vtnet.4 +++ b/share/man/man4/vtnet.4 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 22, 2012 +.Dd July 29, 2025 .Dt VTNET 4 .Os .Sh NAME @@ -70,6 +70,11 @@ prompt before booting the kernel or stored in .It Va hw.vtnet. Ns Ar X Ns Va .csum_disable This tunable disables receive and send checksum offload. The default value is 0. +.It Va hw.vtnet.fixup_needs_csum +.It Va hw.vtnet. Ns Ar X Ns Va .fixup_needs_csum +This tunable enforces the calculation of a valid checksum for NEEDS_CSUM +packets. +The default value is 0. .It Va hw.vtnet.tso_disable .It Va hw.vtnet. Ns Ar X Ns Va .tso_disable This tunable disables TSO. @@ -91,6 +96,22 @@ The number of queue pairs used is the lesser of the maximum supported by the driver and the hypervisor, the number of CPUs present in the guest, and this tunable if not zero. The default value is 0. +.It Va hw.vtnet.tso_maxlen +.It Va hw.vtnet. Ns Ar X Ns Va .tso_maxlen +This tunable sets the TSO burst limit. +The default value is 65535. +.It Va hw.vtnet.rx_process_limit +.It Va hw.vtnet. Ns Ar X Ns Va .rx_process_limit +This tunable sets the number of RX segments processed in one pass. +The default value is 1024. +.It Va hw.vtnet.lro_entry_count +.It Va hw.vtnet. Ns Ar X Ns Va .lro_entry_count +This tunable sets the software LRO entry count. +The default value is 128, the minimum value is 8. +.It Va hw.vtnet.lro_mbufq_depth +.It Va hw.vtnet. Ns Ar X Ns Va .lro_mbufq_depth +This tunable sets the depth of the software LRO mbuf queue. +The default value is 0. .It Va hw.vtnet.altq_disable This tunable disables ALTQ support, allowing the use of multiqueue instead. This option applies to all interfaces. diff --git a/share/man/man5/core.5 b/share/man/man5/core.5 index 8efc8c970014..628fdb7920bb 100644 --- a/share/man/man5/core.5 +++ b/share/man/man5/core.5 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 12, 2023 +.Dd July 17, 2025 .Dt CORE 5 .Os .Sh NAME @@ -48,26 +48,6 @@ a system crash. (In this event, the decision to save the core file is arbitrary, see .Xr savecore 8 . ) .Pp -The maximum size of a core file is limited by the -.Dv RLIMIT_CORE -.Xr setrlimit 2 -limit. -Files which would be larger than the limit are not created. -.Pp -With a large limit, a process that had mapped a very large, -and perhaps sparsely populated, virtual memory region, could take -a very long time to create core dumps. -The system ignores all signals sent to a process writing a core file, except -.Dv SIGKILL -which terminates the writing and causes immediate exit of the process. -The behavior of -.Dv SIGKILL -can be disabled by setting tunable -.Xr sysctl 8 -variable -.Va kern.core_dump_can_intr -to zero. -.Pp The name of the file is controlled via the .Xr sysctl 8 variable @@ -107,6 +87,26 @@ yielding the traditional .Fx behaviour. .Pp +The maximum size of a core file is limited by the +.Dv RLIMIT_CORE +.Xr setrlimit 2 +limit. +Files which would be larger than the limit are not created. +.Pp +With a large limit, a process that had mapped a very large, +and perhaps sparsely populated, virtual memory region, could take +a very long time to create core dumps. +The system ignores all signals sent to a process writing a core file, except +.Dv SIGKILL +which terminates the writing and causes immediate exit of the process. +The behavior of +.Dv SIGKILL +can be disabled by setting tunable +.Xr sysctl 8 +variable +.Va kern.core_dump_can_intr +to zero. +.Pp By default, a process that changes user or group credentials whether real or effective will not create a corefile. This behaviour can be @@ -116,11 +116,13 @@ variable .Va kern.sugid_coredump to 1. .Pp -Corefiles can be compressed by the kernel if the following item -is included in the kernel configuration file: +Corefiles can be compressed by the kernel if one of the following items +are included in the kernel configuration file: .Bl -tag -width "1234567890" -compact -offset "12345" .It options GZIO +.It options +ZSTDIO .El .Pp The following sysctl control core file compression: diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index a3db00aed42f..f93d3f9fc69f 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,5 +1,5 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. -.Dd July 14, 2025 +.Dd July 27, 2025 .Dt SRC.CONF 5 .Os .Sh NAME @@ -940,8 +940,9 @@ amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and powerpc/powerpc64le Do not build the LLD linker during the bootstrap phase of the build. To be able to build the system an alternate linker must be provided via XLD. -.It Va WITHOUT_LLVM_ASSERTIONS -Disable debugging assertions in LLVM. +.It Va WITH_LLVM_ASSERTIONS +Enable debugging assertions in LLVM. +Use when working on or requesting help with LLVM components. .It Va WITHOUT_LLVM_BINUTILS Install ELF Tool Chain's binary utilities instead of LLVM's. This includes diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index b73e47b3ef4d..5bcde3030ebc 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -69,6 +69,7 @@ MAN= accept_filter.9 \ config_intrhook.9 \ contigmalloc.9 \ copy.9 \ + coredumper_register.9 \ counter.9 \ cpu_machdep.9 \ cpuset.9 \ @@ -905,6 +906,7 @@ MLINKS+=copy.9 copyin.9 \ copy.9 copyout.9 \ copy.9 copyout_nofault.9 \ copy.9 copystr.9 +MLINKS+=coredumper_register.9 coredumper_unregister.9 MLINKS+=counter.9 counter_u64_alloc.9 \ counter.9 counter_u64_free.9 \ counter.9 counter_u64_add.9 \ diff --git a/share/man/man9/coredumper_register.9 b/share/man/man9/coredumper_register.9 new file mode 100644 index 000000000000..f4c9eb4a1bf6 --- /dev/null +++ b/share/man/man9/coredumper_register.9 @@ -0,0 +1,168 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Kyle Evans <kevans@FreeBSD.org> +.\" +.Dd July 23, 2025 +.Dt COREDUMPER_REGISTER 9 +.Os +.Sh NAME +.Nm coredumper_register , +.Nm coredumper_unregister +.Nd loadable user coredumper support +.Sh SYNOPSIS +.In sys/ucoredump.h +.Ft void +.Fn coredumper_register "struct coredumper *cd" +.Ft void +.Fn coredumper_unregister "struct coredumper *cd" +.Pp +.Ft int +.Fn coredumper_probe_fn "struct thread *td" +.Ft int +.Fn coredumper_handle_fn "struct thread *td" "off_t limit" +.Bd -literal +/* Incomplete, but the useful members are depicted here. */ +struct coredumper { + const char *cd_name; + coredumper_probe_fn *cd_probe; + coredumper_handle_fn *cd_handle; +}; +.Ed +.Pp +.Ft int +.Fn coredump_init_fn "const struct coredump_writer *" \ +"const struct coredump_params *" +.Ft int +.Fn coredump_write_fn "const struct coredump_writer *" "const void *" "size_t" \ +"off_t" "enum uio_seg" "struct ucred *" "size_t *" "struct thread *" +.Ft int +.Fn coredump_extend_fn "const struct coredump_writer *" "off_t" "struct ucred *" +.Bd -literal +struct coredump_writer { + void *ctx; + coredump_init_fn *init_fn; + coredump_write_fn *write_fn; + coredump_extend_fn *extend_fn; +}; +.Ed +.Sh DESCRIPTION +The +.Nm +mechanism provides a path for kernel modules to register a new user process core +dumper. +The expected use of +.Nm +is for a module to define the fields of the struct coredumper listed above, then +call +.Fn coredumper_register +at +.Dv MOD_LOAD +time. +A corresponding +.Fn coredumper_unregister +should be called at +.Dv MOD_UNLOAD +time. +Note that +.Fn coredumper_unregister +will block until the specified coredumper is no longer processing coredumps. +.Pp +When a user process is preparing to start dumping core, the kernel will execute +the +.Fn cd_probe +function for each coredumper currently registered. +The +.Fn cd_probe +function is expected to return either -1 if it would decline to dump the +process, or a priority level greater than 0. +The coredumper with the highest priority will handle the coredump. +The following default priorities are defined: +.Bl -tag -width indent +.It Dv COREDUMPER_NOMATCH +This dumper declines dumping the process. +.It Dv COREDUMPER_GENERIC +This dumper will dump the process at the lowest priority. +This priority is not recommended, as the default vnode dumper will bid at +.Dv COREDUMPER_GENERIC +as well. +.It Dv COREDUMPER_SPECIAL +This dumper provides special behavior, and will dump the process at a higher +priority. +.It Dv COREDUMPER_HIGHPRIORITY +This dumper would prefer to handle this coredump. +This may be used by, for instance, a custom or vendor-specific coredump +mechanism that wishes to preempt others. +.El +.Pp +Note that this system has been designed such that the +.Fn cd_probe +function can examine the process in question and make an informed decision. +Different processes being dumped could probe at different priorities in the +same coredumper. +.Pp +Once the highest priority coredumper has been selected, the +.Fn cd_handle +function will be invoked. +The +.Fn cd_handle +will receive both the thread and the +.Dv RLIMIT_CORE +.Xr setrlimit 2 +.Fa limit . +The proc lock will be held on entry, and should be unlocked before the handler +returns. +The +.Fa limit +is typically passed to the +.Fn sv_coredump +that belongs to the process's +.Va p_sysent . +.Pp +The +.Fn cd_handle +function should return either 0 if the dump was successful, or an appropriate +.Xr errno 2 +otherwise. +.Ss Customized Coredump Writers +Custom coredumpers can define their own +.Dv coredump_writer +to pass to +.Fn sv_coredump . +.Pp +The +.Va ctx +member is opaque and only to be used by the coredumper itself. +.Pp +The +.Va init_fn +function, if it's provided, will be called by the +.Fn sv_coredump +implementation before any data is to be written. +This allows the writer implementation to record any coredump parameters that it +might need to capture, or setup the object to be written to. +.Pp +The +.Va write_fn +function will be called by the +.Fn sv_coredump +implementation to write out data. +The +.Va extend_fn +function will be called to enlarge the coredump, in the sense that a hole is +created in any difference between the current size and the new size. +For convenience, the +.Fn core_vn_write +and +.Fn core_vn_extend +functions used by the vnode coredumper are exposed in +.In sys/ucordumper.h , +and the +.Dv coredump_vnode_ctx +defined there should be populated with the vnode to write to. +.Sh SEE ALSO +.Xr setrlimit 2 , +.Xr core 5 +.Sh AUTHORS +This manual page was written by +.An Kyle Evans Aq Mt kevans@FreeBSD.org . diff --git a/share/man/man9/domainset.9 b/share/man/man9/domainset.9 index 816ce29f04f7..702c9f83a88b 100644 --- a/share/man/man9/domainset.9 +++ b/share/man/man9/domainset.9 @@ -22,7 +22,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd April 14, 2021 +.Dd June 24, 2025 .Dt DOMAINSET 9 .Os .Sh NAME @@ -54,6 +54,8 @@ struct domainset { .Ft struct domainset * .Fn domainset_create "const struct domainset *key" .Ft int +.Fn domainset_populate "struct domainset *domain" "domainset_t *mask" "int policy" "size_t mask_size" +.Ft int .Fn sysctl_handle_domainset "SYSCTL_HANDLER_ARGS" .Sh DESCRIPTION The @@ -137,6 +139,7 @@ These policies should be used in preference to to avoid blocking indefinitely on a .Dv M_WAITOK request. +.Pp The .Fn domainset_create function takes a partially filled in domainset as a key and returns a @@ -148,6 +151,17 @@ is an immutable type that is shared among all matching keys and must not be modified after return. .Pp The +.Fn domainset_populate +function fills a +.Vt domainset +struct using a domain mask and policy. +It is used for validating and +translating a domain mask and policy into a +.Vt domainset +struct when creating a custom domainset using +.Vt domainset_create . +.Pp +The .Fn sysctl_handle_domainset function is provided as a convenience for modifying or viewing domainsets that are not accessible via diff --git a/share/man/man9/mbuf.9 b/share/man/man9/mbuf.9 index 0262c598ed18..c05505716a30 100644 --- a/share/man/man9/mbuf.9 +++ b/share/man/man9/mbuf.9 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 28, 2023 +.Dd July 29, 2025 .Dt MBUF 9 .Os .\" @@ -1091,7 +1091,7 @@ network code, when data must be encrypted or otherwise altered prior to transmission. .El .Sh HARDWARE-ASSISTED CHECKSUM CALCULATION -This section currently applies to TCP/IP only. +This section currently applies to SCTP, TCP, and UDP over IP only. In order to save the host CPU resources, computing checksums is offloaded to the network interface hardware if possible. The @@ -1117,7 +1117,7 @@ in the .Vt mbuf chain containing the packet. .Pp -On output, checksum offloading is attempted after the outgoing +On output, the computation of the checksum is delayed until the outgoing interface has been determined for a packet. The interface-specific field .Va ifnet.if_data.ifi_hwassist @@ -1135,12 +1135,15 @@ such actions will never be requested through .Va csum_flags . .Pp The flags demanding a particular action from an interface are as follows: -.Bl -tag -width ".Dv CSUM_TCP" -offset indent +.Bl -tag -width ".Dv CSUM_SCTP" -offset indent .It Dv CSUM_IP The IP header checksum is to be computed and stored in the corresponding field of the packet. The hardware is expected to know the format of an IP header to determine the offset of the IP checksum field. +.It Dv CSUM_SCTP +The SCTP checksum is to be computed. +(See below.) .It Dv CSUM_TCP The TCP checksum is to be computed. (See below.) @@ -1149,14 +1152,16 @@ The UDP checksum is to be computed. (See below.) .El .Pp -Should a TCP or UDP checksum be offloaded to the hardware, +Should a SCTP, TCP, or UDP checksum be offloaded to the hardware, the field .Va csum_data will contain the byte offset of the checksum field relative to the end of the IP header. -In this case, the checksum field will be initially -set by the TCP/IP module to the checksum of the pseudo header +In the case of TCP or UDP, the checksum field will be initially +set by the TCP or UDP implementation to the checksum of the pseudo header defined by the TCP and UDP specifications. +In the case of SCTP, the checksum field will be initially +set by the SCTP implementation to 0. .Pp On input, an interface indicates the actions it has performed on a packet by setting one or more of the following flags in @@ -1187,13 +1192,13 @@ to obtain the final checksum to be used for TCP or UDP validation purposes. .El .Pp If a particular network interface just indicates success or -failure of TCP or UDP checksum validation without returning +failure of SCTP, TCP, or UDP checksum validation without returning the exact value of the checksum to the host CPU, its driver can mark .Dv CSUM_DATA_VALID -and -.Dv CSUM_PSEUDO_HDR in -.Va csum_flags , +.Va csum_flags +as well as, for TCP and UDP, +.Dv CSUM_PSEUDO_HDR and set .Va csum_data to @@ -1203,6 +1208,13 @@ It is a peculiarity of the algorithm used that the Internet checksum calculated over any valid packet will be .Li 0xFFFF as long as the original checksum field is included. +Note that for SCTP the value of +.Va csum_data +is not relevant and +.Dv CSUM_PSEUDO_HDR +in +.Va csum_flags +is not set, since SCTP does not use a pseudo header checksum. .Sh STRESS TESTING When running a kernel compiled with the option .Dv MBUF_STRESS_TEST , diff --git a/share/man/man9/style.9 b/share/man/man9/style.9 index 484b4f144b2e..e9f17392ae0c 100644 --- a/share/man/man9/style.9 +++ b/share/man/man9/style.9 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 27, 2025 +.Dd July 28, 2025 .Dt STYLE 9 .Os .Sh NAME @@ -766,8 +766,7 @@ to any pointer type. .Pp Values in .Ic return -statements should be enclosed in parentheses where possible. -For example, parentheses cannot be used if the value is a C++ braced-init-list. +statements should be enclosed in parentheses. .Pp Use .Xr err 3 @@ -918,6 +917,161 @@ Only use the annotation for the entire if statement, rather than individual clauses. Do not add these annotations without empirical evidence of the likelihood of the branch. +.Ss C++ +KNF style was originally defined as a style for C. +C++ introduces several new idioms which do not have an existing corollary +in KNF C such as inline function definitions in classes. +C++ is also not always compatible with some KNF guidelines such as +enclosing return values in parentheses. +For C++ code, FreeBSD aims to follow broadly accepted C++ practices while +also following the general shape of KNF. +This section enumerates C++ specific guidelines that differ from KNF C. +.Pp +The preferred suffixes for C++ source files are +.Dq .cc +and +.Dq .hh . +Header files should always use a suffix, +unlike headers from the C++ standard library. +.Pp +Return values should not be enclosed in parantheses. +When converting existing C code to C++, +existing return values may remain in parantheses. +.Pp +The opening curly brace for namespace declarations should be on the first line +similar to structure and class definitions. +Nested namespaces should be declared using a single namespace declaration. +.Bd -literal +namespace foo::bar { +} +.Ed +.Pp +Member function declarations should follow the same style used for standalone +function protoypes except that a space should be used between a function's +return type and name. +.Pp +Function definitions at the top level should use a newline after the function +type similar to C function definitions. +.Pp +Nested member function definitions inside of a class, structure, or union +should not use a newline after the function type. +Instead, these should follow the style of member function declarations. +This is more common C++ style and is more compact for small methods such as +getters and setters. +.Pp +Inline functions whose body consists of a single statement may use a single +line for the function body. +Inline functions with an empty body should always use a single line. +.Bd -literal +struct widget { + int foo() { return 4; } + int bar(); +}; + +int +widget::bar() +{ + return 6; +} +.Ed +.Pp +Default and deleted methods should be declared as a single line. +.Bd -literal +class box { + ~box() = default; +}; +.Ed +.Pp +In template declarations, the +.Ic template +keyword and list of template parameters should be followed by a newline +before the templated declaration. +.Bd -literal +template <typename T> +class box { + T data; +}; +.Ed +.Pp +The +.Ic & +for reference variables should be placed on the variable name rather +than the type similar to the style used with +.Ic * +for pointers. +.Bd -literal + int x; + int &xp = x; +.Ed +.Pp +Variables may be declared at any point within a function, +not just at the start of blocks. +.Pp +Standard library containers should be used in preference to +.Xr queue 3 +or +.Xr tree 3 +macros. +.Pp +.Ic nullptr +should be used instead of +.Dv NULL +or 0. +.Pp +Use standard library types for managing strings such as +.Vt std::string +and +.Vt std::string_view +rather than +.Vt "char *" +and +.Vt "const char *" . +C types may be used when interfacing with C code. +.Pp +The +.Ic auto +keyword can be used in various contexts which improve readability. +Examples include iterators, non-trivial types of ranged-for values, +and return values of obvious types, +such as +.Ic static_cast +or +.Fn std::make_unique . +Place any qualifiers before +.Ic auto , +for example: +.Ic const auto . +.Pp +Use the +.Vt std::unique_ptr +and +.Vt std::shared_ptr +smart pointers to manage the lifetime of dynamically allocated objects +instead of +.Ic new +and +.Ic delete . +Construct smart pointers with +.Fn std::make_unique +or +.Fn std::make_shared . +Do not use +.Xr malloc 3 +except when necessary to interface with C code. +.Pp +Do not import any namespaces with +.Ic using +at global scope in header files. +Namespaces other than the +.Ic std +namespace (for example, +.Ic std::literals ) +may be imported in source files and in function scope in header files. +.Pp +Define type aliases using +.Ic using +instead of +.Ic typedef . .Sh FILES .Bl -tag -width indent .It Pa /usr/src/tools/build/checkstyle9.pl diff --git a/share/misc/committers-ports.dot b/share/misc/committers-ports.dot index 7bb3d936e5e5..fb6c168f1425 100644 --- a/share/misc/committers-ports.dot +++ b/share/misc/committers-ports.dot @@ -153,6 +153,7 @@ ak [label="Alex Kozlov\nak@FreeBSD.org\n2012/02/29"] ale [label="Alex Dupre\nale@FreeBSD.org\n2004/01/12"] alepulver [label="Alejandro Pulver\nalepulver@FreeBSD.org\n2006/04/01"] alexey [label="Alexey Degtyarev\nalexey@FreeBSD.org\n2013/11/09"] +alven [label="Älven\nalven@FreeBSD.org\n2025/07/28"] amdmi3 [label="Dmitry Marakasov\namdmi3@FreeBSD.org\n2008/06/19"] antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2013/04/03"] arrowd [label="Gleb Popov\narrowd@FreeBSD.org\n2018/05/18"] @@ -420,6 +421,7 @@ culot -> marino culot -> pi culot -> wg +db -> alven db -> tj db -> shurd @@ -865,6 +867,7 @@ wxs -> zi ygy -> yasu +yuri -> alven yuri -> rea zirias -> jbo diff --git a/share/mk/Makefile b/share/mk/Makefile index 837f7da68b4b..4ab5c8cc314b 100644 --- a/share/mk/Makefile +++ b/share/mk/Makefile @@ -10,6 +10,7 @@ UPDATE_DEPENDFILE= no .include <src.opts.mk> +PACKAGE= bmake FILES= \ auto.obj.mk \ bsd.README \ diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index f21d519160d2..88202aeb78fe 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -487,6 +487,7 @@ _DP_be= zfs spl nvpair zfsbootenv _DP_netmap= _DP_ifconfig= m _DP_pfctl= nv +_DP_krb5ss= edit # OFED support .if ${MK_OFED} != "no" diff --git a/share/mk/src.opts.mk b/share/mk/src.opts.mk index ef43d3c939b2..77923ae7b6d1 100644 --- a/share/mk/src.opts.mk +++ b/share/mk/src.opts.mk @@ -123,7 +123,6 @@ __DEFAULT_YES_OPTIONS = \ LEGACY_CONSOLE \ LLD \ LLD_BOOTSTRAP \ - LLVM_ASSERTIONS \ LLVM_BINUTILS \ LLVM_COV \ LLVM_CXXFILT \ @@ -210,6 +209,7 @@ __DEFAULT_NO_OPTIONS = \ HESIOD \ LOADER_VERBOSE \ LOADER_VERIEXEC_PASS_MANIFEST \ + LLVM_ASSERTIONS \ LLVM_FULL_DEBUGINFO \ MALLOC_PRODUCTION \ OFED_EXTRA \ diff --git a/stand/i386/Makefile b/stand/i386/Makefile index 768496598575..299e070d8cd5 100644 --- a/stand/i386/Makefile +++ b/stand/i386/Makefile @@ -18,7 +18,7 @@ SUBDIR.yes+= loader_simp # special boot programs, 'self-extracting boot2+loader' SUBDIR.${MK_LOADER_PXEBOOT}+= pxeldr -SUBDIR.${MK_LOADER_ZFS}+= zfsboot gptzfsboot +SUBDIR.${MK_LOADER_ZFS}+= gptzfsboot .if defined(PXEBOOT_DEFAULT_INTERP) L=${PXEBOOT_DEFAULT_INTERP} diff --git a/stand/i386/common/bootargs.h b/stand/i386/common/bootargs.h index dafcf6a55554..072f7ee505fd 100644 --- a/stand/i386/common/bootargs.h +++ b/stand/i386/common/bootargs.h @@ -88,7 +88,7 @@ struct bootargs /* * geli_boot_data is embedded in geli_boot_args (passed from gptboot to loader) - * and in zfs_boot_args (passed from zfsboot and gptzfsboot to loader). + * and in zfs_boot_args (passed from gptzfsboot to loader). */ struct geli_boot_data { diff --git a/stand/i386/gptboot/Makefile b/stand/i386/gptboot/Makefile index b91875d242f5..a829be6c745d 100644 --- a/stand/i386/gptboot/Makefile +++ b/stand/i386/gptboot/Makefile @@ -1,6 +1,6 @@ .include <bsd.init.mk> -.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${SASRC} +.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common FILES= gptboot MAN= gptboot.8 @@ -53,12 +53,12 @@ gptldr.out: gptldr.o ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o CLEANFILES+= gptboot.bin gptboot.out gptboot.o sio.o drv.o \ - cons.o ${OPENCRYPTO_XTS} + cons.o gptboot.bin: gptboot.out ${OBJCOPY} -S -O binary gptboot.out ${.TARGET} -gptboot.out: ${BTXCRT} gptboot.o sio.o drv.o cons.o ${OPENCRYPTO_XTS} +gptboot.out: ${BTXCRT} gptboot.o sio.o drv.o cons.o ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32} .include <bsd.prog.mk> diff --git a/stand/i386/gptzfsboot/Makefile b/stand/i386/gptzfsboot/Makefile index 0d9fa8b043df..0b67ff8cdaf4 100644 --- a/stand/i386/gptzfsboot/Makefile +++ b/stand/i386/gptzfsboot/Makefile @@ -1,7 +1,7 @@ .include <bsd.init.mk> .PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \ - ${BOOTSRC}/i386/zfsboot ${BOOTSRC}/i386/common \ + ${BOOTSRC}/i386/common \ ${BOOTSRC}/common FILES= gptzfsboot @@ -65,7 +65,7 @@ gptldr.out: gptldr.o ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o -CLEANFILES+= gptzfsboot.bin gptzfsboot.out ${OBJS} ${OPENCRYPTO_XTS} +CLEANFILES+= gptzfsboot.bin gptzfsboot.out ${OBJS} # i386 standalone support library LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a @@ -73,8 +73,7 @@ LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a gptzfsboot.bin: gptzfsboot.out ${OBJCOPY} -S -O binary gptzfsboot.out ${.TARGET} -gptzfsboot.out: ${BTXCRT} ${OBJS} \ - ${OPENCRYPTO_XTS} +gptzfsboot.out: ${BTXCRT} ${OBJS} ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32} zfsboot.o: ${ZFSSRC}/zfsimpl.c diff --git a/stand/i386/zfsboot/zfsboot.c b/stand/i386/gptzfsboot/zfsboot.c index 4c8eae9b65e5..4c8eae9b65e5 100644 --- a/stand/i386/zfsboot/zfsboot.c +++ b/stand/i386/gptzfsboot/zfsboot.c diff --git a/stand/i386/isoboot/Makefile b/stand/i386/isoboot/Makefile index 7973f8029aa0..0049e7fd3e0a 100644 --- a/stand/i386/isoboot/Makefile +++ b/stand/i386/isoboot/Makefile @@ -1,7 +1,7 @@ .include <bsd.init.mk> .PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/gptboot \ - ${BOOTSRC}/i386/common ${SASRC} + ${BOOTSRC}/i386/common FILES= isoboot MAN= isoboot.8 @@ -51,12 +51,12 @@ gptldr.out: gptldr.o ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o CLEANFILES+= isoboot.bin isoboot.out isoboot.o sio.o drv.o \ - cons.o ${OPENCRYPTO_XTS} + cons.o isoboot.bin: isoboot.out ${OBJCOPY} -S -O binary isoboot.out ${.TARGET} -isoboot.out: ${BTXCRT} isoboot.o sio.o drv.o cons.o ${OPENCRYPTO_XTS} +isoboot.out: ${BTXCRT} isoboot.o sio.o drv.o cons.o ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBSA32} .include <bsd.prog.mk> diff --git a/stand/i386/loader/main.c b/stand/i386/loader/main.c index fd95cf5243cf..a70b3a253b90 100644 --- a/stand/i386/loader/main.c +++ b/stand/i386/loader/main.c @@ -198,7 +198,7 @@ main(void) #ifdef LOADER_ZFS_SUPPORT /* - * zfsboot and gptzfsboot have always passed KARGS_FLAGS_ZFS, + * gptzfsboot has always passed KARGS_FLAGS_ZFS, * so if that is set along with KARGS_FLAGS_EXTARG we know we * can interpret the extarg data as a struct zfs_boot_args. */ diff --git a/stand/i386/zfsboot/Makefile b/stand/i386/zfsboot/Makefile deleted file mode 100644 index b619b84c368e..000000000000 --- a/stand/i386/zfsboot/Makefile +++ /dev/null @@ -1,92 +0,0 @@ -.include <bsd.init.mk> - -.PATH: ${BOOTSRC}/i386/boot2 ${BOOTSRC}/i386/common ${BOOTSRC}/common - -FILES= zfsboot -MAN= zfsboot.8 - -BOOT_COMCONSOLE_PORT?= 0x3f8 -BOOT_COMCONSOLE_SPEED?= 115200 -B2SIOFMT?= 0x3 - -REL1= 0x700 -ORG1= 0x7c00 -ORG2= 0x2000 - -CFLAGS+=-DBOOTPROG=\"zfsboot\" \ - -O1 \ - -DBOOT2 \ - -DLOADER_GPT_SUPPORT \ - -DLOADER_MBR_SUPPORT \ - -DLOADER_ZFS_SUPPORT \ - -DLOADER_UFS_SUPPORT \ - -DSIOPRT=${BOOT_COMCONSOLE_PORT} \ - -DSIOFMT=${B2SIOFMT} \ - -DSIOSPD=${BOOT_COMCONSOLE_SPEED} \ - -I${LDRSRC} \ - -I${BOOTSRC}/i386/common \ - -I${BOOTSRC}/i386/libi386 \ - -I${ZFSSRC} \ - -I${SYSDIR}/crypto/skein \ - -I${SYSDIR}/cddl/boot/zfs \ - -I${SYSDIR}/contrib/openzfs/include \ - -I${SYSDIR}/contrib/openzfs/include/os/freebsd/spl \ - -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs \ - -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 \ - -I${BOOTSRC}/i386/boot2 \ - -Wall -Waggregate-return -Wbad-function-cast -Wno-cast-align \ - -Wmissing-declarations -Wmissing-prototypes -Wnested-externs \ - -Wpointer-arith -Wshadow -Wstrict-prototypes -Wwrite-strings - -CFLAGS.part.c+= -DHAVE_MEMCPY -I${SRCTOP}/sys/contrib/zlib - -CFLAGS.gcc+= --param max-inline-insns-single=100 - -LD_FLAGS+=${LD_FLAGS_BIN} - -CLEANFILES+= zfsboot - -zfsboot: zfsboot1 zfsboot2 - cat zfsboot1 zfsboot2 > zfsboot - -CLEANFILES+= zfsboot1 zfsldr.out zfsldr.o - -zfsboot1: zfsldr.out - ${OBJCOPY} -S -O binary zfsldr.out ${.TARGET} - -zfsldr.out: zfsldr.o - ${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} zfsldr.o - -OBJS= zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o -CLEANFILES+= zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \ - ${OBJS} - -# We currently allow 256k bytes for zfsboot - in practice it could be -# any size up to 3.5Mb but keeping it fixed size simplifies zfsldr. -# -BOOT2SIZE= 262144 - -# i386 standalone support library -LIBI386= ${BOOTOBJ}/i386/libi386/libi386.a - -zfsboot2: zfsboot.ld - @set -- `ls -l ${.ALLSRC}`; x=$$((${BOOT2SIZE}-$$5)); \ - echo "$$x bytes available"; test $$x -ge 0 - ${DD} if=${.ALLSRC} of=${.TARGET} bs=${BOOT2SIZE} conv=sync - -zfsboot.ld: zfsboot.ldr zfsboot.bin ${BTXKERN} - btxld -v -E ${ORG2} -f bin -b ${BTXKERN} -l zfsboot.ldr \ - -o ${.TARGET} -P 1 zfsboot.bin - -zfsboot.ldr: - :> ${.TARGET} - -zfsboot.bin: zfsboot.out - ${OBJCOPY} -S -O binary zfsboot.out ${.TARGET} - -zfsboot.out: ${BTXCRT} ${OBJS} - ${LD} ${LD_FLAGS} --defsym ORG=${ORG2} -T ${LDSCRIPT} -o ${.TARGET} ${.ALLSRC} ${LIBI386} ${LIBSA32} - -SRCS= zfsboot.c - -.include <bsd.prog.mk> diff --git a/stand/i386/zfsboot/Makefile.depend b/stand/i386/zfsboot/Makefile.depend deleted file mode 100644 index 92ab022283fd..000000000000 --- a/stand/i386/zfsboot/Makefile.depend +++ /dev/null @@ -1,17 +0,0 @@ -# Autogenerated - do NOT edit! - -DIRDEPS = \ - include \ - include/xlocale \ - lib/libmd \ - stand/i386/btx/btx \ - stand/i386/btx/lib \ - stand/libsa32 \ - stand/zfs32 \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -.endif diff --git a/stand/i386/zfsboot/zfsboot.8 b/stand/i386/zfsboot/zfsboot.8 deleted file mode 100644 index a8411bc065d0..000000000000 --- a/stand/i386/zfsboot/zfsboot.8 +++ /dev/null @@ -1,130 +0,0 @@ -.\" Copyright (c) 2014 Andriy Gapon <avg@FreeBSD.org> -.\" All rights reserved. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.Dd March 27, 2018 -.Dt ZFSBOOT 8 -.Os -.Sh NAME -.Nm zfsboot -.Nd bootcode for ZFS on BIOS-based computers -.Sh DESCRIPTION -.Nm -is used on BIOS-based computers to boot from a filesystem in -a ZFS pool. -.Nm -is installed in two parts on a disk or a partition used by a ZFS pool. -The first part, a single-sector starter boot block, is installed -at the beginning of the disk or partition. -The second part, a main boot block, is installed at a special offset -within the disk or partition. -Both areas are reserved by the ZFS on-disk specification for boot use. -If -.Nm -is installed in a partition, then that partition should be made -bootable using appropriate configuration and boot blocks described in -.Xr boot 8 . -.Sh BOOTING -The -.Nm -boot process is very similar to that of -.Xr gptzfsboot 8 . -One significant difference is that -.Nm -does not currently support the GPT partitioning scheme. -Thus only whole disks and MBR partitions, traditionally referred to as -slices, are probed for ZFS disk labels. -See the BUGS section in -.Xr gptzfsboot 8 -for some limitations of the MBR scheme support. -.Sh USAGE -.Nm -supports all the same prompt and configuration file arguments as -.Xr gptzfsboot 8 . -.Sh FILES -.Bl -tag -width /boot/zfsboot -compact -.It Pa /boot/zfsboot -boot code binary -.It Pa /boot.config -parameters for the boot block -.Pq optional -.It Pa /boot/config -alternative parameters for the boot block -.Pq optional -.El -.Sh EXAMPLES -.Nm -is typically installed using -.Xr dd 1 . -To install -.Nm -on the -.Pa ada0 -drive: -.Bd -literal -offset indent -dd if=/boot/zfsboot of=/dev/ada0 count=1 -dd if=/boot/zfsboot of=/dev/ada0 iseek=1 oseek=1024 -.Ed -.Pp -If the drive is currently in use, the GEOM safety will prevent writes -and must be disabled before running the above commands: -.Bd -literal -offset indent -sysctl kern.geom.debugflags=0x10 -.Ed -.Pp -.Nm -can also be installed in an MBR slice: -.Bd -literal -offset indent -gpart create -s mbr ada0 -gpart add -t freebsd ada0 -gpart bootcode -b /boot/boot0 ada0 -gpart set -a active -i 1 ada0 -dd if=/dev/zero of=/dev/ada0s1 count=2 -dd if=/boot/zfsboot of=/dev/ada0s1 count=1 -dd if=/boot/zfsboot of=/dev/ada0s1 iseek=1 oseek=1024 -.Ed -.Pp -Note that commands to create and populate a pool are not shown -in the example above. -.Sh SEE ALSO -.Xr dd 1 , -.Xr boot.config 5 , -.Xr boot 8 , -.Xr gptzfsboot 8 , -.Xr loader 8 , -.Xr zpool 8 -.Sh HISTORY -.Nm -appeared in FreeBSD 7.3. -.Sh AUTHORS -This manual page was written by -.An Andriy Gapon Aq avg@FreeBSD.org . -.Sh BUGS -Installing -.Nm -with -.Xr dd 1 -is a hack. -ZFS needs a command to properly install -.Nm -onto a ZFS-controlled disk or partition. diff --git a/stand/i386/zfsboot/zfsldr.S b/stand/i386/zfsboot/zfsldr.S deleted file mode 100644 index cd8289f952fd..000000000000 --- a/stand/i386/zfsboot/zfsldr.S +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright (c) 1998 Robert Nordier - * All rights reserved. - * - * Redistribution and use in source and binary forms are freely - * permitted provided that the above copyright notice and this - * paragraph and the following disclaimer are duplicated in all - * such forms. - * - * This software is provided "AS IS" and without any express or - * implied warranties, including, without limitation, the implied - * warranties of merchantability and fitness for a particular - * purpose. - */ - -/* Memory Locations */ - .set MEM_ARG,0x900 # Arguments - .set MEM_ORG,0x7c00 # Origin - .set MEM_BUF,0x8000 # Load area - .set MEM_BTX,0x9000 # BTX start - .set MEM_JMP,0x9010 # BTX entry point - .set MEM_USR,0xa000 # Client start - .set BDA_BOOT,0x472 # Boot howto flag - -/* Partition Constants */ - .set PRT_OFF,0x1be # Partition offset - .set PRT_NUM,0x4 # Partitions - .set PRT_BSD,0xa5 # Partition type - -/* Misc. Constants */ - .set SIZ_PAG,0x1000 # Page size - .set SIZ_SEC,0x200 # Sector size - .set COPY_BLKS,0x8 # Number of blocks - # to copy for boot2 (<= 15) - .set COPY_BLK_SZ,0x8000 # Copy in 32k blocks; must be - # a multiple of 16 bytes - .set NSECT,(COPY_BLK_SZ / SIZ_SEC * COPY_BLKS) - .globl start - .code16 - -/* - * Load the rest of zfsboot2 and BTX up, copy the parts to the right locations, - * and start it all up. - */ - -/* - * Setup the segment registers to flat addressing (segment 0) and setup the - * stack to end just below the start of our code. - */ -start: cld # String ops inc - xor %cx,%cx # Zero - mov %cx,%es # Address - mov %cx,%ds # data - mov %cx,%ss # Set up - mov $start,%sp # stack -/* - * Load the MBR and look for the first FreeBSD slice. We use the fake - * partition entry below that points to the MBR when we call read. - * The first pass looks for the first active FreeBSD slice. The - * second pass looks for the first non-active FreeBSD slice if the - * first one fails. - */ - call check_edd # Make sure EDD works - mov $part4,%si # Dummy partition - xor %eax,%eax # Read MBR - movl $MEM_BUF,%ebx # from first - call read # sector - mov $0x1,%cx # Two passes -main.1: mov $MEM_BUF+PRT_OFF,%si # Partition table - movb $0x1,%dh # Partition -main.2: cmpb $PRT_BSD,0x4(%si) # Our partition type? - jne main.3 # No - jcxz main.5 # If second pass - testb $0x80,(%si) # Active? - jnz main.5 # Yes -main.3: add $0x10,%si # Next entry - incb %dh # Partition - cmpb $0x1+PRT_NUM,%dh # In table? - jb main.2 # Yes - dec %cx # Do two - jcxz main.1 # passes -/* - * If we get here, we didn't find any FreeBSD slices at all, so print an - * error message and die. - */ - mov $msg_part,%si # Message - jmp error # Error - -/* - * Ok, we have a slice and drive in %dx now, so use that to locate and - * load boot2. %si references the start of the slice we are looking - * for, so go ahead and load up the COPY_BLKS*COPY_BLK_SZ/SIZ_SEC sectors - * starting at sector 1024 (i.e. after the two vdev labels). We don't - * have do anything fancy here to allow for an extra copy of boot1 and - * a partition table (compare to this section of the UFS bootstrap) so we - * just load it all at 0x9000. The first part of boot2 is BTX, which wants - * to run at 0x9000. The boot2.bin binary starts right after the end of BTX, - * so we have to figure out where the start of it is and then move the - * binary to 0xc000. Normally, BTX clients start at MEM_USR, or 0xa000, - * but when we use btxld to create zfsboot2, we use an entry point of - * 0x2000. That entry point is relative to MEM_USR; thus boot2.bin - * starts at 0xc000. - * - * The load area and the target area for the client overlap so we have - * to use a decrementing string move. We also play segment register - * games with the destination address for the move so that the client - * can be larger than 16k (which would overflow the zero segment since - * the client starts at 0xc000). - */ -main.5: mov %dx,MEM_ARG # Save args - mov $NSECT,%cx # Sector count - movl $1024,%eax # Offset to boot2 - mov $MEM_BTX,%ebx # Destination buffer -main.6: pushal # Save params - call read # Read disk - popal # Restore - incl %eax # Advance to - add $SIZ_SEC,%ebx # next sector - loop main.6 # If not last, read another - - mov $MEM_BTX,%bx # BTX - mov 0xa(%bx),%si # Get BTX length and set - add %bx,%si # %si to start of boot2 - dec %si # Set %ds:%si to point at the - mov %si,%ax # last byte we want to copy - shr $4,%ax # from boot2, with %si made as - add $(COPY_BLKS*COPY_BLK_SZ/16),%ax # small as possible. - and $0xf,%si # - mov %ax,%ds # - mov $(MEM_USR+2*SIZ_PAG)/16,%ax # Set %es:(-1) to point at - add $(COPY_BLKS*COPY_BLK_SZ/16),%ax # the last byte we - mov %ax,%es # want to copy boot2 into. - mov $COPY_BLKS,%bx # Copy COPY_BLKS 32k blocks -copyloop: - add $COPY_BLK_SZ,%si # Adjust %ds:%si to point at - mov %ds,%ax # the end of the next 32k to - sub $COPY_BLK_SZ/16,%ax # copy from boot2 - mov %ax,%ds - mov $COPY_BLK_SZ-1,%di # Adjust %es:%di to point at - mov %es,%ax # the end of the next 32k into - sub $COPY_BLK_SZ/16,%ax # which we want boot2 copied - mov %ax,%es - mov $COPY_BLK_SZ,%cx # Copy 32k - std - rep movsb - dec %bx - jnz copyloop - mov %cx,%ds # Reset %ds and %es - mov %cx,%es - cld # Back to increment - -/* - * Enable A20 so we can access memory above 1 meg. - * Use the zero-valued %cx as a timeout for embedded hardware which do not - * have a keyboard controller. - */ -seta20: cli # Disable interrupts -seta20.1: dec %cx # Timeout? - jz seta20.3 # Yes - inb $0x64,%al # Get status - testb $0x2,%al # Busy? - jnz seta20.1 # Yes - movb $0xd1,%al # Command: Write - outb %al,$0x64 # output port -seta20.2: inb $0x64,%al # Get status - testb $0x2,%al # Busy? - jnz seta20.2 # Yes - movb $0xdf,%al # Enable - outb %al,$0x60 # A20 -seta20.3: sti # Enable interrupts - - jmp start+MEM_JMP-MEM_ORG # Start BTX - - -/* - * Read a sector from the disk. Sets up an EDD packet on the stack - * and passes it to read. We assume that the destination address is - * always segment-aligned. - * - * %eax - int - LBA to read in relative to partition start - * %ebx - ptr - destination address - * %dl - byte - drive to read from - * %si - ptr - MBR partition entry - */ -read: xor %ecx,%ecx # Get - addl 0x8(%si),%eax # LBA - adc $0,%ecx - pushl %ecx # Starting absolute block - pushl %eax # block number - shr $4,%ebx # Convert to segment - push %bx # Address of - push $0 # transfer buffer - push $0x1 # Read 1 sector - push $0x10 # Size of packet - mov %sp,%si # Packet pointer - mov $0x42,%ah # BIOS: Extended - int $0x13 # read - jc read.1 # If error, fail - lea 0x10(%si),%sp # Clear stack - ret # If success, return -read.1: mov %ah,%al # Format - mov $read_err,%di # error - call hex8 # code - mov $msg_read,%si # Set the error message and - # fall through to the error - # routine -/* - * Print out the error message pointed to by %ds:(%si) followed - * by a prompt, wait for a keypress, and then reboot the machine. - */ -error: callw putstr # Display message - mov $prompt,%si # Display - callw putstr # prompt - xorb %ah,%ah # BIOS: Get - int $0x16 # keypress - movw $0x1234, BDA_BOOT # Do a warm boot - ljmp $0xffff,$0x0 # reboot the machine -/* - * Display a null-terminated string using the BIOS output. - */ -putstr.0: mov $0x7,%bx # Page:attribute - movb $0xe,%ah # BIOS: Display - int $0x10 # character -putstr: lodsb # Get char - testb %al,%al # End of string? - jne putstr.0 # No - ret # To caller -/* - * Check to see if the disk supports EDD. zfsboot requires EDD and does not - * support older C/H/S disk I/O. - */ -check_edd: cmpb $0x80,%dl # Hard drive? - jb check_edd.1 # No, fail to boot - mov $0x55aa,%bx # Magic - push %dx # Save - movb $0x41,%ah # BIOS: Check - int $0x13 # extensions present - pop %dx # Restore - jc check_edd.1 # If error, fail - cmp $0xaa55,%bx # Magic? - jne check_edd.1 # No, so fail - testb $0x1,%cl # Packet interface? - jz check_edd.1 # No, so fail - ret # EDD ok, keep booting -check_edd.1: mov $msg_chs,%si # Warn that CHS is - jmp error # unsupported and fail -/* - * AL to hex, saving the result to [EDI]. - */ -hex8: push %ax # Save - shrb $0x4,%al # Do upper - call hex8.1 # 4 - pop %ax # Restore -hex8.1: andb $0xf,%al # Get lower 4 - cmpb $0xa,%al # Convert - sbbb $0x69,%al # to hex - das # digit - orb $0x20,%al # To lower case - stosb # Save char - ret # (Recursive) - -/* Messages */ - -msg_chs: .asciz "CHS not supported" -msg_read: .ascii "Read error: " -read_err: .asciz "XX" -msg_part: .asciz "Boot error" - -prompt: .asciz "\r\n" - - .org PRT_OFF,0x90 - -/* Partition table */ - - .fill 0x30,0x1,0x0 -part4: .byte 0x80, 0x00, 0x01, 0x00 - .byte 0xa5, 0xfe, 0xff, 0xff - .byte 0x00, 0x00, 0x00, 0x00 - .byte 0x50, 0xc3, 0x00, 0x00 # 50000 sectors long, bleh - - .word 0xaa55 # Magic number diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index 1f86538ce5f3..441330fd57b8 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -29,6 +29,8 @@ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ +#include <sys/domainset.h> + #include <machine/vmm.h> #include <machine/vmm_snapshot.h> @@ -52,7 +54,10 @@ struct vm_munmap { struct vm_memseg { int segid; size_t len; - char name[VM_MAX_SUFFIXLEN + 1]; + char name[VM_MAX_SUFFIXLEN + 1]; + domainset_t *ds_mask; + size_t ds_mask_size; + int ds_policy; }; struct vm_register { diff --git a/sys/arm64/include/vmm_dev.h b/sys/arm64/include/vmm_dev.h index 938bea47c7f8..219f1116c728 100644 --- a/sys/arm64/include/vmm_dev.h +++ b/sys/arm64/include/vmm_dev.h @@ -27,6 +27,8 @@ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ +#include <sys/domainset.h> + #include <machine/vmm.h> struct vm_memmap { @@ -49,6 +51,9 @@ struct vm_memseg { int segid; size_t len; char name[VM_MAX_SUFFIXLEN + 1]; + domainset_t *ds_mask; + size_t ds_mask_size; + int ds_policy; }; struct vm_register { diff --git a/sys/cddl/dev/sdt/sdt.c b/sys/cddl/dev/sdt/sdt.c index a8da618204af..0a9059104671 100644 --- a/sys/cddl/dev/sdt/sdt.c +++ b/sys/cddl/dev/sdt/sdt.c @@ -72,6 +72,7 @@ static void sdt_load(void); static int sdt_unload(void); static void sdt_create_provider(struct sdt_provider *); static void sdt_create_probe(struct sdt_probe *); +static void sdt_init_probe(struct sdt_probe *, linker_file_t); static void sdt_kld_load(void *, struct linker_file *); static void sdt_kld_unload_try(void *, struct linker_file *, int *); @@ -204,6 +205,14 @@ sdt_create_probe(struct sdt_probe *probe) (void)dtrace_probe_create(prov->id, mod, func, name, aframes, probe); } +static void +sdt_init_probe(struct sdt_probe *probe, linker_file_t lf) +{ + probe->sdtp_lf = lf; + TAILQ_INIT(&probe->argtype_list); + STAILQ_INIT(&probe->tracepoint_list); +} + /* * Probes are created through the SDT module load/unload hook, so this function * has nothing to do. It only exists because the DTrace provider framework @@ -361,12 +370,19 @@ static void sdt_kld_load_providers(struct linker_file *lf) { struct sdt_provider **prov, **begin, **end; + struct sdt_probe **p_begin, **p_end; if (linker_file_lookup_set(lf, "sdt_providers_set", &begin, &end, NULL) == 0) { for (prov = begin; prov < end; prov++) sdt_create_provider(*prov); } + + if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end, + NULL) == 0) { + for (struct sdt_probe **probe = p_begin; probe < p_end; probe++) + sdt_init_probe(*probe, lf); + } } static void @@ -378,13 +394,8 @@ sdt_kld_load_probes(struct linker_file *lf) if (linker_file_lookup_set(lf, "sdt_probes_set", &p_begin, &p_end, NULL) == 0) { - for (struct sdt_probe **probe = p_begin; probe < p_end; - probe++) { - (*probe)->sdtp_lf = lf; + for (struct sdt_probe **probe = p_begin; probe < p_end; probe++) sdt_create_probe(*probe); - TAILQ_INIT(&(*probe)->argtype_list); - STAILQ_INIT(&(*probe)->tracepoint_list); - } } if (linker_file_lookup_set(lf, "sdt_argtypes_set", &a_begin, &a_end, diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index cfb054235489..1c6d64d6b8bc 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -1911,7 +1911,7 @@ linprocfs_doproclimits(PFS_FILL_ARGS) "kern.sigqueue.max_pending_per_proc", &res, &size, 0, 0, 0, 0); if (error != 0) - goto out; + continue; rl.rlim_cur = res; rl.rlim_max = res; break; @@ -1919,7 +1919,7 @@ linprocfs_doproclimits(PFS_FILL_ARGS) error = kernel_sysctlbyname(td, "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); if (error != 0) - goto out; + continue; rl.rlim_cur = res; rl.rlim_max = res; break; @@ -1941,9 +1941,9 @@ linprocfs_doproclimits(PFS_FILL_ARGS) li->desc, (unsigned long long)rl.rlim_cur, (unsigned long long)rl.rlim_max, li->unit); } -out: + lim_free(limp); - return (error); + return (0); } /* diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 86834a7ecea8..a4be5313aa96 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -1792,7 +1792,7 @@ linux_memfd_create(struct thread *td, struct linux_memfd_create_args *args) if ((flags & MFD_ALLOW_SEALING) != 0) shmflags |= SHM_ALLOW_SEALING; return (kern_shm_open2(td, SHM_ANON, oflags, 0, shmflags, NULL, - memfd_name)); + memfd_name, NULL)); } int diff --git a/sys/conf/files b/sys/conf/files index dd0d390962f2..b7c19fae0b8e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3768,6 +3768,7 @@ gnu/gcov/gcov_subr.c optional gcov kern/bus_if.m standard kern/clock_if.m standard +kern/coredump_vnode.c standard kern/cpufreq_if.m standard kern/device_if.m standard kern/imgact_binmisc.c optional imgact_binmisc @@ -3856,6 +3857,7 @@ kern/kern_time.c standard kern/kern_timeout.c standard kern/kern_tslog.c optional tslog kern/kern_ubsan.c optional kubsan +kern/kern_ucoredump.c standard kern/kern_umtx.c standard kern/kern_uuid.c standard kern/kern_vnodedumper.c standard diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index 901da27e63f2..641001efab5e 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -368,6 +368,10 @@ dev/ice/irdma_di_if.m optional ice pci \ compile-with "${NORMAL_M} -I$S/dev/ice" dev/ice/ice_ddp_common.c optional ice pci \ compile-with "${NORMAL_C} -I$S/dev/ice" +dev/ice/ice_iov.c optional ice pci pci_iov \ + compile-with "${NORMAL_C} -I$S/dev/ice" +dev/ice/ice_vf_mbx.c optional ice pci pci_iov \ + compile-with "${NORMAL_C} -I$S/dev/ice" ice_ddp.c optional ice_ddp \ compile-with "${AWK} -f $S/tools/fw_stub.awk ice_ddp.fw:ice_ddp:0x01032900 -mice_ddp -c${.TARGET}" \ no-ctfconvert no-implicit-rule before-depend local \ diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 index df206b314b38..9976e9cfec5d 100644 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -62,6 +62,7 @@ dev/acpi_support/acpi_wmi_if.m standard dev/agp/agp_amd64.c optional agp dev/agp/agp_i810.c optional agp dev/agp/agp_via.c optional agp +dev/amdsmu/amdsmu.c optional amdsmu pci dev/amdsbwd/amdsbwd.c optional amdsbwd dev/amdsmn/amdsmn.c optional amdsmn | amdtemp dev/amdtemp/amdtemp.c optional amdtemp diff --git a/sys/dev/amdsmu/amdsmu.c b/sys/dev/amdsmu/amdsmu.c new file mode 100644 index 000000000000..416f875c6176 --- /dev/null +++ b/sys/dev/amdsmu/amdsmu.c @@ -0,0 +1,466 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 The FreeBSD Foundation + * + * This software was developed by Aymeric Wibo <obiwac@freebsd.org> + * under sponsorship from the FreeBSD Foundation. + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/rman.h> +#include <sys/sysctl.h> + +#include <dev/pci/pcivar.h> +#include <dev/amdsmu/amdsmu.h> + +static bool +amdsmu_match(device_t dev, const struct amdsmu_product **product_out) +{ + const uint16_t vendorid = pci_get_vendor(dev); + const uint16_t deviceid = pci_get_device(dev); + + for (size_t i = 0; i < nitems(amdsmu_products); i++) { + const struct amdsmu_product *prod = &amdsmu_products[i]; + + if (vendorid == prod->amdsmu_vendorid && + deviceid == prod->amdsmu_deviceid) { + if (product_out != NULL) + *product_out = prod; + return (true); + } + } + return (false); +} + +static void +amdsmu_identify(driver_t *driver, device_t parent) +{ + if (device_find_child(parent, "amdsmu", -1) != NULL) + return; + + if (amdsmu_match(parent, NULL)) { + if (device_add_child(parent, "amdsmu", -1) == NULL) + device_printf(parent, "add amdsmu child failed\n"); + } +} + +static int +amdsmu_probe(device_t dev) +{ + if (resource_disabled("amdsmu", 0)) + return (ENXIO); + if (!amdsmu_match(device_get_parent(dev), NULL)) + return (ENXIO); + device_set_descf(dev, "AMD System Management Unit"); + + return (BUS_PROBE_GENERIC); +} + +static enum amdsmu_res +amdsmu_wait_res(device_t dev) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + enum amdsmu_res res; + + /* + * The SMU has a response ready for us when the response register is + * set. Otherwise, we must wait. + */ + for (size_t i = 0; i < SMU_RES_READ_MAX; i++) { + res = amdsmu_read4(sc, SMU_REG_RESPONSE); + if (res != SMU_RES_WAIT) + return (res); + pause_sbt("amdsmu", ustosbt(SMU_RES_READ_PERIOD_US), 0, + C_HARDCLOCK); + } + device_printf(dev, "timed out waiting for response from SMU\n"); + return (SMU_RES_WAIT); +} + +static int +amdsmu_cmd(device_t dev, enum amdsmu_msg msg, uint32_t arg, uint32_t *ret) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + enum amdsmu_res res; + + /* Wait for SMU to be ready. */ + if (amdsmu_wait_res(dev) == SMU_RES_WAIT) + return (ETIMEDOUT); + + /* Clear previous response. */ + amdsmu_write4(sc, SMU_REG_RESPONSE, SMU_RES_WAIT); + + /* Write out command to registers. */ + amdsmu_write4(sc, SMU_REG_MESSAGE, msg); + amdsmu_write4(sc, SMU_REG_ARGUMENT, arg); + + /* Wait for SMU response and handle it. */ + res = amdsmu_wait_res(dev); + + switch (res) { + case SMU_RES_WAIT: + return (ETIMEDOUT); + case SMU_RES_OK: + if (ret != NULL) + *ret = amdsmu_read4(sc, SMU_REG_ARGUMENT); + return (0); + case SMU_RES_REJECT_BUSY: + device_printf(dev, "SMU is busy\n"); + return (EBUSY); + case SMU_RES_REJECT_PREREQ: + case SMU_RES_UNKNOWN: + case SMU_RES_FAILED: + device_printf(dev, "SMU error: %02x\n", res); + return (EIO); + } + + return (EINVAL); +} + +static int +amdsmu_get_vers(device_t dev) +{ + int err; + uint32_t smu_vers; + struct amdsmu_softc *sc = device_get_softc(dev); + + err = amdsmu_cmd(dev, SMU_MSG_GETSMUVERSION, 0, &smu_vers); + if (err != 0) { + device_printf(dev, "failed to get SMU version\n"); + return (err); + } + sc->smu_program = (smu_vers >> 24) & 0xFF; + sc->smu_maj = (smu_vers >> 16) & 0xFF; + sc->smu_min = (smu_vers >> 8) & 0xFF; + sc->smu_rev = smu_vers & 0xFF; + device_printf(dev, "SMU version: %d.%d.%d (program %d)\n", + sc->smu_maj, sc->smu_min, sc->smu_rev, sc->smu_program); + + return (0); +} + +static int +amdsmu_get_ip_blocks(device_t dev) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + const uint16_t deviceid = pci_get_device(dev); + int err; + struct amdsmu_metrics *m = &sc->metrics; + bool active; + char sysctl_descr[32]; + + /* Get IP block count. */ + switch (deviceid) { + case PCI_DEVICEID_AMD_REMBRANDT_ROOT: + sc->ip_block_count = 12; + break; + case PCI_DEVICEID_AMD_PHOENIX_ROOT: + sc->ip_block_count = 21; + break; + /* TODO How many IP blocks does Strix Point (and the others) have? */ + case PCI_DEVICEID_AMD_STRIX_POINT_ROOT: + default: + sc->ip_block_count = nitems(amdsmu_ip_blocks_names); + } + KASSERT(sc->ip_block_count <= nitems(amdsmu_ip_blocks_names), + ("too many IP blocks for array")); + + /* Get and print out IP blocks. */ + err = amdsmu_cmd(dev, SMU_MSG_GET_SUP_CONSTRAINTS, 0, + &sc->active_ip_blocks); + if (err != 0) { + device_printf(dev, "failed to get IP blocks\n"); + return (err); + } + device_printf(dev, "Active IP blocks: "); + for (size_t i = 0; i < sc->ip_block_count; i++) { + active = (sc->active_ip_blocks & (1 << i)) != 0; + sc->ip_blocks_active[i] = active; + if (!active) + continue; + printf("%s%s", amdsmu_ip_blocks_names[i], + i + 1 < sc->ip_block_count ? " " : "\n"); + } + + /* Create a sysctl node for IP blocks. */ + sc->ip_blocks_sysctlnode = SYSCTL_ADD_NODE(sc->sysctlctx, + SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, "ip_blocks", + CTLFLAG_RD, NULL, "SMU metrics"); + if (sc->ip_blocks_sysctlnode == NULL) { + device_printf(dev, "could not add sysctl node for IP blocks\n"); + return (ENOMEM); + } + + /* Create a sysctl node for each IP block. */ + for (size_t i = 0; i < sc->ip_block_count; i++) { + /* Create the sysctl node itself for the IP block. */ + snprintf(sysctl_descr, sizeof sysctl_descr, + "Metrics about the %s AMD IP block", + amdsmu_ip_blocks_names[i]); + sc->ip_block_sysctlnodes[i] = SYSCTL_ADD_NODE(sc->sysctlctx, + SYSCTL_CHILDREN(sc->ip_blocks_sysctlnode), OID_AUTO, + amdsmu_ip_blocks_names[i], CTLFLAG_RD, NULL, sysctl_descr); + if (sc->ip_block_sysctlnodes[i] == NULL) { + device_printf(dev, + "could not add sysctl node for \"%s\"\n", sysctl_descr); + continue; + } + /* + * Create sysctls for if the IP block is currently active, last + * active time, and total active time. + */ + SYSCTL_ADD_BOOL(sc->sysctlctx, + SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO, + "active", CTLFLAG_RD, &sc->ip_blocks_active[i], 0, + "IP block is currently active"); + SYSCTL_ADD_U64(sc->sysctlctx, + SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO, + "last_time", CTLFLAG_RD, &m->ip_block_last_active_time[i], + 0, "How long the IP block was active for during the last" + " sleep (us)"); +#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME + SYSCTL_ADD_U64(sc->sysctlctx, + SYSCTL_CHILDREN(sc->ip_block_sysctlnodes[i]), OID_AUTO, + "total_time", CTLFLAG_RD, &m->ip_block_total_active_time[i], + 0, "How long the IP block was active for during sleep in" + " total (us)"); +#endif + } + return (0); +} + +static int +amdsmu_init_metrics(device_t dev) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + int err; + uint32_t metrics_addr_lo, metrics_addr_hi; + uint64_t metrics_addr; + + /* Get physical address of logging buffer. */ + err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_LO, 0, &metrics_addr_lo); + if (err != 0) + return (err); + err = amdsmu_cmd(dev, SMU_MSG_LOG_GETDRAM_ADDR_HI, 0, &metrics_addr_hi); + if (err != 0) + return (err); + metrics_addr = ((uint64_t) metrics_addr_hi << 32) | metrics_addr_lo; + + /* Map memory of logging buffer. */ + err = bus_space_map(sc->bus_tag, metrics_addr, + sizeof(struct amdsmu_metrics), 0, &sc->metrics_space); + if (err != 0) { + device_printf(dev, "could not map bus space for SMU metrics\n"); + return (err); + } + + /* Start logging for metrics. */ + amdsmu_cmd(dev, SMU_MSG_LOG_RESET, 0, NULL); + amdsmu_cmd(dev, SMU_MSG_LOG_START, 0, NULL); + return (0); +} + +static int +amdsmu_dump_metrics(device_t dev) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + int err; + + err = amdsmu_cmd(dev, SMU_MSG_LOG_DUMP_DATA, 0, NULL); + if (err != 0) { + device_printf(dev, "failed to dump metrics\n"); + return (err); + } + bus_space_read_region_4(sc->bus_tag, sc->metrics_space, 0, + (uint32_t *)&sc->metrics, sizeof(sc->metrics) / sizeof(uint32_t)); + + return (0); +} + +static void +amdsmu_fetch_idlemask(device_t dev) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + + sc->idlemask = amdsmu_read4(sc, SMU_REG_IDLEMASK); +} + +static int +amdsmu_attach(device_t dev) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + int err; + uint32_t physbase_addr_lo, physbase_addr_hi; + uint64_t physbase_addr; + int rid = 0; + struct sysctl_oid *node; + + /* + * Find physical base address for SMU. + * XXX I am a little confused about the masks here. I'm just copying + * what Linux does in the amd-pmc driver to get the base address. + */ + pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_LO, 4); + physbase_addr_lo = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0xFFF00000; + + pci_write_config(dev, SMU_INDEX_ADDRESS, SMU_PHYSBASE_ADDR_HI, 4); + physbase_addr_hi = pci_read_config(dev, SMU_INDEX_DATA, 4) & 0x0000FFFF; + + physbase_addr = (uint64_t)physbase_addr_hi << 32 | physbase_addr_lo; + + /* Map memory for SMU and its registers. */ + sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); + if (sc->res == NULL) { + device_printf(dev, "could not allocate resource\n"); + return (ENXIO); + } + + sc->bus_tag = rman_get_bustag(sc->res); + + if (bus_space_map(sc->bus_tag, physbase_addr, + SMU_MEM_SIZE, 0, &sc->smu_space) != 0) { + device_printf(dev, "could not map bus space for SMU\n"); + err = ENXIO; + goto err_smu_space; + } + if (bus_space_map(sc->bus_tag, physbase_addr + SMU_REG_SPACE_OFF, + SMU_MEM_SIZE, 0, &sc->reg_space) != 0) { + device_printf(dev, "could not map bus space for SMU regs\n"); + err = ENXIO; + goto err_reg_space; + } + + /* sysctl stuff. */ + sc->sysctlctx = device_get_sysctl_ctx(dev); + sc->sysctlnode = device_get_sysctl_tree(dev); + + /* Get version & add sysctls. */ + if ((err = amdsmu_get_vers(dev)) != 0) + goto err_dump; + + SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, + "program", CTLFLAG_RD, &sc->smu_program, 0, "SMU program number"); + SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, + "version_major", CTLFLAG_RD, &sc->smu_maj, 0, + "SMU firmware major version number"); + SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, + "version_minor", CTLFLAG_RD, &sc->smu_min, 0, + "SMU firmware minor version number"); + SYSCTL_ADD_U8(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, + "version_revision", CTLFLAG_RD, &sc->smu_rev, 0, + "SMU firmware revision number"); + + /* Set up for getting metrics & add sysctls. */ + if ((err = amdsmu_init_metrics(dev)) != 0) + goto err_dump; + if ((err = amdsmu_dump_metrics(dev)) != 0) + goto err_dump; + + node = SYSCTL_ADD_NODE(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), + OID_AUTO, "metrics", CTLFLAG_RD, NULL, "SMU metrics"); + if (node == NULL) { + device_printf(dev, "could not add sysctl node for metrics\n"); + err = ENOMEM; + goto err_dump; + } + + SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "table_version", CTLFLAG_RD, &sc->metrics.table_version, 0, + "SMU metrics table version"); + SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "hint_count", CTLFLAG_RD, &sc->metrics.hint_count, 0, + "How many times the sleep hint was set"); + SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "s0i3_last_entry_status", CTLFLAG_RD, + &sc->metrics.s0i3_last_entry_status, 0, + "1 if last S0i3 entry was successful"); + SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "time_last_in_s0i2", CTLFLAG_RD, &sc->metrics.time_last_in_s0i2, 0, + "Time spent in S0i2 during last sleep (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "time_last_entering_s0i3", CTLFLAG_RD, + &sc->metrics.time_last_entering_s0i3, 0, + "Time spent entering S0i3 during last sleep (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "total_time_entering_s0i3", CTLFLAG_RD, + &sc->metrics.total_time_entering_s0i3, 0, + "Total time spent entering S0i3 (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "time_last_resuming", CTLFLAG_RD, &sc->metrics.time_last_resuming, + 0, "Time spent resuming from last sleep (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "total_time_resuming", CTLFLAG_RD, &sc->metrics.total_time_resuming, + 0, "Total time spent resuming from sleep (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "time_last_in_s0i3", CTLFLAG_RD, &sc->metrics.time_last_in_s0i3, 0, + "Time spent in S0i3 during last sleep (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "total_time_in_s0i3", CTLFLAG_RD, &sc->metrics.total_time_in_s0i3, + 0, "Total time spent in S0i3 (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "time_last_in_sw_drips", CTLFLAG_RD, + &sc->metrics.time_last_in_sw_drips, 0, + "Time spent in awake during last sleep (us)"); + SYSCTL_ADD_U64(sc->sysctlctx, SYSCTL_CHILDREN(node), OID_AUTO, + "total_time_in_sw_drips", CTLFLAG_RD, + &sc->metrics.total_time_in_sw_drips, 0, + "Total time spent awake (us)"); + + /* Get IP blocks & add sysctls. */ + err = amdsmu_get_ip_blocks(dev); + if (err != 0) + goto err_dump; + + /* Get idlemask & add sysctl. */ + amdsmu_fetch_idlemask(dev); + SYSCTL_ADD_U32(sc->sysctlctx, SYSCTL_CHILDREN(sc->sysctlnode), OID_AUTO, + "idlemask", CTLFLAG_RD, &sc->idlemask, 0, "SMU idlemask. This " + "value is not documented - only used to help AMD internally debug " + "issues"); + + return (0); +err_dump: + bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE); +err_reg_space: + bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE); +err_smu_space: + bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res); + return (err); +} + +static int +amdsmu_detach(device_t dev) +{ + struct amdsmu_softc *sc = device_get_softc(dev); + int rid = 0; + + bus_space_unmap(sc->bus_tag, sc->smu_space, SMU_MEM_SIZE); + bus_space_unmap(sc->bus_tag, sc->reg_space, SMU_MEM_SIZE); + + bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->res); + return (0); +} + +static device_method_t amdsmu_methods[] = { + DEVMETHOD(device_identify, amdsmu_identify), + DEVMETHOD(device_probe, amdsmu_probe), + DEVMETHOD(device_attach, amdsmu_attach), + DEVMETHOD(device_detach, amdsmu_detach), + DEVMETHOD_END +}; + +static driver_t amdsmu_driver = { + "amdsmu", + amdsmu_methods, + sizeof(struct amdsmu_softc), +}; + +DRIVER_MODULE(amdsmu, hostb, amdsmu_driver, NULL, NULL); +MODULE_VERSION(amdsmu, 1); +MODULE_DEPEND(amdsmu, amdsmn, 1, 1, 1); +MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmu, amdsmu_products, + nitems(amdsmu_products)); diff --git a/sys/dev/amdsmu/amdsmu.h b/sys/dev/amdsmu/amdsmu.h new file mode 100644 index 000000000000..025887f7fe5a --- /dev/null +++ b/sys/dev/amdsmu/amdsmu.h @@ -0,0 +1,95 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 The FreeBSD Foundation + * + * This software was developed by Aymeric Wibo <obiwac@freebsd.org> + * under sponsorship from the FreeBSD Foundation. + */ +#ifndef _AMDSMU_H_ +#define _AMDSMU_H_ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <machine/bus.h> +#include <x86/cputypes.h> + +#include <dev/amdsmu/amdsmu_reg.h> + +#define SMU_RES_READ_PERIOD_US 50 +#define SMU_RES_READ_MAX 20000 + +static const struct amdsmu_product { + uint16_t amdsmu_vendorid; + uint16_t amdsmu_deviceid; +} amdsmu_products[] = { + { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_REMBRANDT_ROOT }, + { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_PHOENIX_ROOT }, + { CPU_VENDOR_AMD, PCI_DEVICEID_AMD_STRIX_POINT_ROOT }, +}; + +static const char *const amdsmu_ip_blocks_names[] = { + "DISPLAY", + "CPU", + "GFX", + "VDD", + "ACP", + "VCN", + "ISP", + "NBIO", + "DF", + "USB3_0", + "USB3_1", + "LAPIC", + "USB3_2", + "USB3_3", + "USB3_4", + "USB4_0", + "USB4_1", + "MPM", + "JPEG", + "IPU", + "UMSCH", + "VPE", +}; + +CTASSERT(nitems(amdsmu_ip_blocks_names) <= 32); + +struct amdsmu_softc { + struct sysctl_ctx_list *sysctlctx; + struct sysctl_oid *sysctlnode; + + struct resource *res; + bus_space_tag_t bus_tag; + + bus_space_handle_t smu_space; + bus_space_handle_t reg_space; + + uint8_t smu_program; + uint8_t smu_maj, smu_min, smu_rev; + + uint32_t active_ip_blocks; + struct sysctl_oid *ip_blocks_sysctlnode; + size_t ip_block_count; + struct sysctl_oid *ip_block_sysctlnodes[nitems(amdsmu_ip_blocks_names)]; + bool ip_blocks_active[nitems(amdsmu_ip_blocks_names)]; + + bus_space_handle_t metrics_space; + struct amdsmu_metrics metrics; + uint32_t idlemask; +}; + +static inline uint32_t +amdsmu_read4(const struct amdsmu_softc *sc, bus_size_t reg) +{ + return (bus_space_read_4(sc->bus_tag, sc->reg_space, reg)); +} + +static inline void +amdsmu_write4(const struct amdsmu_softc *sc, bus_size_t reg, uint32_t val) +{ + bus_space_write_4(sc->bus_tag, sc->reg_space, reg, val); +} + +#endif /* _AMDSMU_H_ */ diff --git a/sys/dev/amdsmu/amdsmu_reg.h b/sys/dev/amdsmu/amdsmu_reg.h new file mode 100644 index 000000000000..e685b34e6883 --- /dev/null +++ b/sys/dev/amdsmu/amdsmu_reg.h @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 The FreeBSD Foundation + * + * This software was developed by Aymeric Wibo <obiwac@freebsd.org> + * under sponsorship from the FreeBSD Foundation. + */ +#ifndef _AMDSMU_REG_H_ +#define _AMDSMU_REG_H_ + +#include <sys/types.h> + +/* + * TODO These are in common with amdtemp; should we find a way to factor these + * out? Also, there are way more of these. I couldn't find a centralized place + * which lists them though. + */ +#define PCI_DEVICEID_AMD_REMBRANDT_ROOT 0x14B5 +#define PCI_DEVICEID_AMD_PHOENIX_ROOT 0x14E8 +#define PCI_DEVICEID_AMD_STRIX_POINT_ROOT 0x14A4 + +#define SMU_INDEX_ADDRESS 0xB8 +#define SMU_INDEX_DATA 0xBC + +#define SMU_PHYSBASE_ADDR_LO 0x13B102E8 +#define SMU_PHYSBASE_ADDR_HI 0x13B102EC + +#define SMU_MEM_SIZE 0x1000 +#define SMU_REG_SPACE_OFF 0x10000 + +#define SMU_REG_MESSAGE 0x538 +#define SMU_REG_RESPONSE 0x980 +#define SMU_REG_ARGUMENT 0x9BC +#define SMU_REG_IDLEMASK 0xD14 + +enum amdsmu_res { + SMU_RES_WAIT = 0x00, + SMU_RES_OK = 0x01, + SMU_RES_REJECT_BUSY = 0xFC, + SMU_RES_REJECT_PREREQ = 0xFD, + SMU_RES_UNKNOWN = 0xFE, + SMU_RES_FAILED = 0xFF, +}; + +enum amdsmu_msg { + SMU_MSG_GETSMUVERSION = 0x02, + SMU_MSG_LOG_GETDRAM_ADDR_HI = 0x04, + SMU_MSG_LOG_GETDRAM_ADDR_LO = 0x05, + SMU_MSG_LOG_START = 0x06, + SMU_MSG_LOG_RESET = 0x07, + SMU_MSG_LOG_DUMP_DATA = 0x08, + SMU_MSG_GET_SUP_CONSTRAINTS = 0x09, +}; + +/* XXX Copied from Linux struct smu_metrics. */ +struct amdsmu_metrics { + uint32_t table_version; + uint32_t hint_count; + uint32_t s0i3_last_entry_status; + uint32_t time_last_in_s0i2; + uint64_t time_last_entering_s0i3; + uint64_t total_time_entering_s0i3; + uint64_t time_last_resuming; + uint64_t total_time_resuming; + uint64_t time_last_in_s0i3; + uint64_t total_time_in_s0i3; + uint64_t time_last_in_sw_drips; + uint64_t total_time_in_sw_drips; + /* + * This is how long each IP block was active for (us), i.e., blocking + * entry to S0i3. In Linux, these are called "timecondition_notmet_*". + * + * XXX Total active time for IP blocks seems to be buggy and reporting + * garbage (at least on Phoenix), so it's disabled for now. The last + * active time for the USB4_0 IP block also seems to be buggy. + */ + uint64_t ip_block_last_active_time[32]; +#ifdef IP_BLOCK_TOTAL_ACTIVE_TIME + uint64_t ip_block_total_active_time[32]; +#endif +} __attribute__((packed)); + +#endif /* _AMDSMU_REG_H_ */ diff --git a/sys/dev/iicbus/iichid.c b/sys/dev/iicbus/iichid.c index 9c0324a24685..3f1d7a0cefba 100644 --- a/sys/dev/iicbus/iichid.c +++ b/sys/dev/iicbus/iichid.c @@ -275,62 +275,36 @@ iichid_cmd_read(struct iichid_softc* sc, void *buf, iichid_size_t maxlen, * 6.1.3 - Retrieval of Input Reports * DEVICE returns the length (2 Bytes) and the entire Input Report. */ - uint8_t actbuf[2] = { 0, 0 }; - /* Read actual input report length. */ + + memset(buf, 0xaa, 2); // In case nothing gets read struct iic_msg msgs[] = { - { sc->addr, IIC_M_RD | IIC_M_NOSTOP, sizeof(actbuf), actbuf }, + { sc->addr, IIC_M_RD, maxlen, buf }, }; - uint16_t actlen; int error; error = iicbus_transfer(sc->dev, msgs, nitems(msgs)); if (error != 0) return (error); - actlen = actbuf[0] | actbuf[1] << 8; -#ifdef IICHID_SAMPLING - if ((actlen == 0 && sc->sampling_rate_slow < 0) || - (maxlen == 0 && sc->sampling_rate_slow >= 0)) { -#else + DPRINTFN(sc, 5, "%*D\n", msgs[0].len, msgs[0].buf, " "); + + uint16_t actlen = le16dec(buf); + if (actlen == 0) { -#endif - /* Read and discard reset command response. */ - msgs[0] = (struct iic_msg) - { sc->addr, IIC_M_RD | IIC_M_NOSTART, - le16toh(sc->desc.wMaxInputLength) - 2, sc->intr_buf }; - actlen = 0; if (!sc->reset_acked) { mtx_lock(&sc->mtx); sc->reset_acked = true; wakeup(&sc->reset_acked); mtx_unlock(&sc->mtx); } -#ifdef IICHID_SAMPLING - } else if ((actlen <= 2 || actlen == 0xFFFF) && - sc->sampling_rate_slow >= 0) { - /* Read and discard 1 byte to send I2C STOP condition. */ - msgs[0] = (struct iic_msg) - { sc->addr, IIC_M_RD | IIC_M_NOSTART, 1, actbuf }; - actlen = 0; -#endif - } else { - actlen -= 2; - if (actlen > maxlen) { - DPRINTF(sc, "input report too big. requested=%d " - "received=%d\n", maxlen, actlen); - actlen = maxlen; - } - /* Read input report itself. */ - msgs[0] = (struct iic_msg) - { sc->addr, IIC_M_RD | IIC_M_NOSTART, actlen, buf }; } - error = iicbus_transfer(sc->dev, msgs, 1); - if (error == 0 && actual_len != NULL) + if (actlen <= 2 || actlen > maxlen) { + actlen = 0; + } + if (actual_len != NULL) { *actual_len = actlen; - - DPRINTFN(sc, 5, - "%*D - %*D\n", 2, actbuf, " ", msgs[0].len, msgs[0].buf, " "); + } return (error); } @@ -566,7 +540,7 @@ iichid_sampling_task(void *context, int pending) error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual); if (error == 0) { if (actual > 0) { - sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual); + sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual); sc->missing_samples = 0; if (sc->dup_size != actual || memcmp(sc->dup_buf, sc->intr_buf, actual) != 0) { @@ -577,7 +551,7 @@ iichid_sampling_task(void *context, int pending) ++sc->dup_samples; } else { if (++sc->missing_samples == 1) - sc->intr_handler(sc->intr_ctx, sc->intr_buf, 0); + sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, 0); sc->dup_samples = 0; } } else @@ -632,7 +606,7 @@ iichid_intr(void *context) if (error == 0) { if (sc->power_on && sc->open) { if (actual != 0) - sc->intr_handler(sc->intr_ctx, sc->intr_buf, + sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual); else DPRINTF(sc, "no data received\n"); @@ -842,11 +816,12 @@ iichid_intr_setup(device_t dev, device_t child __unused, hid_intr_t intr, sc = device_get_softc(dev); /* - * Do not rely on wMaxInputLength, as some devices may set it to - * a wrong length. Find the longest input report in report descriptor. + * Do not rely just on wMaxInputLength, as some devices (which?) + * may set it to a wrong length. Also find the longest input report + * in report descriptor, and add two for the length field. */ - rdesc->rdsize = - MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength) - 2); + rdesc->rdsize = 2 + + MAX(rdesc->isize, le16toh(sc->desc.wMaxInputLength)); /* Write and get/set_report sizes are limited by I2C-HID protocol. */ rdesc->grsize = rdesc->srsize = IICHID_SIZE_MAX; rdesc->wrsize = IICHID_SIZE_MAX; @@ -919,7 +894,7 @@ iichid_intr_poll(device_t dev, device_t child __unused) sc = device_get_softc(dev); error = iichid_cmd_read(sc, sc->intr_buf, sc->intr_bufsize, &actual); if (error == 0 && actual != 0) - sc->intr_handler(sc->intr_ctx, sc->intr_buf, actual); + sc->intr_handler(sc->intr_ctx, sc->intr_buf + 2, actual); } /* @@ -946,6 +921,7 @@ iichid_read(device_t dev, device_t child __unused, void *buf, { struct iichid_softc *sc; device_t parent; + uint8_t *tmpbuf; int error; if (maxlen > IICHID_SIZE_MAX) @@ -954,8 +930,12 @@ iichid_read(device_t dev, device_t child __unused, void *buf, parent = device_get_parent(sc->dev); error = iicbus_request_bus(parent, sc->dev, IIC_WAIT); if (error == 0) { - error = iichid_cmd_read(sc, buf, maxlen, actlen); + tmpbuf = malloc(maxlen + 2, M_DEVBUF, M_WAITOK | M_ZERO); + error = iichid_cmd_read(sc, tmpbuf, maxlen + 2, actlen); iicbus_release_bus(parent, sc->dev); + if (*actlen > 0) + memcpy(buf, tmpbuf + 2, *actlen); + free(tmpbuf, M_DEVBUF); } return (iic2errno(error)); } diff --git a/sys/dev/nvmf/controller/nvmft_subr.c b/sys/dev/nvmf/controller/nvmft_subr.c index bb2bc0988e81..245971813854 100644 --- a/sys/dev/nvmf/controller/nvmft_subr.c +++ b/sys/dev/nvmf/controller/nvmft_subr.c @@ -26,46 +26,6 @@ nvmf_nqn_valid(const char *nqn) len = strnlen(nqn, NVME_NQN_FIELD_SIZE); if (len == 0 || len > NVMF_NQN_MAX_LEN) return (false); - -#ifdef STRICT_CHECKS - /* - * Stricter checks from the spec. Linux does not seem to - * require these. - */ - - /* - * NVMF_NQN_MIN_LEN does not include '.', and require at least - * one character of a domain name. - */ - if (len < NVMF_NQN_MIN_LEN + 2) - return (false); - if (memcmp("nqn.", nqn, strlen("nqn.")) != 0) - return (false); - nqn += strlen("nqn."); - - /* Next 4 digits must be a year. */ - for (u_int i = 0; i < 4; i++) { - if (!isdigit(nqn[i])) - return (false); - } - nqn += 4; - - /* '-' between year and month. */ - if (nqn[0] != '-') - return (false); - nqn++; - - /* 2 digit month. */ - for (u_int i = 0; i < 2; i++) { - if (!isdigit(nqn[i])) - return (false); - } - nqn += 2; - - /* '.' between month and reverse domain name. */ - if (nqn[0] != '.') - return (false); -#endif return (true); } diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c index 1f72391fb6b4..0efcfeac9eff 100644 --- a/sys/dev/pci/pci_iov.c +++ b/sys/dev/pci/pci_iov.c @@ -734,11 +734,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) first_rid = pci_get_rid(dev) + rid_off; last_rid = first_rid + (num_vfs - 1) * rid_stride; - /* We don't yet support allocating extra bus numbers for VFs. */ if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) { - device_printf(dev, "not enough PCIe bus numbers for VFs\n"); - error = ENOSPC; - goto out; + int rid = 0; + uint16_t last_rid_bus = PCI_RID2BUS(last_rid); + + iov->iov_bus_res = bus_alloc_resource(bus, PCI_RES_BUS, &rid, + last_rid_bus, last_rid_bus, 1, RF_ACTIVE); + if (iov->iov_bus_res == NULL) { + device_printf(dev, + "failed to allocate PCIe bus number for VFs\n"); + error = ENOSPC; + goto out; + } } if (!ari_enabled && PCI_RID2SLOT(last_rid) != 0) { @@ -786,6 +793,11 @@ out: } } + if (iov->iov_bus_res != NULL) { + bus_release_resource(bus, iov->iov_bus_res); + iov->iov_bus_res = NULL; + } + if (iov->iov_flags & IOV_RMAN_INITED) { rman_fini(&iov->rman); iov->iov_flags &= ~IOV_RMAN_INITED; @@ -896,6 +908,11 @@ pci_iov_delete_iov_children(struct pci_devinfo *dinfo) } } + if (iov->iov_bus_res != NULL) { + bus_release_resource(bus, iov->iov_bus_res); + iov->iov_bus_res = NULL; + } + if (iov->iov_flags & IOV_RMAN_INITED) { rman_fini(&iov->rman); iov->iov_flags &= ~IOV_RMAN_INITED; diff --git a/sys/dev/pci/pci_iov_private.h b/sys/dev/pci/pci_iov_private.h index 7ae2219b936d..ecf0a9b21be5 100644 --- a/sys/dev/pci/pci_iov_private.h +++ b/sys/dev/pci/pci_iov_private.h @@ -39,6 +39,8 @@ struct pcicfg_iov { struct cdev *iov_cdev; nvlist_t *iov_schema; + struct resource *iov_bus_res; + struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1]; struct rman rman; char rman_name[64]; diff --git a/sys/dev/usb/input/usbhid.c b/sys/dev/usb/input/usbhid.c index 3bb7d5e594e3..df810012b3f8 100644 --- a/sys/dev/usb/input/usbhid.c +++ b/sys/dev/usb/input/usbhid.c @@ -76,7 +76,7 @@ #include "hid_if.h" static SYSCTL_NODE(_hw_usb, OID_AUTO, usbhid, CTLFLAG_RW, 0, "USB usbhid"); -static int usbhid_enable = 0; +static int usbhid_enable = 1; SYSCTL_INT(_hw_usb_usbhid, OID_AUTO, enable, CTLFLAG_RWTUN, &usbhid_enable, 0, "Enable usbhid and prefer it to other USB HID drivers"); #ifdef USB_DEBUG diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index 819debadd1ac..9f2b009d02ec 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -30,7 +30,8 @@ #include <dev/vmm/vmm_mem.h> #include <dev/vmm/vmm_stat.h> -#if defined(__amd64__) && defined(COMPAT_FREEBSD12) +#ifdef __amd64__ +#ifdef COMPAT_FREEBSD12 struct vm_memseg_12 { int segid; size_t len; @@ -42,7 +43,22 @@ _Static_assert(sizeof(struct vm_memseg_12) == 80, "COMPAT_FREEBSD12 ABI"); _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_12) #define VM_GET_MEMSEG_12 \ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_12) -#endif +#endif /* COMPAT_FREEBSD12 */ +#ifdef COMPAT_FREEBSD14 +struct vm_memseg_14 { + int segid; + size_t len; + char name[VM_MAX_SUFFIXLEN + 1]; +}; +_Static_assert(sizeof(struct vm_memseg_14) == (VM_MAX_SUFFIXLEN + 1 + 16), + "COMPAT_FREEBSD14 ABI"); + +#define VM_ALLOC_MEMSEG_14 \ + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg_14) +#define VM_GET_MEMSEG_14 \ + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg_14) +#endif /* COMPAT_FREEBSD14 */ +#endif /* __amd64__ */ struct devmem_softc { int segid; @@ -257,7 +273,8 @@ get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) } static int -alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) +alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len, + struct domainset *domainset) { char *name; int error; @@ -278,8 +295,7 @@ alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg, size_t len) if (error) goto done; } - - error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); + error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem, domainset); if (error) goto done; @@ -295,6 +311,20 @@ done: return (error); } +#if defined(__amd64__) && \ + (defined(COMPAT_FREEBSD14) || defined(COMPAT_FREEBSD12)) +/* + * Translate pre-15.0 memory segment identifiers into their 15.0 counterparts. + */ +static void +adjust_segid(struct vm_memseg *mseg) +{ + if (mseg->segid != VM_SYSMEM) { + mseg->segid += (VM_BOOTROM - 1); + } +} +#endif + static int vm_get_register_set(struct vcpu *vcpu, unsigned int count, int *regnum, uint64_t *regval) @@ -353,10 +383,16 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = { VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_STAT_DESC, 0), -#if defined(__amd64__) && defined(COMPAT_FREEBSD12) +#ifdef __amd64__ +#ifdef COMPAT_FREEBSD12 VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), #endif +#ifdef COMPAT_FREEBSD14 + VMMDEV_IOCTL(VM_ALLOC_MEMSEG_14, + VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), +#endif +#endif /* __amd64__ */ VMMDEV_IOCTL(VM_ALLOC_MEMSEG, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), VMMDEV_IOCTL(VM_MMAP_MEMSEG, @@ -366,9 +402,14 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = { VMMDEV_IOCTL(VM_REINIT, VMMDEV_IOCTL_XLOCK_MEMSEGS | VMMDEV_IOCTL_LOCK_ALL_VCPUS), -#if defined(__amd64__) && defined(COMPAT_FREEBSD12) +#ifdef __amd64__ +#if defined(COMPAT_FREEBSD12) VMMDEV_IOCTL(VM_GET_MEMSEG_12, VMMDEV_IOCTL_SLOCK_MEMSEGS), #endif +#ifdef COMPAT_FREEBSD14 + VMMDEV_IOCTL(VM_GET_MEMSEG_14, VMMDEV_IOCTL_SLOCK_MEMSEGS), +#endif +#endif /* __amd64__ */ VMMDEV_IOCTL(VM_GET_MEMSEG, VMMDEV_IOCTL_SLOCK_MEMSEGS), VMMDEV_IOCTL(VM_MMAP_GETNEXT, VMMDEV_IOCTL_SLOCK_MEMSEGS), @@ -388,6 +429,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vmmdev_softc *sc; struct vcpu *vcpu; const struct vmmdev_ioctl *ioctl; + struct vm_memseg *mseg; int error, vcpuid; sc = vmmdev_lookup2(cdev); @@ -499,20 +541,77 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = vm_munmap_memseg(sc->vm, mu->gpa, mu->len); break; } -#if defined(__amd64__) && defined(COMPAT_FREEBSD12) +#ifdef __amd64__ +#ifdef COMPAT_FREEBSD12 case VM_ALLOC_MEMSEG_12: - error = alloc_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg_12 *)0)->name)); + mseg = (struct vm_memseg *)data; + + adjust_segid(mseg); + error = alloc_memseg(sc, mseg, + sizeof(((struct vm_memseg_12 *)0)->name), NULL); break; case VM_GET_MEMSEG_12: - error = get_memseg(sc, (struct vm_memseg *)data, + mseg = (struct vm_memseg *)data; + + adjust_segid(mseg); + error = get_memseg(sc, mseg, sizeof(((struct vm_memseg_12 *)0)->name)); break; -#endif - case VM_ALLOC_MEMSEG: - error = alloc_memseg(sc, (struct vm_memseg *)data, - sizeof(((struct vm_memseg *)0)->name)); +#endif /* COMPAT_FREEBSD12 */ +#ifdef COMPAT_FREEBSD14 + case VM_ALLOC_MEMSEG_14: + mseg = (struct vm_memseg *)data; + + adjust_segid(mseg); + error = alloc_memseg(sc, mseg, + sizeof(((struct vm_memseg_14 *)0)->name), NULL); + break; + case VM_GET_MEMSEG_14: + mseg = (struct vm_memseg *)data; + + adjust_segid(mseg); + error = get_memseg(sc, mseg, + sizeof(((struct vm_memseg_14 *)0)->name)); + break; +#endif /* COMPAT_FREEBSD14 */ +#endif /* __amd64__ */ + case VM_ALLOC_MEMSEG: { + domainset_t *mask; + struct domainset *domainset, domain; + + domainset = NULL; + mseg = (struct vm_memseg *)data; + if (mseg->ds_policy != DOMAINSET_POLICY_INVALID && mseg->ds_mask != NULL) { + if (mseg->ds_mask_size < sizeof(domainset_t) || + mseg->ds_mask_size > DOMAINSET_MAXSIZE / NBBY) { + error = ERANGE; + break; + } + memset(&domain, 0, sizeof(domain)); + mask = malloc(mseg->ds_mask_size, M_VMMDEV, M_WAITOK); + error = copyin(mseg->ds_mask, mask, mseg->ds_mask_size); + if (error) { + free(mask, M_VMMDEV); + break; + } + error = domainset_populate(&domain, mask, mseg->ds_policy, + mseg->ds_mask_size); + if (error) { + free(mask, M_VMMDEV); + break; + } + domainset = domainset_create(&domain); + if (domainset == NULL) { + error = EINVAL; + free(mask, M_VMMDEV); + break; + } + free(mask, M_VMMDEV); + } + error = alloc_memseg(sc, mseg, sizeof(mseg->name), domainset); + break; + } case VM_GET_MEMSEG: error = get_memseg(sc, (struct vm_memseg *)data, sizeof(((struct vm_memseg *)0)->name)); @@ -820,7 +919,6 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) buflen = VM_MAX_NAMELEN + 1; buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); - strlcpy(buf, "beavis", buflen); error = sysctl_handle_string(oidp, buf, buflen, req); if (error == 0 && req->newptr != NULL) error = vmmdev_lookup_and_destroy(buf, req->td->td_ucred); @@ -830,7 +928,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_destroy, "A", - NULL); + "Destroy a vmm(4) instance (legacy interface)"); static struct cdevsw vmmdevsw = { .d_name = "vmmdev", @@ -909,7 +1007,6 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS) buflen = VM_MAX_NAMELEN + 1; buf = malloc(buflen, M_VMMDEV, M_WAITOK | M_ZERO); - strlcpy(buf, "beavis", buflen); error = sysctl_handle_string(oidp, buf, buflen, req); if (error == 0 && req->newptr != NULL) error = vmmdev_create(buf, req->td->td_ucred); @@ -919,7 +1016,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_hw_vmm, OID_AUTO, create, CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_PRISON | CTLFLAG_MPSAFE, NULL, 0, sysctl_vmm_create, "A", - NULL); + "Create a vmm(4) instance (legacy interface)"); static int vmmctl_open(struct cdev *cdev, int flags, int fmt, struct thread *td) diff --git a/sys/dev/vmm/vmm_mem.c b/sys/dev/vmm/vmm_mem.c index c61ae2d44b96..be59e37de33d 100644 --- a/sys/dev/vmm/vmm_mem.c +++ b/sys/dev/vmm/vmm_mem.c @@ -7,6 +7,7 @@ #include <sys/types.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/sx.h> #include <sys/systm.h> @@ -156,10 +157,11 @@ vm_mem_allocated(struct vcpu *vcpu, vm_paddr_t gpa) } int -vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) +vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem, + struct domainset *obj_domainset) { - struct vm_mem *mem; struct vm_mem_seg *seg; + struct vm_mem *mem; vm_object_t obj; mem = vm_mem(vm); @@ -179,13 +181,22 @@ vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) return (EINVAL); } + /* + * When given an impossible policy, signal an + * error to the user. + */ + if (obj_domainset != NULL && domainset_empty_vm(obj_domainset)) + return (EINVAL); obj = vm_object_allocate(OBJT_SWAP, len >> PAGE_SHIFT); if (obj == NULL) return (ENOMEM); seg->len = len; seg->object = obj; + if (obj_domainset != NULL) + seg->object->domain.dr_policy = obj_domainset; seg->sysmem = sysmem; + return (0); } diff --git a/sys/dev/vmm/vmm_mem.h b/sys/dev/vmm/vmm_mem.h index a4be4c1c57aa..856470cf2590 100644 --- a/sys/dev/vmm/vmm_mem.h +++ b/sys/dev/vmm/vmm_mem.h @@ -8,6 +8,27 @@ #ifndef _DEV_VMM_MEM_H_ #define _DEV_VMM_MEM_H_ +/* Maximum number of NUMA domains in a guest. */ +#define VM_MAXMEMDOM 8 +#define VM_MAXSYSMEM VM_MAXMEMDOM + +/* + * Identifiers for memory segments. + * Each guest NUMA domain is represented by a single system + * memory segment from [VM_SYSMEM, VM_MAXSYSMEM). + * The remaining identifiers can be used to create devmem segments. + */ +enum { + VM_SYSMEM = 0, + VM_BOOTROM = VM_MAXSYSMEM, + VM_FRAMEBUFFER, + VM_PCIROM, + VM_MEMSEG_END +}; + +#define VM_MAX_MEMSEGS VM_MEMSEG_END +#define VM_MAX_MEMMAPS (VM_MAX_MEMSEGS * 2) + #ifdef _KERNEL #include <sys/types.h> @@ -31,9 +52,6 @@ struct vm_mem_map { int flags; }; -#define VM_MAX_MEMSEGS 4 -#define VM_MAX_MEMMAPS 8 - struct vm_mem { struct vm_mem_map mem_maps[VM_MAX_MEMMAPS]; struct vm_mem_seg mem_segs[VM_MAX_MEMSEGS]; @@ -55,7 +73,8 @@ void vm_assert_memseg_xlocked(struct vm *vm); int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, size_t len, int prot, int flags); int vm_munmap_memseg(struct vm *vm, vm_paddr_t gpa, size_t len); -int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); +int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem, + struct domainset *obj_domainset); void vm_free_memseg(struct vm *vm, int ident); /* diff --git a/sys/geom/geom.h b/sys/geom/geom.h index dcd6f793f9f7..908ce86f03a6 100644 --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -282,7 +282,7 @@ void g_detach(struct g_consumer *cp); void g_error_provider(struct g_provider *pp, int error); struct g_provider *g_provider_by_name(char const *arg); int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len); -#define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v)) +#define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof(*(v))) int g_handleattr(struct bio *bp, const char *attribute, const void *val, int len); int g_handleattr_int(struct bio *bp, const char *attribute, int val); diff --git a/sys/geom/geom_ccd.c b/sys/geom/geom_ccd.c index 5700399ee5d1..2140d005160e 100644 --- a/sys/geom/geom_ccd.c +++ b/sys/geom/geom_ccd.c @@ -730,17 +730,17 @@ g_ccd_create(struct gctl_req *req, struct g_class *mp) int i, error; g_topology_assert(); - unit = gctl_get_paraml(req, "unit", sizeof (*unit)); + unit = gctl_get_paraml(req, "unit", sizeof(*unit)); if (unit == NULL) { gctl_error(req, "unit parameter not given"); return; } - ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave)); + ileave = gctl_get_paraml(req, "ileave", sizeof(*ileave)); if (ileave == NULL) { gctl_error(req, "ileave parameter not given"); return; } - nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider)); + nprovider = gctl_get_paraml(req, "nprovider", sizeof(*nprovider)); if (nprovider == NULL) { gctl_error(req, "nprovider parameter not given"); return; @@ -769,7 +769,7 @@ g_ccd_create(struct gctl_req *req, struct g_class *mp) } gp = g_new_geomf(mp, "ccd%d", *unit); - sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO); + sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); gp->softc = sc; sc->sc_ndisks = *nprovider; @@ -872,7 +872,7 @@ g_ccd_list(struct gctl_req *req, struct g_class *mp) struct g_geom *gp; int i, unit, *up; - up = gctl_get_paraml(req, "unit", sizeof (*up)); + up = gctl_get_paraml(req, "unit", sizeof(*up)); if (up == NULL) { gctl_error(req, "unit parameter not given"); return; diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c index 0a76fd6c6f57..341233a6ef47 100644 --- a/sys/geom/geom_event.c +++ b/sys/geom/geom_event.c @@ -145,7 +145,7 @@ g_attr_changed(struct g_provider *pp, const char *attr, int flag) struct g_attrchanged_args *args; int error; - args = g_malloc(sizeof *args, flag); + args = g_malloc(sizeof(*args), flag); if (args == NULL) return (ENOMEM); args->pp = pp; diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c index 8d6b9a926e1d..247a623bf1bf 100644 --- a/sys/geom/geom_io.c +++ b/sys/geom/geom_io.c @@ -278,7 +278,7 @@ g_io_init(void) g_bioq_init(&g_bio_run_down); g_bioq_init(&g_bio_run_up); - biozone = uma_zcreate("g_bio", sizeof (struct bio), + biozone = uma_zcreate("g_bio", sizeof(struct bio), NULL, NULL, NULL, NULL, 0, 0); diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c index 8cfffc478849..0491b0069be4 100644 --- a/sys/geom/geom_slice.c +++ b/sys/geom/geom_slice.c @@ -57,7 +57,7 @@ g_slice_alloc(unsigned nslice, unsigned scsize) { struct g_slicer *gsp; - gsp = g_malloc(sizeof *gsp, M_WAITOK | M_ZERO); + gsp = g_malloc(sizeof(*gsp), M_WAITOK | M_ZERO); if (scsize > 0) gsp->softc = g_malloc(scsize, M_WAITOK | M_ZERO); else @@ -463,9 +463,9 @@ g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int r } gsl = gsp->hotspot; if(idx >= gsp->nhotspot) { - gsl2 = g_malloc((idx + 1) * sizeof *gsl2, M_WAITOK | M_ZERO); + gsl2 = g_malloc((idx + 1) * sizeof(*gsl2), M_WAITOK | M_ZERO); if (gsp->hotspot != NULL) - bcopy(gsp->hotspot, gsl2, gsp->nhotspot * sizeof *gsl2); + bcopy(gsp->hotspot, gsl2, gsp->nhotspot * sizeof(*gsl2)); gsp->hotspot = gsl2; if (gsp->hotspot != NULL) g_free(gsl); diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index aba4bf7c44c4..1429c84942ed 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -267,7 +267,7 @@ g_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: g_trace(G_T_TOPOLOGY, "g_modevent(%s, LOAD)", mp->name); - hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); + hh = g_malloc(sizeof(*hh), M_WAITOK | M_ZERO); hh->mp = mp; /* * Once the system is not cold, MOD_LOAD calls will be @@ -351,7 +351,7 @@ g_retaste(struct g_class *mp) if (mp->taste == NULL) return (EINVAL); - hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); + hh = g_malloc(sizeof(*hh), M_WAITOK | M_ZERO); hh->mp = mp; if (cold) { @@ -381,7 +381,7 @@ g_new_geomf(struct g_class *mp, const char *fmt, ...) sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); - gp = g_malloc(sizeof *gp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); + gp = g_malloc(sizeof(*gp) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); gp->name = (char *)(gp + 1); gp->class = mp; gp->rank = 1; @@ -527,7 +527,7 @@ g_new_consumer(struct g_geom *gp) ("g_new_consumer on geom(%s) (class %s) without orphan", gp->name, gp->class->name)); - cp = g_malloc(sizeof *cp, M_WAITOK | M_ZERO); + cp = g_malloc(sizeof(*cp), M_WAITOK | M_ZERO); cp->geom = gp; cp->stat = devstat_new_entry(cp, -1, 0, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); @@ -616,7 +616,7 @@ g_new_providerf(struct g_geom *gp, const char *fmt, ...) sbuf_vprintf(sb, fmt, ap); va_end(ap); sbuf_finish(sb); - pp = g_malloc(sizeof *pp + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); + pp = g_malloc(sizeof(*pp) + sbuf_len(sb) + 1, M_WAITOK | M_ZERO); pp->name = (char *)(pp + 1); strcpy(pp->name, sbuf_data(sb)); sbuf_delete(sb); @@ -748,7 +748,7 @@ g_resize_provider(struct g_provider *pp, off_t size) if (size == pp->mediasize) return; - hh = g_malloc(sizeof *hh, M_WAITOK | M_ZERO); + hh = g_malloc(sizeof(*hh), M_WAITOK | M_ZERO); hh->pp = pp; hh->size = size; g_post_event(g_resize_provider_event, hh, M_WAITOK, NULL); @@ -1082,21 +1082,21 @@ int g_handleattr_int(struct bio *bp, const char *attribute, int val) { - return (g_handleattr(bp, attribute, &val, sizeof val)); + return (g_handleattr(bp, attribute, &val, sizeof(val))); } int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val) { - return (g_handleattr(bp, attribute, &val, sizeof val)); + return (g_handleattr(bp, attribute, &val, sizeof(val))); } int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val) { - return (g_handleattr(bp, attribute, &val, sizeof val)); + return (g_handleattr(bp, attribute, &val, sizeof(val))); } int diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c index 23088c895541..a4935df7eaa1 100644 --- a/sys/geom/multipath/g_multipath.c +++ b/sys/geom/multipath/g_multipath.c @@ -321,7 +321,7 @@ g_multipath_resize(struct g_consumer *cp) if (sc->sc_uuid[0] != 0) { pp = cp->provider; strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic)); - memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid)); + memcpy(md.md_uuid, sc->sc_uuid, sizeof(sc->sc_uuid)); strlcpy(md.md_name, sc->sc_name, sizeof(md.md_name)); md.md_version = G_MULTIPATH_VERSION; md.md_size = size; @@ -552,8 +552,8 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md) gp = g_new_geomf(mp, "%s", md->md_name); sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); mtx_init(&sc->sc_mtx, "multipath", NULL, MTX_DEF); - memcpy(sc->sc_uuid, md->md_uuid, sizeof (sc->sc_uuid)); - memcpy(sc->sc_name, md->md_name, sizeof (sc->sc_name)); + memcpy(sc->sc_uuid, md->md_uuid, sizeof(sc->sc_uuid)); + memcpy(sc->sc_name, md->md_name, sizeof(sc->sc_name)); sc->sc_active_active = md->md_active_active; sc->sc_size = md->md_size; gp->softc = sc; @@ -906,7 +906,7 @@ g_multipath_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) char buf[16]; u_long rand = random(); - snprintf(buf, sizeof (buf), "%s-%lu", md.md_name, rand); + snprintf(buf, sizeof(buf), "%s-%lu", md.md_name, rand); printf("GEOM_MULTIPATH: geom %s/%s exists already\n", sc->sc_name, sc->sc_uuid); printf("GEOM_MULTIPATH: %s will be (temporarily) %s\n", @@ -1200,7 +1200,7 @@ g_multipath_ctl_configure(struct gctl_req *req, struct g_class *mp) cp = sc->sc_active; pp = cp->provider; strlcpy(md.md_magic, G_MULTIPATH_MAGIC, sizeof(md.md_magic)); - memcpy(md.md_uuid, sc->sc_uuid, sizeof (sc->sc_uuid)); + memcpy(md.md_uuid, sc->sc_uuid, sizeof(sc->sc_uuid)); strlcpy(md.md_name, name, sizeof(md.md_name)); md.md_version = G_MULTIPATH_VERSION; md.md_size = pp->mediasize; diff --git a/sys/geom/virstor/g_virstor.c b/sys/geom/virstor/g_virstor.c index 73bd9f73055a..c7d737493f11 100644 --- a/sys/geom/virstor/g_virstor.c +++ b/sys/geom/virstor/g_virstor.c @@ -202,7 +202,7 @@ virstor_ctl_stop(struct gctl_req *req, struct g_class *cp) int *force, *nargs; int i; - nargs = gctl_get_paraml(req, "nargs", sizeof *nargs); + nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs)); if (nargs == NULL) { gctl_error(req, "Error fetching argument '%s'", "nargs"); return; @@ -211,7 +211,7 @@ virstor_ctl_stop(struct gctl_req *req, struct g_class *cp) gctl_error(req, "Invalid number of arguments"); return; } - force = gctl_get_paraml(req, "force", sizeof *force); + force = gctl_get_paraml(req, "force", sizeof(*force)); if (force == NULL) { gctl_error(req, "Error fetching argument '%s'", "force"); return; @@ -315,7 +315,7 @@ virstor_ctl_add(struct gctl_req *req, struct g_class *cp) u_int nc; u_int j; - snprintf(aname, sizeof aname, "arg%d", i); + snprintf(aname, sizeof(aname), "arg%d", i); pp = gctl_get_provider(req, aname); if (pp == NULL) { /* This is the most common error so be verbose about it */ @@ -487,12 +487,12 @@ fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md, { struct g_virstor_component *c; - bzero(md, sizeof *md); + bzero(md, sizeof(*md)); c = &sc->components[nc]; - strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof md->md_magic); + strncpy(md->md_magic, G_VIRSTOR_MAGIC, sizeof(md->md_magic)); md->md_version = G_VIRSTOR_VERSION; - strncpy(md->md_name, sc->geom->name, sizeof md->md_name); + strncpy(md->md_name, sc->geom->name, sizeof(md->md_name)); md->md_id = sc->id; md->md_virsize = sc->virsize; md->md_chunk_size = sc->chunk_size; @@ -500,7 +500,7 @@ fill_metadata(struct g_virstor_softc *sc, struct g_virstor_metadata *md, if (hardcode) { strncpy(md->provider, c->gcons->provider->name, - sizeof md->provider); + sizeof(md->provider)); } md->no = nc; md->provsize = c->gcons->provider->mediasize; @@ -959,7 +959,7 @@ virstor_geom_destroy(struct g_virstor_softc *sc, boolean_t force, free(sc->map, M_GVIRSTOR); free(sc->components, M_GVIRSTOR); - bzero(sc, sizeof *sc); + bzero(sc, sizeof(*sc)); free(sc, M_GVIRSTOR); pp = LIST_FIRST(&gp->provider); /* We only offer one provider */ @@ -1213,7 +1213,7 @@ virstor_check_and_run(struct g_virstor_softc *sc) sc->provider->name, sc->chunk_count * (off_t)sc->chunk_size); } - sc->map_size = sc->chunk_count * sizeof *(sc->map); + sc->map_size = sc->chunk_count * sizeof(*(sc->map)); /* The following allocation is in order of 4MB - 8MB */ sc->map = malloc(sc->map_size, M_GVIRSTOR, M_WAITOK); KASSERT(sc->map != NULL, ("%s: Memory allocation error (%zu bytes) for %s", @@ -1267,7 +1267,7 @@ virstor_check_and_run(struct g_virstor_softc *sc) bcopy(mapbuf, &sc->map[n], bs); off += bs; count += bs; - n += bs / sizeof *(sc->map); + n += bs / sizeof(*(sc->map)); g_free(mapbuf); } g_access(sc->components[0].gcons, -1, 0, 0); @@ -1306,8 +1306,8 @@ virstor_check_and_run(struct g_virstor_softc *sc) sc->components[index].chunk_next); } - sc->me_per_sector = sc->sectorsize / sizeof *(sc->map); - if (sc->sectorsize % sizeof *(sc->map) != 0) { + sc->me_per_sector = sc->sectorsize / sizeof(*(sc->map)); + if (sc->sectorsize % sizeof(*(sc->map)) != 0) { LOG_MSG(LVL_ERROR, "%s: Map entries don't fit exactly in a sector (%s)", __func__, sc->geom->name); @@ -1653,7 +1653,7 @@ g_virstor_start(struct bio *b) * XXX: this will prevent the fs from * being umounted! */ struct g_virstor_bio_q *biq; - biq = malloc(sizeof *biq, M_GVIRSTOR, + biq = malloc(sizeof(*biq), M_GVIRSTOR, M_NOWAIT); if (biq == NULL) { bioq_dismantle(&bq); @@ -1703,7 +1703,7 @@ g_virstor_start(struct bio *b) * map array. * sc_offset will end up pointing to the drive * sector. */ - s_offset = chunk_index * sizeof *me; + s_offset = chunk_index * sizeof(*me); s_offset = rounddown(s_offset, sc->sectorsize); /* data_me points to map entry sector diff --git a/sys/kern/coredump_vnode.c b/sys/kern/coredump_vnode.c new file mode 100644 index 000000000000..8b857e9aa4a2 --- /dev/null +++ b/sys/kern/coredump_vnode.c @@ -0,0 +1,562 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause + * + * Copyright (c) 1982, 1986, 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * - kern_sig.c + */ +/* + * Copyright (c) 1993, David Greenman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * -kern_exec.c + */ + +#include <sys/systm.h> +#include <sys/acct.h> +#include <sys/compressor.h> +#include <sys/devctl.h> +#include <sys/fcntl.h> +#include <sys/jail.h> +#include <sys/limits.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/sbuf.h> +#include <sys/stat.h> +#include <sys/sysctl.h> +#include <sys/sysent.h> +#include <sys/syslog.h> +#include <sys/ucoredump.h> +#include <sys/unistd.h> +#include <sys/vnode.h> + +#include <security/audit/audit.h> + +#define GZIP_SUFFIX ".gz" +#define ZSTD_SUFFIX ".zst" + +#define MAX_NUM_CORE_FILES 100000 +#ifndef NUM_CORE_FILES +#define NUM_CORE_FILES 5 +#endif + +static coredumper_handle_fn coredump_vnode; +static struct coredumper vnode_coredumper = { + .cd_name = "vnode_coredumper", + .cd_handle = coredump_vnode, +}; + +SYSINIT(vnode_coredumper_register, SI_SUB_EXEC, SI_ORDER_ANY, + coredumper_register, &vnode_coredumper); + +_Static_assert(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES, + "NUM_CORE_FILES is out of range (0 to " __STRING(MAX_NUM_CORE_FILES) ")"); +static int num_cores = NUM_CORE_FILES; + +static int capmode_coredump; +SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, + &capmode_coredump, 0, "Allow processes in capability mode to dump core"); + +static int set_core_nodump_flag = 0; +SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, + 0, "Enable setting the NODUMP flag on coredump files"); + +static int coredump_devctl = 0; +SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, + 0, "Generate a devctl notification when processes coredump"); + +/* + * corefilename[] is protected by the allproc_lock. + */ +static char corefilename[MAXPATHLEN] = { "%N.core" }; +TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); + +static int +sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) +{ + int error; + + sx_xlock(&allproc_lock); + error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), + req); + sx_xunlock(&allproc_lock); + + return (error); +} +SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | + CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", + "Process corefile name format string"); + +static int +sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) +{ + int error; + int new_val; + + new_val = num_cores; + error = sysctl_handle_int(oidp, &new_val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (new_val > MAX_NUM_CORE_FILES) + new_val = MAX_NUM_CORE_FILES; + if (new_val < 0) + new_val = 0; + num_cores = new_val; + return (0); +} +SYSCTL_PROC(_debug, OID_AUTO, ncores, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int), + sysctl_debug_num_cores_check, "I", + "Maximum number of generated process corefiles while using index format"); + +static void +vnode_close_locked(struct thread *td, struct vnode *vp) +{ + + VOP_UNLOCK(vp); + vn_close(vp, FWRITE, td->td_ucred, td); +} + +int +core_vn_write(const struct coredump_writer *cdw, const void *base, size_t len, + off_t offset, enum uio_seg seg, struct ucred *cred, size_t *resid, + struct thread *td) +{ + struct coredump_vnode_ctx *ctx = cdw->ctx; + + return (vn_rdwr_inchunks(UIO_WRITE, ctx->vp, __DECONST(void *, base), + len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, + cred, ctx->fcred, resid, td)); +} + +int +core_vn_extend(const struct coredump_writer *cdw, off_t newsz, + struct ucred *cred) +{ + struct coredump_vnode_ctx *ctx = cdw->ctx; + struct mount *mp; + int error; + + error = vn_start_write(ctx->vp, &mp, V_WAIT); + if (error != 0) + return (error); + vn_lock(ctx->vp, LK_EXCLUSIVE | LK_RETRY); + error = vn_truncate_locked(ctx->vp, newsz, false, cred); + VOP_UNLOCK(ctx->vp); + vn_finished_write(mp); + return (error); +} + +/* + * If the core format has a %I in it, then we need to check + * for existing corefiles before defining a name. + * To do this we iterate over 0..ncores to find a + * non-existing core file name to use. If all core files are + * already used we choose the oldest one. + */ +static int +corefile_open_last(struct thread *td, char *name, int indexpos, + int indexlen, int ncores, struct vnode **vpp) +{ + struct vnode *oldvp, *nextvp, *vp; + struct vattr vattr; + struct nameidata nd; + int error, i, flags, oflags, cmode; + char ch; + struct timespec lasttime; + + nextvp = oldvp = NULL; + cmode = S_IRUSR | S_IWUSR; + oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | + (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); + + for (i = 0; i < ncores; i++) { + flags = O_CREAT | FWRITE | O_NOFOLLOW; + + ch = name[indexpos + indexlen]; + (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen, + i); + name[indexpos + indexlen] = ch; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); + error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, + NULL); + if (error != 0) + break; + + vp = nd.ni_vp; + NDFREE_PNBUF(&nd); + if ((flags & O_CREAT) == O_CREAT) { + nextvp = vp; + break; + } + + error = VOP_GETATTR(vp, &vattr, td->td_ucred); + if (error != 0) { + vnode_close_locked(td, vp); + break; + } + + if (oldvp == NULL || + lasttime.tv_sec > vattr.va_mtime.tv_sec || + (lasttime.tv_sec == vattr.va_mtime.tv_sec && + lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) { + if (oldvp != NULL) + vn_close(oldvp, FWRITE, td->td_ucred, td); + oldvp = vp; + VOP_UNLOCK(oldvp); + lasttime = vattr.va_mtime; + } else { + vnode_close_locked(td, vp); + } + } + + if (oldvp != NULL) { + if (nextvp == NULL) { + if ((td->td_proc->p_flag & P_SUGID) != 0) { + error = EFAULT; + vn_close(oldvp, FWRITE, td->td_ucred, td); + } else { + nextvp = oldvp; + error = vn_lock(nextvp, LK_EXCLUSIVE); + if (error != 0) { + vn_close(nextvp, FWRITE, td->td_ucred, + td); + nextvp = NULL; + } + } + } else { + vn_close(oldvp, FWRITE, td->td_ucred, td); + } + } + if (error != 0) { + if (nextvp != NULL) + vnode_close_locked(td, oldvp); + } else { + *vpp = nextvp; + } + + return (error); +} + +/* + * corefile_open(comm, uid, pid, td, compress, vpp, namep) + * Expand the name described in corefilename, using name, uid, and pid + * and open/create core file. + * corefilename is a printf-like string, with three format specifiers: + * %N name of process ("name") + * %P process id (pid) + * %U user id (uid) + * For example, "%N.core" is the default; they can be disabled completely + * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". + * This is controlled by the sysctl variable kern.corefile (see above). + */ +static int +corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, + int compress, int signum, struct vnode **vpp, char **namep) +{ + struct sbuf sb; + struct nameidata nd; + const char *format; + char *hostname, *name; + int cmode, error, flags, i, indexpos, indexlen, oflags, ncores; + + hostname = NULL; + format = corefilename; + name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); + indexlen = 0; + indexpos = -1; + ncores = num_cores; + (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); + sx_slock(&allproc_lock); + for (i = 0; format[i] != '\0'; i++) { + switch (format[i]) { + case '%': /* Format character */ + i++; + switch (format[i]) { + case '%': + sbuf_putc(&sb, '%'); + break; + case 'H': /* hostname */ + if (hostname == NULL) { + hostname = malloc(MAXHOSTNAMELEN, + M_TEMP, M_WAITOK); + } + getcredhostname(td->td_ucred, hostname, + MAXHOSTNAMELEN); + sbuf_cat(&sb, hostname); + break; + case 'I': /* autoincrementing index */ + if (indexpos != -1) { + sbuf_printf(&sb, "%%I"); + break; + } + + indexpos = sbuf_len(&sb); + sbuf_printf(&sb, "%u", ncores - 1); + indexlen = sbuf_len(&sb) - indexpos; + break; + case 'N': /* process name */ + sbuf_printf(&sb, "%s", comm); + break; + case 'P': /* process id */ + sbuf_printf(&sb, "%u", pid); + break; + case 'S': /* signal number */ + sbuf_printf(&sb, "%i", signum); + break; + case 'U': /* user id */ + sbuf_printf(&sb, "%u", uid); + break; + default: + log(LOG_ERR, + "Unknown format character %c in " + "corename `%s'\n", format[i], format); + break; + } + break; + default: + sbuf_putc(&sb, format[i]); + break; + } + } + sx_sunlock(&allproc_lock); + free(hostname, M_TEMP); + if (compress == COMPRESS_GZIP) + sbuf_cat(&sb, GZIP_SUFFIX); + else if (compress == COMPRESS_ZSTD) + sbuf_cat(&sb, ZSTD_SUFFIX); + if (sbuf_error(&sb) != 0) { + log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " + "long\n", (long)pid, comm, (u_long)uid); + sbuf_delete(&sb); + free(name, M_TEMP); + return (ENOMEM); + } + sbuf_finish(&sb); + sbuf_delete(&sb); + + if (indexpos != -1) { + error = corefile_open_last(td, name, indexpos, indexlen, ncores, + vpp); + if (error != 0) { + log(LOG_ERR, + "pid %d (%s), uid (%u): Path `%s' failed " + "on initial open test, error = %d\n", + pid, comm, uid, name, error); + } + } else { + cmode = S_IRUSR | S_IWUSR; + oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | + (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); + flags = O_CREAT | FWRITE | O_NOFOLLOW; + if ((td->td_proc->p_flag & P_SUGID) != 0) + flags |= O_EXCL; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); + error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, + NULL); + if (error == 0) { + *vpp = nd.ni_vp; + NDFREE_PNBUF(&nd); + } + } + + if (error != 0) { +#ifdef AUDIT + audit_proc_coredump(td, name, error); +#endif + free(name, M_TEMP); + return (error); + } + *namep = name; + return (0); +} + +/* + * The vnode dumper is the traditional coredump handler. Our policy and limits + * are generally checked already, so it creates the coredump name and passes on + * a vnode and a size limit to the process-specific coredump routine if there is + * one. If there _is not_ one, it returns ENOSYS; otherwise it returns the + * error from the process-specific routine. + */ +static int +coredump_vnode(struct thread *td, off_t limit) +{ + struct proc *p = td->td_proc; + struct ucred *cred = td->td_ucred; + struct vnode *vp; + struct coredump_vnode_ctx wctx; + struct coredump_writer cdw = { }; + struct flock lf; + struct vattr vattr; + size_t fullpathsize; + int error, error1, jid, locked, ppid, sig; + char *name; /* name of corefile */ + void *rl_cookie; + char *fullpath, *freepath = NULL; + struct sbuf *sb; + + PROC_LOCK_ASSERT(p, MA_OWNED); + + ppid = p->p_oppid; + sig = p->p_sig; + jid = p->p_ucred->cr_prison->pr_id; + PROC_UNLOCK(p); + + error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, + compress_user_cores, sig, &vp, &name); + if (error != 0) + return (error); + + /* + * Don't dump to non-regular files or files with links. + * Do not dump into system files. Effective user must own the corefile. + */ + if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || + vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 || + vattr.va_uid != cred->cr_uid) { + VOP_UNLOCK(vp); + error = EFAULT; + goto out; + } + + VOP_UNLOCK(vp); + + /* Postpone other writers, including core dumps of other processes. */ + rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); + + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_WRLCK; + locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); + + VATTR_NULL(&vattr); + vattr.va_size = 0; + if (set_core_nodump_flag) + vattr.va_flags = UF_NODUMP; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + VOP_SETATTR(vp, &vattr, cred); + VOP_UNLOCK(vp); + PROC_LOCK(p); + p->p_acflag |= ACORE; + PROC_UNLOCK(p); + + wctx.vp = vp; + wctx.fcred = NOCRED; + + cdw.ctx = &wctx; + cdw.write_fn = core_vn_write; + cdw.extend_fn = core_vn_extend; + + if (p->p_sysent->sv_coredump != NULL) { + error = p->p_sysent->sv_coredump(td, &cdw, limit, 0); + } else { + error = ENOSYS; + } + + if (locked) { + lf.l_type = F_UNLCK; + VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); + } + vn_rangelock_unlock(vp, rl_cookie); + + /* + * Notify the userland helper that a process triggered a core dump. + * This allows the helper to run an automated debugging session. + */ + if (error != 0 || coredump_devctl == 0) + goto out; + sb = sbuf_new_auto(); + if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0) + goto out2; + sbuf_cat(sb, "comm=\""); + devctl_safe_quote_sb(sb, fullpath); + free(freepath, M_TEMP); + sbuf_cat(sb, "\" core=\""); + + /* + * We can't lookup core file vp directly. When we're replacing a core, and + * other random times, we flush the name cache, so it will fail. Instead, + * if the path of the core is relative, add the current dir in front if it. + */ + if (name[0] != '/') { + fullpathsize = MAXPATHLEN; + freepath = malloc(fullpathsize, M_TEMP, M_WAITOK); + if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) { + free(freepath, M_TEMP); + goto out2; + } + devctl_safe_quote_sb(sb, fullpath); + free(freepath, M_TEMP); + sbuf_putc(sb, '/'); + } + devctl_safe_quote_sb(sb, name); + sbuf_putc(sb, '"'); + + sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", + jid, p->p_pid, ppid, sig); + if (sbuf_finish(sb) == 0) + devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); +out2: + sbuf_delete(sb); +out: + error1 = vn_close(vp, FWRITE, cred, td); + if (error == 0) + error = error1; +#ifdef AUDIT + audit_proc_coredump(td, name, error); +#endif + free(name, M_TEMP); + return (error); +} diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index b7ffbe68b483..2690ad3b2679 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -64,6 +64,7 @@ #include <sys/syscall.h> #include <sys/sysctl.h> #include <sys/sysent.h> +#include <sys/ucoredump.h> #include <sys/vnode.h> #include <sys/syslog.h> #include <sys/eventhandler.h> @@ -1562,9 +1563,6 @@ struct note_info { TAILQ_HEAD(note_info_list, note_info); -extern int compress_user_cores; -extern int compress_user_cores_level; - static void cb_put_phdr(vm_map_entry_t, void *); static void cb_size_segment(vm_map_entry_t, void *); static void each_dumpable_segment(struct thread *, segment_callback, void *, @@ -1595,7 +1593,7 @@ core_compressed_write(void *base, size_t len, off_t offset, void *arg) } int -__elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) +__elfN(coredump)(struct thread *td, struct coredump_writer *cdw, off_t limit, int flags) { struct ucred *cred = td->td_ucred; int compm, error = 0; @@ -1625,9 +1623,8 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) /* Set up core dump parameters. */ params.offset = 0; params.active_cred = cred; - params.file_cred = NOCRED; params.td = td; - params.vp = vp; + params.cdw = cdw; params.comp = NULL; #ifdef RACCT @@ -1662,6 +1659,12 @@ __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); } + if (cdw->init_fn != NULL) { + error = (*cdw->init_fn)(cdw, ¶ms); + if (error != 0) + goto done; + } + /* * Allocate memory for building the header, fill it up, * and write it out following the notes. diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index 5d9e2f2f326b..d7eb82d5f259 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -530,7 +530,7 @@ _domainset_create(struct domainset *domain, struct domainlist *freelist) * remove them and update the domainset accordingly. If only empty * domains are present, we must return failure. */ -static bool +bool domainset_empty_vm(struct domainset *domain) { domainset_t empty; @@ -2409,82 +2409,92 @@ sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap) } int -kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, - id_t id, size_t domainsetsize, const domainset_t *maskp, int policy, - const struct cpuset_copy_cb *cb) +domainset_populate(struct domainset *domain, const domainset_t *mask, int policy, + size_t mask_size) { - struct cpuset *nset; - struct cpuset *set; - struct thread *ttd; - struct proc *p; - struct domainset domain; - domainset_t *mask; - int error; - if (domainsetsize < sizeof(domainset_t) || - domainsetsize > DOMAINSET_MAXSIZE / NBBY) - return (ERANGE); if (policy <= DOMAINSET_POLICY_INVALID || - policy > DOMAINSET_POLICY_MAX) + policy > DOMAINSET_POLICY_MAX) { return (EINVAL); - error = cpuset_check_capabilities(td, level, which, id); - if (error != 0) - return (error); - memset(&domain, 0, sizeof(domain)); - mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); - error = cb->cpuset_copyin(maskp, mask, domainsetsize); - if (error) - goto out; + } + /* * Verify that no high bits are set. */ - if (domainsetsize > sizeof(domainset_t)) { - char *end; - char *cp; + if (mask_size > sizeof(domainset_t)) { + const char *end; + const char *cp; - end = cp = (char *)&mask->__bits; - end += domainsetsize; + end = cp = (const char *)&mask->__bits; + end += mask_size; cp += sizeof(domainset_t); - while (cp != end) + while (cp != end) { if (*cp++ != 0) { - error = EINVAL; - goto out; + return (EINVAL); } + } } if (DOMAINSET_EMPTY(mask)) { - error = EDEADLK; - goto out; + return (EDEADLK); } - DOMAINSET_COPY(mask, &domain.ds_mask); - domain.ds_policy = policy; + DOMAINSET_COPY(mask, &domain->ds_mask); + domain->ds_policy = policy; /* * Sanitize the provided mask. */ - if (!DOMAINSET_SUBSET(&all_domains, &domain.ds_mask)) { - error = EINVAL; - goto out; + if (!DOMAINSET_SUBSET(&all_domains, &domain->ds_mask)) { + return (EINVAL); } /* Translate preferred policy into a mask and fallback. */ if (policy == DOMAINSET_POLICY_PREFER) { /* Only support a single preferred domain. */ - if (DOMAINSET_COUNT(&domain.ds_mask) != 1) { - error = EINVAL; - goto out; + if (DOMAINSET_COUNT(&domain->ds_mask) != 1) { + return (EINVAL); } - domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1; + domain->ds_prefer = DOMAINSET_FFS(&domain->ds_mask) - 1; /* This will be constrained by domainset_shadow(). */ - DOMAINSET_COPY(&all_domains, &domain.ds_mask); + DOMAINSET_COPY(&all_domains, &domain->ds_mask); } + return (0); +} + +int +kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which, + id_t id, size_t domainsetsize, const domainset_t *maskp, int policy, + const struct cpuset_copy_cb *cb) +{ + struct cpuset *nset; + struct cpuset *set; + struct thread *ttd; + struct proc *p; + struct domainset domain; + domainset_t *mask; + int error; + + error = cpuset_check_capabilities(td, level, which, id); + if (error != 0) + return (error); + if (domainsetsize < sizeof(domainset_t) || + domainsetsize > DOMAINSET_MAXSIZE / NBBY) + return (ERANGE); + memset(&domain, 0, sizeof(domain)); + mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); + error = cb->cpuset_copyin(maskp, mask, domainsetsize); + if (error) + goto out; + error = domainset_populate(&domain, mask, policy, domainsetsize); + if (error) + goto out; + /* * When given an impossible policy, fall back to interleaving * across all domains. */ if (domainset_empty_vm(&domain)) domainset_copy(domainset2, &domain); - switch (level) { case CPU_LEVEL_ROOT: case CPU_LEVEL_CPUSET: diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 03268365891e..0fc2d0e7f1bc 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -70,6 +70,7 @@ #include <sys/sysent.h> #include <sys/sysproto.h> #include <sys/timers.h> +#include <sys/ucoredump.h> #include <sys/umtxvar.h> #include <sys/vnode.h> #include <sys/wait.h> @@ -2002,10 +2003,14 @@ int core_write(struct coredump_params *cp, const void *base, size_t len, off_t offset, enum uio_seg seg, size_t *resid) { + return ((*cp->cdw->write_fn)(cp->cdw, base, len, offset, seg, + cp->active_cred, resid, cp->td)); +} - return (vn_rdwr_inchunks(UIO_WRITE, cp->vp, __DECONST(void *, base), - len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, - cp->active_cred, cp->file_cred, resid, cp->td)); +static int +core_extend(struct coredump_params *cp, off_t newsz) +{ + return ((*cp->cdw->extend_fn)(cp->cdw, newsz, cp->active_cred)); } int @@ -2013,7 +2018,6 @@ core_output(char *base, size_t len, off_t offset, struct coredump_params *cp, void *tmpbuf) { vm_map_t map; - struct mount *mp; size_t resid, runlen; int error; bool success; @@ -2068,14 +2072,7 @@ core_output(char *base, size_t len, off_t offset, struct coredump_params *cp, } } if (!success) { - error = vn_start_write(cp->vp, &mp, V_WAIT); - if (error != 0) - break; - vn_lock(cp->vp, LK_EXCLUSIVE | LK_RETRY); - error = vn_truncate_locked(cp->vp, offset + runlen, - false, cp->td->td_ucred); - VOP_UNLOCK(cp->vp); - vn_finished_write(mp); + error = core_extend(cp, offset + runlen); if (error != 0) break; } diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index d4529e096929..7ef1d19f0ea8 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -3466,7 +3466,7 @@ prison_check_af(struct ucred *cred, int af) pr = cred->cr_prison; #ifdef VIMAGE /* Prisons with their own network stack are not limited. */ - if (prison_owns_vnet(cred)) + if (prison_owns_vnet(pr)) return (0); #endif @@ -3531,7 +3531,7 @@ prison_if(struct ucred *cred, const struct sockaddr *sa) KASSERT(sa != NULL, ("%s: sa is NULL", __func__)); #ifdef VIMAGE - if (prison_owns_vnet(cred)) + if (prison_owns_vnet(cred->cr_prison)) return (0); #endif @@ -3648,7 +3648,7 @@ jailed_without_vnet(struct ucred *cred) if (!jailed(cred)) return (false); #ifdef VIMAGE - if (prison_owns_vnet(cred)) + if (prison_owns_vnet(cred->cr_prison)) return (false); #endif @@ -3711,20 +3711,17 @@ getjailname(struct ucred *cred, char *name, size_t len) #ifdef VIMAGE /* - * Determine whether the prison represented by cred owns - * its vnet rather than having it inherited. - * - * Returns true in case the prison owns the vnet, false otherwise. + * Determine whether the prison owns its VNET. */ bool -prison_owns_vnet(struct ucred *cred) +prison_owns_vnet(struct prison *pr) { /* * vnets cannot be added/removed after jail creation, * so no need to lock here. */ - return ((cred->cr_prison->pr_flags & PR_VNET) != 0); + return ((pr->pr_flags & PR_VNET) != 0); } #endif @@ -4425,7 +4422,7 @@ sysctl_jail_vnet(SYSCTL_HANDLER_ARGS) #ifdef VIMAGE struct ucred *cred = req->td->td_ucred; - havevnet = jailed(cred) && prison_owns_vnet(cred); + havevnet = jailed(cred) && prison_owns_vnet(cred->cr_prison); #else havevnet = 0; #endif diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 5d51aa675cb7..da0efac0598d 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -45,10 +45,10 @@ #include <sys/vnode.h> #include <sys/acct.h> #include <sys/capsicum.h> -#include <sys/compressor.h> #include <sys/condvar.h> #include <sys/devctl.h> #include <sys/event.h> +#include <sys/exec.h> #include <sys/fcntl.h> #include <sys/imgact.h> #include <sys/jail.h> @@ -80,6 +80,7 @@ #include <sys/syslog.h> #include <sys/sysproto.h> #include <sys/timers.h> +#include <sys/ucoredump.h> #include <sys/unistd.h> #include <sys/vmmeter.h> #include <sys/wait.h> @@ -101,7 +102,6 @@ SDT_PROBE_DEFINE2(proc, , , signal__clear, SDT_PROBE_DEFINE3(proc, , , signal__discard, "struct thread *", "struct proc *", "int"); -static int coredump(struct thread *); static int killpg1(struct thread *td, int sig, int pgid, int all, ksiginfo_t *ksi); static int issignal(struct thread *td); @@ -126,11 +126,6 @@ const struct filterops sig_filtops = { .f_event = filt_signal, }; -static int kern_logsigexit = 1; -SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, - &kern_logsigexit, 0, - "Log processes quitting on abnormal signals to syslog(3)"); - static int kern_forcesigexit = 1; SYSCTL_INT(_kern, OID_AUTO, forcesigexit, CTLFLAG_RW, &kern_forcesigexit, 0, "Force trap signal to be handled"); @@ -193,26 +188,6 @@ SYSINIT(signal, SI_SUB_P1003_1B, SI_ORDER_FIRST+3, sigqueue_start, NULL); (cr1)->cr_ruid == (cr2)->cr_uid || \ (cr1)->cr_uid == (cr2)->cr_uid) -static int sugid_coredump; -SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, - &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); - -static int capmode_coredump; -SYSCTL_INT(_kern, OID_AUTO, capmode_coredump, CTLFLAG_RWTUN, - &capmode_coredump, 0, "Allow processes in capability mode to dump core"); - -static int do_coredump = 1; -SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, - &do_coredump, 0, "Enable/Disable coredumps"); - -static int set_core_nodump_flag = 0; -SYSCTL_INT(_kern, OID_AUTO, nodump_coredump, CTLFLAG_RW, &set_core_nodump_flag, - 0, "Enable setting the NODUMP flag on coredump files"); - -static int coredump_devctl = 0; -SYSCTL_INT(_kern, OID_AUTO, coredump_devctl, CTLFLAG_RW, &coredump_devctl, - 0, "Generate a devctl notification when processes coredump"); - /* * Signal properties and actions. * The array below categorizes the signals and their default actions @@ -784,6 +759,13 @@ sigprop(int sig) return (0); } +bool +sig_do_core(int sig) +{ + + return ((sigprop(sig) & SIGPROP_CORE) != 0); +} + static bool sigact_flag_test(const struct sigaction *act, int flag) { @@ -2665,6 +2647,8 @@ static void ptrace_coredumpreq(struct thread *td, struct proc *p, struct thr_coredump_req *tcq) { + struct coredump_vnode_ctx wctx; + struct coredump_writer cdw; void *rl_cookie; if (p->p_sysent->sv_coredump == NULL) { @@ -2672,8 +2656,15 @@ ptrace_coredumpreq(struct thread *td, struct proc *p, return; } + wctx.vp = tcq->tc_vp; + wctx.fcred = NOCRED; + + cdw.ctx = &wctx; + cdw.write_fn = core_vn_write; + cdw.extend_fn = core_vn_extend; + rl_cookie = vn_rangelock_wlock(tcq->tc_vp, 0, OFF_MAX); - tcq->tc_error = p->p_sysent->sv_coredump(td, tcq->tc_vp, + tcq->tc_error = p->p_sysent->sv_coredump(td, &cdw, tcq->tc_limit, tcq->tc_flags); vn_rangelock_unlock(tcq->tc_vp, rl_cookie); } @@ -3635,82 +3626,6 @@ killproc(struct proc *p, const char *why) } /* - * Force the current process to exit with the specified signal, dumping core - * if appropriate. We bypass the normal tests for masked and caught signals, - * allowing unrecoverable failures to terminate the process without changing - * signal state. Mark the accounting record with the signal termination. - * If dumping core, save the signal number for the debugger. Calls exit and - * does not return. - */ -void -sigexit(struct thread *td, int sig) -{ - struct proc *p = td->td_proc; - const char *coreinfo; - int rv; - bool logexit; - - PROC_LOCK_ASSERT(p, MA_OWNED); - proc_set_p2_wexit(p); - - p->p_acflag |= AXSIG; - if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0) - logexit = kern_logsigexit != 0; - else - logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0; - - /* - * We must be single-threading to generate a core dump. This - * ensures that the registers in the core file are up-to-date. - * Also, the ELF dump handler assumes that the thread list doesn't - * change out from under it. - * - * XXX If another thread attempts to single-thread before us - * (e.g. via fork()), we won't get a dump at all. - */ - if ((sigprop(sig) & SIGPROP_CORE) && - thread_single(p, SINGLE_NO_EXIT) == 0) { - p->p_sig = sig; - /* - * Log signals which would cause core dumps - * (Log as LOG_INFO to appease those who don't want - * these messages.) - * XXX : Todo, as well as euid, write out ruid too - * Note that coredump() drops proc lock. - */ - rv = coredump(td); - switch (rv) { - case 0: - sig |= WCOREFLAG; - coreinfo = " (core dumped)"; - break; - case EFAULT: - coreinfo = " (no core dump - bad address)"; - break; - case EINVAL: - coreinfo = " (no core dump - invalid argument)"; - break; - case EFBIG: - coreinfo = " (no core dump - too large)"; - break; - default: - coreinfo = " (no core dump - other error)"; - break; - } - if (logexit) - log(LOG_INFO, - "pid %d (%s), jid %d, uid %d: exited on " - "signal %d%s\n", p->p_pid, p->p_comm, - p->p_ucred->cr_prison->pr_id, - td->td_ucred->cr_uid, - sig &~ WCOREFLAG, coreinfo); - } else - PROC_UNLOCK(p); - exit1(td, 0, sig); - /* NOTREACHED */ -} - -/* * Send queued SIGCHLD to parent when child process's state * is changed. */ @@ -3803,477 +3718,6 @@ childproc_exited(struct proc *p) sigparent(p, reason, status); } -#define MAX_NUM_CORE_FILES 100000 -#ifndef NUM_CORE_FILES -#define NUM_CORE_FILES 5 -#endif -CTASSERT(NUM_CORE_FILES >= 0 && NUM_CORE_FILES <= MAX_NUM_CORE_FILES); -static int num_cores = NUM_CORE_FILES; - -static int -sysctl_debug_num_cores_check (SYSCTL_HANDLER_ARGS) -{ - int error; - int new_val; - - new_val = num_cores; - error = sysctl_handle_int(oidp, &new_val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (new_val > MAX_NUM_CORE_FILES) - new_val = MAX_NUM_CORE_FILES; - if (new_val < 0) - new_val = 0; - num_cores = new_val; - return (0); -} -SYSCTL_PROC(_debug, OID_AUTO, ncores, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, sizeof(int), - sysctl_debug_num_cores_check, "I", - "Maximum number of generated process corefiles while using index format"); - -#define GZIP_SUFFIX ".gz" -#define ZSTD_SUFFIX ".zst" - -int compress_user_cores = 0; - -static int -sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS) -{ - int error, val; - - val = compress_user_cores; - error = sysctl_handle_int(oidp, &val, 0, req); - if (error != 0 || req->newptr == NULL) - return (error); - if (val != 0 && !compressor_avail(val)) - return (EINVAL); - compress_user_cores = val; - return (error); -} -SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores, - CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), - sysctl_compress_user_cores, "I", - "Enable compression of user corefiles (" - __XSTRING(COMPRESS_GZIP) " = gzip, " - __XSTRING(COMPRESS_ZSTD) " = zstd)"); - -int compress_user_cores_level = 6; -SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN, - &compress_user_cores_level, 0, - "Corefile compression level"); - -/* - * Protect the access to corefilename[] by allproc_lock. - */ -#define corefilename_lock allproc_lock - -static char corefilename[MAXPATHLEN] = {"%N.core"}; -TUNABLE_STR("kern.corefile", corefilename, sizeof(corefilename)); - -static int -sysctl_kern_corefile(SYSCTL_HANDLER_ARGS) -{ - int error; - - sx_xlock(&corefilename_lock); - error = sysctl_handle_string(oidp, corefilename, sizeof(corefilename), - req); - sx_xunlock(&corefilename_lock); - - return (error); -} -SYSCTL_PROC(_kern, OID_AUTO, corefile, CTLTYPE_STRING | CTLFLAG_RW | - CTLFLAG_MPSAFE, 0, 0, sysctl_kern_corefile, "A", - "Process corefile name format string"); - -static void -vnode_close_locked(struct thread *td, struct vnode *vp) -{ - - VOP_UNLOCK(vp); - vn_close(vp, FWRITE, td->td_ucred, td); -} - -/* - * If the core format has a %I in it, then we need to check - * for existing corefiles before defining a name. - * To do this we iterate over 0..ncores to find a - * non-existing core file name to use. If all core files are - * already used we choose the oldest one. - */ -static int -corefile_open_last(struct thread *td, char *name, int indexpos, - int indexlen, int ncores, struct vnode **vpp) -{ - struct vnode *oldvp, *nextvp, *vp; - struct vattr vattr; - struct nameidata nd; - int error, i, flags, oflags, cmode; - char ch; - struct timespec lasttime; - - nextvp = oldvp = NULL; - cmode = S_IRUSR | S_IWUSR; - oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | - (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); - - for (i = 0; i < ncores; i++) { - flags = O_CREAT | FWRITE | O_NOFOLLOW; - - ch = name[indexpos + indexlen]; - (void)snprintf(name + indexpos, indexlen + 1, "%.*u", indexlen, - i); - name[indexpos + indexlen] = ch; - - NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); - error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, - NULL); - if (error != 0) - break; - - vp = nd.ni_vp; - NDFREE_PNBUF(&nd); - if ((flags & O_CREAT) == O_CREAT) { - nextvp = vp; - break; - } - - error = VOP_GETATTR(vp, &vattr, td->td_ucred); - if (error != 0) { - vnode_close_locked(td, vp); - break; - } - - if (oldvp == NULL || - lasttime.tv_sec > vattr.va_mtime.tv_sec || - (lasttime.tv_sec == vattr.va_mtime.tv_sec && - lasttime.tv_nsec >= vattr.va_mtime.tv_nsec)) { - if (oldvp != NULL) - vn_close(oldvp, FWRITE, td->td_ucred, td); - oldvp = vp; - VOP_UNLOCK(oldvp); - lasttime = vattr.va_mtime; - } else { - vnode_close_locked(td, vp); - } - } - - if (oldvp != NULL) { - if (nextvp == NULL) { - if ((td->td_proc->p_flag & P_SUGID) != 0) { - error = EFAULT; - vn_close(oldvp, FWRITE, td->td_ucred, td); - } else { - nextvp = oldvp; - error = vn_lock(nextvp, LK_EXCLUSIVE); - if (error != 0) { - vn_close(nextvp, FWRITE, td->td_ucred, - td); - nextvp = NULL; - } - } - } else { - vn_close(oldvp, FWRITE, td->td_ucred, td); - } - } - if (error != 0) { - if (nextvp != NULL) - vnode_close_locked(td, oldvp); - } else { - *vpp = nextvp; - } - - return (error); -} - -/* - * corefile_open(comm, uid, pid, td, compress, vpp, namep) - * Expand the name described in corefilename, using name, uid, and pid - * and open/create core file. - * corefilename is a printf-like string, with three format specifiers: - * %N name of process ("name") - * %P process id (pid) - * %U user id (uid) - * For example, "%N.core" is the default; they can be disabled completely - * by using "/dev/null", or all core files can be stored in "/cores/%U/%N-%P". - * This is controlled by the sysctl variable kern.corefile (see above). - */ -static int -corefile_open(const char *comm, uid_t uid, pid_t pid, struct thread *td, - int compress, int signum, struct vnode **vpp, char **namep) -{ - struct sbuf sb; - struct nameidata nd; - const char *format; - char *hostname, *name; - int cmode, error, flags, i, indexpos, indexlen, oflags, ncores; - - hostname = NULL; - format = corefilename; - name = malloc(MAXPATHLEN, M_TEMP, M_WAITOK | M_ZERO); - indexlen = 0; - indexpos = -1; - ncores = num_cores; - (void)sbuf_new(&sb, name, MAXPATHLEN, SBUF_FIXEDLEN); - sx_slock(&corefilename_lock); - for (i = 0; format[i] != '\0'; i++) { - switch (format[i]) { - case '%': /* Format character */ - i++; - switch (format[i]) { - case '%': - sbuf_putc(&sb, '%'); - break; - case 'H': /* hostname */ - if (hostname == NULL) { - hostname = malloc(MAXHOSTNAMELEN, - M_TEMP, M_WAITOK); - } - getcredhostname(td->td_ucred, hostname, - MAXHOSTNAMELEN); - sbuf_cat(&sb, hostname); - break; - case 'I': /* autoincrementing index */ - if (indexpos != -1) { - sbuf_printf(&sb, "%%I"); - break; - } - - indexpos = sbuf_len(&sb); - sbuf_printf(&sb, "%u", ncores - 1); - indexlen = sbuf_len(&sb) - indexpos; - break; - case 'N': /* process name */ - sbuf_printf(&sb, "%s", comm); - break; - case 'P': /* process id */ - sbuf_printf(&sb, "%u", pid); - break; - case 'S': /* signal number */ - sbuf_printf(&sb, "%i", signum); - break; - case 'U': /* user id */ - sbuf_printf(&sb, "%u", uid); - break; - default: - log(LOG_ERR, - "Unknown format character %c in " - "corename `%s'\n", format[i], format); - break; - } - break; - default: - sbuf_putc(&sb, format[i]); - break; - } - } - sx_sunlock(&corefilename_lock); - free(hostname, M_TEMP); - if (compress == COMPRESS_GZIP) - sbuf_cat(&sb, GZIP_SUFFIX); - else if (compress == COMPRESS_ZSTD) - sbuf_cat(&sb, ZSTD_SUFFIX); - if (sbuf_error(&sb) != 0) { - log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too " - "long\n", (long)pid, comm, (u_long)uid); - sbuf_delete(&sb); - free(name, M_TEMP); - return (ENOMEM); - } - sbuf_finish(&sb); - sbuf_delete(&sb); - - if (indexpos != -1) { - error = corefile_open_last(td, name, indexpos, indexlen, ncores, - vpp); - if (error != 0) { - log(LOG_ERR, - "pid %d (%s), uid (%u): Path `%s' failed " - "on initial open test, error = %d\n", - pid, comm, uid, name, error); - } - } else { - cmode = S_IRUSR | S_IWUSR; - oflags = VN_OPEN_NOAUDIT | VN_OPEN_NAMECACHE | - (capmode_coredump ? VN_OPEN_NOCAPCHECK : 0); - flags = O_CREAT | FWRITE | O_NOFOLLOW; - if ((td->td_proc->p_flag & P_SUGID) != 0) - flags |= O_EXCL; - - NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name); - error = vn_open_cred(&nd, &flags, cmode, oflags, td->td_ucred, - NULL); - if (error == 0) { - *vpp = nd.ni_vp; - NDFREE_PNBUF(&nd); - } - } - - if (error != 0) { -#ifdef AUDIT - audit_proc_coredump(td, name, error); -#endif - free(name, M_TEMP); - return (error); - } - *namep = name; - return (0); -} - -/* - * Dump a process' core. The main routine does some - * policy checking, and creates the name of the coredump; - * then it passes on a vnode and a size limit to the process-specific - * coredump routine if there is one; if there _is not_ one, it returns - * ENOSYS; otherwise it returns the error from the process-specific routine. - */ - -static int -coredump(struct thread *td) -{ - struct proc *p = td->td_proc; - struct ucred *cred = td->td_ucred; - struct vnode *vp; - struct flock lf; - struct vattr vattr; - size_t fullpathsize; - int error, error1, jid, locked, ppid, sig; - char *name; /* name of corefile */ - void *rl_cookie; - off_t limit; - char *fullpath, *freepath = NULL; - struct sbuf *sb; - - PROC_LOCK_ASSERT(p, MA_OWNED); - MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); - - if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) || - (p->p_flag2 & P2_NOTRACE) != 0) { - PROC_UNLOCK(p); - return (EFAULT); - } - - /* - * Note that the bulk of limit checking is done after - * the corefile is created. The exception is if the limit - * for corefiles is 0, in which case we don't bother - * creating the corefile at all. This layout means that - * a corefile is truncated instead of not being created, - * if it is larger than the limit. - */ - limit = (off_t)lim_cur(td, RLIMIT_CORE); - if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { - PROC_UNLOCK(p); - return (EFBIG); - } - - ppid = p->p_oppid; - sig = p->p_sig; - jid = p->p_ucred->cr_prison->pr_id; - PROC_UNLOCK(p); - - error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, - compress_user_cores, p->p_sig, &vp, &name); - if (error != 0) - return (error); - - /* - * Don't dump to non-regular files or files with links. - * Do not dump into system files. Effective user must own the corefile. - */ - if (vp->v_type != VREG || VOP_GETATTR(vp, &vattr, cred) != 0 || - vattr.va_nlink != 1 || (vp->v_vflag & VV_SYSTEM) != 0 || - vattr.va_uid != cred->cr_uid) { - VOP_UNLOCK(vp); - error = EFAULT; - goto out; - } - - VOP_UNLOCK(vp); - - /* Postpone other writers, including core dumps of other processes. */ - rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); - - lf.l_whence = SEEK_SET; - lf.l_start = 0; - lf.l_len = 0; - lf.l_type = F_WRLCK; - locked = (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &lf, F_FLOCK) == 0); - - VATTR_NULL(&vattr); - vattr.va_size = 0; - if (set_core_nodump_flag) - vattr.va_flags = UF_NODUMP; - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - VOP_SETATTR(vp, &vattr, cred); - VOP_UNLOCK(vp); - PROC_LOCK(p); - p->p_acflag |= ACORE; - PROC_UNLOCK(p); - - if (p->p_sysent->sv_coredump != NULL) { - error = p->p_sysent->sv_coredump(td, vp, limit, 0); - } else { - error = ENOSYS; - } - - if (locked) { - lf.l_type = F_UNLCK; - VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_FLOCK); - } - vn_rangelock_unlock(vp, rl_cookie); - - /* - * Notify the userland helper that a process triggered a core dump. - * This allows the helper to run an automated debugging session. - */ - if (error != 0 || coredump_devctl == 0) - goto out; - sb = sbuf_new_auto(); - if (vn_fullpath_global(p->p_textvp, &fullpath, &freepath) != 0) - goto out2; - sbuf_cat(sb, "comm=\""); - devctl_safe_quote_sb(sb, fullpath); - free(freepath, M_TEMP); - sbuf_cat(sb, "\" core=\""); - - /* - * We can't lookup core file vp directly. When we're replacing a core, and - * other random times, we flush the name cache, so it will fail. Instead, - * if the path of the core is relative, add the current dir in front if it. - */ - if (name[0] != '/') { - fullpathsize = MAXPATHLEN; - freepath = malloc(fullpathsize, M_TEMP, M_WAITOK); - if (vn_getcwd(freepath, &fullpath, &fullpathsize) != 0) { - free(freepath, M_TEMP); - goto out2; - } - devctl_safe_quote_sb(sb, fullpath); - free(freepath, M_TEMP); - sbuf_putc(sb, '/'); - } - devctl_safe_quote_sb(sb, name); - sbuf_putc(sb, '"'); - - sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", - jid, p->p_pid, ppid, sig); - if (sbuf_finish(sb) == 0) - devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); -out2: - sbuf_delete(sb); -out: - error1 = vn_close(vp, FWRITE, cred, td); - if (error == 0) - error = error1; -#ifdef AUDIT - audit_proc_coredump(td, name, error); -#endif - free(name, M_TEMP); - return (error); -} - /* * Nonexistent system call-- signal process (may want to handle it). Flag * error in case process won't see signal immediately (blocked or ignored). diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 46226cc31980..25da134661e9 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -2368,7 +2368,7 @@ sysctl_root(SYSCTL_HANDLER_ARGS) priv = PRIV_SYSCTL_WRITEJAIL; #ifdef VIMAGE else if ((oid->oid_kind & CTLFLAG_VNET) && - prison_owns_vnet(req->td->td_ucred)) + prison_owns_vnet(req->td->td_ucred->cr_prison)) priv = PRIV_SYSCTL_WRITEJAIL; #endif else diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index f853af193016..50b040132396 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -571,7 +571,7 @@ threadinit(void) /* * Thread structures are specially aligned so that (at least) the - * 5 lower bits of a pointer to 'struct thead' must be 0. These bits + * 5 lower bits of a pointer to 'struct thread' must be 0. These bits * are used by synchronization primitives to store flags in pointers to * such structures. */ diff --git a/sys/kern/kern_ucoredump.c b/sys/kern/kern_ucoredump.c new file mode 100644 index 000000000000..d425596b5f24 --- /dev/null +++ b/sys/kern/kern_ucoredump.c @@ -0,0 +1,299 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1982, 1986, 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/acct.h> +#include <sys/compressor.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/signalvar.h> +#include <sys/racct.h> +#include <sys/resourcevar.h> +#include <sys/rmlock.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/ucoredump.h> +#include <sys/wait.h> + +static int coredump(struct thread *td, const char **); + +int compress_user_cores = 0; + +static SLIST_HEAD(, coredumper) coredumpers = + SLIST_HEAD_INITIALIZER(coredumpers); +static struct rmlock coredump_rmlock; +RM_SYSINIT(coredump_lock, &coredump_rmlock, "coredump_lock"); + +static int kern_logsigexit = 1; +SYSCTL_INT(_kern, KERN_LOGSIGEXIT, logsigexit, CTLFLAG_RW, + &kern_logsigexit, 0, + "Log processes quitting on abnormal signals to syslog(3)"); + +static int sugid_coredump; +SYSCTL_INT(_kern, OID_AUTO, sugid_coredump, CTLFLAG_RWTUN, + &sugid_coredump, 0, "Allow setuid and setgid processes to dump core"); + +static int do_coredump = 1; +SYSCTL_INT(_kern, OID_AUTO, coredump, CTLFLAG_RW, + &do_coredump, 0, "Enable/Disable coredumps"); + +static int +sysctl_compress_user_cores(SYSCTL_HANDLER_ARGS) +{ + int error, val; + + val = compress_user_cores; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (val != 0 && !compressor_avail(val)) + return (EINVAL); + compress_user_cores = val; + return (error); +} +SYSCTL_PROC(_kern, OID_AUTO, compress_user_cores, + CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), + sysctl_compress_user_cores, "I", + "Enable compression of user corefiles (" + __XSTRING(COMPRESS_GZIP) " = gzip, " + __XSTRING(COMPRESS_ZSTD) " = zstd)"); + +int compress_user_cores_level = 6; +SYSCTL_INT(_kern, OID_AUTO, compress_user_cores_level, CTLFLAG_RWTUN, + &compress_user_cores_level, 0, + "Corefile compression level"); + +void +coredumper_register(struct coredumper *cd) +{ + + blockcount_init(&cd->cd_refcount); + rm_wlock(&coredump_rmlock); + SLIST_INSERT_HEAD(&coredumpers, cd, cd_entry); + rm_wunlock(&coredump_rmlock); +} + +void +coredumper_unregister(struct coredumper *cd) +{ + + rm_wlock(&coredump_rmlock); + SLIST_REMOVE(&coredumpers, cd, coredumper, cd_entry); + rm_wunlock(&coredump_rmlock); + + /* + * Wait for any in-process coredumps to finish before returning. + */ + blockcount_wait(&cd->cd_refcount, NULL, "dumpwait", 0); +} + +/* + * Force the current process to exit with the specified signal, dumping core + * if appropriate. We bypass the normal tests for masked and caught signals, + * allowing unrecoverable failures to terminate the process without changing + * signal state. Mark the accounting record with the signal termination. + * If dumping core, save the signal number for the debugger. Calls exit and + * does not return. + */ +void +sigexit(struct thread *td, int sig) +{ + struct proc *p = td->td_proc; + int rv; + bool logexit; + + PROC_LOCK_ASSERT(p, MA_OWNED); + proc_set_p2_wexit(p); + + p->p_acflag |= AXSIG; + if ((p->p_flag2 & P2_LOGSIGEXIT_CTL) == 0) + logexit = kern_logsigexit != 0; + else + logexit = (p->p_flag2 & P2_LOGSIGEXIT_ENABLE) != 0; + + /* + * We must be single-threading to generate a core dump. This + * ensures that the registers in the core file are up-to-date. + * Also, the ELF dump handler assumes that the thread list doesn't + * change out from under it. + * + * XXX If another thread attempts to single-thread before us + * (e.g. via fork()), we won't get a dump at all. + */ + if (sig_do_core(sig) && thread_single(p, SINGLE_NO_EXIT) == 0) { + const char *err = NULL; + + p->p_sig = sig; + /* + * Log signals which would cause core dumps + * (Log as LOG_INFO to appease those who don't want + * these messages.) + * XXX : Todo, as well as euid, write out ruid too + * Note that coredump() drops proc lock. + */ + rv = coredump(td, &err); + if (rv == 0) { + MPASS(err == NULL); + sig |= WCOREFLAG; + } else if (err == NULL) { + switch (rv) { + case EFAULT: + err = "bad address"; + break; + case EINVAL: + err = "invalild argument"; + break; + case EFBIG: + err = "too large"; + break; + default: + err = "other error"; + break; + } + } + if (logexit) + log(LOG_INFO, + "pid %d (%s), jid %d, uid %d: exited on " + "signal %d (%s%s)\n", p->p_pid, p->p_comm, + p->p_ucred->cr_prison->pr_id, + td->td_ucred->cr_uid, sig &~ WCOREFLAG, + err != NULL ? "no core dump - " : "core dumped", + err != NULL ? err : ""); + } else + PROC_UNLOCK(p); + exit1(td, 0, sig); + /* NOTREACHED */ +} + + +/* + * Dump a process' core. The main routine does some + * policy checking, and creates the name of the coredump; + * then it passes on a vnode and a size limit to the process-specific + * coredump routine if there is one; if there _is not_ one, it returns + * ENOSYS; otherwise it returns the error from the process-specific routine. + */ +static int +coredump(struct thread *td, const char **errmsg) +{ + struct coredumper *iter, *chosen; + struct proc *p = td->td_proc; + struct rm_priotracker tracker; + off_t limit; + int error, priority; + + PROC_LOCK_ASSERT(p, MA_OWNED); + MPASS((p->p_flag & P_HADTHREADS) == 0 || p->p_singlethread == td); + + if (!do_coredump || (!sugid_coredump && (p->p_flag & P_SUGID) != 0) || + (p->p_flag2 & P2_NOTRACE) != 0) { + PROC_UNLOCK(p); + + if (!do_coredump) + *errmsg = "denied by kern.coredump"; + else if ((p->p_flag2 & P2_NOTRACE) != 0) + *errmsg = "process has trace disabled"; + else + *errmsg = "sugid process denied by kern.sugid_coredump"; + return (EFAULT); + } + + /* + * Note that the bulk of limit checking is done after + * the corefile is created. The exception is if the limit + * for corefiles is 0, in which case we don't bother + * creating the corefile at all. This layout means that + * a corefile is truncated instead of not being created, + * if it is larger than the limit. + */ + limit = (off_t)lim_cur(td, RLIMIT_CORE); + if (limit == 0 || racct_get_available(p, RACCT_CORE) == 0) { + PROC_UNLOCK(p); + *errmsg = "coredumpsize limit is 0"; + return (EFBIG); + } + + rm_rlock(&coredump_rmlock, &tracker); + priority = -1; + chosen = NULL; + SLIST_FOREACH(iter, &coredumpers, cd_entry) { + if (iter->cd_probe == NULL) { + /* + * If we haven't found anything of a higher priority + * yet, we'll call this a GENERIC. Ideally, we want + * coredumper modules to include a probe function. + */ + if (priority < 0) { + priority = COREDUMPER_GENERIC; + chosen = iter; + } + + continue; + } + + error = (*iter->cd_probe)(td); + if (error < 0) + continue; + + /* + * Higher priority than previous options. + */ + if (error > priority) { + priority = error; + chosen = iter; + } + } + + /* + * Acquire our refcount before we drop the lock so that + * coredumper_unregister() can safely assume that the refcount will only + * go down once it's dropped the rmlock. + */ + blockcount_acquire(&chosen->cd_refcount, 1); + rm_runlock(&coredump_rmlock, &tracker); + + /* Currently, we always have the vnode dumper built in. */ + MPASS(chosen != NULL); + error = ((*chosen->cd_handle)(td, limit)); + PROC_LOCK_ASSERT(p, MA_NOTOWNED); + + blockcount_release(&chosen->cd_refcount, 1); + + return (error); +} diff --git a/sys/kern/subr_compressor.c b/sys/kern/subr_compressor.c index 280264881241..5d59622e0455 100644 --- a/sys/kern/subr_compressor.c +++ b/sys/kern/subr_compressor.c @@ -538,6 +538,12 @@ compressor_init(compressor_cb_t cb, int format, size_t maxiosize, int level, return (s); } +int +compressor_format(const struct compressor *stream) +{ + return (stream->methods->format); +} + void compressor_reset(struct compressor *stream) { diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index b472aaea89e6..5606b36f772f 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -2269,6 +2269,7 @@ exterr_copyout(struct thread *td) ue.error = 0; sz = sizeof(ue.error); } else { + ktrexterr(td); sz = sizeof(ue) - __offsetof(struct uexterror, error); } error = copyout(&ue.error, uloc, sz); @@ -2335,7 +2336,6 @@ exterr_set(int eerror, int category, const char *mmsg, uintptr_t pp1, td->td_kexterr.p1 = pp1; td->td_kexterr.p2 = pp2; td->td_kexterr.src_line = line; - ktrexterr(td); } return (eerror); } diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 6f83b875a6b6..85fe48ddd466 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -1134,10 +1134,10 @@ shm_doremove(struct shm_mapping *map) int kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, - int shmflags, struct filecaps *fcaps, const char *name __unused) + int shmflags, struct filecaps *fcaps, const char *name __unused, + struct shmfd *shmfd) { struct pwddesc *pdp; - struct shmfd *shmfd; struct file *fp; char *path; void *rl_cookie; @@ -1214,23 +1214,41 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, if (error != 0) goto outnofp; - /* A SHM_ANON path pointer creates an anonymous object. */ + /* + * A SHM_ANON path pointer creates an anonymous object. We allow other + * parts of the kernel to pre-populate a shmfd and then materialize an + * fd for it here as a means to pass data back up to userland. This + * doesn't really make sense for named shm objects, but it makes plenty + * of sense for anonymous objects. + */ if (userpath == SHM_ANON) { - /* A read-only anonymous object is pointless. */ - if ((flags & O_ACCMODE) == O_RDONLY) { - error = EINVAL; - goto out; - } - shmfd = shm_alloc(td->td_ucred, cmode, largepage); - if (shmfd == NULL) { - error = ENOMEM; - goto out; + if (shmfd != NULL) { + shm_hold(shmfd); + } else { + /* + * A read-only anonymous object is pointless, unless it + * was pre-populated by the kernel with the expectation + * that a shmfd would later be created for userland to + * access it through. + */ + if ((flags & O_ACCMODE) == O_RDONLY) { + error = EINVAL; + goto out; + } + shmfd = shm_alloc(td->td_ucred, cmode, largepage); + if (shmfd == NULL) { + error = ENOMEM; + goto out; + } + + shmfd->shm_seals = initial_seals; + shmfd->shm_flags = shmflags; } - shmfd->shm_seals = initial_seals; - shmfd->shm_flags = shmflags; } else { fnv = fnv_32_str(path, FNV1_32_INIT); sx_xlock(&shm_dict_lock); + + MPASS(shmfd == NULL); shmfd = shm_lookup(path, fnv); if (shmfd == NULL) { /* Object does not yet exist, create it if requested. */ @@ -2173,7 +2191,7 @@ kern_shm_open(struct thread *td, const char *path, int flags, mode_t mode, struct filecaps *caps) { - return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL)); + return (kern_shm_open2(td, path, flags, mode, 0, caps, NULL, NULL)); } /* @@ -2191,7 +2209,7 @@ sys_shm_open2(struct thread *td, struct shm_open2_args *uap) { return (kern_shm_open2(td, uap->path, uap->flags, uap->mode, - uap->shmflags, NULL, uap->name)); + uap->shmflags, NULL, uap->name, NULL)); } int diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 7cb6e2124326..99c9ec9dcd01 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -34,6 +34,7 @@ SUBDIR= \ alq \ ${_amd_ecc_inject} \ ${_amdgpio} \ + ${_amdsmu} \ ${_amdsbwd} \ ${_amdsmn} \ ${_amdtemp} \ @@ -772,6 +773,7 @@ _acpi= acpi _aesni= aesni .endif _amd_ecc_inject=amd_ecc_inject +_amdsmu= amdsmu _amdsbwd= amdsbwd _amdsmn= amdsmn _amdtemp= amdtemp diff --git a/sys/modules/amdsmu/Makefile b/sys/modules/amdsmu/Makefile new file mode 100644 index 000000000000..752f57173d61 --- /dev/null +++ b/sys/modules/amdsmu/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2025 The FreeBSD Foundation +# +# This software was developed by Aymeric Wibo <obiwac@freebsd.org> +# under sponsorship from the FreeBSD Foundation. + +.PATH: ${SRCTOP}/sys/dev/amdsmu + +KMOD= amdsmu +SRCS= amdsmu.c +SRCS+= bus_if.h device_if.h pci_if.h + +.include <bsd.kmod.mk> diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index 5b3ee740d75e..0a35fb4095fb 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -76,31 +76,34 @@ * heterogeneous bridges). */ -#include <sys/cdefs.h> #include "opt_inet.h" #include "opt_inet6.h" +#define EXTERR_CATEGORY EXTERR_CAT_BRIDGE + #include <sys/param.h> +#include <sys/ctype.h> /* string functions */ #include <sys/eventhandler.h> -#include <sys/mbuf.h> +#include <sys/exterrvar.h> +#include <sys/jail.h> +#include <sys/kernel.h> +#include <sys/lock.h> #include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/priv.h> +#include <sys/proc.h> #include <sys/protosw.h> +#include <sys/random.h> #include <sys/systm.h> -#include <sys/jail.h> -#include <sys/time.h> #include <sys/socket.h> /* for net/if.h */ #include <sys/sockio.h> -#include <sys/ctype.h> /* string functions */ -#include <sys/kernel.h> -#include <sys/random.h> #include <sys/syslog.h> #include <sys/sysctl.h> +#include <sys/time.h> + #include <vm/uma.h> -#include <sys/module.h> -#include <sys/priv.h> -#include <sys/proc.h> -#include <sys/lock.h> -#include <sys/mutex.h> #include <net/bpf.h> #include <net/if.h> @@ -254,8 +257,8 @@ struct bridge_iflist { uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ struct epoch_context bif_epoch_ctx; - ether_vlanid_t bif_untagged; /* untagged vlan id */ - ifbvlan_set_t bif_vlan_set; /* allowed tagged vlans */ + ether_vlanid_t bif_pvid; /* port vlan id */ + ifbvlan_set_t bif_vlan_set; /* if allowed tagged vlans */ }; /* @@ -404,7 +407,7 @@ static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); -static int bridge_ioctl_sifuntagged(struct bridge_softc *, void *); +static int bridge_ioctl_sifpvid(struct bridge_softc *, void *); static int bridge_ioctl_sifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_gifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); @@ -625,7 +628,7 @@ static const struct bridge_control bridge_control_table[] = { { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, - { bridge_ioctl_sifuntagged, sizeof(struct ifbreq), + { bridge_ioctl_sifpvid, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, { bridge_ioctl_sifvlanset, sizeof(struct ifbif_vlan_req), @@ -986,31 +989,37 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCGDRVSPEC: case SIOCSDRVSPEC: if (ifd->ifd_cmd >= bridge_control_table_size) { - error = EINVAL; + error = EXTERROR(EINVAL, "Invalid control command"); break; } bc = &bridge_control_table[ifd->ifd_cmd]; if (cmd == SIOCGDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) == 0) { - error = EINVAL; + error = EXTERROR(EINVAL, + "Inappropriate ioctl for command " + "(expected SIOCSDRVSPEC)"); break; } else if (cmd == SIOCSDRVSPEC && (bc->bc_flags & BC_F_COPYOUT) != 0) { - error = EINVAL; + error = EXTERROR(EINVAL, + "Inappropriate ioctl for command " + "(expected SIOCGDRVSPEC)"); break; } if (bc->bc_flags & BC_F_SUSER) { error = priv_check(td, PRIV_NET_BRIDGE); - if (error) + if (error) { + EXTERROR(error, "PRIV_NET_BRIDGE required"); break; + } } if (ifd->ifd_len != bc->bc_argsize || ifd->ifd_len > sizeof(args)) { - error = EINVAL; + error = EXTERROR(EINVAL, "Invalid argument size"); break; } @@ -1062,7 +1071,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) oldmtu = sc->sc_ifp->if_mtu; if (ifr->ifr_mtu < IF_MINMTU) { - error = EINVAL; + error = EXTERROR(EINVAL, + "Requested MTU is lower than IF_MINMTU"); break; } if (CK_LIST_EMPTY(&sc->sc_iflist)) { @@ -1088,6 +1098,8 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) (*bif->bif_ifp->if_ioctl)(bif->bif_ifp, SIOCSIFMTU, (caddr_t)ifr); } + EXTERROR(error, + "Failed to set MTU on member interface"); } else { sc->sc_ifp->if_mtu = ifr->ifr_mtu; } @@ -1125,14 +1137,14 @@ bridge_mutecaps(struct bridge_softc *sc) mask = BRIDGE_IFCAPS_MASK; CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { - /* Every member must support it or its disabled */ + /* Every member must support it or it's disabled */ mask &= bif->bif_savedcaps; } CK_LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { enabled = bif->bif_ifp->if_capenable; enabled &= ~BRIDGE_IFCAPS_STRIP; - /* strip off mask bits and enable them again if allowed */ + /* Strip off mask bits and enable them again if allowed */ enabled &= ~BRIDGE_IFCAPS_MASK; enabled |= mask; bridge_set_ifcap(sc, bif, enabled); @@ -1282,7 +1294,7 @@ bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif, #endif break; } - /* reneable any interface capabilities */ + /* Re-enable any interface capabilities */ bridge_set_ifcap(sc, bif, bif->bif_savedcaps); } bstp_destroy(&bif->bif_stp); /* prepare to free */ @@ -1318,21 +1330,48 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "No such interface", + req->ifbr_ifsname)); if (ifs->if_ioctl == NULL) /* must be supported */ - return (EINVAL); + return (EXTERROR(EINVAL, "Interface must support ioctl(2)")); + + /* + * If the new interface is a vlan(4), it could be a bridge SVI. + * Don't allow such things to be added to bridges. + */ + if (ifs->if_type == IFT_L2VLAN) { + struct ifnet *parent; + struct epoch_tracker et; + bool is_bridge; + + /* + * Entering NET_EPOCH with BRIDGE_LOCK held, but this is okay + * since we don't sleep here. + */ + NET_EPOCH_ENTER(et); + parent = VLAN_TRUNKDEV(ifs); + is_bridge = (parent != NULL && parent->if_type == IFT_BRIDGE); + NET_EPOCH_EXIT(et); + + if (is_bridge) + return (EXTERROR(EINVAL, + "Bridge SVI cannot be added to a bridge")); + } /* If it's in the span list, it can't be a member. */ CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) - return (EBUSY); + return (EXTERROR(EBUSY, + "Span interface cannot be a member")); if (ifs->if_bridge) { struct bridge_iflist *sbif = ifs->if_bridge; if (sbif->bif_sc == sc) - return (EEXIST); + return (EXTERROR(EEXIST, + "Interface is already a member of this bridge")); - return (EBUSY); + return (EXTERROR(EBUSY, + "Interface is already a member of another bridge")); } switch (ifs->if_type) { @@ -1342,7 +1381,7 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) /* permitted interface types */ break; default: - return (EINVAL); + return (EXTERROR(EINVAL, "Unsupported interface type")); } #ifdef INET6 @@ -1394,11 +1433,15 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) CK_STAILQ_FOREACH(ifa, &ifs->if_addrhead, ifa_link) { #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) - return (EINVAL); + return (EXTERROR(EINVAL, + "Member interface may not have " + "an IPv4 address configured")); #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) - return (EINVAL); + return (EXTERROR(EINVAL, + "Member interface may not have " + "an IPv6 address configured")); #endif } } @@ -1420,7 +1463,8 @@ bridge_ioctl_add(struct bridge_softc *sc, void *arg) " new member %s\n", sc->sc_ifp->if_xname, ifr.ifr_mtu, ifs->if_xname); - return (EINVAL); + return (EXTERROR(EINVAL, + "Failed to set MTU on new member")); } } @@ -1482,7 +1526,7 @@ bridge_ioctl_del(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); bridge_delete_member(sc, bif, 0); @@ -1498,7 +1542,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); bp = &bif->bif_stp; req->ifbr_ifsflags = bif->bif_flags; @@ -1512,7 +1556,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; - req->ifbr_untagged = bif->bif_untagged; + req->ifbr_pvid = bif->bif_pvid; /* Copy STP state options as flags */ if (bp->bp_operedge) @@ -1541,12 +1585,12 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); bp = &bif->bif_stp; if (req->ifbr_ifsflags & IFBIF_SPAN) /* SPAN is readonly */ - return (EINVAL); + return (EXTERROR(EINVAL, "Span interface cannot be modified")); NET_EPOCH_ENTER(et); @@ -1555,7 +1599,8 @@ bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg) error = bstp_enable(&bif->bif_stp); if (error) { NET_EPOCH_EXIT(et); - return (error); + return (EXTERROR(error, + "Failed to enable STP")); } } } else { @@ -1724,7 +1769,7 @@ bridge_ioctl_saddr(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->ifba_ifsname); if (bif == NULL) { NET_EPOCH_EXIT(et); - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); } /* bridge_rtupdate() may acquire the lock. */ @@ -1858,7 +1903,7 @@ bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); return (bstp_set_port_priority(&bif->bif_stp, req->ifbr_priority)); } @@ -1871,7 +1916,7 @@ bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); return (bstp_set_path_cost(&bif->bif_stp, req->ifbr_path_cost)); } @@ -1884,28 +1929,28 @@ bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); bif->bif_addrmax = req->ifbr_addrmax; return (0); } static int -bridge_ioctl_sifuntagged(struct bridge_softc *sc, void *arg) +bridge_ioctl_sifpvid(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; struct bridge_iflist *bif; bif = bridge_lookup_member(sc, req->ifbr_ifsname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); - if (req->ifbr_untagged > DOT1Q_VID_MAX) - return (EINVAL); + if (req->ifbr_pvid > DOT1Q_VID_MAX) + return (EXTERROR(EINVAL, "Invalid VLAN ID")); - if (req->ifbr_untagged != DOT1Q_VID_NULL) + if (req->ifbr_pvid != DOT1Q_VID_NULL) bif->bif_flags |= IFBIF_VLANFILTER; - bif->bif_untagged = req->ifbr_untagged; + bif->bif_pvid = req->ifbr_pvid; return (0); } @@ -1917,12 +1962,12 @@ bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->bv_ifname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); /* Reject invalid VIDs. */ if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) || BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL)) - return (EINVAL); + return (EXTERROR(EINVAL, "Invalid VLAN ID in set")); switch (req->bv_op) { /* Replace the existing vlan set with the new set */ @@ -1942,7 +1987,8 @@ bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg) /* Invalid or unknown operation */ default: - return (EINVAL); + return (EXTERROR(EINVAL, + "Unsupported BRDGSIFVLANSET operation")); } /* @@ -1962,7 +2008,7 @@ bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg) bif = bridge_lookup_member(sc, req->bv_ifname); if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a bridge member")); BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); return (0); @@ -1977,14 +2023,16 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "No such interface")); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) - return (EBUSY); + return (EXTERROR(EBUSY, + "Interface is already a span port")); if (ifs->if_bridge != NULL) - return (EBUSY); + return (EXTERROR(EEXIST, + "Interface is already a bridge member")); switch (ifs->if_type) { case IFT_ETHER: @@ -1992,7 +2040,7 @@ bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) case IFT_L2VLAN: break; default: - return (EINVAL); + return (EXTERROR(EINVAL, "Unsupported interface type")); } bif = malloc(sizeof(*bif), M_DEVBUF, M_NOWAIT|M_ZERO); @@ -2016,14 +2064,14 @@ bridge_ioctl_delspan(struct bridge_softc *sc, void *arg) ifs = ifunit(req->ifbr_ifsname); if (ifs == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "No such interface")); CK_LIST_FOREACH(bif, &sc->sc_spanlist, bif_next) if (ifs == bif->bif_ifp) break; if (bif == NULL) - return (ENOENT); + return (EXTERROR(ENOENT, "Interface is not a span port")); bridge_delete_span(sc, bif); @@ -2278,8 +2326,8 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, * the VLAN header. */ if ((bif->bif_flags & IFBIF_VLANFILTER) && - bif->bif_untagged != DOT1Q_VID_NULL && - VLANTAGOF(m) == bif->bif_untagged) { + bif->bif_pvid != DOT1Q_VID_NULL && + VLANTAGOF(m) == bif->bif_pvid) { m->m_flags &= ~M_VLANTAG; m->m_pkthdr.ether_vtag = 0; } @@ -3145,14 +3193,14 @@ bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) * The frame doesn't have a tag. If the interface does not * have an untagged vlan configured, drop the frame. */ - if (sbif->bif_untagged == DOT1Q_VID_NULL) + if (sbif->bif_pvid == DOT1Q_VID_NULL) return (false); /* * Otherwise, insert a new tag based on the interface's * untagged vlan id. */ - m->m_pkthdr.ether_vtag = sbif->bif_untagged; + m->m_pkthdr.ether_vtag = sbif->bif_pvid; m->m_flags |= M_VLANTAG; } else { /* @@ -3213,7 +3261,7 @@ bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m) * If the frame's vlan matches the interfaces's untagged vlan, * allow it. */ - if (vlan == dbif->bif_untagged) + if (vlan == dbif->bif_pvid) return (true); /* @@ -3244,10 +3292,11 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, BRIDGE_LOCK_OR_NET_EPOCH_ASSERT(sc); /* Check the source address is valid and not multicast. */ - if (ETHER_IS_MULTICAST(dst) || - (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 && - dst[3] == 0 && dst[4] == 0 && dst[5] == 0) != 0) - return (EINVAL); + if (ETHER_IS_MULTICAST(dst)) + return (EXTERROR(EINVAL, "Multicast address not permitted")); + if (dst[0] == 0 && dst[1] == 0 && dst[2] == 0 && + dst[3] == 0 && dst[4] == 0 && dst[5] == 0) + return (EXTERROR(EINVAL, "Zero address not permitted")); /* * A route for this destination might already exist. If so, @@ -3266,13 +3315,14 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, if (sc->sc_brtcnt >= sc->sc_brtmax) { sc->sc_brtexceeded++; BRIDGE_RT_UNLOCK(sc); - return (ENOSPC); + return (EXTERROR(ENOSPC, "Address table is full")); } /* Check per interface address limits (if enabled) */ if (bif->bif_addrmax && bif->bif_addrcnt >= bif->bif_addrmax) { bif->bif_addrexceeded++; BRIDGE_RT_UNLOCK(sc); - return (ENOSPC); + return (EXTERROR(ENOSPC, + "Interface address limit exceeded")); } /* @@ -3283,7 +3333,8 @@ bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst, brt = uma_zalloc(V_bridge_rtnode_zone, M_NOWAIT | M_ZERO); if (brt == NULL) { BRIDGE_RT_UNLOCK(sc); - return (ENOMEM); + return (EXTERROR(ENOMEM, + "Cannot allocate address node")); } brt->brt_vnet = curvnet; @@ -3631,7 +3682,7 @@ bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt) do { dir = bridge_rtnode_addr_cmp(brt->brt_addr, lbrt->brt_addr); if (dir == 0 && brt->brt_vlan == lbrt->brt_vlan) - return (EEXIST); + return (EXTERROR(EEXIST, "Address already exists")); if (dir > 0) { CK_LIST_INSERT_BEFORE(lbrt, brt, brt_hash); goto out; diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h index 97b63e3d4416..c458dcc152a0 100644 --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -124,7 +124,7 @@ #define BRDGSPROTO 28 /* set protocol (ifbrparam) */ #define BRDGSTXHC 29 /* set tx hold count (ifbrparam) */ #define BRDGSIFAMAX 30 /* set max interface addrs (ifbreq) */ -#define BRDGSIFUNTAGGED 31 /* set if untagged vlan */ +#define BRDGSIFPVID 31 /* set if PVID */ #define BRDGSIFVLANSET 32 /* set if vlan set */ #define BRDGGIFVLANSET 33 /* get if vlan set */ @@ -144,7 +144,7 @@ struct ifbreq { uint32_t ifbr_addrcnt; /* member if addr number */ uint32_t ifbr_addrmax; /* member if addr max */ uint32_t ifbr_addrexceeded; /* member if addr violations */ - ether_vlanid_t ifbr_untagged; /* member if untagged vlan */ + ether_vlanid_t ifbr_pvid; /* member if PVID */ uint8_t pad[32]; }; diff --git a/sys/net/if_ovpn.c b/sys/net/if_ovpn.c index 853a0556a080..fe3e7bbd7fff 100644 --- a/sys/net/if_ovpn.c +++ b/sys/net/if_ovpn.c @@ -34,11 +34,13 @@ #include <sys/epoch.h> #include <sys/file.h> #include <sys/filedesc.h> +#include <sys/jail.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/module.h> #include <sys/nv.h> +#include <sys/osd.h> #include <sys/priv.h> #include <sys/protosw.h> #include <sys/rmlock.h> @@ -131,6 +133,9 @@ struct ovpn_notification { /* Delete notification */ enum ovpn_del_reason del_reason; struct ovpn_peer_counters counters; + + /* Float notification */ + struct sockaddr_storage address; }; struct ovpn_softc; @@ -195,6 +200,10 @@ struct ovpn_softc { struct epoch_context epoch_ctx; }; +struct ovpn_mtag { + struct sockaddr_storage addr; +}; + static struct ovpn_kpeer *ovpn_find_peer(struct ovpn_softc *, uint32_t); static bool ovpn_udp_input(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *); @@ -206,6 +215,8 @@ static void ovpn_free_kkey_dir(struct ovpn_kkey_dir *); static bool ovpn_check_replay(struct ovpn_kkey_dir *, uint32_t); static int ovpn_peer_compare(const struct ovpn_kpeer *, const struct ovpn_kpeer *); +static bool ovpn_sockaddr_compare(const struct sockaddr *, + const struct sockaddr *); static RB_PROTOTYPE(ovpn_kpeers, ovpn_kpeer, tree, ovpn_peer_compare); static RB_GENERATE(ovpn_kpeers, ovpn_kpeer, tree, ovpn_peer_compare); @@ -283,6 +294,43 @@ ovpn_peer_compare(const struct ovpn_kpeer *a, const struct ovpn_kpeer *b) return (a->peerid - b->peerid); } +static bool +ovpn_sockaddr_compare(const struct sockaddr *a, + const struct sockaddr *b) +{ + if (a->sa_family != b->sa_family) + return (false); + MPASS(a->sa_len == b->sa_len); + + switch (a->sa_family) { + case AF_INET: { + const struct sockaddr_in *a4, *b4; + + a4 = (const struct sockaddr_in *)a; + b4 = (const struct sockaddr_in *)b; + + if (a4->sin_port != b4->sin_port) + return (false); + + return (a4->sin_addr.s_addr == b4->sin_addr.s_addr); + } + case AF_INET6: { + const struct sockaddr_in6 *a6, *b6; + + a6 = (const struct sockaddr_in6 *)a; + b6 = (const struct sockaddr_in6 *)b; + + if (a6->sin6_port != b6->sin6_port) + return (false); + + return (memcmp(&a6->sin6_addr, &b6->sin6_addr, + sizeof(a6->sin6_addr)) == 0); + } + default: + panic("Unknown address family %d", a->sa_family); + } +} + static struct ovpn_kpeer * ovpn_find_peer(struct ovpn_softc *sc, uint32_t peerid) { @@ -394,6 +442,44 @@ ovpn_nvlist_to_sockaddr(const nvlist_t *nvl, struct sockaddr_storage *sa) return (0); } +static int +ovpn_add_sockaddr(nvlist_t *parent, const char *name, const struct sockaddr *s) +{ + nvlist_t *nvl; + + nvl = nvlist_create(0); + if (nvl == NULL) + return (ENOMEM); + + nvlist_add_number(nvl, "af", s->sa_family); + + switch (s->sa_family) { + case AF_INET: { + const struct sockaddr_in *s4 = (const struct sockaddr_in *)s; + + nvlist_add_number(nvl, "port", s4->sin_port); + nvlist_add_binary(nvl, "address", &s4->sin_addr, + sizeof(s4->sin_addr)); + break; + } + case AF_INET6: { + const struct sockaddr_in6 *s6 = (const struct sockaddr_in6 *)s; + + nvlist_add_number(nvl, "port", s6->sin6_port); + nvlist_add_binary(nvl, "address", &s6->sin6_addr, + sizeof(s6->sin6_addr)); + break; + } + default: + nvlist_destroy(nvl); + return (EINVAL); + } + + nvlist_move_nvlist(parent, name, nvl); + + return (0); +} + static void ovpn_notify_del_peer(struct ovpn_softc *sc, struct ovpn_kpeer *peer) { @@ -446,6 +532,33 @@ ovpn_notify_key_rotation(struct ovpn_softc *sc, struct ovpn_kpeer *peer) } } +static int +ovpn_notify_float(struct ovpn_softc *sc, uint32_t peerid, + const struct sockaddr_storage *remote) +{ + struct ovpn_notification *n; + + n = malloc(sizeof(*n), M_OVPN, M_NOWAIT | M_ZERO); + if (n == NULL) + return (ENOMEM); + + n->peerid = peerid; + n->type = OVPN_NOTIF_FLOAT; + memcpy(&n->address, remote, sizeof(n->address)); + + if (buf_ring_enqueue(sc->notifring, n) != 0) { + free(n, M_OVPN); + return (ENOMEM); + } else if (sc->so != NULL) { + /* Wake up userspace */ + sc->so->so_error = EAGAIN; + sorwakeup(sc->so); + sowwakeup(sc->so); + } + + return (0); +} + static void ovpn_peer_release_ref(struct ovpn_kpeer *peer, bool locked) { @@ -1377,12 +1490,36 @@ opvn_get_pkt(struct ovpn_softc *sc, nvlist_t **onvl) } nvlist_add_number(nvl, "peerid", n->peerid); nvlist_add_number(nvl, "notification", n->type); - if (n->type == OVPN_NOTIF_DEL_PEER) { + switch (n->type) { + case OVPN_NOTIF_DEL_PEER: { nvlist_add_number(nvl, "del_reason", n->del_reason); /* No error handling, because we want to send the notification * even if we can't attach the counters. */ ovpn_notif_add_counters(nvl, n); + break; + } + case OVPN_NOTIF_FLOAT: { + int ret; + + ret = ovpn_add_sockaddr(nvl, "address", + (struct sockaddr *)&n->address); + + if (ret) { + /* + * Try to re-enqueue the notification. Maybe we'll + * have better luck next time. No error handling, + * because if we fail to re-enqueue there's nothing we can do. + */ + (void)ovpn_notify_float(sc, n->peerid, &n->address); + nvlist_destroy(nvl); + free(n, M_OVPN); + return (ret); + } + break; + } + default: + break; } free(n, M_OVPN); @@ -1538,6 +1675,7 @@ ovpn_finish_rx(struct ovpn_softc *sc, struct mbuf *m, struct rm_priotracker *_ovpn_lock_trackerp) { uint32_t af; + struct m_tag *mtag; OVPN_RASSERT(sc); NET_EPOCH_ASSERT(); @@ -1556,6 +1694,38 @@ ovpn_finish_rx(struct ovpn_softc *sc, struct mbuf *m, OVPN_RUNLOCK(sc); + /* Check if the peer changed to a new source address. */ + mtag = m_tag_find(m, PACKET_TAG_OVPN, NULL); + if (mtag != NULL) { + struct ovpn_mtag *ot = (struct ovpn_mtag *)(mtag + 1); + + OVPN_WLOCK(sc); + + /* + * Check the address against the peer's remote again, because we may race + * against ourselves (i.e. we may have tagged multiple packets to indicate we + * floated). + */ + if (ovpn_sockaddr_compare((struct sockaddr *)&ot->addr, + (struct sockaddr *)&peer->remote)) { + OVPN_WUNLOCK(sc); + goto skip_float; + } + + /* And notify userspace. */ + if (ovpn_notify_float(sc, peer->peerid, &ot->addr) == 0) { + /* + * Update the 'remote' for this peer, but only if + * we've actually enqueued the notification. + * Otherwise we can try again later. + */ + memcpy(&peer->remote, &ot->addr, sizeof(peer->remote)); + } + + OVPN_WUNLOCK(sc); + } + +skip_float: OVPN_COUNTER_ADD(sc, received_data_pkts, 1); OVPN_COUNTER_ADD(sc, tunnel_bytes_received, m->m_pkthdr.len); OVPN_PEER_COUNTER_ADD(peer, pkt_in, 1); @@ -2318,6 +2488,29 @@ ovpn_udp_input(struct mbuf *m, int off, struct inpcb *inp, return (true); } + /* + * If we got this from a different address than we expected tag the packet. + * We'll deal with notifiying userspace later, after we've decrypted and + * verified. + */ + if (! ovpn_sockaddr_compare((struct sockaddr *)&peer->remote, sa)) { + struct m_tag *mt; + struct ovpn_mtag *ot; + + MPASS(sa->sa_len <= sizeof(ot->addr)); + mt = m_tag_get(PACKET_TAG_OVPN, sizeof(*ot), M_NOWAIT); + /* + * If we fail to allocate here we'll just try again on the next + * packet. + */ + if (mt != NULL) { + ot = (struct ovpn_mtag *)(mt + 1); + memcpy(&ot->addr, sa, sa->sa_len); + + m_tag_prepend(m, mt); + } + } + if (key->decrypt->cipher == OVPN_CIPHER_ALG_NONE) { /* Now remove the outer headers */ m_adj_decap(m, sizeof(struct udphdr) + ohdrlen); @@ -2593,23 +2786,53 @@ vnet_ovpn_init(const void *unused __unused) VNET_SYSINIT(vnet_ovpn_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_ovpn_init, NULL); -static void -vnet_ovpn_uninit(const void *unused __unused) +static int +ovpn_prison_remove(void *obj, void *data __unused) { - if_clone_detach(V_ovpn_cloner); +#ifdef VIMAGE + struct prison *pr; + + pr = obj; + if (prison_owns_vnet(pr)) { + CURVNET_SET(pr->pr_vnet); + if (V_ovpn_cloner != NULL) { + ifc_detach_cloner(V_ovpn_cloner); + V_ovpn_cloner = NULL; + } + CURVNET_RESTORE(); + } +#endif + return (0); } -VNET_SYSUNINIT(vnet_ovpn_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, - vnet_ovpn_uninit, NULL); static int ovpnmodevent(module_t mod, int type, void *data) { + static int ovpn_osd_jail_slot; + switch (type) { - case MOD_LOAD: - /* Done in vnet_ovpn_init() */ + case MOD_LOAD: { + /* + * Registration is handled in vnet_ovpn_init(), but cloned + * interfaces must be destroyed via PR_METHOD_REMOVE since they + * hold a reference to the prison via the UDP socket, which + * prevents the prison from being destroyed. + */ + osd_method_t methods[PR_MAXMETHOD] = { + [PR_METHOD_REMOVE] = ovpn_prison_remove, + }; + ovpn_osd_jail_slot = osd_jail_register(NULL, methods); break; + } case MOD_UNLOAD: - /* Done in vnet_ovpn_uninit() */ + if (ovpn_osd_jail_slot != 0) + osd_jail_deregister(ovpn_osd_jail_slot); + CURVNET_SET(vnet0); + if (V_ovpn_cloner != NULL) { + ifc_detach_cloner(V_ovpn_cloner); + V_ovpn_cloner = NULL; + } + CURVNET_RESTORE(); break; default: return (EOPNOTSUPP); diff --git a/sys/net/if_ovpn.h b/sys/net/if_ovpn.h index 2d6b8c1e7eff..2a24c35788a9 100644 --- a/sys/net/if_ovpn.h +++ b/sys/net/if_ovpn.h @@ -37,6 +37,7 @@ enum ovpn_notif_type { OVPN_NOTIF_DEL_PEER, OVPN_NOTIF_ROTATE_KEY, + OVPN_NOTIF_FLOAT, }; enum ovpn_del_reason { diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index 3bab04aa4d38..5e6f65c04b2f 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -74,6 +74,7 @@ #include <sys/malloc.h> #include <sys/random.h> #include <sys/ctype.h> +#include <sys/osd.h> #include <net/ethernet.h> #include <net/if.h> @@ -178,6 +179,7 @@ struct tuntap_softc { static struct mtx tunmtx; static eventhandler_tag arrival_tag; static eventhandler_tag clone_tag; +static int tuntap_osd_jail_slot; static const char tunname[] = "tun"; static const char tapname[] = "tap"; static const char vmnetname[] = "vmnet"; @@ -497,6 +499,10 @@ vmnet_clone_match(struct if_clone *ifc, const char *name) return (0); } +/* + * Create a clone via the ifnet cloning mechanism. Note that this is invoked + * indirectly by tunclone() below. + */ static int tun_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) @@ -532,15 +538,19 @@ tun_clone_create(struct if_clone *ifc, char *name, size_t len, if (i != 0) i = tun_create_device(drv, unit, NULL, &dev, name); if (i == 0) { - dev_ref(dev); + struct tuntap_softc *tp; + tuncreate(dev); - struct tuntap_softc *tp = dev->si_drv1; + tp = dev->si_drv1; *ifpp = tp->tun_ifp; } return (i); } +/* + * Create a clone via devfs access. + */ static void tunclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) @@ -595,11 +605,12 @@ tunclone(void *arg, struct ucred *cred, char *name, int namelen, } i = tun_create_device(drv, u, cred, dev, name); - } - if (i == 0) { + } else { + /* Consumed by the dev_clone invoker. */ dev_ref(*dev); - if_clone_create(name, namelen, NULL); } + if (i == 0) + if_clone_create(name, namelen, NULL); out: CURVNET_RESTORE(); } @@ -670,16 +681,6 @@ VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_tun_init, NULL); static void -vnet_tun_uninit(const void *unused __unused) -{ - - for (u_int i = 0; i < NDRV; ++i) - if_clone_detach(V_tuntap_driver_cloners[i]); -} -VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, - vnet_tun_uninit, NULL); - -static void tun_uninit(const void *unused __unused) { struct tuntap_driver *drv; @@ -689,6 +690,16 @@ tun_uninit(const void *unused __unused) EVENTHANDLER_DEREGISTER(ifnet_arrival_event, arrival_tag); EVENTHANDLER_DEREGISTER(dev_clone, clone_tag); + CURVNET_SET(vnet0); + for (u_int i = 0; i < NDRV; i++) { + if_clone_detach(V_tuntap_driver_cloners[i]); + V_tuntap_driver_cloners[i] = NULL; + } + CURVNET_RESTORE(); + + if (tuntap_osd_jail_slot != 0) + osd_jail_deregister(tuntap_osd_jail_slot); + mtx_lock(&tunmtx); while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { TAILQ_REMOVE(&tunhead, tp, tun_list); @@ -724,6 +735,30 @@ tuntap_driver_from_ifnet(const struct ifnet *ifp) return (NULL); } +/* + * Remove devices that were created by devfs cloning, as they hold references + * which prevent the prison from collapsing, in which state VNET sysuninits will + * not be invoked. + */ +static int +tuntap_prison_remove(void *obj, void *data __unused) +{ +#ifdef VIMAGE + struct prison *pr; + + pr = obj; + if (prison_owns_vnet(pr)) { + CURVNET_SET(pr->pr_vnet); + for (u_int i = 0; i < NDRV; i++) { + if_clone_detach(V_tuntap_driver_cloners[i]); + V_tuntap_driver_cloners[i] = NULL; + } + CURVNET_RESTORE(); + } +#endif + return (0); +} + static int tuntapmodevent(module_t mod, int type, void *data) { @@ -738,8 +773,12 @@ tuntapmodevent(module_t mod, int type, void *data) clone_setup(&drv->clones); drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx); } + osd_method_t methods[PR_MAXMETHOD] = { + [PR_METHOD_REMOVE] = tuntap_prison_remove, + }; + tuntap_osd_jail_slot = osd_jail_register(NULL, methods); arrival_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event, - tunrename, 0, 1000); + tunrename, 0, 1000); if (arrival_tag == NULL) return (ENOMEM); clone_tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); @@ -747,7 +786,7 @@ tuntapmodevent(module_t mod, int type, void *data) return (ENOMEM); break; case MOD_UNLOAD: - /* See tun_uninit, so it's done after the vnet_sysuninit() */ + /* See tun_uninit(). */ break; default: return EOPNOTSUPP; @@ -798,6 +837,8 @@ tun_create_device(struct tuntap_driver *drv, int unit, struct ucred *cr, args.mda_si_drv1 = tp; error = make_dev_s(&args, dev, "%s", name); if (error != 0) { + mtx_destroy(&tp->tun_mtx); + cv_destroy(&tp->tun_cv); free(tp, M_TUN); return (error); } @@ -914,7 +955,6 @@ tap_transmit(struct ifnet *ifp, struct mbuf *m) return (error); } -/* XXX: should return an error code so it can fail. */ static void tuncreate(struct cdev *dev) { diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 22fcb7bf7c64..61000018e5a4 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -2336,6 +2336,18 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = ENOENT; break; } + + /* + * If the ifp is in a bridge, do not allow setting the device + * to a bridge; this prevents having a bridge SVI as a bridge + * member (which is not permitted). + */ + if (ifp->if_bridge != NULL && p->if_type == IFT_BRIDGE) { + if_rele(p); + error = EINVAL; + break; + } + if (vlr.vlr_proto == 0) vlr.vlr_proto = ETHERTYPE_VLAN; oldmtu = ifp->if_mtu; diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c index 66af716eea52..7d8cb965ab09 100644 --- a/sys/netinet/sctp_timer.c +++ b/sys/netinet/sctp_timer.c @@ -35,7 +35,6 @@ #define _IP_VHL #include <netinet/sctp_os.h> #include <netinet/sctp_pcb.h> - #include <netinet/sctp_var.h> #include <netinet/sctp_sysctl.h> #include <netinet/sctp_timer.h> diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c index 0987ea7e99ad..08702a2e81ab 100644 --- a/sys/netinet6/scope6.c +++ b/sys/netinet6/scope6.c @@ -505,8 +505,23 @@ in6_set_unicast_scopeid(struct in6_addr *in6, uint32_t scopeid) struct ifnet* in6_getlinkifnet(uint32_t zoneid) { + struct ifnet *ifp; - return (ifnet_byindex((u_short)zoneid)); + ifp = ifnet_byindex((u_short)zoneid); + + if (ifp == NULL) + return (NULL); + + /* An interface might not be IPv6 capable. */ + if (ifp->if_afdata[AF_INET6] == NULL) { + log(LOG_NOTICE, + "%s: embedded scope points to an interface without " + "IPv6: %s%%%d.\n", __func__, + if_name(ifp), zoneid); + return (NULL); + } + + return (ifp); } /* diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 41e9ca27912d..79c298c18b46 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -7399,7 +7399,7 @@ pf_sctp_multihome_delayed(struct pf_pdesc *pd, struct pfi_kkif *kif, { struct pf_sctp_multihome_job *j, *tmp; struct pf_sctp_source *i; - int ret __unused; + int ret; struct pf_kstate *sm = NULL; struct pf_krule *ra = NULL; struct pf_krule *r = &V_pf_default_rule; diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index ea9f7fe441c6..9abc07c36788 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -2092,19 +2092,18 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, int rs_num; int error = 0; - if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) { - error = EINVAL; - goto errout_unlocked; - } +#define ERROUT(x) ERROUT_FUNCTION(errout, x) +#define ERROUT_UNLOCKED(x) ERROUT_FUNCTION(errout_unlocked, x) -#define ERROUT(x) ERROUT_FUNCTION(errout, x) + if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) + ERROUT_UNLOCKED(EINVAL); if ((error = pf_rule_checkaf(rule))) - ERROUT(error); + ERROUT_UNLOCKED(error); if (pf_validate_range(rule->src.port_op, rule->src.port)) - ERROUT(EINVAL); + ERROUT_UNLOCKED(EINVAL); if (pf_validate_range(rule->dst.port_op, rule->dst.port)) - ERROUT(EINVAL); + ERROUT_UNLOCKED(EINVAL); if (rule->ifname[0]) kif = pf_kkif_create(M_WAITOK); @@ -2294,6 +2293,7 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, return (0); #undef ERROUT +#undef ERROUT_UNLOCKED errout: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); diff --git a/sys/riscv/include/vmm_dev.h b/sys/riscv/include/vmm_dev.h index 856ff0778b95..4d30d5a1c35b 100644 --- a/sys/riscv/include/vmm_dev.h +++ b/sys/riscv/include/vmm_dev.h @@ -34,6 +34,8 @@ #ifndef _VMM_DEV_H_ #define _VMM_DEV_H_ +#include <sys/domainset.h> + #include <machine/vmm.h> struct vm_memmap { @@ -56,6 +58,9 @@ struct vm_memseg { int segid; size_t len; char name[VM_MAX_SUFFIXLEN + 1]; + domainset_t *ds_mask; + size_t ds_mask_size; + int ds_policy; }; struct vm_register { diff --git a/sys/sys/compressor.h b/sys/sys/compressor.h index cad9080b46ff..e59eeabec2cd 100644 --- a/sys/sys/compressor.h +++ b/sys/sys/compressor.h @@ -42,6 +42,7 @@ struct compressor; bool compressor_avail(int format); struct compressor *compressor_init(compressor_cb_t cb, int format, size_t maxiosize, int level, void *arg); +int compressor_format(const struct compressor *stream); void compressor_reset(struct compressor *stream); int compressor_write(struct compressor *stream, void *data, size_t len); diff --git a/sys/sys/domainset.h b/sys/sys/domainset.h index f98b175e9bc8..f3dc92ec6383 100644 --- a/sys/sys/domainset.h +++ b/sys/sys/domainset.h @@ -113,6 +113,20 @@ void domainset_zero(void); * returned value will not match the key pointer. */ struct domainset *domainset_create(const struct domainset *); + +/* + * Remove empty domains from a given domainset. + * Returns 'false' if the domainset consists entirely of empty domains. + */ +bool domainset_empty_vm(struct domainset *domain); + +/* + * Validate and populate a domainset structure according to the specified + * policy and mask. + */ +int domainset_populate(struct domainset *domain, const domainset_t *mask, int policy, + size_t mask_size); + #ifdef _SYS_SYSCTL_H_ int sysctl_handle_domainset(SYSCTL_HANDLER_ARGS); #endif diff --git a/sys/sys/exec.h b/sys/sys/exec.h index 4bf114a7c698..580a5372c4db 100644 --- a/sys/sys/exec.h +++ b/sys/sys/exec.h @@ -57,16 +57,6 @@ struct ps_strings { unsigned int ps_nenvstr; /* the number of environment strings */ }; -/* Coredump output parameters. */ -struct coredump_params { - off_t offset; - struct ucred *active_cred; - struct ucred *file_cred; - struct thread *td; - struct vnode *vp; - struct compressor *comp; -}; - struct image_params; struct execsw { @@ -105,16 +95,6 @@ int exec_unregister(const struct execsw *); enum uio_seg; -#define CORE_BUF_SIZE (16 * 1024) - -int core_write(struct coredump_params *, const void *, size_t, off_t, - enum uio_seg, size_t *); -int core_output(char *, size_t, off_t, struct coredump_params *, void *); -int sbuf_drain_core_output(void *, const char *, int); - -extern int coredump_pack_fileinfo; -extern int coredump_pack_vmmapinfo; - /* * note: name##_mod cannot be const storage because the * linker_file_sysinit() function modifies _file in the diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h index cab94ac511a5..80cff53b3576 100644 --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -18,6 +18,8 @@ #define EXTERR_CAT_FUSE 4 #define EXTERR_CAT_INOTIFY 5 #define EXTERR_CAT_GENIO 6 +#define EXTERR_CAT_BRIDGE 7 +#define EXTERR_CAT_SWAP 8 #endif diff --git a/sys/sys/imgact_elf.h b/sys/sys/imgact_elf.h index c9444e5aec41..2845a9dbc1e2 100644 --- a/sys/sys/imgact_elf.h +++ b/sys/sys/imgact_elf.h @@ -45,6 +45,7 @@ {(pos)->a_type = (id); (pos)->a_un.a_ptr = (ptr); (pos)++;} #endif +struct coredump_writer; struct image_params; struct thread; struct vnode; @@ -114,7 +115,7 @@ bool __elfN(brand_inuse)(Elf_Brandinfo *entry); int __elfN(insert_brand_entry)(Elf_Brandinfo *entry); int __elfN(remove_brand_entry)(Elf_Brandinfo *entry); int __elfN(freebsd_fixup)(uintptr_t *, struct image_params *); -int __elfN(coredump)(struct thread *, struct vnode *, off_t, int); +int __elfN(coredump)(struct thread *, struct coredump_writer *, off_t, int); size_t __elfN(populate_note)(int, void *, void *, size_t, void **); int __elfN(freebsd_copyout_auxargs)(struct image_params *, uintptr_t); void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t, int); diff --git a/sys/sys/jail.h b/sys/sys/jail.h index 08caa9f49270..24c420e2c976 100644 --- a/sys/sys/jail.h +++ b/sys/sys/jail.h @@ -435,7 +435,7 @@ void prison0_init(void); bool prison_allow(struct ucred *, unsigned); int prison_check(struct ucred *cred1, struct ucred *cred2); bool prison_check_nfsd(struct ucred *cred); -bool prison_owns_vnet(struct ucred *); +bool prison_owns_vnet(struct prison *pr); int prison_canseemount(struct ucred *cred, struct mount *mp); void prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index c75094aea450..304bd019c9fc 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1391,6 +1391,7 @@ extern bool mb_use_ext_pgs; /* Use ext_pgs for sendfile */ #define PACKET_TAG_PF_REASSEMBLED 31 #define PACKET_TAG_IPSEC_ACCEL_OUT 32 /* IPSEC accel out */ #define PACKET_TAG_IPSEC_ACCEL_IN 33 /* IPSEC accel in */ +#define PACKET_TAG_OVPN 34 /* if_ovpn */ /* Specific cookies and tags. */ diff --git a/sys/sys/param.h b/sys/sys/param.h index f941f021a423..33d61e8a1619 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500054 +#define __FreeBSD_version 1500055 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index 23e8426b26ee..8f181b7beee6 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -403,6 +403,7 @@ int sigev_findtd(struct proc *p, struct sigevent *sigev, struct thread **); void sigfastblock_clear(struct thread *td); void sigfastblock_fetch(struct thread *td); int sig_intr(void); +bool sig_do_core(int); void siginit(struct proc *p); void signotify(struct thread *td); void sigqueue_delete(struct sigqueue *queue, int sig); diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index fd183ffbc7a4..8237165b84ce 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -60,6 +60,7 @@ struct rusage; struct sched_param; struct sembuf; union semun; +struct shmfd; struct sockaddr; struct spacectl_range; struct stat; @@ -337,7 +338,7 @@ int kern_shm_open(struct thread *td, const char *userpath, int flags, mode_t mode, struct filecaps *fcaps); int kern_shm_open2(struct thread *td, const char *path, int flags, mode_t mode, int shmflags, struct filecaps *fcaps, - const char *name); + const char *name, struct shmfd *shmfd); int kern_shmat(struct thread *td, int shmid, const void *shmaddr, int shmflg); int kern_shmctl(struct thread *td, int shmid, int cmd, void *buf, diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 4ddfc8516053..1714fa5a7416 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -90,6 +90,7 @@ struct sysent { /* system call table */ #define SY_THR_STATIC_KLD SY_THR_STATIC #endif +struct coredump_writer; struct image_params; struct proc; struct __sigset; @@ -108,7 +109,8 @@ struct sysentvec { int *sv_szsigcode; /* size of sigtramp code */ int sv_sigcodeoff; char *sv_name; /* name of binary type */ - int (*sv_coredump)(struct thread *, struct vnode *, off_t, int); + int (*sv_coredump)(struct thread *, struct coredump_writer *, + off_t, int); /* function to dump core, or NULL */ int sv_elf_core_osabi; const char *sv_elf_core_abi_vendor; diff --git a/sys/sys/ucoredump.h b/sys/sys/ucoredump.h new file mode 100644 index 000000000000..0a51ee7f50c8 --- /dev/null +++ b/sys/sys/ucoredump.h @@ -0,0 +1,99 @@ +/* + * + * Copyright (c) 2015 Mark Johnston <markj@FreeBSD.org> + * Copyright (c) 2025 Kyle Evans <kevans@FreeBSD.org> + * + * SPDX-License-Identifier: BSD-2-Clause + * + */ + +#ifndef _SYS_UCOREDUMP_H_ +#define _SYS_UCOREDUMP_H_ + +#ifdef _KERNEL + +#include <sys/_uio.h> +#include <sys/blockcount.h> +#include <sys/queue.h> + +/* Coredump output parameters. */ +struct coredump_params; +struct coredump_writer; +struct thread; +struct ucred; + +typedef int coredump_init_fn(const struct coredump_writer *, + const struct coredump_params *); +typedef int coredump_write_fn(const struct coredump_writer *, const void *, size_t, + off_t, enum uio_seg, struct ucred *, size_t *, struct thread *); +typedef int coredump_extend_fn(const struct coredump_writer *, off_t, + struct ucred *); + +struct coredump_vnode_ctx { + struct vnode *vp; + struct ucred *fcred; +}; + +coredump_write_fn core_vn_write; +coredump_extend_fn core_vn_extend; + +struct coredump_writer { + void *ctx; + coredump_init_fn *init_fn; + coredump_write_fn *write_fn; + coredump_extend_fn *extend_fn; +}; + +struct coredump_params { + off_t offset; + struct ucred *active_cred; + struct thread *td; + const struct coredump_writer *cdw; + struct compressor *comp; +}; + +#define CORE_BUF_SIZE (16 * 1024) + +int core_write(struct coredump_params *, const void *, size_t, off_t, + enum uio_seg, size_t *); +int core_output(char *, size_t, off_t, struct coredump_params *, void *); +int sbuf_drain_core_output(void *, const char *, int); + +extern int coredump_pack_fileinfo; +extern int coredump_pack_vmmapinfo; + +extern int compress_user_cores; +extern int compress_user_cores_level; + +typedef int coredumper_probe_fn(struct thread *); + +/* + * Some arbitrary values for coredumper probes to return. The highest priority + * we can find wins. It's somewhat expected that a coredumper may want to bid + * differently based on the process in question. Note that probe functions will + * be called with the proc lock held, so they must not sleep. + */ +#define COREDUMPER_NOMATCH (-1) /* Decline to touch it */ +#define COREDUMPER_GENERIC (0) /* I handle coredumps */ +#define COREDUMPER_SPECIAL (50) /* Special handler */ +#define COREDUMPER_HIGH_PRIORITY (100) /* High-priority handler */ + +/* + * The handle functions will be called with the proc lock held, and should + * return with the proc lock dropped. + */ +typedef int coredumper_handle_fn(struct thread *, off_t); + +struct coredumper { + SLIST_ENTRY(coredumper) cd_entry; + const char *cd_name; + coredumper_probe_fn *cd_probe; + coredumper_handle_fn *cd_handle; + blockcount_t cd_refcount; +}; + +void coredumper_register(struct coredumper *); +void coredumper_unregister(struct coredumper *); + +#endif /* _KERNEL */ +#endif /* _SYS_UCOREDUMP_H_ */ diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index c291c1dc2b95..85ed93fd359d 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -156,6 +156,8 @@ #define _PC_DEALLOC_PRESENT 65 #define _PC_NAMEDATTR_ENABLED 66 #define _PC_HAS_NAMEDATTR 67 +#define _PC_XATTR_ENABLED _PC_NAMEDATTR_ENABLED /* Solaris Compatible */ +#define _PC_XATTR_EXISTS _PC_HAS_NAMEDATTR /* Solaris Compatible */ #define _PC_HAS_HIDDENSYSTEM 68 #endif diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 17308706c3f4..b7453db9013c 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1268,7 +1268,8 @@ ufs_rename( struct inode *fip, *tip, *tdp, *fdp; struct direct newdir; off_t endoff; - int doingdirectory, newparent; + int doingdirectory; + u_int newparent; int error = 0; struct mount *mp; ino_t ino; @@ -1475,7 +1476,7 @@ relock: * the user must have write permission in the source so * as to be able to change "..". */ - if (doingdirectory && newparent) { + if (doingdirectory && newparent != 0) { error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread); if (error) goto unlockout; @@ -1538,7 +1539,7 @@ relock: if (tip == NULL) { if (ITODEV(tdp) != ITODEV(fip)) panic("ufs_rename: EXDEV"); - if (doingdirectory && newparent) { + if (doingdirectory && newparent != 0) { /* * Account for ".." in new directory. * When source and destination have the same @@ -1631,7 +1632,7 @@ relock: goto bad; } if (doingdirectory) { - if (!newparent) { + if (newparent == 0) { tdp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); @@ -1641,11 +1642,11 @@ relock: softdep_change_linkcnt(tip); } error = ufs_dirrewrite(tdp, tip, fip->i_number, - IFTODT(fip->i_mode), - (doingdirectory && newparent) ? newparent : doingdirectory); + IFTODT(fip->i_mode), (doingdirectory && newparent != 0) ? + newparent : doingdirectory); if (error) { if (doingdirectory) { - if (!newparent) { + if (newparent == 0) { tdp->i_effnlink++; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); @@ -1668,7 +1669,7 @@ relock: * disk, so when running with that code we avoid doing * them now. */ - if (!newparent) { + if (newparent == 0) { tdp->i_nlink--; DIP_SET_NLINK(tdp, tdp->i_nlink); UFS_INODE_SET_FLAG(tdp, IN_CHANGE); @@ -1697,7 +1698,7 @@ relock: * parent directory must be decremented * and ".." set to point to the new parent. */ - if (doingdirectory && newparent) { + if (doingdirectory && newparent != 0) { /* * Set the directory depth based on its new parent. */ diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index d6bd06226d04..327cac661044 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -65,9 +65,9 @@ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ */ -#include <sys/cdefs.h> #include "opt_vm.h" +#define EXTERR_CATEGORY EXTERR_CAT_SWAP #include <sys/param.h> #include <sys/bio.h> #include <sys/blist.h> @@ -76,6 +76,7 @@ #include <sys/disk.h> #include <sys/disklabel.h> #include <sys/eventhandler.h> +#include <sys/exterrvar.h> #include <sys/fcntl.h> #include <sys/limits.h> #include <sys/lock.h> @@ -2686,7 +2687,7 @@ swapon_check_swzone(void) } } -static void +static int swaponsomething(struct vnode *vp, void *id, u_long nblks, sw_strategy_t *strategy, sw_close_t *close, dev_t dev, int flags) { @@ -2701,6 +2702,8 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks, */ nblks &= ~(ctodb(1) - 1); nblks = dbtoc(nblks); + if (nblks == 0) + return (EXTERROR(EINVAL, "swap device too small")); sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO); sp->sw_blist = blist_create(nblks, M_WAITOK); @@ -2742,6 +2745,8 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks, swp_sizecheck(); mtx_unlock(&sw_dev_mtx); EVENTHANDLER_INVOKE(swapon, sp); + + return (0); } /* @@ -3286,10 +3291,10 @@ swapongeom_locked(struct cdev *dev, struct vnode *vp) return (error); } nblks = pp->mediasize / DEV_BSIZE; - swaponsomething(vp, cp, nblks, swapgeom_strategy, + error = swaponsomething(vp, cp, nblks, swapgeom_strategy, swapgeom_close, dev2udev(dev), (pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ? SW_UNMAPPED : 0); - return (0); + return (error); } static int @@ -3378,9 +3383,9 @@ swaponvp(struct thread *td, struct vnode *vp, u_long nblks) if (error != 0) return (error); - swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close, + error = swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close, NODEV, 0); - return (0); + return (error); } static int diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index bbae55895c2c..b239a6ffb4ce 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -396,7 +396,7 @@ vm_page_blacklist_load(char **list, char **end) } *list = ptr; if (ptr != NULL) - *end = ptr + len; + *end = ptr + len - 1; else *end = NULL; return; diff --git a/targets/pseudo/userland/misc/Makefile.depend b/targets/pseudo/userland/misc/Makefile.depend index d3c97fc56b40..546800004d11 100644 --- a/targets/pseudo/userland/misc/Makefile.depend +++ b/targets/pseudo/userland/misc/Makefile.depend @@ -54,7 +54,6 @@ DIRDEPS.x86sys= \ .if ${MK_ZFS} != "no" DIRDEPS.x86sys+= \ stand/i386/gptzfsboot \ - stand/i386/zfsboot \ stand/i386/zfsloader \ DIRDEPS+= \ diff --git a/tests/sys/net/if_bridge_test.sh b/tests/sys/net/if_bridge_test.sh index cd38adea28ad..c0c085f22273 100755 --- a/tests/sys/net/if_bridge_test.sh +++ b/tests/sys/net/if_bridge_test.sh @@ -1221,6 +1221,29 @@ vlan_qinq_cleanup() vnet_cleanup } +# Adding a bridge SVI to a bridge should not be allowed. +atf_test_case "bridge_svi_in_bridge" "cleanup" +bridge_svi_in_bridge_head() +{ + atf_set descr 'adding a bridge SVI to a bridge is not allowed (1)' + atf_set require.user root +} + +bridge_svi_in_bridge_body() +{ + vnet_init + vnet_init_bridge + + bridge=$(vnet_mkbridge) + atf_check -s exit:0 ifconfig ${bridge}.1 create + atf_check -s exit:1 -e ignore ifconfig ${bridge} addm ${bridge}.1 +} + +bridge_svi_in_bridge_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case "bridge_transmit_ipv4_unicast" @@ -1247,4 +1270,5 @@ atf_init_test_cases() atf_add_test_case "vlan_ifconfig_tagged" atf_add_test_case "vlan_svi" atf_add_test_case "vlan_qinq" + atf_add_test_case "bridge_svi_in_bridge" } diff --git a/tests/sys/net/if_ovpn/if_ovpn.sh b/tests/sys/net/if_ovpn/if_ovpn.sh index 26807a095455..c42344da1a3b 100644 --- a/tests/sys/net/if_ovpn/if_ovpn.sh +++ b/tests/sys/net/if_ovpn/if_ovpn.sh @@ -1314,6 +1314,96 @@ multihome6_cleanup() ovpn_cleanup } +atf_test_case "float" "cleanup" +float_head() +{ + atf_set descr 'Test peer float notification' + atf_set require.user root +} + +float_body() +{ + ovpn_init + + l=$(vnet_mkepair) + + vnet_mkjail a ${l}a + jexec a ifconfig ${l}a 192.0.2.1/24 up + jexec a ifconfig lo0 127.0.0.1/8 up + vnet_mkjail b ${l}b + jexec b ifconfig ${l}b 192.0.2.2/24 up + + # Sanity check + atf_check -s exit:0 -o ignore jexec a ping -c 1 192.0.2.2 + + ovpn_start a " + dev ovpn0 + dev-type tun + proto udp4 + + cipher AES-256-GCM + auth SHA256 + + local 192.0.2.1 + server 198.51.100.0 255.255.255.0 + ca $(atf_get_srcdir)/ca.crt + cert $(atf_get_srcdir)/server.crt + key $(atf_get_srcdir)/server.key + dh $(atf_get_srcdir)/dh.pem + + mode server + script-security 2 + auth-user-pass-verify /usr/bin/true via-env + topology subnet + + keepalive 2 10 + + management 192.0.2.1 1234 + " + ovpn_start b " + dev tun0 + dev-type tun + + client + + remote 192.0.2.1 + auth-user-pass $(atf_get_srcdir)/user.pass + + ca $(atf_get_srcdir)/ca.crt + cert $(atf_get_srcdir)/client.crt + key $(atf_get_srcdir)/client.key + dh $(atf_get_srcdir)/dh.pem + + keepalive 2 10 + " + + # Give the tunnel time to come up + sleep 10 + + atf_check -s exit:0 -o ignore jexec b ping -c 3 198.51.100.1 + + # We expect the client on 192.0.2.2 + if ! echo "status" | jexec a nc -N 192.0.2.1 1234 | grep 192.0.2.2; then + atf_fail "Client not found in status list!" + fi + + # Now change the client IP + jexec b ifconfig ${l}b 192.0.2.3/24 up + + # And wait for keepalives to trigger the float notification + sleep 5 + + # So the client now has the new address in userspace + if ! echo "status" | jexec a nc -N 192.0.2.1 1234 | grep 192.0.2.3; then + atf_fail "Client not found in status list!" + fi +} + +float_cleanup() +{ + ovpn_cleanup +} + atf_init_test_cases() { atf_add_test_case "4in4" @@ -1332,4 +1422,5 @@ atf_init_test_cases() atf_add_test_case "destroy_unused" atf_add_test_case "multihome4" atf_add_test_case "multihome6" + atf_add_test_case "float" } diff --git a/tests/sys/net/if_vlan.sh b/tests/sys/net/if_vlan.sh index 424eac705b94..8122203337e2 100755 --- a/tests/sys/net/if_vlan.sh +++ b/tests/sys/net/if_vlan.sh @@ -333,6 +333,32 @@ conflict_id_cleanup() } +# If a vlan interface is in a bridge, changing the vlandev to refer to +# a bridge should not be allowed. +atf_test_case "bridge_vlandev" "cleanup" +bridge_vlandev_head() +{ + atf_set descr 'transforming a bridge member vlan into an SVI is not allowed' + atf_set require.user root +} + +bridge_vlandev_body() +{ + vnet_init + vnet_init_bridge + + bridge=$(vnet_mkbridge) + vlan=$(vnet_mkvlan) + + atf_check -s exit:0 ifconfig ${bridge} addm ${vlan} + atf_check -s exit:1 -e ignore ifconfig ${vlan} vlan 1 vlandev ${bridge} +} + +bridge_vlandev_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case "basic" @@ -343,4 +369,5 @@ atf_init_test_cases() atf_add_test_case "qinq_setflags" atf_add_test_case "bpf_pcp" atf_add_test_case "conflict_id" + atf_add_test_case "bridge_vlandev" } diff --git a/tests/sys/netpfil/pf/nat64.py b/tests/sys/netpfil/pf/nat64.py index 5cc4713a16cc..a5890fc4a161 100644 --- a/tests/sys/netpfil/pf/nat64.py +++ b/tests/sys/netpfil/pf/nat64.py @@ -33,7 +33,7 @@ from atf_python.sys.net.tools import ToolsHelper from atf_python.sys.net.vnet import VnetTestTemplate class TestNAT64(VnetTestTemplate): - REQUIRED_MODULES = [ "pf" ] + REQUIRED_MODULES = [ "pf", "pflog" ] TOPOLOGY = { "vnet1": {"ifaces": ["if1"]}, "vnet2": {"ifaces": ["if1", "if2"]}, @@ -92,12 +92,15 @@ class TestNAT64(VnetTestTemplate): def vnet2_handler(self, vnet): ifname = vnet.iface_alias_map["if1"].name + ToolsHelper.print_output("/sbin/sysctl net.inet6.ip6.forwarding=1") ToolsHelper.print_output("/sbin/route add default 192.0.2.2") ToolsHelper.print_output("/sbin/pfctl -e") ToolsHelper.pf_rules([ "pass inet6 proto icmp6", "pass in on %s inet6 af-to inet from 192.0.2.1" % ifname]) + vnet.pipe.send(socket.if_nametoindex("pflog0")) + @pytest.mark.require_user("root") @pytest.mark.require_progs(["scapy"]) def test_tcp_rst(self): @@ -287,3 +290,39 @@ class TestNAT64(VnetTestTemplate): reply = sp.sr1(packet, timeout=3) # We don't expect a reply to a corrupted packet assert not reply + + @pytest.mark.require_user("root") + @pytest.mark.require_progs(["scapy"]) + def test_noip6(self): + """ + PR 288263: link-local target address in icmp6 ADVERT can cause NULL deref + """ + ifname = self.vnet.iface_alias_map["if1"].name + gw_mac = self.vnet.iface_alias_map["if1"].epairb.ether + scopeid = self.wait_object(self.vnet_map["vnet2"].pipe) + ToolsHelper.print_output("/sbin/route -6 add default 2001:db8::1") + + import scapy.all as sp + + pkt = sp.Ether(dst=gw_mac) \ + / sp.IPv6(dst="64:ff9b::203.0.113.2") \ + / sp.ICMPv6ND_NA(tgt="FFA2:%x:2821:125F:1D27:B3B2:3F6F:C43C" % scopeid) + pkt.show() + sp.hexdump(pkt) + s = DelayedSend(pkt, sendif=ifname) + + packets = sp.sniff(iface=ifname, timeout=5) + for r in packets: + r.show() + + # Try scope id that likely doesn't have an interface at all + pkt = sp.Ether(dst=gw_mac) \ + / sp.IPv6(dst="64:ff9b::203.0.113.2") \ + / sp.ICMPv6ND_NA(tgt="FFA2:%x:2821:125F:1D27:B3B2:3F6F:C43C" % 255) + pkt.show() + sp.hexdump(pkt) + s = DelayedSend(pkt, sendif=ifname) + + packets = sp.sniff(iface=ifname, timeout=5) + for r in packets: + r.show() diff --git a/tools/boot/install-boot.sh b/tools/boot/install-boot.sh index 217bf0ff1457..10e62dd32ba8 100755 --- a/tools/boot/install-boot.sh +++ b/tools/boot/install-boot.sh @@ -294,27 +294,9 @@ boot_nogeli_mbr_ufs_both() { boot_nogeli_mbr_ufs_uefi $1 $2 $3 } +# ZFS+MBR+BIOS is not a supported configuration boot_nogeli_mbr_zfs_legacy() { - dev=$1 - dst=$2 - - # search to find the BSD slice - s=$(find_part $dev "freebsd") - if [ -z "$s" ] ; then - die "No BSD slice found" - fi - idx=$(find_part ${dev}s${s} "freebsd-zfs") - if [ -z "$idx" ] ; then - die "No freebsd-zfs slice found" - fi - # search to find the freebsd-zfs partition within the slice - # Or just assume it is 'a' because it has to be since it fails otherwise - doit gpart bootcode -b ${dst}/boot/mbr ${dev} - dd if=${dst}/boot/zfsboot of=/tmp/zfsboot1 count=1 - doit gpart bootcode -b /tmp/zfsboot1 ${dev}s${s} # Put boot1 into the start of part - sysctl kern.geom.debugflags=0x10 # Put boot2 into ZFS boot slot - doit dd if=${dst}/boot/zfsboot of=/dev/${dev}s${s}a skip=1 seek=1024 - sysctl kern.geom.debugflags=0x0 + exit 1 } boot_nogeli_mbr_zfs_uefi() { @@ -322,7 +304,6 @@ boot_nogeli_mbr_zfs_uefi() { } boot_nogeli_mbr_zfs_both() { - boot_nogeli_mbr_zfs_legacy $1 $2 $3 boot_nogeli_mbr_zfs_uefi $1 $2 $3 } diff --git a/tools/boot/rootgen.sh b/tools/boot/rootgen.sh index d87eb481e2c1..2cd65bdd180d 100755 --- a/tools/boot/rootgen.sh +++ b/tools/boot/rootgen.sh @@ -202,33 +202,6 @@ mk_nogeli_mbr_ufs_both() { rm -f ${src}/etc/fstab } -mk_nogeli_mbr_zfs_legacy() { - src=$1 - img=$2 - mntpt=$3 - geli=$4 - scheme=$5 - fs=$6 - bios=$7 - pool=nogeli-mbr-zfs-legacy - - zfs_extra $src $dst - makefs -t zfs -s 200m \ - -o poolname=${pool} -o bootfs=${pool} -o rootpath=/ \ - ${img}.s1a ${src} ${dst} - # The old boot1/boot2 boot split is also used by zfs. We need to extract zfsboot1 - # from this image. Since there's no room in the mbr format for the rest of the loader, - # it will load the zfsboot loader from the reserved for bootloader area of the ZFS volume - # being booted, hence the need to dd it into the raw img later. - # Please note: zfsboot only works with partition 'a' which must be the root - # partition / zfs volume - dd if=${src}/boot/zfsboot of=${dst}/zfsboot1 count=1 - mkimg -s bsd -b ${dst}zfsboot1 -p freebsd-zfs:=${img}.s1a -o ${img}.s1 - dd if=${src}/boot/zfsboot of=${img}.s1a skip=1 seek=1024 - mkimg -a 1 -s mbr -b ${src}/boot/mbr -p freebsd:=${img}.s1 -o ${img} - rm -rf ${dst} -} - mk_nogeli_mbr_zfs_uefi() { src=$1 img=$2 @@ -244,38 +217,11 @@ mk_nogeli_mbr_zfs_uefi() { makefs -t zfs -s 200m \ -o poolname=${pool} -o bootfs=${pool} -o rootpath=/ \ ${img}.s2a ${src} ${dst} - mkimg -s bsd -b ${dst}zfsboot1 -p freebsd-zfs:=${img}.s2a -o ${img}.s2 + mkimg -s bsd -p freebsd-zfs:=${img}.s2a -o ${img}.s2 mkimg -a 1 -s mbr -b ${src}/boot/mbr -p efi:=${img}.s1 -p freebsd:=${img}.s2 -o ${img} rm -rf ${dst} } -mk_nogeli_mbr_zfs_both() { - src=$1 - img=$2 - mntpt=$3 - geli=$4 - scheme=$5 - fs=$6 - bios=$7 - pool=nogeli-mbr-zfs-both - - zfs_extra $src $dst - make_esp_file ${img}.s1 ${espsize} ${src}/boot/loader.efi - makefs -t zfs -s 200m \ - -o poolname=${pool} -o bootfs=${pool} -o rootpath=/ \ - ${img}.s2a ${src} ${dst} - # The old boot1/boot2 boot split is also used by zfs. We need to extract zfsboot1 - # from this image. Since there's no room in the mbr format for the rest of the loader, - # it will load the zfsboot loader from the reserved for bootloader area of the ZFS volume - # being booted, hence the need to dd it into the raw img later. - # Please note: zfsboot only works with partition 'a' which must be the root - # partition / zfs volume - dd if=${src}/boot/zfsboot of=${dst}/zfsboot1 count=1 - mkimg -s bsd -b ${dst}zfsboot1 -p freebsd-zfs:=${img}.s2a -o ${img}.s2 - dd if=${src}/boot/zfsboot of=${img}.s1a skip=1 seek=1024 - mkimg -a 1 -s mbr -b ${src}/boot/mbr -p efi:=${img}.s1 -p freebsd:=${img}.s2 -o ${img} -} - mk_geli_gpt_ufs_legacy() { src=$1 img=$2 @@ -728,6 +674,10 @@ for arch in amd64; do for scheme in gpt mbr; do for fs in ufs zfs; do for bios in legacy uefi both; do + # ZFS+MBR+BIOS is not supported + if [ "$scheme" = "mbr" -a "$fs" = "zfs" -a "$bios" != "uefi" ]; then + continue + fi make_one_image ${arch} ${geli} ${scheme} ${fs} ${bios} done done @@ -750,6 +700,11 @@ for arch in i386; do for bios in legacy; do # The legacy boot is shared with amd64 so those routines could # likely be used here. + + # ZFS+MBR+BIOS is not supported + if [ "$scheme" = "mbr" -a "$fs" = "zfs" -a "$bios" != "uefi" ]; then + continue + fi make_one_image ${arch} ${geli} ${scheme} ${fs} ${bios} done done diff --git a/tools/build/cross-build/include/mac/endian.h b/tools/build/cross-build/include/mac/endian.h new file mode 100644 index 000000000000..11788044f05a --- /dev/null +++ b/tools/build/cross-build/include/mac/endian.h @@ -0,0 +1,7 @@ +/* + * Copyright (c) 2025 John Baldwin <jhb@FreeBSD.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/endian.h> diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc index 3183c5f0b45c..580be4362a18 100644 --- a/tools/build/mk/OptionalObsoleteFiles.inc +++ b/tools/build/mk/OptionalObsoleteFiles.inc @@ -357,7 +357,6 @@ OLD_FILES+=boot/shortcuts.4th OLD_FILES+=boot/support.4th OLD_FILES+=boot/userboot.so OLD_FILES+=boot/version.4th -OLD_FILES+=boot/zfsboot OLD_FILES+=boot/zfsloader OLD_FILES+=usr/lib/kgzldr.o OLD_FILES+=usr/share/man/man5/loader.conf.5.gz @@ -374,7 +373,6 @@ OLD_FILES+=usr/share/man/man8/menu.4th.8.gz OLD_FILES+=usr/share/man/man8/menusets.4th.8.gz OLD_FILES+=usr/share/man/man8/pxeboot.8.gz OLD_FILES+=usr/share/man/man8/version.4th.8.gz -OLD_FILES+=usr/share/man/man8/zfsboot.8.gz OLD_FILES+=usr/share/man/man8/zfsloader.8.gz .endif @@ -1473,12 +1471,12 @@ OLD_DIRS+=usr/share/dict .endif .if ${MK_DMAGENT} == no +OLD_FILES+=etc/dma/auth.conf OLD_FILES+=etc/dma/dma.conf OLD_DIRS+=etc/dma OLD_FILES+=usr/libexec/dma OLD_FILES+=usr/libexec/dma-mbox-create OLD_FILES+=usr/share/man/man8/dma.8.gz -OLD_FILES+=usr/share/examples/dma/auth.conf OLD_FILES+=usr/share/examples/dma/mailer.conf OLD_DIRS+=usr/share/examples/dma .endif @@ -3713,33 +3711,33 @@ OLD_FILES+=usr/lib/krb5/plugins/preauth/test.so OLD_FILES+=usr/lib/krb5/plugins/tls/k5tls.so OLD_FILES+=usr/lib/libcom_err.a OLD_LIBS+=usr/lib/libcom_err.so -OLD_LIBS+=usr/lib/libcom_err.so.121 +OLD_LIBS+=usr/lib/libcom_err.so.122 OLD_FILES+=usr/lib/libgssapi_krb5.so -OLD_LIBS+=usr/lib/libgssapi_krb5.so.121 +OLD_LIBS+=usr/lib/libgssapi_krb5.so.122 OLD_FILES+=usr/lib/libgssrpc.so -OLD_LIBS+=usr/lib/libgssrpc.so.121 +OLD_LIBS+=usr/lib/libgssrpc.so.122 OLD_FILES+=usr/lib/libk5crypto.so -OLD_LIBS+=usr/lib/libk5crypto.so.121 +OLD_LIBS+=usr/lib/libk5crypto.so.122 OLD_FILES+=usr/lib/libkadm5clnt.so OLD_FILES+=usr/lib/libkadm5clnt_mit.so -OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121 +OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.122 OLD_FILES+=usr/lib/libkadm5srv.so OLD_FILES+=usr/lib/libkadm5srv_mit.so -OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121 +OLD_LIBS+=usr/lib/libkadm5srv_mit.so.122 OLD_FILES+=usr/lib/libkdb5.so -OLD_LIBS+=usr/lib/libkdb5.so.121 +OLD_LIBS+=usr/lib/libkdb5.so.122 OLD_FILES+=usr/lib/libkrad.so -OLD_LIBS+=usr/lib/libkrad.so.121 +OLD_LIBS+=usr/lib/libkrad.so.122 OLD_FILES+=usr/lib/libkrb5.so -OLD_LIBS+=usr/lib/libkrb5.so.121 +OLD_LIBS+=usr/lib/libkrb5.so.122 OLD_FILES+=usr/lib/libkrb5profile.a OLD_FILES+=usr/lib/libkrb5profile.so -OLD_LIBS+=usr/lib/libkrb5profile.so.121 +OLD_LIBS+=usr/lib/libkrb5profile.so.122 OLD_FILES+=usr/lib/libkrb5support.a OLD_FILES+=usr/lib/libkrb5support.so -OLD_LIBS+=usr/lib/libkrb5support.so.121 +OLD_LIBS+=usr/lib/libkrb5support.so.122 OLD_FILES+=usr/lib/libverto.so -OLD_LIBS+=usr/lib/libverto.so.121 +OLD_LIBS+=usr/lib/libverto.so.122 OLD_FILES+=usr/libdata/pkgconfig/gssrpc.pc OLD_FILES+=usr/libdata/pkgconfig/kadm-client.pc OLD_FILES+=usr/libdata/pkgconfig/kadm-server.pc @@ -5770,36 +5768,36 @@ OLD_FILES+=usr/lib/krb5/plugins/preauth/pkinit.so OLD_FILES+=usr/lib/krb5/plugins/preauth/spake.so OLD_FILES+=usr/lib/krb5/plugins/preauth/test.so OLD_FILES+=usr/lib/krb5/plugins/tls/k5tls.so -OLD_LIBS+=usr/lib/libcom_err.so.121 -OLD_LIBS+=usr/lib/libgssapi_krb5.so.121 +OLD_LIBS+=usr/lib/libcom_err.so.122 +OLD_LIBS+=usr/lib/libgssapi_krb5.so.122 OLD_FILES+=usr/lib/libgssrpc.a OLD_FILES+=usr/lib/libgssrpc.so -OLD_LIBS+=usr/lib/libgssrpc.so.121 +OLD_LIBS+=usr/lib/libgssrpc.so.122 OLD_FILES+=usr/lib/libk5crypto.a OLD_FILES+=usr/lib/libk5crypto.so -OLD_LIBS+=usr/lib/libk5crypto.so.121 +OLD_LIBS+=usr/lib/libk5crypto.so.122 OLD_FILES+=usr/lib/libkadm5clnt_mit.a OLD_FILES+=usr/lib/libkadm5clnt_mit.so -OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.121 +OLD_LIBS+=usr/lib/libkadm5clnt_mit.so.122 OLD_FILES+=usr/lib/libkadm5srv_mit.a OLD_FILES+=usr/lib/libkadm5srv_mit.so -OLD_LIBS+=usr/lib/libkadm5srv_mit.so.121 +OLD_LIBS+=usr/lib/libkadm5srv_mit.so.122 OLD_FILES+=usr/lib/libkdb5.a OLD_FILES+=usr/lib/libkdb5.so -OLD_LIBS+=usr/lib/libkdb5.so.121 +OLD_LIBS+=usr/lib/libkdb5.so.122 OLD_FILES+=usr/lib/libkrad.so OLD_FILES+=usr/lib/libkrad.a -OLD_LIBS+=usr/lib/libkrad.so.121 -OLD_LIBS+=usr/lib/libkrb5.so.121 +OLD_LIBS+=usr/lib/libkrad.so.122 +OLD_LIBS+=usr/lib/libkrb5.so.122 OLD_FILES+=usr/lib/libkrb5profile.a OLD_FILES+=usr/lib/libkrb5profile.so -OLD_LIBS+=usr/lib/libkrb5profile.so.121 +OLD_LIBS+=usr/lib/libkrb5profile.so.122 OLD_FILES+=usr/lib/libkrb5support.a OLD_FILES+=usr/lib/libkrb5support.so -OLD_LIBS+=usr/lib/libkrb5support.so.121 +OLD_LIBS+=usr/lib/libkrb5support.so.122 OLD_FILES+=usr/lib/libverto.a OLD_FILES+=usr/lib/libverto.so -OLD_LIBS+=usr/lib/libverto.so.121 +OLD_LIBS+=usr/lib/libverto.so.122 OLD_FILES+=usr/libdata/pkgconfig/gssrpc.pc OLD_FILES+=usr/libdata/pkgconfig/kadm-client.pc OLD_FILES+=usr/libdata/pkgconfig/kadm-server.pc @@ -12277,7 +12275,6 @@ OLD_FILES+=usr/share/snmp/mibs/BEGEMOT-WIRELESS-MIB.txt .if ${MK_ZFS} == no OLD_FILES+=boot/gptzfsboot -OLD_FILES+=boot/zfsboot OLD_FILES+=boot/zfsloader OLD_FILES+=etc/rc.d/zfs OLD_FILES+=etc/rc.d/zfsbe @@ -12380,7 +12377,6 @@ OLD_FILES+=usr/share/man/man8/gptzfsboot.8.gz OLD_FILES+=usr/share/man/man8/zdb.8.gz OLD_FILES+=usr/share/man/man8/zfs-program.8.gz OLD_FILES+=usr/share/man/man8/zfs.8.gz -OLD_FILES+=usr/share/man/man8/zfsboot.8.gz OLD_FILES+=usr/share/man/man8/zfsbootcfg.8.gz OLD_FILES+=usr/share/man/man8/zfsd.8.gz OLD_FILES+=usr/share/man/man8/zfsloader.8.gz diff --git a/tools/build/options/WITH_LLVM_ASSERTIONS b/tools/build/options/WITH_LLVM_ASSERTIONS index 0e7fbfbda0a3..6af75221a206 100644 --- a/tools/build/options/WITH_LLVM_ASSERTIONS +++ b/tools/build/options/WITH_LLVM_ASSERTIONS @@ -1 +1,2 @@ Enable debugging assertions in LLVM. +Use when working on or requesting help with LLVM components. diff --git a/tools/tools/vt/mkkfont/Makefile b/tools/tools/vt/mkkfont/Makefile index f9758be0ef15..1e8a9bcdafd7 100644 --- a/tools/tools/vt/mkkfont/Makefile +++ b/tools/tools/vt/mkkfont/Makefile @@ -1,4 +1,4 @@ PROG= mkkfont -MAN1= +MAN= .include <bsd.prog.mk> diff --git a/usr.bin/bmake/Makefile b/usr.bin/bmake/Makefile index a8bcdfd9f859..bbceea3ae8c2 100644 --- a/usr.bin/bmake/Makefile +++ b/usr.bin/bmake/Makefile @@ -99,8 +99,6 @@ COPTS.filemon_ktrace.c+= -Wno-error=unused-parameter SUBDIR.${MK_TESTS}+= unit-tests .endif -MAN1= ${MAN} - .if ${MK_GEN_MAN:Uno} == "yes" # we use this to generate ${MAN} diff --git a/usr.bin/bmake/Makefile.inc b/usr.bin/bmake/Makefile.inc index 5140bd18bb37..a064563a2283 100644 --- a/usr.bin/bmake/Makefile.inc +++ b/usr.bin/bmake/Makefile.inc @@ -3,6 +3,8 @@ MK_host_egacy= no .sinclude <src.opts.mk> +PACKAGE?= bmake + .if defined(.PARSEDIR) # make sure this is available to unit-tests/Makefile .export SRCTOP diff --git a/usr.bin/clang/clang-scan-deps/Makefile b/usr.bin/clang/clang-scan-deps/Makefile index 16fecdb88867..8da12faccc45 100644 --- a/usr.bin/clang/clang-scan-deps/Makefile +++ b/usr.bin/clang/clang-scan-deps/Makefile @@ -10,13 +10,14 @@ SRCS+= ClangScanDeps.cpp \ .include "${SRCTOP}/lib/clang/clang.pre.mk" CFLAGS+= -I${.OBJDIR} -TDFILE= Opts.td -INCFILE= ${TDFILE:.td=.inc} + +INCFILE= Opts.inc +TDFILE= ${LLVM_BASE}/${SRCDIR}/Opts.td GENOPT= -gen-opt-parser-defs ${INCFILE}: ${TDFILE} ${LLVM_TBLGEN} ${GENOPT} -I ${LLVM_SRCS}/include -d ${.TARGET:C/$/.d/} \ - -o ${.TARGET} ${.ALLSRC} + -o ${.TARGET} ${TDFILE} TGHDRS+= ${INCFILE} DEPENDFILES+= ${TGHDRS:C/$/.d/} diff --git a/usr.bin/clang/clang.prog.mk b/usr.bin/clang/clang.prog.mk index 36c601bcbe36..3baf3d0baf0f 100644 --- a/usr.bin/clang/clang.prog.mk +++ b/usr.bin/clang/clang.prog.mk @@ -31,7 +31,7 @@ DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT} LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT} .endfor -PACKAGE= clang +PACKAGE?= clang .if ${.MAKE.OS} == "FreeBSD" || !defined(BOOTSTRAPPING) LIBADD+= execinfo diff --git a/usr.bin/clang/llvm-ar/Makefile b/usr.bin/clang/llvm-ar/Makefile index fd12b1ddef57..e019c89b3581 100644 --- a/usr.bin/clang/llvm-ar/Makefile +++ b/usr.bin/clang/llvm-ar/Makefile @@ -1,5 +1,6 @@ .include <src.opts.mk> +PACKAGE= toolchain PROG_CXX= llvm-ar MAN= llvm-ar.1 llvm-ranlib.1 diff --git a/usr.bin/clang/llvm-nm/Makefile b/usr.bin/clang/llvm-nm/Makefile index 825faf74719b..7e089d1b408d 100644 --- a/usr.bin/clang/llvm-nm/Makefile +++ b/usr.bin/clang/llvm-nm/Makefile @@ -1,5 +1,6 @@ .include <src.opts.mk> +PACKAGE= toolchain PROG_CXX= llvm-nm SRCDIR= llvm/tools/llvm-nm diff --git a/usr.bin/clang/llvm-size/Makefile b/usr.bin/clang/llvm-size/Makefile index 2860a0069538..9d3505cdd319 100644 --- a/usr.bin/clang/llvm-size/Makefile +++ b/usr.bin/clang/llvm-size/Makefile @@ -1,5 +1,6 @@ .include <src.opts.mk> +PACKAGE= toolchain PROG_CXX= llvm-size SRCDIR= llvm/tools/llvm-size diff --git a/usr.bin/clang/llvm.prog.mk b/usr.bin/clang/llvm.prog.mk index f702082e31bd..c369fe8d5944 100644 --- a/usr.bin/clang/llvm.prog.mk +++ b/usr.bin/clang/llvm.prog.mk @@ -25,7 +25,7 @@ DPADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT} LDADD+= ${OBJTOP}/lib/clang/lib${lib}/lib${LIBPRIV}${lib}.${LIBEXT} .endfor -PACKAGE= clang +PACKAGE?= clang .if ${.MAKE.OS} == "FreeBSD" || !defined(BOOTSTRAPPING) LIBADD+= execinfo diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1 index 3012ae472015..b16c4bcc95a2 100644 --- a/usr.bin/find/find.1 +++ b/usr.bin/find/find.1 @@ -28,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 23, 2024 +.Dd July 26, 2025 .Dt FIND 1 .Os .Sh NAME @@ -1096,7 +1096,77 @@ as zero; It is not yet implemented. .It Format: One or two characters, described below, which indicates the information to display. -XXX need to write this. +.Bl -tag -width Ds +.It p +Path to file +.It f +Filename without directories. +.It h +Path relative to the starting point, or '.' if that's empty for some reason. +.It P +Unimplemented -- File with command line arg. +.It H +Unimplemented -- Command line arg. +.It g +gid in human readable form. +.It G +gid as a number. +.It h +uid in human readable form. +.It U +uid as a number. +.It m +File permission mode in octal. +.It M +File mode in +.Xr ls 1 +standard form. +.It k +File size in KiB (units of 1024 bytes). +.It b +File size in blocks (Always 512 byte units, even if underlying storage +size differs). +.It s +Size in bytes of the file. +.It S +Sparseness of the file. +The blocks the file occupies times 512 divided by the file size. +.It d +Depth in the tree +.It D +Device number for the file. +.It F +Unimplemented -- Filesystem type where the file resides. +.It l +Object of the symbolic link. +.It i +Inode of the file. +.It n +Number of hard links. +.It y +Unimplemented -- Type of the file +.It Y +Unimplemented -- Type of the file with loop detection +.It a +Access time of the file. +.It A +Access time of the file in strftime format. +Takes an additional argument. +.It B +Birth time of the file in strftime format. +Takes an additional argument. +.It c +Creation time of the file. +.It C +Creation time of the file in strftime format. +Takes an additional argument. +.It t +Modification time of the file. +.It T +Modification time of the file in strftime format. +Takes an additional argument. +.El +Any format not listed is not supported, though the error changes. .El .El .Sh ENVIRONMENT diff --git a/usr.bin/find/function.c b/usr.bin/find/function.c index 11455b395022..b260a71ef4a9 100644 --- a/usr.bin/find/function.c +++ b/usr.bin/find/function.c @@ -1449,11 +1449,12 @@ c_printf(OPTION *option, char ***argvp) { PLAN *new; - isoutput = 1; /* * XXX We could scan the format looking for stat-dependent formats, and - * turn off the stat if there's none: `%p`/`%f`/`%h` don't need a stat. + * turn off the nostat bit for trival cases: `%p`/`%f`/`%h`. */ + isoutput = 1; + ftsoptions &= ~FTS_NOSTAT; new = palloc(option); new->c_data = nextarg(option, argvp); diff --git a/usr.bin/fstat/Makefile b/usr.bin/fstat/Makefile index fa51a92eb52f..f8617fd0c6a4 100644 --- a/usr.bin/fstat/Makefile +++ b/usr.bin/fstat/Makefile @@ -3,6 +3,6 @@ SRCS= fstat.c fuser.c main.c LINKS= ${BINDIR}/fstat ${BINDIR}/fuser LIBADD= procstat -MAN1= fuser.1 fstat.1 +MAN= fuser.1 fstat.1 .include <bsd.prog.mk> diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile index 2204758ece5a..c72b86656148 100644 --- a/usr.bin/grep/Makefile +++ b/usr.bin/grep/Makefile @@ -6,7 +6,7 @@ PACKAGE= runtime PROG= grep -MAN1= grep.1 zgrep.1 +MAN= grep.1 zgrep.1 SRCS= file.c grep.c queue.c util.c diff --git a/usr.bin/pom/pom.c b/usr.bin/pom/pom.c index db0033373b47..bcfbcadc8238 100644 --- a/usr.bin/pom/pom.c +++ b/usr.bin/pom/pom.c @@ -83,6 +83,7 @@ main(int argc, char **argv) err(1, "unable to limit capabitilities for stdio"); caph_cache_catpages(); + caph_cache_tzdata(); if (caph_enter() < 0) err(1, "unable to enter capability mode"); diff --git a/usr.bin/sdiff/Makefile b/usr.bin/sdiff/Makefile index 03587f373098..af9a037e9a58 100644 --- a/usr.bin/sdiff/Makefile +++ b/usr.bin/sdiff/Makefile @@ -3,7 +3,7 @@ PROG= sdiff SRCS= edit.c sdiff.c -MAN1= sdiff.1 +MAN= sdiff.1 HAS_TESTS= SUBDIR.${MK_TESTS}+= tests diff --git a/usr.bin/strings/Makefile b/usr.bin/strings/Makefile index 8e2572810947..c01e775b0b89 100644 --- a/usr.bin/strings/Makefile +++ b/usr.bin/strings/Makefile @@ -1,5 +1,7 @@ .include <src.opts.mk> +PACKAGE= toolchain + ELFTCDIR= ${SRCTOP}/contrib/elftoolchain .PATH: ${ELFTCDIR}/strings diff --git a/usr.bin/vtfontcvt/Makefile b/usr.bin/vtfontcvt/Makefile index de011660ca28..13e60c406b26 100644 --- a/usr.bin/vtfontcvt/Makefile +++ b/usr.bin/vtfontcvt/Makefile @@ -1,6 +1,6 @@ PROG= vtfontcvt SRCS= vtfontcvt.c lz4.c -MAN8= vtfontcvt.8 +MAN= vtfontcvt.8 # lz4 compression functionality .PATH: ${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 diff --git a/usr.bin/xargs/tests/Makefile b/usr.bin/xargs/tests/Makefile index b1e6782069de..9fa8ff11fac2 100644 --- a/usr.bin/xargs/tests/Makefile +++ b/usr.bin/xargs/tests/Makefile @@ -1,6 +1,6 @@ PACKAGE= tests -TAP_TESTS_SH= legacy_test +ATF_TESTS_SH= xargs_test ${PACKAGE}FILES+= regress.0.in ${PACKAGE}FILES+= regress.0.out @@ -17,12 +17,11 @@ ${PACKAGE}FILES+= regress.R-1.out ${PACKAGE}FILES+= regress.in ${PACKAGE}FILES+= regress.n1.out ${PACKAGE}FILES+= regress.n2.out -${PACKAGE}FILES+= regress.n2147483647.out +${PACKAGE}FILES+= regress.nargmax.out ${PACKAGE}FILES+= regress.n2P0.out ${PACKAGE}FILES+= regress.n3.out ${PACKAGE}FILES+= regress.normal.out ${PACKAGE}FILES+= regress.quotes.in ${PACKAGE}FILES+= regress.quotes.out -${PACKAGE}FILES+= regress.sh .include <bsd.test.mk> diff --git a/usr.bin/xargs/tests/legacy_test.sh b/usr.bin/xargs/tests/legacy_test.sh deleted file mode 100644 index 3c7842d07bf0..000000000000 --- a/usr.bin/xargs/tests/legacy_test.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -SRCDIR="$(dirname "${0}")"; export SRCDIR - -m4 "${SRCDIR}/../regress.m4" "${SRCDIR}/regress.sh" | sh diff --git a/usr.bin/xargs/tests/regress.n2147483647.out b/usr.bin/xargs/tests/regress.nargmax.out index cc32a92a2199..cc32a92a2199 100644 --- a/usr.bin/xargs/tests/regress.n2147483647.out +++ b/usr.bin/xargs/tests/regress.nargmax.out diff --git a/usr.bin/xargs/tests/regress.sh b/usr.bin/xargs/tests/regress.sh deleted file mode 100644 index 9b4839d2a8ec..000000000000 --- a/usr.bin/xargs/tests/regress.sh +++ /dev/null @@ -1,32 +0,0 @@ - -echo 1..21 - -REGRESSION_START($1) - -REGRESSION_TEST(`normal', `xargs echo The <${SRCDIR}/regress.in') -REGRESSION_TEST(`I', `xargs -I% echo The % % % %% % % <${SRCDIR}/regress.in') -REGRESSION_TEST(`J', `xargs -J% echo The % again. <${SRCDIR}/regress.in') -REGRESSION_TEST(`L', `xargs -L3 echo <${SRCDIR}/regress.in') -REGRESSION_TEST(`P1', `xargs -P1 echo <${SRCDIR}/regress.in') -REGRESSION_TEST(`R', `xargs -I% -R1 echo The % % % %% % % <${SRCDIR}/regress.in') -REGRESSION_TEST(`R-1', `xargs -I% -R-1 echo The % % % %% % % <${SRCDIR}/regress.in') -REGRESSION_TEST(`n1', `xargs -n1 echo <${SRCDIR}/regress.in') -REGRESSION_TEST(`n2', `xargs -n2 echo <${SRCDIR}/regress.in') -# This test may consume a large amount of memory, making it unsuited to CI -# environments. Disable it for now. -#REGRESSION_TEST(`n2147483647', `xargs -n2147483647 <${SRCDIR}/regress.in') -REGRESSION_TEST(`n2P0',`xargs -n2 -P0 echo <${SRCDIR}/regress.in | sort') -REGRESSION_TEST(`n3', `xargs -n3 echo <${SRCDIR}/regress.in') -REGRESSION_TEST(`0', `xargs -0 -n1 echo <${SRCDIR}/regress.0.in') -REGRESSION_TEST(`0I', `xargs -0 -I% echo The % %% % <${SRCDIR}/regress.0.in') -REGRESSION_TEST(`0J', `xargs -0 -J% echo The % again. <${SRCDIR}/regress.0.in') -REGRESSION_TEST(`0L', `xargs -0 -L2 echo <${SRCDIR}/regress.0.in') -REGRESSION_TEST(`0P1', `xargs -0 -P1 echo <${SRCDIR}/regress.0.in') -REGRESSION_TEST(`quotes', `xargs -n1 echo <${SRCDIR}/regress.quotes.in') - -REGRESSION_TEST_FREEFORM(`parallel1', `echo /var/empty /var/empty | xargs -n1 -P2 test -d; [ $? = 0 ]') -REGRESSION_TEST_FREEFORM(`parallel2', `echo /var/empty /var/empty/nodir | xargs -n1 -P2 test -d; [ $? = 1 ]') -REGRESSION_TEST_FREEFORM(`parallel3', `echo /var/empty/nodir /var/empty | xargs -n1 -P2 test -d; [ $? = 1 ]') -REGRESSION_TEST_FREEFORM(`parallel4', `echo /var/empty/nodir /var/empty/nodir | xargs -n1 -P2 test -d; [ $? = 1 ]') - -REGRESSION_END() diff --git a/usr.bin/xargs/tests/xargs_test.sh b/usr.bin/xargs/tests/xargs_test.sh new file mode 100755 index 000000000000..12c9407a7e45 --- /dev/null +++ b/usr.bin/xargs/tests/xargs_test.sh @@ -0,0 +1,193 @@ +# +# Copyright (c) 2002 Juli Mallett <jmallett@FreeBSD.org> +# Copyright (c) 2025 Dag-Erling Smørgrav <des@FreeBSD.org> +# +# SPDX-License-Identifier: BSD-2-Clause +# + +SRCDIR=$(atf_get_srcdir) + +atf_test_case xargs_normal +xargs_normal_body() +{ + atf_check -o file:${SRCDIR}/regress.normal.out \ + xargs echo The <${SRCDIR}/regress.in +} + +atf_test_case xargs_I +xargs_I_body() +{ + atf_check -o file:${SRCDIR}/regress.I.out \ + xargs -I% echo The % % % %% % % <${SRCDIR}/regress.in +} + +atf_test_case xargs_J +xargs_J_body() +{ + atf_check -o file:${SRCDIR}/regress.J.out \ + xargs -J% echo The % again. <${SRCDIR}/regress.in +} + +atf_test_case xargs_L +xargs_L_body() +{ + atf_check -o file:${SRCDIR}/regress.L.out \ + xargs -L3 echo <${SRCDIR}/regress.in +} + +atf_test_case xargs_P1 +xargs_P1_body() +{ + atf_check -o file:${SRCDIR}/regress.P1.out \ + xargs -P1 echo <${SRCDIR}/regress.in +} + +atf_test_case xargs_R +xargs_R_body() +{ + atf_check -o file:${SRCDIR}/regress.R.out \ + xargs -I% -R1 echo The % % % %% % % <${SRCDIR}/regress.in +} + +atf_test_case xargs_R_1 +xargs_R_1_body() +{ + atf_check -o file:${SRCDIR}/regress.R-1.out \ + xargs -I% -R-1 echo The % % % %% % % <${SRCDIR}/regress.in +} + +atf_test_case xargs_n1 +xargs_n1_body() +{ + atf_check -o file:${SRCDIR}/regress.n1.out \ + xargs -n1 echo <${SRCDIR}/regress.in +} + +atf_test_case xargs_n2 +xargs_n2_body() +{ + atf_check -o file:${SRCDIR}/regress.n2.out \ + xargs -n2 echo <${SRCDIR}/regress.in +} + +atf_test_case xargs_nargmax +xargs_nargmax_body() +{ + argmax=$(sysctl -n kern.argmax) + atf_check -o file:${SRCDIR}/regress.nargmax.out \ + xargs -n$((argmax)) <${SRCDIR}/regress.in + atf_check -s exit:1 -e match:"too large" \ + xargs -n$((argmax+1)) <${SRCDIR}/regress.in +} + +atf_test_case xargs_n2P0 +xargs_n2P0_body() +{ + atf_check -o save:regress.out \ + xargs -n2 -P0 echo <${SRCDIR}/regress.in + atf_check -o file:${SRCDIR}/regress.n2P0.out \ + sort regress.out +} + +atf_test_case xargs_n3 +xargs_n3_body() +{ + atf_check -o file:${SRCDIR}/regress.n3.out \ + xargs -n3 echo <${SRCDIR}/regress.in +} + +atf_test_case xargs_0 +xargs_0_body() +{ + atf_check -o file:${SRCDIR}/regress.0.out \ + xargs -0 -n1 echo <${SRCDIR}/regress.0.in +} + +atf_test_case xargs_0I +xargs_0I_body() +{ + atf_check -o file:${SRCDIR}/regress.0I.out \ + xargs -0 -I% echo The % %% % <${SRCDIR}/regress.0.in +} + +atf_test_case xargs_0J +xargs_0J_body() +{ + atf_check -o file:${SRCDIR}/regress.0J.out \ + xargs -0 -J% echo The % again. <${SRCDIR}/regress.0.in +} + +atf_test_case xargs_0L +xargs_0L_body() +{ + atf_check -o file:${SRCDIR}/regress.0L.out \ + xargs -0 -L2 echo <${SRCDIR}/regress.0.in +} + +atf_test_case xargs_0P1 +xargs_0P1_body() +{ + atf_check -o file:${SRCDIR}/regress.0P1.out \ + xargs -0 -P1 echo <${SRCDIR}/regress.0.in +} + +atf_test_case xargs_quotes +xargs_quotes_body() +{ + atf_check -o file:${SRCDIR}/regress.quotes.out \ + xargs -n1 echo <${SRCDIR}/regress.quotes.in +} + +atf_test_case xargs_parallel1 +xargs_parallel1_body() +{ + echo /var/empty /var/empty >input + atf_check xargs -n1 -P2 test -d <input +} + +atf_test_case xargs_parallel2 +xargs_parallel2_body() +{ + echo /var/empty /var/empty/nodir >input + atf_check -s exit:1 xargs -n1 -P2 test -d <input +} + +atf_test_case xargs_parallel3 +xargs_parallel3_body() +{ + echo /var/empty/nodir /var/empty >input + atf_check -s exit:1 xargs -n1 -P2 test -d <input +} + +atf_test_case xargs_parallel4 +xargs_parallel4_body() +{ + echo /var/empty/nodir /var/empty/nodir >input + atf_check -s exit:1 xargs -n1 -P2 test -d <input +} + +atf_init_test_cases() +{ + atf_add_test_case xargs_normal + atf_add_test_case xargs_I + atf_add_test_case xargs_J + atf_add_test_case xargs_L + atf_add_test_case xargs_P1 + atf_add_test_case xargs_R + atf_add_test_case xargs_R_1 + atf_add_test_case xargs_n1 + atf_add_test_case xargs_n2 + atf_add_test_case xargs_nargmax + atf_add_test_case xargs_n2P0 + atf_add_test_case xargs_n3 + atf_add_test_case xargs_0 + atf_add_test_case xargs_0I + atf_add_test_case xargs_0J + atf_add_test_case xargs_0L + atf_add_test_case xargs_0P1 + atf_add_test_case xargs_quotes + atf_add_test_case xargs_parallel1 + atf_add_test_case xargs_parallel2 + atf_add_test_case xargs_parallel3 + atf_add_test_case xargs_parallel4 +} diff --git a/usr.bin/xargs/xargs.c b/usr.bin/xargs/xargs.c index 237beff26504..2a7f026e5066 100644 --- a/usr.bin/xargs/xargs.c +++ b/usr.bin/xargs/xargs.c @@ -166,7 +166,7 @@ main(int argc, char *argv[]) break; case 'n': nflag = 1; - nargs = (int)strtonum(optarg, 1, INT_MAX, &errstr); + nargs = (int)strtonum(optarg, 1, arg_max, &errstr); if (errstr) errx(1, "-%c %s: %s", ch, optarg, errstr); break; diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index 85864da57af2..6ff8dd8e273b 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -37,9 +37,12 @@ */ #include <sys/param.h> +#include <sys/cpuset.h> +#include <sys/domainset.h> #include <sys/endian.h> #include <sys/errno.h> #include <sys/stat.h> +#include <sys/tree.h> #include <err.h> #include <paths.h> @@ -50,7 +53,9 @@ #include <string.h> #include <unistd.h> +#include <dev/vmm/vmm_mem.h> #include <machine/vmm.h> +#include <machine/vmm_dev.h> #include <vmmapi.h> #include "bhyverun.h" @@ -79,6 +84,22 @@ static char basl_template[MAXPATHLEN]; static char basl_stemplate[MAXPATHLEN]; /* + * SRAT vCPU affinity info. + */ +struct acpi_vcpu_affinity_entry { + RB_ENTRY(acpi_vcpu_affinity_entry) entry; + int vcpuid; + int domain; +}; + +static int vcpu_affinity_cmp(struct acpi_vcpu_affinity_entry *const a1, + struct acpi_vcpu_affinity_entry *const a2); +static RB_HEAD(vcpu_affinities, + acpi_vcpu_affinity_entry) aff_head = RB_INITIALIZER(&aff_head); +RB_GENERATE_STATIC(vcpu_affinities, acpi_vcpu_affinity_entry, entry, + vcpu_affinity_cmp); + +/* * State for dsdt_line(), dsdt_indent(), and dsdt_unindent(). */ static FILE *dsdt_fp; @@ -121,6 +142,31 @@ acpi_tables_add_device(const struct acpi_device *const dev) return (0); } +static int +vcpu_affinity_cmp(struct acpi_vcpu_affinity_entry *a1, + struct acpi_vcpu_affinity_entry *a2) +{ + return (a1->vcpuid < a2->vcpuid ? -1 : a1->vcpuid > a2->vcpuid); +} + +int +acpi_add_vcpu_affinity(int vcpuid, int domain) +{ + struct acpi_vcpu_affinity_entry *entry = calloc(1, sizeof(*entry)); + if (entry == NULL) { + return (ENOMEM); + } + + entry->vcpuid = vcpuid; + entry->domain = domain; + if (RB_INSERT(vcpu_affinities, &aff_head, entry) != NULL) { + free(entry); + return (EEXIST); + } + + return (0); +} + /* * Helper routines for writing to the DSDT from other modules. */ @@ -726,6 +772,83 @@ build_spcr(struct vmctx *const ctx) return (0); } +static int +build_srat(struct vmctx *const ctx) +{ + ACPI_TABLE_SRAT srat; + ACPI_SRAT_MEM_AFFINITY srat_mem_affinity; + ACPI_SRAT_CPU_AFFINITY srat_cpu_affinity; + + struct acpi_vcpu_affinity_entry *ep; + struct basl_table *table; + int segid, domain; + int _flags, _prot; + vm_ooffset_t _off; + size_t maplen; + uint64_t gpa; + int ret; + + if (RB_EMPTY(&aff_head)) + return (0); + + memset(&srat, 0, sizeof(srat)); + BASL_EXEC(basl_table_create(&table, ctx, ACPI_SIG_SRAT, + BASL_TABLE_ALIGNMENT)); + BASL_EXEC(basl_table_append_header(table, ACPI_SIG_SRAT, 1, 1)); + srat.TableRevision = 1; + BASL_EXEC(basl_table_append_content(table, &srat, sizeof(srat))); + + /* + * Iterate over the VM's memory maps and add + * a 'Memory Affinity Structure' for each mapping. + */ + gpa = 0; + while (1) { + ret = vm_mmap_getnext(ctx, &gpa, &segid, &_off, &maplen, &_prot, + &_flags); + if (ret) { + break; + } + + if (segid >= VM_SYSMEM && segid < VM_BOOTROM) { + domain = segid - VM_SYSMEM; + } else { + /* Treat devmem segs as domain 0. */ + domain = 0; + } + memset(&srat_mem_affinity, 0, sizeof(srat_mem_affinity)); + srat_mem_affinity.Header.Type = ACPI_SRAT_TYPE_MEMORY_AFFINITY; + srat_mem_affinity.Header.Length = sizeof(srat_mem_affinity); + srat_mem_affinity.Flags |= ACPI_SRAT_MEM_ENABLED; + srat_mem_affinity.ProximityDomain = htole32(domain); + srat_mem_affinity.BaseAddress = htole64(gpa); + srat_mem_affinity.Length = htole64(maplen); + srat_mem_affinity.Flags = htole32(ACPI_SRAT_MEM_ENABLED); + BASL_EXEC(basl_table_append_bytes(table, &srat_mem_affinity, + sizeof(srat_mem_affinity))); + gpa += maplen; + } + + /* + * Iterate over each "vCPUid to domain id" mapping and emit a + * 'Processor Local APIC/SAPIC Affinity Structure' for each entry. + */ + RB_FOREACH(ep, vcpu_affinities, &aff_head) { + memset(&srat_cpu_affinity, 0, sizeof(srat_cpu_affinity)); + srat_cpu_affinity.Header.Type = ACPI_SRAT_TYPE_CPU_AFFINITY; + srat_cpu_affinity.Header.Length = sizeof(srat_cpu_affinity); + srat_cpu_affinity.ProximityDomainLo = (uint8_t)ep->domain; + srat_cpu_affinity.ApicId = (uint8_t)ep->vcpuid; + srat_cpu_affinity.Flags = htole32(ACPI_SRAT_CPU_USE_AFFINITY); + BASL_EXEC(basl_table_append_bytes(table, &srat_cpu_affinity, + sizeof(srat_cpu_affinity))); + } + + BASL_EXEC(basl_table_register_to_rsdt(table)); + + return (0); +} + int acpi_build(struct vmctx *ctx, int ncpu) { @@ -765,6 +888,7 @@ acpi_build(struct vmctx *ctx, int ncpu) BASL_EXEC(build_mcfg(ctx)); BASL_EXEC(build_facs(ctx)); BASL_EXEC(build_spcr(ctx)); + BASL_EXEC(build_srat(ctx)); /* Build ACPI device-specific tables such as a TPM2 table. */ const struct acpi_device_list_entry *entry; diff --git a/usr.sbin/bhyve/acpi.h b/usr.sbin/bhyve/acpi.h index 4b557993d67f..f4d24d63800e 100644 --- a/usr.sbin/bhyve/acpi.h +++ b/usr.sbin/bhyve/acpi.h @@ -56,7 +56,8 @@ struct vmctx; int acpi_build(struct vmctx *ctx, int ncpu); void acpi_raise_gpe(struct vmctx *ctx, unsigned bit); int acpi_tables_add_device(const struct acpi_device *const dev); -void dsdt_line(const char *fmt, ...); +int acpi_add_vcpu_affinity(int vcpuid, int domain); +void dsdt_line(const char *fmt, ...) __printflike(1, 2); void dsdt_fixed_ioport(uint16_t iobase, uint16_t length); void dsdt_fixed_irq(uint8_t irq); void dsdt_fixed_mem32(uint32_t base, uint32_t length); diff --git a/usr.sbin/bhyve/amd64/bhyverun_machdep.c b/usr.sbin/bhyve/amd64/bhyverun_machdep.c index 85af124b5536..dad8f1e52e4e 100644 --- a/usr.sbin/bhyve/amd64/bhyverun_machdep.c +++ b/usr.sbin/bhyve/amd64/bhyverun_machdep.c @@ -91,6 +91,7 @@ bhyve_usage(int code) " -K: PS2 keyboard layout\n" " -l: LPC device configuration\n" " -m: memory size\n" + " -n: NUMA domain specification\n" " -o: set config 'var' to 'value'\n" " -P: vmexit from the guest on pause\n" " -p: pin 'vcpu' to 'hostcpu'\n" @@ -117,9 +118,9 @@ bhyve_optparse(int argc, char **argv) int c; #ifdef BHYVE_SNAPSHOT - optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:r:"; + optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:n:l:K:U:r:"; #else - optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:l:K:U:"; + optstr = "aehuwxACDHIPSWYk:f:o:p:G:c:s:m:n:l:K:U:"; #endif while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { @@ -194,6 +195,15 @@ bhyve_optparse(int argc, char **argv) case 'm': set_config_value("memory.size", optarg); break; + case 'n': + if (bhyve_numa_parse(optarg) != 0) + errx(EX_USAGE, + "invalid NUMA configuration " + "'%s'", + optarg); + if (!get_config_bool("acpi_tables")) + errx(EX_USAGE, "NUMA emulation requires ACPI"); + break; case 'o': if (!bhyve_parse_config_option(optarg)) { errx(EX_USAGE, diff --git a/usr.sbin/bhyve/amd64/xmsr.c b/usr.sbin/bhyve/amd64/xmsr.c index cd80e4ef782e..7c174728f4fa 100644 --- a/usr.sbin/bhyve/amd64/xmsr.c +++ b/usr.sbin/bhyve/amd64/xmsr.c @@ -204,6 +204,15 @@ emulate_rdmsr(struct vcpu *vcpu __unused, uint32_t num, uint64_t *val) *val = 1; break; + case MSR_VM_CR: + /* + * We currently don't support nested virt. + * Windows seems to ignore the cpuid bits and reads this + * MSR anyways. + */ + *val = VM_CR_SVMDIS; + break; + default: error = -1; break; diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index 62e567fd359d..89c0b23961a8 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -269,8 +269,56 @@ or (either upper or lower case) to indicate a multiple of kilobytes, megabytes, gigabytes, or terabytes. If no suffix is given, the value is assumed to be in megabytes. -.Pp The default is 256M. +.Pp +.It Fl n Ar id Ns Cm \&, Ns Ar size Ns Cm \&, Ns Ar cpus Ns Op Cm \&, Ns Ar domain_policy +Configure guest NUMA domains. +This option applies only to the amd64 platform. +.Pp +The +.Fl n +option allows the guest physical address space to be partitioned into domains. +The layout of each domain is encoded in an ACPI table +visible to the guest operating system. +The +.Fl n +option also allows the specification of a +.Xr domainset 9 +memory allocation policy for the host memory backing a given NUMA domain. +A guest can have up to 8 NUMA domains. +This feature requires that the guest use a boot ROM, and in +particular cannot be used if the guest was initialized using +.Xr bhyveload 8 . +.Pp +Each domain is identified by a numerical +.Em id . +The domain memory +.Em size +is specified using the same format as the +.Fl m +flag. +The sum of all +.Em size +parameters overrides the total VM memory size specified by the +.Fl m +flag. +However, if at least one domain memory size parameter is +missing, the total VM memory size will be equally distributed across +all emulated domains. +The +.Em cpuset +parameter specifies the set of CPUs that are part of the domain. +The +.Em domain_policy +parameter may be optionally used to configure the +.Xr domainset 9 +host NUMA memory allocation policy for an emulated +domain. +See the +.Ar -n +flag in +.Xr cpuset 1 +for a list of valid NUMA memory allocation policies and their formats. .It Fl o Ar var Ns Cm = Ns Ar value Set the configuration variable .Ar var @@ -1202,6 +1250,33 @@ using this configuration file, use flag .Bd -literal -offset indent /usr/sbin/bhyve -k configfile vm0 .Ed +.Pp +Run a UEFI virtual machine with four CPUs and two emulated NUMA domains: +.Bd -literal -offset indent +bhyve -c 4 -w -H \\ + -s 0,hostbridge \\ + -s 4,ahci-hd,disk.img \\ + -s 31,lpc -l com1,stdio \\ + -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \\ + -n id=0,size=4G,cpus=0-1 \\ + -n id=1,size=4G,cpus=2-3 \\ + numavm +.Ed +.Pp +Assuming a host machine with two NUMA domains, +run a UEFI virtual machine with four CPUs using a +.Ar prefer +.Xr domainset 9 +policy to allocate guest memory from the first host NUMA domain only. +.Bd -literal -offset indent +bhyve -c 2 -w -H \\ + -s 0,hostbridge \\ + -s 4,ahci-hd,disk.img \\ + -s 31,lpc -l com1,stdio \\ + -l bootrom,/usr/local/share/uefi-firmware/BHYVE_UEFI.fd \\ + -n id=0,size=4G,cpus=0-1,domain_policy=prefer:0 \\ + numavm +.Ed .Sh SEE ALSO .Xr bhyve 4 , .Xr netgraph 4 , @@ -1211,7 +1286,8 @@ using this configuration file, use flag .Xr bhyve_config 5 , .Xr ethers 5 , .Xr bhyvectl 8 , -.Xr bhyveload 8 +.Xr bhyveload 8 , +.Xr domainset 9 .Pp .Rs .%A Intel diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index be9cd1611700..9ead49582a7d 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -30,6 +30,8 @@ #ifndef WITHOUT_CAPSICUM #include <sys/capsicum.h> #endif +#include <sys/cpuset.h> +#include <sys/domainset.h> #include <sys/mman.h> #ifdef BHYVE_SNAPSHOT #include <sys/socket.h> @@ -54,6 +56,7 @@ #include <fcntl.h> #endif #include <libgen.h> +#include <libutil.h> #include <unistd.h> #include <assert.h> #include <pthread.h> @@ -68,6 +71,7 @@ #include <libxo/xo.h> #endif +#include <dev/vmm/vmm_mem.h> #include <vmmapi.h> #include "acpi.h" @@ -108,6 +112,9 @@ static const int BSP = 0; static cpuset_t cpumask; +static struct vm_mem_domain guest_domains[VM_MAXMEMDOM]; +static int guest_ndomains = 0; + static void vm_loop(struct vmctx *ctx, struct vcpu *vcpu); static struct vcpu_info { @@ -179,6 +186,118 @@ parse_int_value(const char *key, const char *value, int minval, int maxval) return (lval); } +int +bhyve_numa_parse(const char *opt) +{ + int id = -1; + nvlist_t *nvl; + char *cp, *str, *tofree; + char pathbuf[64] = { 0 }; + char *size = NULL, *cpus = NULL, *domain_policy = NULL; + + if (*opt == '\0') { + return (-1); + } + + tofree = str = strdup(opt); + if (str == NULL) + errx(4, "Failed to allocate memory"); + + while ((cp = strsep(&str, ",")) != NULL) { + if (strncmp(cp, "id=", strlen("id=")) == 0) + id = parse_int_value("id", cp + strlen("id="), 0, + UINT8_MAX); + else if (strncmp(cp, "size=", strlen("size=")) == 0) + size = cp + strlen("size="); + else if (strncmp(cp, + "domain_policy=", strlen("domain_policy=")) == 0) + domain_policy = cp + strlen("domain_policy="); + else if (strncmp(cp, "cpus=", strlen("cpus=")) == 0) + cpus = cp + strlen("cpus="); + } + + if (id == -1) { + EPRINTLN("Missing NUMA domain ID in '%s'", opt); + goto out; + } + + snprintf(pathbuf, sizeof(pathbuf), "domains.%d", id); + nvl = find_config_node(pathbuf); + if (nvl == NULL) + nvl = create_config_node(pathbuf); + if (size != NULL) + set_config_value_node(nvl, "size", size); + if (domain_policy != NULL) + set_config_value_node(nvl, "domain_policy", domain_policy); + if (cpus != NULL) + set_config_value_node(nvl, "cpus", cpus); + + free(tofree); + return (0); + +out: + free(tofree); + return (-1); +} + +static void +calc_mem_affinity(size_t vm_memsize) +{ + int i; + nvlist_t *nvl; + bool need_recalc; + const char *value; + struct vm_mem_domain *dom; + char pathbuf[64] = { 0 }; + + need_recalc = false; + for (i = 0; i < VM_MAXMEMDOM; i++) { + dom = &guest_domains[i]; + snprintf(pathbuf, sizeof(pathbuf), "domains.%d", i); + nvl = find_config_node(pathbuf); + if (nvl == NULL) { + break; + } + + value = get_config_value_node(nvl, "size"); + need_recalc |= value == NULL; + if (value != NULL && vm_parse_memsize(value, &dom->size)) { + errx(EX_USAGE, "invalid memsize for domain %d: '%s'", i, + value); + } + + dom->ds_mask = calloc(1, sizeof(domainset_t)); + if (dom->ds_mask == NULL) { + errx(EX_OSERR, "Failed to allocate domainset mask"); + } + dom->ds_size = sizeof(domainset_t); + value = get_config_value_node(nvl, "domain_policy"); + if (value == NULL) { + dom->ds_policy = DOMAINSET_POLICY_INVALID; + DOMAINSET_ZERO(dom->ds_mask); + } else if (domainset_parselist(value, dom->ds_mask, &dom->ds_policy) != + CPUSET_PARSE_OK) { + errx(EX_USAGE, "failed to parse domain policy '%s'", value); + } + } + + guest_ndomains = i; + if (guest_ndomains == 0) { + /* + * No domains were specified - create domain + * 0 holding all CPUs and memory. + */ + guest_ndomains = 1; + guest_domains[0].size = vm_memsize; + } else if (need_recalc) { + warnx("At least one domain memory size was not specified, distributing" + " total VM memory size across all domains"); + for (i = 0; i < guest_ndomains; i++) { + guest_domains[i].size = vm_memsize / guest_ndomains; + } + } +} + /* * Set the sockets, cores, threads, and guest_cpus variables based on * the configured topology. @@ -340,6 +459,56 @@ build_vcpumaps(void) } } +static void +set_vcpu_affinities(void) +{ + int cpu, error; + nvlist_t *nvl = NULL; + cpuset_t cpus; + const char *value; + char pathbuf[64] = { 0 }; + + for (int dom = 0; dom < guest_ndomains; dom++) { + snprintf(pathbuf, sizeof(pathbuf), "domains.%d", dom); + nvl = find_config_node(pathbuf); + if (nvl == NULL) + break; + + value = get_config_value_node(nvl, "cpus"); + if (value == NULL) { + EPRINTLN("Missing CPU set for domain %d", dom); + exit(4); + } + + parse_cpuset(dom, value, &cpus); + CPU_FOREACH_ISSET(cpu, &cpus) { + error = acpi_add_vcpu_affinity(cpu, dom); + if (error) { + EPRINTLN( + "Unable to set vCPU %d affinity for domain %d: %s", + cpu, dom, strerror(errno)); + exit(4); + } + } + } + if (guest_ndomains > 1 || nvl != NULL) + return; + + /* + * If we're dealing with one domain and no cpuset was provided, create a + * default one holding all cpus. + */ + for (cpu = 0; cpu < guest_ncpus; cpu++) { + error = acpi_add_vcpu_affinity(cpu, 0); + if (error) { + EPRINTLN( + "Unable to set vCPU %d affinity for domain %d: %s", + cpu, 0, strerror(errno)); + exit(4); + } + } +} + void * paddr_guest2host(struct vmctx *ctx, uintptr_t gaddr, size_t len) { @@ -713,18 +882,21 @@ main(int argc, char *argv[]) vcpu_info[vcpuid].vcpu = vm_vcpu_open(ctx, vcpuid); } + calc_mem_affinity(memsize); memflags = 0; if (get_config_bool_default("memory.wired", false)) memflags |= VM_MEM_F_WIRED; if (get_config_bool_default("memory.guest_in_core", false)) memflags |= VM_MEM_F_INCORE; vm_set_memflags(ctx, memflags); - error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); + error = vm_setup_memory_domains(ctx, VM_MMAP_ALL, guest_domains, + guest_ndomains); if (error) { fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(4); } + set_vcpu_affinities(); init_mem(guest_ncpus); init_bootrom(ctx); if (bhyve_init_platform(ctx, bsp) != 0) diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h index 005de6dc5410..0a7bbd72a19c 100644 --- a/usr.sbin/bhyve/bhyverun.h +++ b/usr.sbin/bhyve/bhyverun.h @@ -73,6 +73,7 @@ void bhyve_parse_gdb_options(const char *opt); #endif int bhyve_pincpu_parse(const char *opt); int bhyve_topology_parse(const char *opt); +int bhyve_numa_parse(const char *opt); void bhyve_init_vcpu(struct vcpu *vcpu); void bhyve_start_vcpu(struct vcpu *vcpu, bool bsp); diff --git a/usr.sbin/bhyve/bootrom.c b/usr.sbin/bhyve/bootrom.c index e4adaca55947..339974cb2017 100644 --- a/usr.sbin/bhyve/bootrom.c +++ b/usr.sbin/bhyve/bootrom.c @@ -31,6 +31,7 @@ #include <sys/mman.h> #include <sys/stat.h> +#include <dev/vmm/vmm_mem.h> #include <machine/vmm.h> #include <err.h> diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 2f04a488d9c1..9d6060e3e254 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -42,6 +42,7 @@ #include <stdbool.h> #include <sysexits.h> +#include <dev/vmm/vmm_mem.h> #include <machine/vmm.h> #include <machine/vmm_snapshot.h> #include <vmmapi.h> diff --git a/usr.sbin/bhyve/pci_fbuf.c b/usr.sbin/bhyve/pci_fbuf.c index 125428e0b772..1e3ec77c15b0 100644 --- a/usr.sbin/bhyve/pci_fbuf.c +++ b/usr.sbin/bhyve/pci_fbuf.c @@ -29,6 +29,7 @@ #include <sys/types.h> #include <sys/mman.h> +#include <dev/vmm/vmm_mem.h> #include <machine/vmm.h> #include <machine/vmm_snapshot.h> #include <vmmapi.h> diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 9d38ae9168a1..a82078f6e036 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -38,6 +38,7 @@ #include <dev/io/iodev.h> #include <dev/pci/pcireg.h> +#include <dev/vmm/vmm_mem.h> #include <vm/vm.h> diff --git a/usr.sbin/bhyve/tpm_ppi_qemu.c b/usr.sbin/bhyve/tpm_ppi_qemu.c index 01b8493e7273..6974b574b983 100644 --- a/usr.sbin/bhyve/tpm_ppi_qemu.c +++ b/usr.sbin/bhyve/tpm_ppi_qemu.c @@ -207,7 +207,7 @@ tpm_ppi_write_dsdt_regions(void *sc __unused) * Used for TCG Platform Reset Attack Mitigation */ dsdt_line("OperationRegion(TPP3, SystemMemory, 0x%8x, 1)", - TPM_PPI_ADDRESS + sizeof(struct tpm_ppi_qemu)); + TPM_PPI_ADDRESS + (uint32_t)sizeof(struct tpm_ppi_qemu)); dsdt_line("Field(TPP3, ByteAcc, NoLock, Preserve)"); dsdt_line("{"); dsdt_line(" MOVV, 8,"); diff --git a/usr.sbin/bsdinstall/bsdinstall.8 b/usr.sbin/bsdinstall/bsdinstall.8 index 8fadacab9189..181abdcf9d05 100644 --- a/usr.sbin/bsdinstall/bsdinstall.8 +++ b/usr.sbin/bsdinstall/bsdinstall.8 @@ -451,7 +451,7 @@ Each option must be preceded by the -O flag to be taken into consideration or the pool will not be created due to errors using the command .Cm zpool . Default: -.Dq Li "-O compress=lz4 -O atime=off" +.Dq Li "-O compression=on -O atime=off" .It Ev ZFSBOOT_BEROOT_NAME Name for the boot environment parent dataset. This is a non-mountable dataset meant to be a parent dataset where different diff --git a/usr.sbin/bsdinstall/scripts/bootconfig b/usr.sbin/bsdinstall/scripts/bootconfig index 9b330801e409..41243ad14b9b 100755 --- a/usr.sbin/bsdinstall/scripts/bootconfig +++ b/usr.sbin/bsdinstall/scripts/bootconfig @@ -74,7 +74,7 @@ update_uefi_bootentry() fi $DIALOG --backtitle "$OSNAME Installer" --title 'Boot Configuration' \ - --yesno "There are multiple \"$OSNAME\" EFI boot entries. Would you like to remove them all and add a new one?" 0 0 + --yesno "One or more \"$OSNAME\" EFI boot manager entries already exist. Would you like to remove them all and add a new one?" 0 0 if [ $? -eq $DIALOG_OK ]; then for entry in $(efibootmgr | awk "\$NF == \"$EFI_LABEL_NAME\" { sub(/.*Boot/,\"\", \$1); sub(/\*/,\"\", \$1); print \$1 }"); do efibootmgr -B -b ${entry} diff --git a/usr.sbin/bsdinstall/scripts/zfsboot b/usr.sbin/bsdinstall/scripts/zfsboot index 60feec28e888..a3c1e2ddb89f 100755 --- a/usr.sbin/bsdinstall/scripts/zfsboot +++ b/usr.sbin/bsdinstall/scripts/zfsboot @@ -51,7 +51,7 @@ f_include $BSDCFG_SHARE/variable.subr # # Default options to use when creating zroot pool # -: ${ZFSBOOT_POOL_CREATE_OPTIONS:=-O compress=lz4 -O atime=off} +: ${ZFSBOOT_POOL_CREATE_OPTIONS:=-O compression=on -O atime=off} # # Default name for the boot environment parent dataset diff --git a/usr.sbin/chroot/chroot.8 b/usr.sbin/chroot/chroot.8 index f26b7e937da9..4a1a5a396631 100644 --- a/usr.sbin/chroot/chroot.8 +++ b/usr.sbin/chroot/chroot.8 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 20, 2021 +.Dd July 25, 2025 .Dt CHROOT 8 .Os .Sh NAME @@ -52,13 +52,15 @@ or an interactive copy of the user's login shell. The options are as follows: .Bl -tag -width "-G group[,group ...]" .It Fl G Ar group Ns Op Cm \&, Ns Ar group ... -Run the command with the permissions of the specified groups. +Run the command with the specified groups as supplementary groups. .It Fl g Ar group -Run the command with the permissions of the specified -.Ar group . +Run the command with the specified +.Ar group +as the real, effective and saved groups. .It Fl u Ar user -Run the command as the -.Ar user . +Run the command with the specified +.Ar user +as the real, effective and saved users. .It Fl n Use the .Dv PROC_NO_NEW_PRIVS_CTL diff --git a/usr.sbin/makefs/zfs/dsl.c b/usr.sbin/makefs/zfs/dsl.c index 8a8cee7c82b2..1977521d7f92 100644 --- a/usr.sbin/makefs/zfs/dsl.c +++ b/usr.sbin/makefs/zfs/dsl.c @@ -119,7 +119,7 @@ dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) if (nvlist_find_string(pdir->propsnv, "mountpoint", &tmp) == 0) { - easprintf(&mountpoint, "%s%s%s", tmp, + (void)easprintf(&mountpoint, "%s%s%s", tmp, tmp[strlen(tmp) - 1] == '/' ? "" : "/", origmountpoint); free(tmp); @@ -127,7 +127,7 @@ dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) break; } - easprintf(&mountpoint, "%s/%s", pdir->name, + (void)easprintf(&mountpoint, "%s/%s", pdir->name, origmountpoint); free(origmountpoint); } @@ -175,22 +175,22 @@ dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, "the root path `%s'", val, zfs->rootpath); } } - nvlist_add_string(nvl, key, val); + (void)nvlist_add_string(nvl, key, val); } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || strcmp(key, "setuid") == 0) { if (strcmp(val, "on") == 0) - nvlist_add_uint64(nvl, key, 1); + (void)nvlist_add_uint64(nvl, key, 1); else if (strcmp(val, "off") == 0) - nvlist_add_uint64(nvl, key, 0); + (void)nvlist_add_uint64(nvl, key, 0); else errx(1, "invalid value `%s' for %s", val, key); } else if (strcmp(key, "canmount") == 0) { if (strcmp(val, "noauto") == 0) - nvlist_add_uint64(nvl, key, 2); + (void)nvlist_add_uint64(nvl, key, 2); else if (strcmp(val, "on") == 0) - nvlist_add_uint64(nvl, key, 1); + (void)nvlist_add_uint64(nvl, key, 1); else if (strcmp(val, "off") == 0) - nvlist_add_uint64(nvl, key, 0); + (void)nvlist_add_uint64(nvl, key, 0); else errx(1, "invalid value `%s' for %s", val, key); } else if (strcmp(key, "compression") == 0) { @@ -237,7 +237,7 @@ dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) zfs_dsl_dir_t *dir; char *path; - easprintf(&path, "%s/%s", zfs->poolname, name); + (void)easprintf(&path, "%s/%s", zfs->poolname, name); dir = dsl_dir_alloc(zfs, path); free(path); return (dir); @@ -322,11 +322,11 @@ dsl_init(zfs_opt_t *zfs) * user didn't override the defaults. */ if (nvpair_find(zfs->rootdsldir->propsnv, "compression") == NULL) { - nvlist_add_uint64(zfs->rootdsldir->propsnv, "compression", - ZIO_COMPRESS_OFF); + (void)nvlist_add_uint64(zfs->rootdsldir->propsnv, + "compression", ZIO_COMPRESS_OFF); } if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { - nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", + (void)nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", zfs->rootpath); } } @@ -431,6 +431,7 @@ dsl_dir_alloc(zfs_opt_t *zfs, const char *name) STAILQ_INIT(&l); STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); origname = dirname = nextdir = estrdup(name); + parent = NULL; for (lp = &l;; lp = &parent->children) { dirname = strsep(&nextdir, "/"); if (nextdir == NULL) diff --git a/usr.sbin/makefs/zfs/fs.c b/usr.sbin/makefs/zfs/fs.c index 073dce3ce697..75f6e30e1500 100644 --- a/usr.sbin/makefs/zfs/fs.c +++ b/usr.sbin/makefs/zfs/fs.c @@ -28,6 +28,7 @@ * SUCH DAMAGE. */ +#include <sys/param.h> #include <sys/stat.h> #include <assert.h> @@ -383,22 +384,34 @@ fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur, links = 1; /* .. */ objsize = 1; /* .. */ - /* - * The size of a ZPL directory is the number of entries - * (including "." and ".."), and the link count is the number of - * entries which are directories (including "." and ".."). - */ - for (fsnode *c = fsnode_isroot(cur) ? cur->next : cur->child; - c != NULL; c = c->next) { - switch (c->type) { - case S_IFDIR: - links++; - /* FALLTHROUGH */ - case S_IFREG: - case S_IFLNK: - objsize++; - break; + if ((cur->inode->flags & FI_ROOT) == 0 ) { + /* + * The size of a ZPL directory is the number of entries + * (including "." and ".."), and the link count is the + * number of entries which are directories + * (including "." and ".."). + */ + for (fsnode *c = + fsnode_isroot(cur) ? cur->next : cur->child; + c != NULL; c = c->next) { + switch (c->type) { + case S_IFDIR: + links++; + /* FALLTHROUGH */ + case S_IFREG: + case S_IFLNK: + objsize++; + break; + } } + } else { + /* + * Root directory children do belong to + * different dataset and this directory is + * empty in the current objset. + */ + links++; /* . */ + objsize++; /* . */ } /* The root directory is its own parent. */ @@ -734,7 +747,7 @@ fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index, assert(sizeof(layout[0]) == 2); - snprintf(ti, sizeof(ti), "%u", index); + (void)snprintf(ti, sizeof(ti), "%u", index); zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt, (const uint8_t *)layout); } diff --git a/usr.sbin/makefs/zfs/objset.c b/usr.sbin/makefs/zfs/objset.c index 6be732db477a..f47953ac4339 100644 --- a/usr.sbin/makefs/zfs/objset.c +++ b/usr.sbin/makefs/zfs/objset.c @@ -28,6 +28,7 @@ * SUCH DAMAGE. */ +#include <sys/param.h> #include <assert.h> #include <stdlib.h> #include <string.h> diff --git a/usr.sbin/makefs/zfs/vdev.c b/usr.sbin/makefs/zfs/vdev.c index ef9e681af2da..afcce402cb13 100644 --- a/usr.sbin/makefs/zfs/vdev.c +++ b/usr.sbin/makefs/zfs/vdev.c @@ -28,6 +28,7 @@ * SUCH DAMAGE. */ +#include <sys/param.h> #include <assert.h> #include <fcntl.h> #include <stdlib.h> diff --git a/usr.sbin/makefs/zfs/zap.c b/usr.sbin/makefs/zfs/zap.c index decf5fc6a473..316d1446cecf 100644 --- a/usr.sbin/makefs/zfs/zap.c +++ b/usr.sbin/makefs/zfs/zap.c @@ -28,7 +28,7 @@ * SUCH DAMAGE. */ -#include <sys/types.h> +#include <sys/param.h> #include <sys/endian.h> #include <assert.h> @@ -172,14 +172,14 @@ zap_add_uint64_self(zfs_zap_t *zap, uint64_t val) { char name[32]; - snprintf(name, sizeof(name), "%jx", (uintmax_t)val); + (void)snprintf(name, sizeof(name), "%jx", (uintmax_t)val); zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val); } void zap_add_string(zfs_zap_t *zap, const char *name, const char *val) { - zap_add(zap, name, 1, strlen(val) + 1, val); + zap_add(zap, name, 1, strlen(val) + 1, (const uint8_t *)val); } bool @@ -221,7 +221,8 @@ zap_micro_write(zfs_opt_t *zfs, zfs_zap_t *zap) STAILQ_FOREACH(ent, &zap->kvps, next) { memcpy(&ment->mze_value, ent->valp, ent->intsz * ent->intcnt); ment->mze_cd = cd++; - strlcpy(ment->mze_name, ent->name, sizeof(ment->mze_name)); + (void)strlcpy(ment->mze_name, ent->name, + sizeof(ment->mze_name)); ment++; } @@ -247,6 +248,7 @@ zap_fat_write_array_chunk(zap_leaf_t *l, uint16_t li, size_t sz, struct zap_leaf_array *la; assert(sz <= ZAP_MAXVALUELEN); + assert(sz > 0); for (uint16_t n, resid = sz; resid > 0; resid -= n, val += n, li++) { n = MIN(resid, ZAP_LEAF_ARRAY_BYTES); @@ -503,7 +505,8 @@ zap_fat_write(zfs_opt_t *zfs, zfs_zap_t *zap) le->le_value_intlen = ent->intsz; le->le_value_numints = ent->intcnt; le->le_hash = ent->hash; - zap_fat_write_array_chunk(&l, *lptr + 1, namelen, ent->name); + zap_fat_write_array_chunk(&l, *lptr + 1, namelen, + (uint8_t *)ent->name); zap_fat_write_array_chunk(&l, *lptr + 1 + nnamechunks, ent->intcnt * ent->intsz, ent->valp); } diff --git a/usr.sbin/mfiutil/Makefile b/usr.sbin/mfiutil/Makefile index 85b66d4b6f49..49c0e688e8e2 100644 --- a/usr.sbin/mfiutil/Makefile +++ b/usr.sbin/mfiutil/Makefile @@ -4,7 +4,7 @@ LINKS= ${BINDIR}/mfiutil ${BINDIR}/mrsasutil SRCS= mfiutil.c mfi_bbu.c mfi_cmd.c mfi_config.c mfi_drive.c mfi_evt.c \ mfi_flash.c mfi_patrol.c mfi_show.c mfi_volume.c mfi_foreign.c \ mfi_properties.c -MAN8= mfiutil.8 +MAN= mfiutil.8 MLINKS= mfiutil.8 mrsasutil.8 CFLAGS.gcc+= -fno-builtin-strftime diff --git a/usr.sbin/rwhod/rwhod.c b/usr.sbin/rwhod/rwhod.c index 237663eef74d..b99e4ea74b5a 100644 --- a/usr.sbin/rwhod/rwhod.c +++ b/usr.sbin/rwhod/rwhod.c @@ -246,12 +246,12 @@ main(int argc, char *argv[]) syslog(LOG_ERR, "bind: %m"); exit(1); } - if (setgid(unpriv_gid) != 0) { - syslog(LOG_ERR, "setgid: %m"); + if (setgroups(0, NULL) != 0) { + syslog(LOG_ERR, "setgroups: %m"); exit(1); } - if (setgroups(1, &unpriv_gid) != 0) { /* XXX BOGUS groups[0] = egid */ - syslog(LOG_ERR, "setgroups: %m"); + if (setgid(unpriv_gid) != 0) { + syslog(LOG_ERR, "setgid: %m"); exit(1); } if (setuid(unpriv_uid) != 0) { diff --git a/usr.sbin/spi/Makefile b/usr.sbin/spi/Makefile index 73f5af6fc3cc..fee967f6a234 100644 --- a/usr.sbin/spi/Makefile +++ b/usr.sbin/spi/Makefile @@ -2,6 +2,6 @@ PROG= spi -MAN8= spi.8 +MAN= spi.8 .include <bsd.prog.mk> diff --git a/usr.sbin/ypldap/ldapclient.c b/usr.sbin/ypldap/ldapclient.c index acd4410d939f..a246a25a9605 100644 --- a/usr.sbin/ypldap/ldapclient.c +++ b/usr.sbin/ypldap/ldapclient.c @@ -385,7 +385,7 @@ ldapclient(int pipe_main2client[2]) ypldap_process = PROC_CLIENT; #ifndef DEBUG - if (setgroups(1, &pw->pw_gid) || + if (setgroups(0, NULL) || setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) fatal("cannot drop privileges"); diff --git a/usr.sbin/ypldap/ypldap.c b/usr.sbin/ypldap/ypldap.c index 01b5955aa822..b9e938227831 100644 --- a/usr.sbin/ypldap/ypldap.c +++ b/usr.sbin/ypldap/ypldap.c @@ -602,7 +602,7 @@ main(int argc, char *argv[]) fatal("getpwnam"); #ifndef DEBUG - if (setgroups(1, &pw->pw_gid) || + if (setgroups(0, NULL) || setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) fatal("cannot drop privileges"); diff --git a/usr.sbin/ypldap/ypldap_dns.c b/usr.sbin/ypldap/ypldap_dns.c index 09ce636ebdc8..9dbbf26d237b 100644 --- a/usr.sbin/ypldap/ypldap_dns.c +++ b/usr.sbin/ypldap/ypldap_dns.c @@ -91,7 +91,7 @@ ypldap_dns(int pipe_ntp[2], struct passwd *pw) setproctitle("dns engine"); close(pipe_ntp[0]); - if (setgroups(1, &pw->pw_gid) || + if (setgroups(0, NULL) || setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) || setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid)) fatal("can't drop privileges"); |