diff options
75 files changed, 1537 insertions, 330 deletions
@@ -103,7 +103,7 @@ # # See src/UPDATING `COMMON ITEMS' for more complete information. # -# If TARGET=machine (e.g. powerpc, arm64, ...) is specified you can +# If TARGET=machine (e.g. powerpc64, arm64, ...) is specified you can # cross build world for other machine types using the buildworld target, # and once the world is built you can cross build a kernel using the # buildkernel target. @@ -530,8 +530,7 @@ worlds: .PHONY # Don't build rarely used, semi-supported architectures unless requested. # .if defined(EXTRA_TARGETS) -# powerpcspe excluded from main list until clang fixed -EXTRA_ARCHES_powerpc= powerpcspe +EXTRA_ARCHES_powerpc= powerpc powerpcspe .endif TARGETS?= ${TARGET_MACHINE_LIST} _UNIVERSE_TARGETS= ${TARGETS} @@ -546,8 +545,7 @@ TOOLCHAINS_amd64= amd64-${_GCC_VERSION} TOOLCHAINS_arm= armv7-${_GCC_VERSION} TOOLCHAINS_arm64= aarch64-${_GCC_VERSION} TOOLCHAINS_i386= i386-${_GCC_VERSION} -TOOLCHAINS_powerpc= powerpc-${_GCC_VERSION} powerpc64-${_GCC_VERSION} -TOOLCHAIN_powerpc64= powerpc64-${_GCC_VERSION} +TOOLCHAINS_powerpc= powerpc64-${_GCC_VERSION} TOOLCHAINS_riscv= riscv64-${_GCC_VERSION} .endif diff --git a/cddl/contrib/opensolaris/cmd/dtrace/dtrace.1 b/cddl/contrib/opensolaris/cmd/dtrace/dtrace.1 index 7a98baf39776..d44ee5023e78 100644 --- a/cddl/contrib/opensolaris/cmd/dtrace/dtrace.1 +++ b/cddl/contrib/opensolaris/cmd/dtrace/dtrace.1 @@ -20,7 +20,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 12, 2025 +.Dd July 14, 2025 .Dt DTRACE 1 .Os .Sh NAME @@ -617,6 +617,52 @@ Same as the flag. .It Sy dynvarsize Ns = Ns Ar size Size of the dynamic variable space. +.Sm off +.It Sy evaltime = Cm exec | preinit | postinit | main +.Sm on +Process create mode. +When using +.Fl c Ar cmd +to start a command, +.Nm +will first stop the newly started +.Ar cmd , +evaluate the +.Xr d 7 +program, +and then resume the +.Ar cmd . +The +.Cm evaltime +option controls the exact moment when this happens. +.Pp +The following table describes supported modes. +.Bl -column -offset indent "postinit" "D Program Evaluation Time" +.It Sy Mode Ta Sy D Program Evaluation Time +.It Cm exec Ta +Right at the first instruction of the command +.Ar cmd +execution. +.It Cm preinit Ta +Before +.Xr elf 5 Ap s +.Dq .init +sections. +.It Cm postinit Ta +After +.Xr elf 5 Ap s +.Dq .init +sections. +Default on +.Fx . +.It Cm main Ta +Before the first instruction of the +.Fn main +function. +.El +.Pp +Usually, there is no reason to change the default mode, +but it might be handy in situations such as shared library tracing. .It Sy flowindent Turn on flow indentation. Same as the @@ -1223,17 +1269,20 @@ Invalid command line options or arguments were specified. .Xr cpp 1 , .Xr dwatch 1 , .Xr dtrace_audit 4 , +.Xr dtrace_dtrace 4 , .Xr dtrace_io 4 , .Xr dtrace_ip 4 , .Xr dtrace_kinst 4 , .Xr dtrace_lockstat 4 , .Xr dtrace_proc 4 , +.Xr dtrace_profile 4 , .Xr dtrace_sched 4 , .Xr dtrace_sctp 4 , .Xr dtrace_tcp 4 , .Xr dtrace_udp 4 , .Xr dtrace_udplite 4 , .Xr elf 5 , +.Xr d 7 , .Xr tracing 7 , .Xr SDT 9 .Rs diff --git a/lib/libc/powerpc64/gen/_ctx_start.S b/lib/libc/powerpc64/gen/_ctx_start.S index c2f8abfd6486..98225f9c1138 100644 --- a/lib/libc/powerpc64/gen/_ctx_start.S +++ b/lib/libc/powerpc64/gen/_ctx_start.S @@ -34,6 +34,16 @@ ld %r2,8(%r14) ld %r14,0(%r14) #else + /* + * The stack frame was already set up in makecontext(), + * so we can safely use the guaranteed fields here. + * + * Note we do step on the allocated stack frame's TOC, + * but since we never return from this function (i.e. + * never restore the stack frame) this should be safe. + */ + std %r2,24(%r1) /* save TOC */ + /* Load global entry point */ mr %r12,%r14 #endif @@ -41,6 +51,10 @@ blrl /* branch to start function */ mr %r3,%r15 /* pass pointer to ucontext as argument */ nop +#if defined(_CALL_ELF) && _CALL_ELF != 1 + /* Restore TOC */ + ld %r2,24(%r1) +#endif bl CNAME(_ctx_done) /* branch to ctxt completion func */ /* * we should never return from the diff --git a/lib/libc/powerpc64/gen/makecontext.c b/lib/libc/powerpc64/gen/makecontext.c index 75c2d40bdd60..9e3a976fa1bd 100644 --- a/lib/libc/powerpc64/gen/makecontext.c +++ b/lib/libc/powerpc64/gen/makecontext.c @@ -78,7 +78,7 @@ __makecontext(ucontext_t *ucp, void (*start)(void), int argc, ...) */ stackargs = (argc > 8) ? argc - 8 : 0; sp = (char *) ucp->uc_stack.ss_sp + ucp->uc_stack.ss_size - - sizeof(uintptr_t)*(stackargs + 2); + - sizeof(uintptr_t)*(stackargs + 6); sp = (char *)((uintptr_t)sp & ~0x1f); mc = &ucp->uc_mcontext; @@ -119,6 +119,7 @@ __makecontext(ucontext_t *ucp, void (*start)(void), int argc, ...) mc->mc_srr0 = *(uintptr_t *)_ctx_start; #else mc->mc_srr0 = (uintptr_t) _ctx_start; + mc->mc_gpr[12] = (uintptr_t) _ctx_start;/* required for prologue */ #endif mc->mc_gpr[1] = (uintptr_t) sp; /* new stack pointer */ mc->mc_gpr[14] = (uintptr_t) start; /* r14 <- start */ diff --git a/lib/libc/tests/sys/Makefile b/lib/libc/tests/sys/Makefile index 89d341ff400a..88f8191a16eb 100644 --- a/lib/libc/tests/sys/Makefile +++ b/lib/libc/tests/sys/Makefile @@ -7,11 +7,11 @@ ATF_TESTS_C+= brk_test .endif ATF_TESTS_C+= cpuset_test ATF_TESTS_C+= errno_test +ATF_TESTS_C+= swapcontext_test ATF_TESTS_C+= queue_test ATF_TESTS_C+= sendfile_test -# TODO: clone, lwp_create, lwp_ctl, posix_fadvise, recvmmsg, -# swapcontext +# TODO: clone, lwp_create, lwp_ctl, posix_fadvise, recvmmsg NETBSD_ATF_TESTS_C+= access_test NETBSD_ATF_TESTS_C+= bind_test NETBSD_ATF_TESTS_C+= chroot_test diff --git a/lib/libc/tests/sys/swapcontext_test.c b/lib/libc/tests/sys/swapcontext_test.c new file mode 100644 index 000000000000..f341a746e515 --- /dev/null +++ b/lib/libc/tests/sys/swapcontext_test.c @@ -0,0 +1,63 @@ +/*- + * Copyright (c) 2025 Raptor Computing Systems, LLC + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ucontext.h> +#include <errno.h> + +#include <atf-c.h> + +#define STACK_SIZE (64ull << 10) + +static volatile int callback_reached = 0; + +static ucontext_t uctx_save, uctx_switch; + +static void swapcontext_callback() +{ + // Increment callback reached variable + // If this is called multiple times, we will fail the test + // If this is not called at all, we will fail the test + callback_reached++; +} + +ATF_TC(swapcontext_basic); +ATF_TC_HEAD(swapcontext_basic, tc) +{ + atf_tc_set_md_var(tc, "descr", + "Verify basic functionality of swapcontext"); +} + +ATF_TC_BODY(swapcontext_basic, tc) +{ + char *stack; + int res; + + stack = malloc(STACK_SIZE); + ATF_REQUIRE_MSG(stack != NULL, "malloc failed: %s", strerror(errno)); + res = getcontext(&uctx_switch); + ATF_REQUIRE_MSG(res == 0, "getcontext failed: %s", strerror(errno)); + + uctx_switch.uc_stack.ss_sp = stack; + uctx_switch.uc_stack.ss_size = STACK_SIZE; + uctx_switch.uc_link = &uctx_save; + makecontext(&uctx_switch, swapcontext_callback, 0); + + res = swapcontext(&uctx_save, &uctx_switch); + + ATF_REQUIRE_MSG(res == 0, "swapcontext failed: %s", strerror(errno)); + ATF_REQUIRE_MSG(callback_reached == 1, + "callback failed, reached %d times", callback_reached); +} + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, swapcontext_basic); + + return (atf_no_error()); +} + diff --git a/lib/ncurses/tinfo/Makefile b/lib/ncurses/tinfo/Makefile index 920bd6e5559c..476df54bb72a 100644 --- a/lib/ncurses/tinfo/Makefile +++ b/lib/ncurses/tinfo/Makefile @@ -259,10 +259,18 @@ term.h: MKterm.h.awk edit_cfg.sh Caps Caps-ncurses sh ${NCURSES_DIR}/include/edit_cfg.sh ${NCURSES_CFG_H} $@.new mv -f $@.new $@ +# Avoid hard-coding absolute source paths if requested. +.if ${MK_REPRODUCIBLE_BUILD} != "no" +NCURSES_SRCTOP=/usr/src +.else +NCURSES_SRCTOP=${SRCTOP} +.endif + curses.h: curses.head MKkey_defs.sh Caps Caps-ncurses cat curses.head > $@.new AWK=${AWK} _POSIX2_VERSION=199209 sh ${NCURSES_DIR}/include/MKkey_defs.sh \ ${NCURSES_DIR}/include/Caps ${NCURSES_DIR}/include/Caps-ncurses >> $@.new + sed -i '' 's|${SRCTOP}|${NCURSES_SRCTOP}|g' $@.new cat ${NCURSES_DIR}/include/curses.wide >> $@.new cat ${NCURSES_DIR}/include/curses.tail >> $@.new mv -f $@.new $@ @@ -387,6 +395,7 @@ unctrl.h: unctrl.h.in terminfo.5: MKterminfo.sh terminfo.head Caps sh ${NCURSES_DIR}/man/MKterminfo.sh ${NCURSES_DIR}/man/terminfo.head \ ${NCURSES_DIR}/include/Caps ${NCURSES_DIR}/man/terminfo.tail >$@ + sed -i '' 's|${SRCTOP}|${NCURSES_SRCTOP}|g' $@ CLEANFILES+= terminfo.5 diff --git a/libexec/rc/rc b/libexec/rc/rc index 5ed47d6eac20..db3c3e20ab44 100644 --- a/libexec/rc/rc +++ b/libexec/rc/rc @@ -83,9 +83,9 @@ fi trap "_rc_conf_loaded=false; load_rc_config" ALRM skip="-s nostart" -if [ `/sbin/sysctl -n security.jail.jailed` -eq 1 ]; then +if check_jail jailed; then skip="$skip -s nojail" - if [ `/sbin/sysctl -n security.jail.vnet` -ne 1 ]; then + if ! check_jail vnet; then skip="$skip -s nojailvnet" fi fi diff --git a/libexec/rc/rc.d/hostname b/libexec/rc/rc.d/hostname index 8b26c4f60633..0bc31ccd787e 100755 --- a/libexec/rc/rc.d/hostname +++ b/libexec/rc/rc.d/hostname @@ -42,8 +42,8 @@ hostname_start() # If we are not inside a jail, set the host name. # If we are inside a jail, set the host name if it is permitted. # - if [ `$SYSCTL_N security.jail.jailed` -eq 1 ]; then - if [ `$SYSCTL_N security.jail.set_hostname_allowed` -eq 0 ]; then + if check_jail jailed; then + if ! check_jail set_hostname_allowed; then return fi else diff --git a/libexec/rc/rc.d/routing b/libexec/rc/rc.d/routing index 893acb83cf4a..dd75604125a3 100755 --- a/libexec/rc/rc.d/routing +++ b/libexec/rc/rc.d/routing @@ -331,7 +331,7 @@ _check_dynamicrouting() # copied from /etc/rc skip="-s nostart" - if [ `/sbin/sysctl -n security.jail.jailed` -eq 1 ]; then + if check_jail jailed; then skip="$skip -s nojail" fi [ -n "$local_startup" ] && find_local_scripts_new diff --git a/libexec/rc/rc.d/zfs b/libexec/rc/rc.d/zfs index 26bf3046444b..f88f65c2ec18 100755 --- a/libexec/rc/rc.d/zfs +++ b/libexec/rc/rc.d/zfs @@ -18,7 +18,7 @@ required_modules="zfs" zfs_start_jail() { - if [ `$SYSCTL_N security.jail.mount_allowed` -eq 1 ]; then + if check_jail mount_allowed; then zfs mount -a fi } @@ -34,7 +34,7 @@ zfs_start_main() zfs_start() { - if [ `$SYSCTL_N security.jail.jailed` -eq 1 ]; then + if check_jail jailed; then zfs_start_jail else zfs_start_main @@ -54,7 +54,7 @@ zfs_poststart() zfs_stop_jail() { - if [ `$SYSCTL_N security.jail.mount_allowed` -eq 1 ]; then + if check_jail mount_allowed; then zfs unmount -a fi } @@ -67,7 +67,7 @@ zfs_stop_main() zfs_stop() { - if [ `$SYSCTL_N security.jail.jailed` -eq 1 ]; then + if check_jail jailed; then zfs_stop_jail else zfs_stop_main diff --git a/libexec/rc/rc.d/zfsbe b/libexec/rc/rc.d/zfsbe index f61f3bf097f0..22d53f219679 100755 --- a/libexec/rc/rc.d/zfsbe +++ b/libexec/rc/rc.d/zfsbe @@ -64,7 +64,7 @@ activate_bootonce() be_start() { - if [ `$SYSCTL_N security.jail.jailed` -eq 1 ]; then + if check_jail jailed; then : else mount -p | while read _dev _mp _type _rest; do diff --git a/libexec/rc/rc.shutdown b/libexec/rc/rc.shutdown index 18f67f5ca124..3dfd7a7e0936 100644 --- a/libexec/rc/rc.shutdown +++ b/libexec/rc/rc.shutdown @@ -83,9 +83,9 @@ fi # and perform the operation # rcorder_opts="-k shutdown" -if [ `/sbin/sysctl -n security.jail.jailed` -eq 1 ]; then +if check_jail jailed; then rcorder_opts="$rcorder_opts -s nojail" - if [ `/sbin/sysctl -n security.jail.vnet` -ne 1 ]; then + if ! check_jail vnet; then rcorder_opts="$rcorder_opts -s nojailvnet" fi fi diff --git a/libexec/rc/rc.subr b/libexec/rc/rc.subr index 2eaf336b5220..a2e2e98a5087 100644 --- a/libexec/rc/rc.subr +++ b/libexec/rc/rc.subr @@ -1689,7 +1689,7 @@ $_cpusetcmd $command $rc_flags $command_args" start) # We cannot use protect(1) inside jails. if [ -n "$_oomprotect" ] && [ -f "${PROTECT}" ] && - [ "$(sysctl -n security.jail.jailed)" -eq 0 ]; then + ! check_jail jailed; then [ -z "${rc_pid}" ] && eval $_pidcmd case $_oomprotect in [Aa][Ll][Ll]) @@ -2671,7 +2671,7 @@ check_required_after() } # check_jail mib -# Return true if security.jail.$mib exists and set to 1. +# Return true if security.jail.$mib exists and is set to 1. check_jail() { diff --git a/libexec/rc/tests/rc_subr_test.sh b/libexec/rc/tests/rc_subr_test.sh index 60f77c2c2de3..9931389e7a02 100644 --- a/libexec/rc/tests/rc_subr_test.sh +++ b/libexec/rc/tests/rc_subr_test.sh @@ -52,7 +52,7 @@ oomprotect_all_body() _rc_arg="$4" setvar "${name}_oomprotect" all command="/usr/sbin/daemon" - command_args="-P $pidfile -p $_childpidfile -- /bin/sleep 5" + command_args="-P $pidfile -p $_childpidfile -- /bin/sleep 60" run_rc_command "$_rc_arg" LITERAL @@ -92,7 +92,7 @@ oomprotect_yes_body() setvar "${name}_oomprotect" yes procname="/bin/sleep" command="/usr/sbin/daemon" - command_args="-p $pidfile -- $procname 5" + command_args="-p $pidfile -- $procname 60" run_rc_command "$_rc_arg" LITERAL diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 7c8a8f3afc45..5e60f00bc09f 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -980,11 +980,13 @@ _ccd.4= ccd.4 .if ${MK_CDDL} != "no" _dtrace_provs= dtrace_audit.4 \ + dtrace_dtrace.4 \ dtrace_io.4 \ dtrace_ip.4 \ dtrace_kinst.4 \ dtrace_lockstat.4 \ dtrace_proc.4 \ + dtrace_profile.4 \ dtrace_sched.4 \ dtrace_sctp.4 \ dtrace_tcp.4 \ diff --git a/share/man/man4/dtrace_dtrace.4 b/share/man/man4/dtrace_dtrace.4 new file mode 100644 index 000000000000..b8c31005b47e --- /dev/null +++ b/share/man/man4/dtrace_dtrace.4 @@ -0,0 +1,191 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Mateusz Piotrowski <0mp@FreeBSD.org> +.\" +.Dd July 14, 2025 +.Dt DTRACE_DTRACE 4 +.Os +.Sh NAME +.Nm dtrace_dtrace +.Nd a DTrace provider for BEGIN, END, and ERROR probes +.Sh SYNOPSIS +.Nm dtrace Ns Cm :::BEGIN +.Nm dtrace Ns Cm :::END +.Nm dtrace Ns Cm :::ERROR +.Sh DESCRIPTION +The +.Nm dtrace +provider implements three special probes related to the life cycle of the +DTrace program itself. +.Ss dtrace:::BEGIN +The +.Nm dtrace Ns Cm :::BEGIN +probe fires at the beginning of a +.Xr dtrace 1 , +program before tracing has begun. +It provides a convenient place for initializing variables +and printing column headers. +.Pp +Variables such as +.Va stack +or +.Va execname +cannot be relied upon in the execution context of the +.Nm dtrace Ns Cm :::BEGIN +probe. +.Ss dtrace:::END +The +.Nm dtrace Ns Cm :::END +probe fires at the end of a +.Xr dtrace 1 +program, when all tracing has stopped. +.Ss dtrace:::ERROR +The +.Nm dtrace Ns Cm :::ERROR +probe fires when an unexpected runtime error occurs in another probe. +.Pp +The following table describes the arguments to +.Nm dtrace Ns Cm :::ERROR . +.Bl -column -offset indent "Argument" "Definition" +.It Sy Argument Ta Sy Definition +.It Fa arg1 Ta Enabled probe identifier (EPID) +of the probe where the runtime error occurred +.It Fa arg2 Ta Index of the action statement that caused the error +.It Fa arg3 Ta DIF offset into the action if available (otherwise -1) +.It Fa arg4 Ta Fault type +.It Fa arg5 Ta Accessed address (or 0 if not applicable) when +.Va arg4 +is of fault type +.Dv DTRACEFLT_BADADDR , DTRACEFLT_BADALIGN , DTRACEFLT_KPRIV , +or +.Dv DTRACEFLT_UPRIV +.El +.Pp +The fault types are: +.Bl -tag -offset indent -width "DTRACEFLT_NOSCRATCH" -compact +.It Dv DTRACEFLT_UNKNOWN +Unknown fault +.It Dv DTRACEFLT_BADADDR +Bad address +.It Dv DTRACEFLT_BADALIGN +Bad alignment +.It Dv DTRACEFLT_ILLOP +Illegal operation +.It Dv DTRACEFLT_DIVZERO +Divide-by-zero +.It Dv DTRACEFLT_NOSCRATCH +Out of scratch space +.It Dv DTRACEFLT_KPRIV +Illegal kernel access +.It Dv DTRACEFLT_UPRIV +Illegal user access +.It Dv DTRACEFLT_TUPOFLOW +Tuple stack overflow +.It Dv DTRACEFLT_BADSTACK +Bad stack +.El +.Sh FILES +.Bl -tag -width '<sys/dtrace.h>' +.It In sys/dtrace.h +The header file containing the definitions of DTrace fault types. +.El +.Sh EXAMPLES +.Ss Example 1 : Custom Column Headers +The following script uses the +.Nm dtrace Ns Cm :::BEGIN +probe to print column headers. +Note the pragma line setting the +.Ql quiet +option to disable the default column headers. +.Bd -literal -offset 2n +#pragma D option quiet + +dtrace:::BEGIN +{ + printf(" %12s %-20s %-20s %s\en", + "DELTA(us)", "OLD", "NEW", "TIMESTAMP"); +} +.Ed +.Ss Example 2 : Handling Runtime Errors with dtrace:::ERROR +The following script causes a runtime error by dereferencing a pointer +on address +.Ad 19930908 +in the +.Cm BEGIN +probe. +As a result, the +.Cm ERROR +probe fires and prints out +.Dq Oops +along with the probe arguments. +At that point, the program ends and fires the +.Cm END +probe. +.\" It might look weird to define ERROR first, but that is on purpose. +.\" This way the probe IDs and EPIDs are a bit more mixed up +.\" and are easier to understand. +.Bd -literal -offset 2n +ERROR +{ + printf("Oops\en"); + printf("EPID (arg1): %d\en", arg1); + printf("Action index (arg2): %d\en", arg2); + printf("DIF offset (arg3): %d\en", arg3); + printf("Fault type (arg4): %d\en", arg4); + printf("Accessed address (arg5): %X\en", arg5); + exit(1); +} +BEGIN +{ + *(int *)0x19931101; +} +END { + printf("Bye"); +} +.Ed +.Pp +This script will result in the following output: +.Bd -literal -offset 2n +CPU ID FUNCTION:NAME + 2 3 :ERROR Oops +EPID (arg1): 2 +Action index (arg2): 1 +DIF offset (arg3): 16 +Fault type: 1 +arg5: 19931101 + +dtrace: error on enabled probe ID 2 (ID 1: dtrace:::BEGIN): invalid address (0x19931101) in action #1 at DIF offset 16 + 2 2 :END Bye +.Ed +.Sh SEE ALSO +.Xr dtrace 1 , +.Xr tracing 7 +.Rs +.%B The illumos Dynamic Tracing Guide +.%O Chapter dtrace Provider +.%D 2008 +.%U https://illumos.org/books/dtrace/chp-dtrace.html +.Re +.Sh AUTHORS +This manual page was written by +.An Mateusz Piotrowski Aq Mt 0mp@FreeBSD.org . +.Sh CAVEATS +The +.Nm dtrace Ns Cm :::ERROR +probe arguments cannot be accessed through the typed +.Va args[] +array. +.Pp +.Xr dtrace 1 +will not fire the +.Nm dtrace Ns Cm :::ERROR +probe recursively. +If an error occurs in one of the action statements of the +.Nm dtrace Ns Cm :::ERROR , +then +.Xr dtrace 1 +will abort further processing of +the +.Nm dtrace Ns Cm :::ERROR +probe's actions. diff --git a/share/man/man4/dtrace_profile.4 b/share/man/man4/dtrace_profile.4 new file mode 100644 index 000000000000..07f86663d60a --- /dev/null +++ b/share/man/man4/dtrace_profile.4 @@ -0,0 +1,129 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Mateusz Piotrowski <0mp@FreeBSD.org> +.\" +.Dd July 14, 2025 +.Dt DTRACE_PROFILE 4 +.Os +.Sh NAME +.Nm dtrace_profile +.Nd a DTrace provider for firing probes at a given time interval +.Sh SYNOPSIS +.Nm profile Ns Cm :::profile- Ns Ar rate Ns Op Ar unit +.Nm profile Ns Cm :::tick- Ns Ar rate Ns Op Ar unit +.Sh DESCRIPTION +The +.Nm profile +provider implements three special probes related to the life cycle of the +DTrace program itself. +.Ss Probes +The +.Nm profile Ns Cm :::profile +probes fire on all CPUs and are suitable for measuring the whole system +periodically. +.Pp +The +.Nm profile Ns Cm :::tick +probes fire on a single CPU, potentially a different one every time. +They are useful, e.g., for printing partial results periodically. +.Ss Rate and Time Units +The +.Nm profile +provider probes will fire at the specified +.Ar rate . +.Pp +The default unit is +.Cm hz . +The +.Nm profile +provider supports the following time units: +.Bl -column -offset indent "ns, nsec" "Definition" +.It Sy Time Unit Ta Sy Definition +.It Cm ns , nsec Ta nanoseconds +.It Cm us , usec Ta microseconds +.It Cm ms , msec Ta milliseconds +.It Cm s , sec Ta seconds +.It Cm m , min Ta minutes +.It Cm h , hour Ta hours +.It Cm d , day Ta days +.It Cm hz Ta Hertz (frequency per second) +.El +.Ss Probe Arguments +The arguments of the +.Nm profile +provider probes +are: +.Bl -tag -width arg0 +.It Va arg0 +The PC (program counter) in the kernel when the probe triggered, +or 0 if the process was not in the kernel at that time. +.It Va arg1 +The PC in the user process when the probe triggered, +or 0 if the process was in the kernel when the probe triggered. +.El +.Pp +Use arguments +.Va arg0 +and +.Va arg1 +to tell if the +.Nm profile +provider probe fired in the kernel or in the userspace context. +.Sh IMPLEMENTATION NOTES +The +.Xr sysctl 8 +variable +.Va kern.dtrace.profile.aframes +controls the number of skipped artificial frames for +the +.Nm profile +provider. +.Sh EXAMPLES +.Ss Example 1 : Profiling On-CPU Kernel Stack Traces +The following DTrace one-liner uses the +.Nm profile +provider to collect stack traces over 60 seconds. +.\" XXX: Keep on one line for easier copy-pasting. +.Bd -literal -offset indent +dtrace -x stackframes=100 -n 'profile-197 /arg0/ {@[stack()] = count();} tick-60s {exit(0);} +.Ed +.Pp +The system is profiled at the 197 Hz to avoid sampling in lockstep +with other periodic activities. +This unnatural frequency minimizes the chance of overlapping with other events. +.Pp +Option +.Fl x Cm stackframes=100 +increases the maximum number of kernel stack frames to unwind during +.Fn stack . +.Pp +Checking if +.Ar arg0 +is not zero makes sure that profiling happens +when the program is in the kernel context. +.Pp +Refer to +.Lk https://www.brendangregg.com/flamegraphs.html +to learn about generating flame graphs from the obtained stack traces. +.Sh SEE ALSO +.Xr dtrace 1 , +.Xr tracing 7 +.Rs +.%B The illumos Dynamic Tracing Guide +.%O Chapter profile Provider +.%D 2008 +.%U https://www.illumos.org/books/dtrace/chp-profile.html +.Re +.Rs +.%A Brendan Gregg +.%A Jim Mauro +.%B DTrace: Dynamic Tracing in Oracle Solaris, Mac OS X and FreeBSD +.%I Prentice Hall +.%P pp. 24\(en25 +.%D 2011 +.%U https://www.brendangregg.com/dtracebook/ +.Re +.Sh AUTHORS +This manual page was written by +.An Mateusz Piotrowski Aq Mt 0mp@FreeBSD.org . diff --git a/share/man/man4/hwt.4 b/share/man/man4/hwt.4 index df02a4672f36..299332c72542 100644 --- a/share/man/man4/hwt.4 +++ b/share/man/man4/hwt.4 @@ -3,7 +3,7 @@ .\" .\" SPDX-License-Identifier: BSD-2-Clause .\" -.Dd July 7, 2025 +.Dd July 12, 2025 .Dt HWT 4 .Os .Sh NAME @@ -131,6 +131,7 @@ For SPE-only, the kernel is waiting for userspace to notify that it has copied out a buffer to avoid data loss/overwriting buffers. .El .Sh SEE ALSO +.Xr tracing 7 , .Xr hwt 8 .Sh HISTORY The diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index 63e9f471f1f1..a3db00aed42f 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,5 +1,5 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. -.Dd July 5, 2025 +.Dd July 14, 2025 .Dt SRC.CONF 5 .Os .Sh NAME @@ -493,7 +493,7 @@ Do not build .Xr cxgbetool 8 .Pp This is a default setting on -arm/armv7, powerpc/powerpc and riscv/riscv64. +arm/armv7 and riscv/riscv64. .It Va WITH_CXGBETOOL Build .Xr cxgbetool 8 @@ -655,7 +655,7 @@ and .Xr efivar 8 . .Pp This is a default setting on -i386/i386, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpc64le. +i386/i386, powerpc/powerpc64 and powerpc/powerpc64le. .It Va WITH_EFI Build .Xr efivar 3 @@ -687,7 +687,7 @@ Build Flattened Device Tree support as part of the base system. This includes the device tree compiler (dtc) and libfdt support library. .Pp This is a default setting on -arm/armv7, arm64/aarch64, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. +arm/armv7, arm64/aarch64, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. .It Va WITHOUT_FILE Do not build .Xr file 1 @@ -750,7 +750,7 @@ Do not build HTML docs. Do not build or install HyperV utilities. .Pp This is a default setting on -arm/armv7, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. +arm/armv7, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. .It Va WITH_HYPERV Build or install HyperV utilities. .Pp @@ -916,7 +916,7 @@ On 64-bit platforms, do not build 32-bit library set and a runtime linker. .Pp This is a default setting on -arm/armv7, i386/i386, powerpc/powerpc, powerpc/powerpc64le and riscv/riscv64. +arm/armv7, i386/i386, powerpc/powerpc64le and riscv/riscv64. .It Va WITH_LIB32 On 64-bit platforms, build the 32-bit library set and a .Nm ld-elf32.so.1 @@ -935,7 +935,7 @@ arm/armv7 and riscv/riscv64. Build the LLDB debugger. .Pp This is a default setting on -amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpc64le. +amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64 and powerpc/powerpc64le. .It Va WITHOUT_LLD_BOOTSTRAP Do not build the LLD linker during the bootstrap phase of the build. @@ -1038,7 +1038,7 @@ with support for verification based on certificates obtained from UEFI. Disable inclusion of GELI crypto support in the boot chain binaries. .Pp This is a default setting on -powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpc64le. +powerpc/powerpc64 and powerpc/powerpc64le. .It Va WITH_LOADER_GELI Build GELI bootloader support. .Pp @@ -1048,7 +1048,7 @@ amd64/amd64, arm/armv7, arm64/aarch64, i386/i386 and riscv/riscv64. Do not build the 32-bit UEFI loader. .Pp This is a default setting on -arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. +arm/armv7, arm64/aarch64, i386/i386, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. .It Va WITH_LOADER_IA32 Build the 32-bit UEFI loader. .Pp @@ -1058,7 +1058,7 @@ amd64/amd64. Do not build kboot, a linuxboot environment loader .Pp This is a default setting on -arm/armv7, i386/i386, powerpc/powerpc, powerpc/powerpc64le and riscv/riscv64. +arm/armv7, i386/i386, powerpc/powerpc64le and riscv/riscv64. .It Va WITH_LOADER_KBOOT Build kboot, a linuxboot environment loader .Pp @@ -1068,7 +1068,7 @@ amd64/amd64, arm64/aarch64 and powerpc/powerpc64. Do not build LUA bindings for the boot loader. .Pp This is a default setting on -powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpc64le. +powerpc/powerpc64 and powerpc/powerpc64le. .It Va WITH_LOADER_LUA Build LUA bindings for the boot loader. .Pp @@ -1083,7 +1083,7 @@ amd64/amd64, arm/armv7, arm64/aarch64, i386/i386 and riscv/riscv64. Build openfirmware bootloader components. .Pp This is a default setting on -powerpc/powerpc, powerpc/powerpc64 and powerpc/powerpc64le. +powerpc/powerpc64 and powerpc/powerpc64le. .It Va WITHOUT_LOADER_PXEBOOT Do not build pxeboot on i386/amd64. When the pxeboot is too large, or unneeded, it may be disabled with this option. @@ -1104,7 +1104,7 @@ amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64le and riscv/riscv64. Build ubldr. .Pp This is a default setting on -arm/armv7, powerpc/powerpc and powerpc/powerpc64. +arm/armv7 and powerpc/powerpc64. .It Va WITH_LOADER_VERBOSE Build with extra verbose debugging in the loader. May explode already nearly too large loader over the limit. @@ -1309,7 +1309,7 @@ Do not build .Xr mlx5tool 8 .Pp This is a default setting on -arm/armv7, powerpc/powerpc and riscv/riscv64. +arm/armv7 and riscv/riscv64. .It Va WITH_MLX5TOOL Build .Xr mlx5tool 8 @@ -1401,7 +1401,7 @@ Build the InfiniBand software stack, including kernel modules and userspace libraries. .Pp This is a default setting on -amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. +amd64/amd64, arm64/aarch64, i386/i386, powerpc/powerpc64, powerpc/powerpc64le and riscv/riscv64. .It Va WITH_OFED_EXTRA Build the non-essential components of the .Dq "OpenFabrics Enterprise Distribution" @@ -1412,7 +1412,7 @@ Enable building LDAP support for kerberos using an openldap client from ports. Do not build LLVM's OpenMP runtime. .Pp This is a default setting on -arm/armv7 and powerpc/powerpc. +arm/armv7. .It Va WITH_OPENMP Build LLVM's OpenMP runtime. .Pp @@ -1465,7 +1465,7 @@ is set explicitly) Do not include kernel TLS support in OpenSSL. .Pp This is a default setting on -arm/armv7, i386/i386, powerpc/powerpc and riscv/riscv64. +arm/armv7, i386/i386 and riscv/riscv64. .It Va WITH_OPENSSL_KTLS Include kernel TLS support in OpenSSL. .Pp @@ -1502,7 +1502,7 @@ Do not build dynamically linked binaries as Position-Independent Executable (PIE). .Pp This is a default setting on -arm/armv7, i386/i386 and powerpc/powerpc. +arm/armv7 and i386/i386. .It Va WITH_PIE Build dynamically linked binaries as Position-Independent Executable (PIE). diff --git a/share/man/man5/style.Makefile.5 b/share/man/man5/style.Makefile.5 index cc5d2f6bb28a..fe8754924575 100644 --- a/share/man/man5/style.Makefile.5 +++ b/share/man/man5/style.Makefile.5 @@ -1,3 +1,6 @@ +.\" +.\" SPDX-License-Identifier: BSD-3-Clause +.\" .\" Copyright (c) 2002-2003, 2023 David O'Brien <obrien@FreeBSD.org> .\" All rights reserved. .\" @@ -30,10 +33,7 @@ .Os .Sh NAME .Nm style.Makefile -.Nd -.Fx -.Pa Makefile -file style guide +.Nd FreeBSD Makefile style guide .Sh DESCRIPTION This file specifies the preferred style for makefiles in the .Fx diff --git a/share/man/man7/Makefile b/share/man/man7/Makefile index 7daa0ffed8ea..1e50242a1754 100644 --- a/share/man/man7/Makefile +++ b/share/man/man7/Makefile @@ -6,6 +6,7 @@ MAN= arch.7 \ bsd.snmpmod.mk.7 \ build.7 \ c.7 \ + d.7 \ clocks.7 \ crypto.7 \ development.7 \ diff --git a/share/man/man7/arch.7 b/share/man/man7/arch.7 index 91f6953370d9..918f9058c7aa 100644 --- a/share/man/man7/arch.7 +++ b/share/man/man7/arch.7 @@ -67,8 +67,7 @@ and should be avoided. .Pp On some architectures, e.g., -.Dv powerpc -and AIM variants of +AIM variants of .Dv powerpc64 , the kernel uses a separate address space. On other architectures, kernel and a user mode process share a @@ -88,9 +87,6 @@ release to support each architecture. .It aarch64 Ta 11.0 .It amd64 Ta 5.1 .It armv7 Ta 12.0 -.It i386 Ta 1.0 -.It powerpc Ta 6.0 -.It powerpcspe Ta 12.0 .It powerpc64 Ta 9.0 .It powerpc64le Ta 13.0 .It riscv64 Ta 12.0 @@ -104,6 +100,7 @@ Discontinued architectures are shown in the following table. .It armeb Ta 8.0 Ta 11.4 .It armv6 Ta 10.0 Ta 14.x .It ia64 Ta 5.0 Ta 10.4 +.It i386 Ta 1.0 Ta 14.x .It mips Ta 8.0 Ta 13.5 .It mipsel Ta 9.0 Ta 13.5 .It mipselhf Ta 12.0 Ta 13.5 @@ -114,6 +111,8 @@ Discontinued architectures are shown in the following table. .It mips64elhf Ta 12.0 Ta 13.5 .It mips64hf Ta 12.0 Ta 13.5 .It pc98 Ta 2.2 Ta 11.4 +.It powerpc Ta 6.0 Ta 14.x +.It powerpcspe Ta 12.0 Ta 14.x .It riscv64sf Ta 12.0 Ta 13.5 .It sparc64 Ta 5.0 Ta 12.4 .El diff --git a/share/man/man7/d.7 b/share/man/man7/d.7 new file mode 100644 index 000000000000..f4686d98b1d1 --- /dev/null +++ b/share/man/man7/d.7 @@ -0,0 +1,287 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Mateusz Piotrowski <0mp@FreeBSD.org> +.\" +.Dd June 14, 2025 +.Dt D 7 +.Os +.Sh NAME +.Nm D +.Nd DTrace scripting language overview +.Sh SYNOPSIS +.Sm off +.Ar provider Cm \&: +.Ar module Cm \&: +.Ar function Cm \&: +.Ar name +.Sm on +.Sm off +.Oo +.Cm / +.Ar predicate +.Cm / +.Sm on +.Oc +.Op Cm \&{ Ns Ar action Ns Cm \&} +.Sh DESCRIPTION +.Nm D +is the +.Xr dtrace 1 +scripting language. +This manual provides a brief reference of the +.Nm +language and scripting. +.Pp +This manual page serves as a short reference of the language. +Refer to books listed in +.Sx SEE ALSO +for a complete reference. +.Sh PROBE'S DESCRIPTION +A probe's description consists of four elements: +.Sm off +.D1 Ar provider Ns Cm \&: Ns Ar module Cm \&: Ar function Cm \&: Ar name +.Sm on +.Pp +The exact meaning of +.Ar module , +.Ar function , +and +.Ar name +depends on +.Ar provider . +.Sh USER-DEFINED VARIABLE TYPES +.Bl -column "thread-local" "Syntax" +.It Sy Type Ta Sy Syntax +.It global Ta Va variable_name +.It thread-local Ta Sy self-> Ns Va variable_name +.It clause-local Ta Sy this-> Ns Va variable_name +.It aggregate Ta Sy @ Ns Va variable_name +.El +.Pp +.Em Tips : +.Bl -dash -compact +.It +Always use the variable type with the smallest scope +to minimize processing overhead. +.It +Use aggregate variables instead of global variables when possible. +Aggregate variables are multi-CPU safe in contrast to global variables. +.El +.Sh BUILT-IN VARIABLES +.Ss Probe Arguments +.Bl -tag -width "arg0, ..., arg9" +.It Va args[] +The array of typed probe arguments. +.It Va arg0 , ... , arg9 +The untyped probe arguments represented as 64-bit unsigned integers. +Only the first ten arguments are available this way. +.El +.Ss Probe Information +.Bl -tag -width probeprov +.It Va epid +The enabled probe ID which uniquely identifies an enabled probe. +An enabled probe is defined by its probe ID, its predicates, and its actions. +.It Va id +The probe ID which uniquely identifies a probe available to DTrace. +.It Va probeprov +The +.Ar provider +in the probe's description +.Sm off +.Pq Ar provider Cm \&: Ar module Cm \&: Ar function Cm \&: Ar name +.Sm on . +.It Va probemod +The +.Ar module +in the probe's description +.Sm off +.Pq Ar provider Cm \&: Ar module Cm \&: Ar function Cm \&: Ar name +.Sm on . +.It Va probefunc +The +.Ar function +in the probe's description +.Sm off +.Pq Ar provider Cm \&: Ar module Cm \&: Ar function Cm \&: Ar name +.Sm on . +.It Va probename +The +.Ar name +in the probe's description +.Sm off +.Pq Ar provider Cm \&: Ar module Cm \&: Ar function Cm \&: Ar name +.Sm on . +.El +.Ss Process Information +.Bl -tag -width execname +.It Va execargs +The process arguments. +Effectively, +.Ql curthread->td_proc->p_args . +.It Va execname +The name of the current process. +Effectively, +.Ql curthread->td_proc->p_comm . +.It Va gid +The group ID of the current process. +.It Va pid +The process ID of the current process. +.It Va ppid +The parent process ID of the current process. +.It Va uid +The user ID of the current process. +.El +.Ss Thread Information +.Bl -tag -width curlwpsinfo +.It Va uregs[] +The saved user-mode register values. +.It Va cpu +The ID of the current CPU. +.It Va stackdepth +The kernel stack frame depth. +.It Va ustackdepth +The userspace counterpart of +.Va stackdepth . +.It Va tid +The thread ID. +Depending on the context, +this can be either the ID of a kernel thread or a thread in a user process. +.It Va errno +The +.Xr errno 2 +value of the last system call performed by the current thread. +.It Va curlwpsinfo +A pointer to the +.Vt lwpsinfo_t +representation of the current thread. +Refer to +.Xr dtrace_proc 4 +for more details. +.It Va curpsinfo +A pointer to the +.Vt psinfo_t +representation of the current process. +Refer to +.Xr dtrace_proc 4 +for more details. +.It Va curthread +A pointer to the thread struct that is currently on-CPU. +E.g., +.Ql curthread->td_name +returns the thread name. +The +.In sys/proc.h +header documents all members of +.Vt struct thread . +.It Va caller +The address of the kernel thread instruction at the time of execution +of the current probe. +.It Va ucaller +The userspace counterpart of +.Va caller . +.El +.Ss Timestamps +.Bl -tag -width walltimestamp +.It Va timestamp +The number of nanoseconds since boot. +Suitable for calculating relative time differences of elapsed time and latency. +.It Va vtimestamp +The number of nanoseconds that the current thread spent on CPU. +The counter is not increased during handling of a fired DTrace probe. +Suitable for calculating relative time differences of on-CPU time. +.It Va walltimestamp +The number of nanoseconds since the Epoch +.Pq 1970-01-01T00+00:00 . +Suitable for timestamping logs. +.El +.Sh BUILT-IN FUNCTIONS +.Ss Aggregation Functions +.Bl -tag -compact -width "llquantize(value, factor, low, high, nsteps)" +.It Fn avg value +Average +.It Fn count +Count +.It Fn llquantize value factor low high nsteps +Log-linear quantization +.It Fn lquantize value low high nsteps +Linear quantization +.It Fn max value +Maximum +.It Fn min value +Minimum +.It Fn quantize value +Power-of-two frequency distribution +.It Fn stddev value +Standard deviation +.It Fn sum value +Sum +.El +.Ss Kernel Destructive Functions +By default, +.Xr dtrace 1 +does not permit the use of destructive actions. +.Bl -tag -width "chill(nanoseconds)" +.It Fn breakpoint +Set a kernel breakpoint and transfer control to +the +.Xr ddb 4 +kernel debugger. +.It Fn chill nanoseconds +Spin on the CPU for the specified number of +.Fa nanoseconds . +.It Fn panic +Panic the kernel. +.El +.Sh FILES +.Bl -tag -width /usr/share/dtrace +.It Pa /usr/share/dtrace +DTrace scripts shipped with +.Fx +base. +.El +.Sh SEE ALSO +.Xr awk 1 , +.Xr dtrace 1 , +.Xr tracing 7 +.Rs +.%B The illumos Dynamic Tracing Guide +.%D 2008 +.%U https://illumos.org/books/dtrace/ +.Re +.Rs +.%A Brendan Gregg +.%A Jim Mauro +.%B DTrace: Dynamic Tracing in Oracle Solaris, Mac OS X and FreeBSD +.%I Prentice Hall +.%D 2011 +.%U https://www.brendangregg.com/dtracebook/ +.Re +.Rs +.%A George Neville-Neil +.%A Jonathan Anderson +.%A Graeme Jenkinson +.%A Brian Kidney +.%A Domagoj Stolfa +.%A Arun Thomas +.%A Robert N. M. Watson +.%C Cambridge, United Kingdom +.%D August 2018 +.%T Univeristy of Cambridge Computer Laboratory +.%R OpenDTrace Specification version 1.0 +.%U https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-924.pdf +.Re +.Sh HISTORY +This manual page first appeared in +.Fx 15.0 . +.Sh AUTHORS +.An -nosplit +This manual page was written by +.An Mateusz Piotrowski Aq Mt 0mp@FreeBSD.org . +.Sh BUGS +The +.Va cwd +variable which typically provides the current working directory is +not supported on +.Fx +at the moment. diff --git a/share/man/man7/intro.7 b/share/man/man7/intro.7 index d889c2dd299f..43e48de87bc5 100644 --- a/share/man/man7/intro.7 +++ b/share/man/man7/intro.7 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd June 23, 2025 +.Dd July 14, 2025 .Dt INTRO 7 .Os .Sh NAME @@ -49,6 +49,9 @@ system timekeeping clocks available in .It Xr crypto 7 cryptographic algorithms provided by OpenCrypto in .Fx +.It Xr d 7 +.Xr dtrace 1 +scripting language overview .It Xr development 7 development introduction to .Fx diff --git a/share/man/man7/tracing.7 b/share/man/man7/tracing.7 index 7f21bfbbd2a3..7085bac78385 100644 --- a/share/man/man7/tracing.7 +++ b/share/man/man7/tracing.7 @@ -8,7 +8,7 @@ .Os .Sh NAME .Nm tracing -.Nd introduction to tracing and performance monitoring facilities +.Nd introduction to FreeBSD tracing and performance monitoring .Sh DESCRIPTION .Fx features a large variety of tracing and performance monitoring facilities. @@ -73,6 +73,10 @@ It comes in handy for some niche purposes during kernel development. It lets kernel programmers log events to a global ring buffer, which can later be dumped using .Xr ktrdump 8 . +.Ss Hardware-Accelerated Tracing +.Xr hwt 4 +is a kernel trace framework providing infrastructure +for hardware-assisted tracing. .Ss Hardware Counters .Xr pmcstat 8 , and its kernel counterpart, diff --git a/share/man/man8/nanobsd.8 b/share/man/man8/nanobsd.8 index 2ba072541ada..838f9ddc9afa 100644 --- a/share/man/man8/nanobsd.8 +++ b/share/man/man8/nanobsd.8 @@ -1,3 +1,6 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" .\" Copyright (c) 2006 Daniel Gerzo <danger@FreeBSD.org> .\" All rights reserved. .\" @@ -22,13 +25,12 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd November 10, 2024 +.Dd July 14, 2025 .Dt NANOBSD 8 .Os .Sh NAME .Nm nanobsd.sh -.Nd utility used to create a FreeBSD system image suitable for embedded -applications +.Nd create an embedded FreeBSD system image .Sh SYNOPSIS .Nm .Op Fl BbfhIiKknqvWwX diff --git a/share/man/man9/vnode.9 b/share/man/man9/vnode.9 index 5dd087725e92..d17492668298 100644 --- a/share/man/man9/vnode.9 +++ b/share/man/man9/vnode.9 @@ -24,7 +24,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd October 9, 2024 +.Dd July 15, 2025 .Dt VNODE 9 .Os .Sh NAME @@ -113,7 +113,7 @@ The function declarations and definitions are generated from .Pa sys/kern/vnode_if.src by the -.Pa sys/tools/vndoe_if.awk +.Pa sys/tools/vnode_if.awk script. The interfaces are documented in their respective manual pages like .Xr VOP_READ 9 diff --git a/share/mk/local.sys.machine.mk b/share/mk/local.sys.machine.mk index 5e40dfe805f9..961362cb048a 100644 --- a/share/mk/local.sys.machine.mk +++ b/share/mk/local.sys.machine.mk @@ -7,9 +7,9 @@ TARGET_MACHINE_LIST?= amd64 arm arm64 i386 powerpc riscv MACHINE_ARCH_host?= ${_HOST_ARCH} MACHINE_ARCH_host32?= ${_HOST_ARCH32} -MACHINE_ARCH_LIST_arm?= armv7 ${EXTRA_ARCHES_arm} +MACHINE_ARCH_LIST_arm?= armv7 MACHINE_ARCH_LIST_arm64?= aarch64 -MACHINE_ARCH_LIST_powerpc?= powerpc powerpc64 powerpc64le ${EXTRA_ARCHES_powerpc} +MACHINE_ARCH_LIST_powerpc?= powerpc64 powerpc64le ${EXTRA_ARCHES_powerpc} MACHINE_ARCH_LIST_riscv?= riscv64 .for m in ${TARGET_MACHINE_LIST} diff --git a/share/termcap/termcap b/share/termcap/termcap index 9704d85c942f..46b89d0b3ddf 100644 --- a/share/termcap/termcap +++ b/share/termcap/termcap @@ -3549,8 +3549,7 @@ ti931|ti 931:\ # using \EPC\\ and \EPD\\, but I don't think there is a # capability for that. ti703|ti707|Texas Instruments Silent 703/707, 80 cols:\ - :am:hc:os:xn:\ - :co#80:it#8:\ + :am:hc:os:xn:co#80:\ :do=\n:le=\b:cr=\r:nd= :bl=^G:ta=\t:is=\EPC\\: ti703-w|ti707-w|Texas Instruments Silent 703/707, 132 cols:\ :co#132:is=\EPD\\:tc=ti703: @@ -4808,6 +4807,26 @@ alacritty+common|base fragment for alacritty:\ :te=\E[?1049l\E[23;0;0t:ti=\E[?1049h\E[22;0;0t:\ :ts=\E]2;:ue=\E[24m:up=\E[A:us=\E[4m:vb=\E[?5h\E[?5l:\ :ve=\E[?12l\E[?25h:vi=\E[?25l:vs=\E[?12;25h: + +# From Tim Culverhouse <tim@timculverhouse.com> +xterm-ghostty|ghostty|Ghostty:\ + :am:hs:km:mi:ms:xn:\ + :co#80:it#8:li#24:\ + :AL=\E[%dL:DC=\E[%dP:DL=\E[%dM:DO=\E[%dB:IC=\E[%d@:\ + :LE=\E[%dD:RI=\E[%dC:SF=\E[%dS:SR=\E[%dT:UP=\E[%dA:\ + :ae=\E(B:al=\E[L:as=\E(0:bl=^G:bt=\E[Z:cd=\E[J:ce=\E[K:\ + :cl=\E[H\E[2J:cm=\E[%i%d;%dH:cr=\r:cs=\E[%i%d;%dr:\ + :ct=\E[3g:dc=\E[P:dl=\E[M:do=\n:ds=\E]2;\007:ec=\E[%dX:\ + :ei=\E[4l:fs=^G:ho=\E[H:ic=\E[@:im=\E[4h:k1=\EOP:k2=\EOQ:\ + :k3=\EOR:k4=\EOS:k5=\E[15~:k6=\E[17~:k7=\E[18~:k8=\E[19~:\ + :k9=\E[20~:kD=\E[3~:kI=\E[2~:kN=\E[6~:kP=\E[5~:kb=\177:\ + :kd=\EOB:ke=\E[?1l\E>:kh=\EOH:kl=\EOD:kr=\EOC:\ + :ks=\E[?1h\E=:ku=\EOA:le=^H:mb=\E[5m:md=\E[1m:me=\E[0m:\ + :mh=\E[2m:mr=\E[7m:nd=\E[C:rc=\E8:sc=\E7:se=\E[27m:sf=\n:\ + :so=\E[7m:sr=\EM:st=\EH:ta=^I:te=\E[?1049l:ti=\E[?1049h:\ + :ts=\E]2;:ue=\E[24m:up=\E[A:us=\E[4m:vb=\E[?5h\E[?5l:\ + :ve=\E[?12l\E[?25h:vi=\E[?25l:vs=\E[?12;25h: + # # END OF TERMCAP # ------------------------ diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c index 413b7c74890e..851f2df0e6e1 100644 --- a/sys/amd64/amd64/mem.c +++ b/sys/amd64/amd64/mem.c @@ -105,8 +105,8 @@ memrw(struct cdev *dev, struct uio *uio, int flags) * PAGE_SIZE, the uiomove() call does not * access past the end of the direct map. */ - if (v >= DMAP_MIN_ADDRESS && - v < DMAP_MIN_ADDRESS + dmaplimit) { + if (v >= kva_layout.dmap_low && + v < kva_layout.dmap_high) { error = uiomove((void *)v, c, uio); break; } diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c index 6d0917e16099..43bf81a991bf 100644 --- a/sys/amd64/amd64/minidump_machdep.c +++ b/sys/amd64/amd64/minidump_machdep.c @@ -186,7 +186,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) * tables, so care must be taken to read each entry only once. */ pmapsize = 0; - for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; ) { + for (va = kva_layout.km_low; va < kva_end; ) { /* * We always write a page, even if it is zero. Each * page written corresponds to 1GB of space @@ -279,9 +279,9 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) mdhdr.msgbufsize = mbp->msg_size; mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); mdhdr.pmapsize = pmapsize; - mdhdr.kernbase = VM_MIN_KERNEL_ADDRESS; - mdhdr.dmapbase = DMAP_MIN_ADDRESS; - mdhdr.dmapend = DMAP_MAX_ADDRESS; + mdhdr.kernbase = kva_layout.km_low; + mdhdr.dmapbase = kva_layout.dmap_low; + mdhdr.dmapend = kva_layout.dmap_high; mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, @@ -323,7 +323,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) /* Dump kernel page directory pages */ bzero(fakepd, sizeof(fakepd)); - for (va = VM_MIN_KERNEL_ADDRESS; va < kva_end; va += NBPDP) { + for (va = kva_layout.km_low; va < kva_end; va += NBPDP) { ii = pmap_pml4e_index(va); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 9c985df13ddf..2c7777e608b9 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -415,7 +415,7 @@ SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0, static int ndmpdp; vm_paddr_t dmaplimit; -vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; +vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS_LA48; pt_entry_t pg_nx; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, @@ -475,11 +475,36 @@ _Static_assert(DMPML4I + NDMPML4E <= KMSANSHADPML4I, "direct map overflow"); static pml4_entry_t *kernel_pml4; static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ +static u_int64_t DMPML4phys; /* ... level 4, for la57 */ static int ndmpdpphys; /* number of DMPDPphys pages */ vm_paddr_t kernphys; /* phys addr of start of bootstrap data */ vm_paddr_t KERNend; /* and the end */ +struct kva_layout_s kva_layout = { + .kva_min = KV4ADDR(PML4PML4I, 0, 0, 0), + .dmap_low = KV4ADDR(DMPML4I, 0, 0, 0), + .dmap_high = KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0), + .lm_low = KV4ADDR(LMSPML4I, 0, 0, 0), + .lm_high = KV4ADDR(LMEPML4I + 1, 0, 0, 0), + .km_low = KV4ADDR(KPML4BASE, 0, 0, 0), + .km_high = KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1, + NPDEPG - 1, NPTEPG - 1), + .rec_pt = KV4ADDR(PML4PML4I, 0, 0, 0), +}; + +struct kva_layout_s kva_layout_la57 = { + .kva_min = KV5ADDR(NPML5EPG / 2, 0, 0, 0, 0), /* == rec_pt */ + .dmap_low = KV5ADDR(DMPML5I, 0, 0, 0, 0), + .dmap_high = KV5ADDR(DMPML5I + NDMPML5E, 0, 0, 0, 0), + .lm_low = KV4ADDR(LMSPML4I, 0, 0, 0), + .lm_high = KV4ADDR(LMEPML4I + 1, 0, 0, 0), + .km_low = KV4ADDR(KPML4BASE, 0, 0, 0), + .km_high = KV4ADDR(KPML4BASE + NKPML4E - 1, NPDPEPG - 1, + NPDEPG - 1, NPTEPG - 1), + .rec_pt = KV5ADDR(PML5PML5I, 0, 0, 0, 0), +}; + /* * pmap_mapdev support pre initialization (i.e. console) */ @@ -549,8 +574,8 @@ static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */ static vmem_t *large_vmem; static u_int lm_ents; -#define PMAP_ADDRESS_IN_LARGEMAP(va) ((va) >= LARGEMAP_MIN_ADDRESS && \ - (va) < LARGEMAP_MIN_ADDRESS + NBPML4 * (u_long)lm_ents) +#define PMAP_ADDRESS_IN_LARGEMAP(va) ((va) >= kva_layout.lm_low && \ + (va) < kva_layout.lm_high) int pmap_pcid_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, @@ -1336,7 +1361,7 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - bool remove_pt, struct spglist *free, struct rwlock **lockp); + bool demote_kpde, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); @@ -1722,7 +1747,7 @@ create_pagetables(vm_paddr_t *firstaddr) { pd_entry_t *pd_p; pdp_entry_t *pdp_p; - pml4_entry_t *p4_p; + pml4_entry_t *p4_p, *p4d_p; pml5_entry_t *p5_p; uint64_t DMPDkernphys; vm_paddr_t pax; @@ -1732,7 +1757,7 @@ create_pagetables(vm_paddr_t *firstaddr) vm_offset_t kasankernbase; int kasankpdpi, kasankpdi, nkasanpte; #endif - int i, j, ndm1g, nkpdpe, nkdmpde; + int i, j, ndm1g, nkpdpe, nkdmpde, ndmpml4phys; TSENTER(); /* Allocate page table pages for the direct map */ @@ -1740,15 +1765,30 @@ create_pagetables(vm_paddr_t *firstaddr) if (ndmpdp < 4) /* Minimum 4GB of dirmap */ ndmpdp = 4; ndmpdpphys = howmany(ndmpdp, NPDPEPG); - if (ndmpdpphys > NDMPML4E) { - /* - * Each NDMPML4E allows 512 GB, so limit to that, - * and then readjust ndmpdp and ndmpdpphys. - */ - printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); - Maxmem = atop(NDMPML4E * NBPML4); - ndmpdpphys = NDMPML4E; - ndmpdp = NDMPML4E * NPDEPG; + if (la57) { + ndmpml4phys = howmany(ndmpdpphys, NPML4EPG); + if (ndmpml4phys > NDMPML5E) { + printf("NDMPML5E limits system to %ld GB\n", + (u_long)NDMPML5E * NBPML5 / 1024 / 1024 / 1024); + Maxmem = atop(NDMPML5E * NBPML5); + ndmpml4phys = NDMPML5E; + ndmpdpphys = ndmpml4phys * NPML4EPG; + ndmpdp = ndmpdpphys * NPDEPG; + } + DMPML4phys = allocpages(firstaddr, ndmpml4phys); + } else { + if (ndmpdpphys > NDMPML4E) { + /* + * Each NDMPML4E allows 512 GB, so limit to + * that, and then readjust ndmpdp and + * ndmpdpphys. + */ + printf("NDMPML4E limits system to %d GB\n", + NDMPML4E * 512); + Maxmem = atop(NDMPML4E * NBPML4); + ndmpdpphys = NDMPML4E; + ndmpdp = NDMPML4E * NPDEPG; + } } DMPDPphys = allocpages(firstaddr, ndmpdpphys); ndm1g = 0; @@ -1773,7 +1813,13 @@ create_pagetables(vm_paddr_t *firstaddr) dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; /* Allocate pages. */ + if (la57) { + KPML5phys = allocpages(firstaddr, 1); + p5_p = (pml5_entry_t *)KPML5phys; + } KPML4phys = allocpages(firstaddr, 1); + p4_p = (pml4_entry_t *)KPML4phys; + KPDPphys = allocpages(firstaddr, NKPML4E); #ifdef KASAN KASANPDPphys = allocpages(firstaddr, NKASANPML4E); @@ -1893,6 +1939,16 @@ create_pagetables(vm_paddr_t *firstaddr) } /* + * Connect the Direct Map slots up to the PML4. + * pml5 entries for DMAP are handled below in global pml5 loop. + */ + p4d_p = la57 ? (pml4_entry_t *)DMPML4phys : &p4_p[DMPML4I]; + for (i = 0; i < ndmpdpphys; i++) { + p4d_p[i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V | + pg_nx; + } + + /* * Instead of using a 1G page for the memory containing the kernel, * use 2M pages with read-only and no-execute permissions. (If using 1G * pages, this will partially overwrite the PDPEs above.) @@ -1911,11 +1967,6 @@ create_pagetables(vm_paddr_t *firstaddr) } } - /* And recursively map PML4 to itself in order to get PTmap */ - p4_p = (pml4_entry_t *)KPML4phys; - p4_p[PML4PML4I] = KPML4phys; - p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx; - #ifdef KASAN /* Connect the KASAN shadow map slots up to the PML4. */ for (i = 0; i < NKASANPML4E; i++) { @@ -1938,25 +1989,15 @@ create_pagetables(vm_paddr_t *firstaddr) } #endif - /* Connect the Direct Map slots up to the PML4. */ - for (i = 0; i < ndmpdpphys; i++) { - p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); - p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; - } - /* Connect the KVA slots up to the PML4 */ for (i = 0; i < NKPML4E; i++) { p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; } - kernel_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); - if (la57) { /* XXXKIB bootstrap KPML5phys page is lost */ - KPML5phys = allocpages(firstaddr, 1); - for (i = 0, p5_p = (pml5_entry_t *)KPML5phys; i < NPML5EPG; - i++) { + for (i = 0; i < NPML5EPG; i++) { if (i == PML5PML5I) { /* * Recursively map PML5 to itself in @@ -1964,6 +2005,10 @@ create_pagetables(vm_paddr_t *firstaddr) */ p5_p[i] = KPML5phys | X86_PG_RW | X86_PG_A | X86_PG_M | X86_PG_V | pg_nx; + } else if (i >= DMPML5I && i < DMPML5I + NDMPML5E) { + /* Connect DMAP pml4 pages to PML5. */ + p5_p[i] = (DMPML4phys + ptoa(i - DMPML5I)) | + X86_PG_RW | X86_PG_V | pg_nx; } else if (i == pmap_pml5e_index(UPT_MAX_ADDRESS)) { p5_p[i] = KPML4phys | X86_PG_RW | X86_PG_A | X86_PG_M | X86_PG_V; @@ -1971,6 +2016,10 @@ create_pagetables(vm_paddr_t *firstaddr) p5_p[i] = 0; } } + } else { + /* Recursively map PML4 to itself in order to get PTmap */ + p4_p[PML4PML4I] = KPML4phys; + p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx; } TSEXIT(); } @@ -2024,7 +2073,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) */ virtual_avail = (vm_offset_t)KERNSTART + round_2mpage(KERNend - (vm_paddr_t)kernphys); - virtual_end = VM_MAX_KERNEL_ADDRESS; + virtual_end = kva_layout.km_high; /* * Enable PG_G global pages, then switch to the kernel page @@ -2046,9 +2095,13 @@ pmap_bootstrap(vm_paddr_t *firstaddr) * Initialize the kernel pmap (which is statically allocated). * Count bootstrap data as being resident in case any of this data is * later unmapped (using pmap_remove()) and freed. + * + * DMAP_TO_PHYS()/PHYS_TO_DMAP() are functional only after + * kva_layout is fixed. */ PMAP_LOCK_INIT(kernel_pmap); if (la57) { + kva_layout = kva_layout_la57; vtoptem = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT + NPML5EPGSHIFT)) - 1) << 3; PTmap = (vm_offset_t)P5Tmap; @@ -2059,6 +2112,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) kernel_pmap->pm_cr3 = KPML5phys; pmap_pt_page_count_adj(kernel_pmap, 1); /* top-level page */ } else { + kernel_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_pmltop = kernel_pml4; kernel_pmap->pm_cr3 = KPML4phys; } @@ -2420,6 +2474,7 @@ pmap_init(void) { struct pmap_preinit_mapping *ppim; vm_page_t m, mpte; + pml4_entry_t *pml4e; int error, i, ret, skz63; /* L1TF, reserve page @0 unconditionally */ @@ -2559,18 +2614,19 @@ pmap_init(void) printf("pmap: large map %u PML4 slots (%lu GB)\n", lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024)); if (lm_ents != 0) { - large_vmem = vmem_create("large", LARGEMAP_MIN_ADDRESS, - (vmem_size_t)lm_ents * NBPML4, PAGE_SIZE, 0, M_WAITOK); + large_vmem = vmem_create("large", kva_layout.lm_low, + (vmem_size_t)kva_layout.lm_high - kva_layout.lm_low, + PAGE_SIZE, 0, M_WAITOK); if (large_vmem == NULL) { printf("pmap: cannot create large map\n"); lm_ents = 0; } for (i = 0; i < lm_ents; i++) { m = pmap_large_map_getptp_unlocked(); - /* XXXKIB la57 */ - kernel_pml4[LMSPML4I + i] = X86_PG_V | - X86_PG_RW | X86_PG_A | X86_PG_M | pg_nx | - VM_PAGE_TO_PHYS(m); + pml4e = pmap_pml4e(kernel_pmap, kva_layout.lm_low + + (u_long)i * NBPML4); + *pml4e = X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | + pg_nx | VM_PAGE_TO_PHYS(m); } } } @@ -3899,7 +3955,7 @@ pmap_kextract(vm_offset_t va) pd_entry_t pde; vm_paddr_t pa; - if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { + if (va >= kva_layout.dmap_low && va < kva_layout.dmap_high) { pa = DMAP_TO_PHYS(va); } else if (PMAP_ADDRESS_IN_LARGEMAP(va)) { pa = pmap_large_map_kextract(va); @@ -4040,7 +4096,7 @@ pmap_qremove(vm_offset_t sva, int count) * enough to one of those pmap_enter() calls for it to * be caught up in a promotion. */ - KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va)); + KASSERT(va >= kva_layout.km_low, ("usermode va %lx", va)); KASSERT((*vtopde(va) & X86_PG_PS) == 0, ("pmap_qremove on promoted va %#lx", va)); @@ -4328,21 +4384,13 @@ void pmap_pinit_pml5(vm_page_t pml5pg) { pml5_entry_t *pm_pml5; + int i; pm_pml5 = (pml5_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml5pg)); - - /* - * Add pml5 entry at top of KVA pointing to existing pml4 table, - * entering all existing kernel mappings into level 5 table. - */ - pm_pml5[pmap_pml5e_index(UPT_MAX_ADDRESS)] = KPML4phys | X86_PG_V | - X86_PG_RW | X86_PG_A | X86_PG_M; - - /* - * Install self-referential address mapping entry. - */ - pm_pml5[PML5PML5I] = VM_PAGE_TO_PHYS(pml5pg) | - X86_PG_RW | X86_PG_V | X86_PG_M | X86_PG_A; + for (i = 0; i < NPML5EPG / 2; i++) + pm_pml5[i] = 0; + for (; i < NPML5EPG; i++) + pm_pml5[i] = kernel_pmap->pm_pmltop[i]; } static void @@ -4899,8 +4947,8 @@ pmap_release(pmap_t pmap) m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pmltop)); if (pmap_is_la57(pmap)) { - pmap->pm_pmltop[pmap_pml5e_index(UPT_MAX_ADDRESS)] = 0; - pmap->pm_pmltop[PML5PML5I] = 0; + for (i = NPML5EPG / 2; i < NPML5EPG; i++) + pmap->pm_pmltop[i] = 0; } else { for (i = 0; i < NKPML4E; i++) /* KVA */ pmap->pm_pmltop[KPML4BASE + i] = 0; @@ -4942,7 +4990,7 @@ pmap_release(pmap_t pmap) static int kvm_size(SYSCTL_HANDLER_ARGS) { - unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; + unsigned long ksize = kva_layout.km_high - kva_layout.km_low; return sysctl_handle_long(oidp, &ksize, 0, req); } @@ -4953,7 +5001,7 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG | CTLFLAG_RD | CTLFLAG_MPSAFE, static int kvm_free(SYSCTL_HANDLER_ARGS) { - unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; + unsigned long kfree = kva_layout.km_high - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } @@ -5031,7 +5079,7 @@ pmap_page_array_startup(long pages) vm_page_array_size = pages; - start = VM_MIN_KERNEL_ADDRESS; + start = kva_layout.km_low; end = start + pages * sizeof(struct vm_page); for (va = start; va < end; va += NBPDR) { pfn = first_page + (va - start) / sizeof(struct vm_page); @@ -6067,8 +6115,8 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * so the direct map region is the only part of the * kernel address space that must be handled here. */ - KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && - va < DMAP_MAX_ADDRESS), + KASSERT(!in_kernel || (va >= kva_layout.dmap_low && + va < kva_layout.dmap_high), ("pmap_demote_pde: No saved mpte for va %#lx", va)); /* @@ -6165,8 +6213,7 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void -pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, - bool remove_pt) +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; @@ -6174,12 +6221,8 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if (remove_pt) - mpte = pmap_remove_pt_page(pmap, va); - else - mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)); - if (mpte == NULL) - panic("pmap_remove_kernel_pde: Missing pt page."); + mpte = pmap_remove_pt_page(pmap, va); + KASSERT(mpte != NULL, ("pmap_remove_kernel_pde: missing pt page")); mptepa = VM_PAGE_TO_PHYS(mpte); newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V; @@ -6209,7 +6252,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * pmap_remove_pde: do the things to unmap a superpage in a process */ static int -pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; @@ -6249,9 +6292,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, pmap_delayed_invl_page(m); } } - if (pmap == kernel_pmap) { - pmap_remove_kernel_pde(pmap, pdq, sva, remove_pt); - } else { + if (pmap != kernel_pmap) { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { KASSERT(vm_page_any_valid(mpte), @@ -6262,6 +6303,14 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, mpte->ref_count = 0; pmap_add_delayed_free_list(mpte, free, false); } + } else if (demote_kpde) { + pmap_remove_kernel_pde(pmap, pdq, sva); + } else { + mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(sva)); + if (vm_page_any_valid(mpte)) { + mpte->valid = 0; + pmap_zero_page(mpte); + } } return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); } @@ -7183,7 +7232,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, PG_RW = pmap_rw_bit(pmap); va = trunc_page(va); - KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); + KASSERT(va <= kva_layout.km_high, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va)); @@ -7573,8 +7622,8 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, * the mapping is not from kernel_pmap, then * a reserved PT page could be freed. */ - (void)pmap_remove_pde(pmap, pde, va, - pmap != kernel_pmap, &free, lockp); + (void)pmap_remove_pde(pmap, pde, va, false, &free, + lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { @@ -7584,10 +7633,9 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, * before any changes to mappings are * made. Abort on failure. */ - mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false, false)) { - if (pdpg != NULL) - pdpg->ref_count--; + mt = PHYS_TO_VM_PAGE(oldpde & PG_FRAME); + if (pmap_insert_pt_page(pmap, mt, false, + false)) { CTR1(KTR_PMAP, "pmap_enter_pde: cannot ins kern ptp va %#lx", va); @@ -9550,7 +9598,7 @@ pmap_unmapdev(void *p, vm_size_t size) va = (vm_offset_t)p; /* If we gave a direct map region in pmap_mapdev, do nothing */ - if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) + if (va >= kva_layout.dmap_low && va < kva_layout.dmap_high) return; offset = va & PAGE_MASK; size = round_page(offset + size); @@ -9649,6 +9697,8 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { + if (m->md.pat_mode == ma) + return; m->md.pat_mode = ma; @@ -9668,6 +9718,9 @@ pmap_page_set_memattr_noflush(vm_page_t m, vm_memattr_t ma) { int error; + if (m->md.pat_mode == ma) + return; + m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) @@ -9724,7 +9777,7 @@ pmap_change_prot(vm_offset_t va, vm_size_t size, vm_prot_t prot) int error; /* Only supported within the kernel map. */ - if (va < VM_MIN_KERNEL_ADDRESS) + if (va < kva_layout.km_low) return (EINVAL); PMAP_LOCK(kernel_pmap); @@ -9755,7 +9808,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, * Only supported on kernel virtual addresses, including the direct * map but excluding the recursive map. */ - if (base < DMAP_MIN_ADDRESS) + if (base < kva_layout.dmap_low) return (EINVAL); /* @@ -9778,7 +9831,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pte_bits |= X86_PG_RW; } if ((prot & VM_PROT_EXECUTE) == 0 || - va < VM_MIN_KERNEL_ADDRESS) { + va < kva_layout.km_low) { pde_bits |= pg_nx; pte_bits |= pg_nx; } @@ -9874,7 +9927,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pmap_pte_props(pdpe, pde_bits, pde_mask); changed = true; } - if (tmpva >= VM_MIN_KERNEL_ADDRESS && + if (tmpva >= kva_layout.km_low && (*pdpe & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ @@ -9904,7 +9957,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pmap_pte_props(pde, pde_bits, pde_mask); changed = true; } - if (tmpva >= VM_MIN_KERNEL_ADDRESS && + if (tmpva >= kva_layout.km_low && (*pde & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ @@ -9932,7 +9985,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, pmap_pte_props(pte, pte_bits, pte_mask); changed = true; } - if (tmpva >= VM_MIN_KERNEL_ADDRESS && + if (tmpva >= kva_layout.km_low && (*pte & PG_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ @@ -10904,8 +10957,8 @@ pmap_large_unmap(void *svaa, vm_size_t len) struct spglist spgf; sva = (vm_offset_t)svaa; - if (len == 0 || sva + len < sva || (sva >= DMAP_MIN_ADDRESS && - sva + len <= DMAP_MIN_ADDRESS + dmaplimit)) + if (len == 0 || sva + len < sva || (sva >= kva_layout.dmap_low && + sva + len < kva_layout.dmap_high)) return; SLIST_INIT(&spgf); @@ -11151,11 +11204,10 @@ pmap_large_map_wb(void *svap, vm_size_t len) sva = (vm_offset_t)svap; eva = sva + len; pmap_large_map_wb_fence(); - if (sva >= DMAP_MIN_ADDRESS && eva <= DMAP_MIN_ADDRESS + dmaplimit) { + if (sva >= kva_layout.dmap_low && eva < kva_layout.dmap_high) { pmap_large_map_flush_range(sva, len); } else { - KASSERT(sva >= LARGEMAP_MIN_ADDRESS && - eva <= LARGEMAP_MIN_ADDRESS + lm_ents * NBPML4, + KASSERT(sva >= kva_layout.lm_low && eva < kva_layout.lm_high, ("pmap_large_map_wb: not largemap %#lx %#lx", sva, len)); pmap_large_map_wb_large(sva, eva); } @@ -11196,8 +11248,8 @@ pmap_pti_init(void) VM_OBJECT_WLOCK(pti_obj); pml4_pg = pmap_pti_alloc_page(); pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg)); - for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS && - va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) { + for (va = kva_layout.km_low; va <= kva_layout.km_high && + va >= kva_layout.km_low && va > NBPML4; va += NBPML4) { pdpe = pmap_pti_pdpe(va); pmap_pti_wire_pte(pdpe); } diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 09ac0a67dbef..eefddad2f142 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -769,7 +769,7 @@ trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode) return (-1); } } - if (eva >= VM_MIN_KERNEL_ADDRESS) { + if (eva >= kva_layout.km_low) { /* * Don't allow user-mode faults in kernel address space. */ diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h index 8db314fa034d..1bbb302259d6 100644 --- a/sys/amd64/include/param.h +++ b/sys/amd64/include/param.h @@ -146,8 +146,9 @@ #define amd64_btop(x) ((unsigned long)(x) >> PAGE_SHIFT) #define amd64_ptob(x) ((unsigned long)(x) << PAGE_SHIFT) -#define INKERNEL(va) (((va) >= DMAP_MIN_ADDRESS && (va) < DMAP_MAX_ADDRESS) \ - || ((va) >= VM_MIN_KERNEL_ADDRESS && (va) < VM_MAX_KERNEL_ADDRESS)) +#define INKERNEL(va) \ + (((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_high) || \ + ((va) >= kva_layout.km_low && (va) < kva_layout.km_high)) #ifdef SMP #define SC_TABLESIZE 1024 /* Must be power of 2. */ diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 7d3e91bcd9b9..08e96027a5ed 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -169,11 +169,12 @@ * the recursive page table map. */ #define NDMPML4E 8 +#define NDMPML5E 32 /* - * These values control the layout of virtual memory. The starting address - * of the direct map, which is controlled by DMPML4I, must be a multiple of - * its size. (See the PHYS_TO_DMAP() and DMAP_TO_PHYS() macros.) + * These values control the layout of virtual memory. The starting + * address of the direct map is controlled by DMPML4I on LA48 and + * DMPML5I on LA57. * * Note: KPML4I is the index of the (single) level 4 page that maps * the KVA that holds KERNBASE, while KPML4BASE is the index of the @@ -191,6 +192,7 @@ #define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */ #define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */ +#define DMPML5I (NPML5EPG / 2 + 1) #define KPML4I (NPML4EPG-1) #define KPDPI (NPDPEPG-2) /* kernbase at -2GB */ @@ -548,6 +550,18 @@ pmap_pml5e_index(vm_offset_t va) return ((va >> PML5SHIFT) & ((1ul << NPML5EPGSHIFT) - 1)); } +struct kva_layout_s { + vm_offset_t kva_min; + vm_offset_t dmap_low; /* DMAP_MIN_ADDRESS */ + vm_offset_t dmap_high; /* DMAP_MAX_ADDRESS */ + vm_offset_t lm_low; /* LARGEMAP_MIN_ADDRESS */ + vm_offset_t lm_high; /* LARGEMAP_MAX_ADDRESS */ + vm_offset_t km_low; /* VM_MIN_KERNEL_ADDRESS */ + vm_offset_t km_high; /* VM_MAX_KERNEL_ADDRESS */ + vm_offset_t rec_pt; +}; +extern struct kva_layout_s kva_layout; + #endif /* !LOCORE */ #endif /* !_MACHINE_PMAP_H_ */ diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 0cd9bb4fa7a4..59053665dc40 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -163,6 +163,7 @@ * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. * + * LA48: * 0x0000000000000000 - 0x00007fffffffffff user map * 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole) * 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot) @@ -175,18 +176,29 @@ * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * + * LA57: + * 0x0000000000000000 - 0x00ffffffffffffff user map + * 0x0100000000000000 - 0xf0ffffffffffffff does not exist (hole) + * 0xff00000000000000 - 0xff00ffffffffffff recursive page table (2048TB slot) + * 0xff01000000000000 - 0xff20ffffffffffff direct map (32 x 2048TB slots) + * 0xff21000000000000 - 0xffff807fffffffff unused + * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) + * 0xffff848000000000 - 0xfffff77fffffffff unused (large map extends there) + * 0xfffff60000000000 - 0xfffff7ffffffffff 2TB KMSAN origin map, optional + * 0xfffff78000000000 - 0xfffff7bfffffffff 512GB KASAN shadow map, optional + * 0xfffff80000000000 - 0xfffffbffffffffff 4TB unused + * 0xfffffc0000000000 - 0xfffffdffffffffff 2TB KMSAN shadow map, optional + * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map + * * Within the kernel map: * * 0xfffffe0000000000 vm_page_array * 0xffffffff80000000 KERNBASE */ -#define VM_MIN_KERNEL_ADDRESS KV4ADDR(KPML4BASE, 0, 0, 0) -#define VM_MAX_KERNEL_ADDRESS KV4ADDR(KPML4BASE + NKPML4E - 1, \ - NPDPEPG-1, NPDEPG-1, NPTEPG-1) - -#define DMAP_MIN_ADDRESS KV4ADDR(DMPML4I, 0, 0, 0) -#define DMAP_MAX_ADDRESS KV4ADDR(DMPML4I + NDMPML4E, 0, 0, 0) +#define VM_MIN_KERNEL_ADDRESS_LA48 KV4ADDR(KPML4BASE, 0, 0, 0) +#define VM_MIN_KERNEL_ADDRESS kva_layout.km_low +#define VM_MAX_KERNEL_ADDRESS kva_layout.km_high #define KASAN_MIN_ADDRESS KV4ADDR(KASANPML4I, 0, 0, 0) #define KASAN_MAX_ADDRESS KV4ADDR(KASANPML4I + NKASANPML4E, 0, 0, 0) @@ -199,9 +211,6 @@ #define KMSAN_ORIG_MAX_ADDRESS KV4ADDR(KMSANORIGPML4I + NKMSANORIGPML4E, \ 0, 0, 0) -#define LARGEMAP_MIN_ADDRESS KV4ADDR(LMSPML4I, 0, 0, 0) -#define LARGEMAP_MAX_ADDRESS KV4ADDR(LMEPML4I + 1, 0, 0, 0) - /* * Formally kernel mapping starts at KERNBASE, but kernel linker * script leaves first PDE reserved. For legacy BIOS boot, kernel is @@ -239,21 +248,21 @@ * vt fb startup needs to be reworked. */ #define PHYS_IN_DMAP(pa) (dmaplimit == 0 || (pa) < dmaplimit) -#define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ - (va) < (DMAP_MIN_ADDRESS + dmaplimit)) +#define VIRT_IN_DMAP(va) \ + ((va) >= kva_layout.dmap_low && (va) < kva_layout.dmap_high) #define PMAP_HAS_DMAP 1 -#define PHYS_TO_DMAP(x) ({ \ +#define PHYS_TO_DMAP(x) __extension__ ({ \ KASSERT(PHYS_IN_DMAP(x), \ ("physical address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ - (x) | DMAP_MIN_ADDRESS; }) + (x) + kva_layout.dmap_low; }) -#define DMAP_TO_PHYS(x) ({ \ +#define DMAP_TO_PHYS(x) __extension__ ({ \ KASSERT(VIRT_IN_DMAP(x), \ ("virtual address %#jx not covered by the DMAP", \ (uintmax_t)x)); \ - (x) & ~DMAP_MIN_ADDRESS; }) + (x) - kva_layout.dmap_low; }) /* * amd64 maps the page array into KVA so that it can be more easily @@ -274,7 +283,7 @@ */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ - VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5) + kva_layout.km_low + 1) * 3 / 5) #endif /* initial pagein size of beginning of executable file */ diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 92eb0589f80b..78883296c5b7 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -5767,7 +5767,7 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) CTR5(KTR_PMAP, "%s: page %p - 0x%08X oma: %d, ma: %d", __func__, m, VM_PAGE_TO_PHYS(m), oma, ma); - if ((m->flags & PG_FICTITIOUS) != 0) + if (ma == oma || (m->flags & PG_FICTITIOUS) != 0) return; #if 0 /* @@ -5784,22 +5784,20 @@ pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) * If page is not mapped by sf buffer, map the page * transient and do invalidation. */ - if (ma != oma) { - pa = VM_PAGE_TO_PHYS(m); - sched_pin(); - pc = get_pcpu(); - cmap2_pte2p = pc->pc_cmap2_pte2p; - mtx_lock(&pc->pc_cmap_lock); - if (pte2_load(cmap2_pte2p) != 0) - panic("%s: CMAP2 busy", __func__); - pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, - vm_memattr_to_pte2(ma))); - dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE); - pte2_clear(cmap2_pte2p); - tlb_flush((vm_offset_t)pc->pc_cmap2_addr); - sched_unpin(); - mtx_unlock(&pc->pc_cmap_lock); - } + pa = VM_PAGE_TO_PHYS(m); + sched_pin(); + pc = get_pcpu(); + cmap2_pte2p = pc->pc_cmap2_pte2p; + mtx_lock(&pc->pc_cmap_lock); + if (pte2_load(cmap2_pte2p) != 0) + panic("%s: CMAP2 busy", __func__); + pte2_store(cmap2_pte2p, PTE2_KERN_NG(pa, PTE2_AP_KRW, + vm_memattr_to_pte2(ma))); + dcache_wbinv_poc((vm_offset_t)pc->pc_cmap2_addr, pa, PAGE_SIZE); + pte2_clear(cmap2_pte2p); + tlb_flush((vm_offset_t)pc->pc_cmap2_addr); + sched_unpin(); + mtx_unlock(&pc->pc_cmap_lock); } /* diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index d2e56a270f54..a09da794e77d 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -497,7 +497,8 @@ static bool pmap_pv_insert_l3c(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static void pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, - pd_entry_t l1e, struct spglist *free, struct rwlock **lockp); + pd_entry_t l1e, bool demote_kl2e, struct spglist *free, + struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp); static bool pmap_remove_l3c(pmap_t pmap, pt_entry_t *l3p, vm_offset_t va, @@ -3847,8 +3848,7 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) PMAP_LOCK_ASSERT(pmap, MA_OWNED); ml3 = pmap_remove_pt_page(pmap, va); - if (ml3 == NULL) - panic("pmap_remove_kernel_l2: Missing pt page"); + KASSERT(ml3 != NULL, ("pmap_remove_kernel_l2: missing pt page")); ml3pa = VM_PAGE_TO_PHYS(ml3); newl2 = PHYS_TO_PTE(ml3pa) | L2_TABLE; @@ -3873,8 +3873,8 @@ pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) * pmap_remove_l2: Do the things to unmap a level 2 superpage. */ static int -pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, - pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) +pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pd_entry_t l1e, + bool demote_kl2e, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t old_l2; @@ -3910,9 +3910,7 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, vm_page_aflag_clear(mt, PGA_WRITEABLE); } } - if (pmap == kernel_pmap) { - pmap_remove_kernel_l2(pmap, l2, sva); - } else { + if (pmap != kernel_pmap) { ml3 = pmap_remove_pt_page(pmap, sva); if (ml3 != NULL) { KASSERT(vm_page_any_valid(ml3), @@ -3923,6 +3921,14 @@ pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, ml3->ref_count = 0; pmap_add_delayed_free_list(ml3, free, false); } + } else if (demote_kl2e) { + pmap_remove_kernel_l2(pmap, l2, sva); + } else { + ml3 = vm_radix_lookup(&pmap->pm_root, pmap_l2_pindex(sva)); + if (vm_page_any_valid(ml3)) { + ml3->valid = 0; + pmap_zero_page(ml3); + } } return (pmap_unuse_pt(pmap, sva, l1e, free)); } @@ -4232,7 +4238,7 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { if (sva + L2_SIZE == va_next && eva >= va_next) { pmap_remove_l2(pmap, l2, sva, pmap_load(l1), - &free, &lock); + true, &free, &lock); continue; } else if (pmap_demote_l2_locked(pmap, l2, sva, &lock) == NULL) @@ -5747,33 +5753,51 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, } } SLIST_INIT(&free); - if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) + if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) { (void)pmap_remove_l2(pmap, l2, va, - pmap_load(pmap_l1(pmap, va)), &free, lockp); - else + pmap_load(pmap_l1(pmap, va)), false, &free, lockp); + } else { + if (ADDR_IS_KERNEL(va)) { + /* + * Try to save the ptp in the trie + * before any changes to mappings are + * made. Abort on failure. + */ + mt = PTE_TO_VM_PAGE(old_l2); + if (pmap_insert_pt_page(pmap, mt, false, + false)) { + CTR1(KTR_PMAP, + "pmap_enter_l2: cannot ins kern ptp va %#lx", + va); + return (KERN_RESOURCE_SHORTAGE); + } + /* + * Both pmap_remove_l2() and + * pmap_remove_l3_range() will zero fill + * the L3 kernel page table page. + */ + } pmap_remove_l3_range(pmap, old_l2, va, va + L2_SIZE, &free, lockp); + if (ADDR_IS_KERNEL(va)) { + /* + * The TLB could have an intermediate + * entry for the L3 kernel page table + * page, so request an invalidation at + * all levels after clearing the + * L2_TABLE entry. + */ + pmap_clear(l2); + pmap_s1_invalidate_page(pmap, va, false); + } + } + KASSERT(pmap_load(l2) == 0, + ("pmap_enter_l2: non-zero L2 entry %p", l2)); if (!ADDR_IS_KERNEL(va)) { vm_page_free_pages_toq(&free, true); - KASSERT(pmap_load(l2) == 0, - ("pmap_enter_l2: non-zero L2 entry %p", l2)); } else { KASSERT(SLIST_EMPTY(&free), ("pmap_enter_l2: freed kernel page table page")); - - /* - * Both pmap_remove_l2() and pmap_remove_l3_range() - * will leave the kernel page table page zero filled. - * Nonetheless, the TLB could have an intermediate - * entry for the kernel page table page, so request - * an invalidation at all levels after clearing - * the L2_TABLE entry. - */ - mt = PTE_TO_VM_PAGE(pmap_load(l2)); - if (pmap_insert_pt_page(pmap, mt, false, false)) - panic("pmap_enter_l2: trie insert failed"); - pmap_clear(l2); - pmap_s1_invalidate_page(pmap, va, false); } } @@ -8045,6 +8069,8 @@ pmap_unmapbios(void *p, vm_size_t size) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { + if (m->md.pv_memattr == ma) + return; m->md.pv_memattr = ma; @@ -8424,8 +8450,8 @@ pmap_demote_l2_abort(pmap_t pmap, vm_offset_t va, pt_entry_t *l2, struct spglist free; SLIST_INIT(&free); - (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), &free, - lockp); + (void)pmap_remove_l2(pmap, l2, va, pmap_load(pmap_l1(pmap, va)), true, + &free, lockp); vm_page_free_pages_toq(&free, true); } diff --git a/sys/compat/linuxkpi/common/include/linux/slab.h b/sys/compat/linuxkpi/common/include/linux/slab.h index f3a840d9bf4b..efa5c8cb67b3 100644 --- a/sys/compat/linuxkpi/common/include/linux/slab.h +++ b/sys/compat/linuxkpi/common/include/linux/slab.h @@ -45,7 +45,7 @@ MALLOC_DECLARE(M_KMALLOC); -#define kvzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) +#define kvzalloc(size, flags) kvmalloc(size, (flags) | __GFP_ZERO) #define kvcalloc(n, size, flags) kvmalloc_array(n, size, (flags) | __GFP_ZERO) #define kzalloc(size, flags) kmalloc(size, (flags) | __GFP_ZERO) #define kzalloc_node(size, flags, node) kmalloc_node(size, (flags) | __GFP_ZERO, node) diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index ebb92eacbf9a..628af17df853 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -106,6 +106,7 @@ linux_alloc_pages(gfp_t flags, unsigned int order) if ((flags & M_ZERO) != 0) req |= VM_ALLOC_ZERO; + if (order == 0 && (flags & GFP_DMA32) == 0) { page = vm_page_alloc_noobj(req); if (page == NULL) @@ -113,6 +114,10 @@ linux_alloc_pages(gfp_t flags, unsigned int order) } else { vm_paddr_t pmax = (flags & GFP_DMA32) ? BUS_SPACE_MAXADDR_32BIT : BUS_SPACE_MAXADDR; + + if ((flags & __GFP_NORETRY) != 0) + req |= VM_ALLOC_NORECLAIM; + retry: page = vm_page_alloc_noobj_contig(req, npages, 0, pmax, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); diff --git a/sys/dev/usb/controller/xhci_pci.c b/sys/dev/usb/controller/xhci_pci.c index b50e33ea36ce..d5cfd228a429 100644 --- a/sys/dev/usb/controller/xhci_pci.c +++ b/sys/dev/usb/controller/xhci_pci.c @@ -99,6 +99,11 @@ xhci_pci_match(device_t self) return ("AMD Starship USB 3.0 controller"); case 0x149c1022: return ("AMD Matisse USB 3.0 controller"); + case 0x15b61022: + case 0x15b71022: + return ("AMD Raphael/Granite Ridge USB 3.1 controller"); + case 0x15b81022: + return ("AMD Raphael/Granite Ridge USB 2.0 controller"); case 0x15e01022: case 0x15e11022: return ("AMD Raven USB 3.1 controller"); @@ -109,6 +114,8 @@ xhci_pci_match(device_t self) return ("AMD 300 Series USB 3.1 controller"); case 0x43d51022: return ("AMD 400 Series USB 3.1 controller"); + case 0x43f71022: + return ("AMD 600 Series USB 3.2 controller"); case 0x78121022: case 0x78141022: case 0x79141022: diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 5db61c8951f6..33e0d94954d7 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1521,6 +1521,9 @@ msdosfs_readdir(struct vop_readdir_args *ap) ap->a_vp, uio, ap->a_cred, ap->a_eofflag); #endif + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 0; + /* * msdosfs_readdir() won't operate properly on regular files since * it does i/o only with the filesystem vnode, and hence can @@ -1614,8 +1617,11 @@ msdosfs_readdir(struct vop_readdir_args *ap) on = (offset - bias) & pmp->pm_crbomask; n = min(pmp->pm_bpcluster - on, uio->uio_resid); diff = dep->de_FileSize - (offset - bias); - if (diff <= 0) - break; + if (diff <= 0) { + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 1; + goto out; + } n = min(n, diff); error = pcbmap(dep, lbn, &bn, &cn, &blsize); if (error) @@ -1646,6 +1652,8 @@ msdosfs_readdir(struct vop_readdir_args *ap) */ if (dentp->deName[0] == SLOT_EMPTY) { brelse(bp); + if (ap->a_eofflag != NULL) + *ap->a_eofflag = 1; goto out; } /* @@ -1743,15 +1751,6 @@ out: uio->uio_offset = off; - /* - * Set the eofflag (NFS uses it) - */ - if (ap->a_eofflag) { - if (dep->de_FileSize - (offset - bias) <= 0) - *ap->a_eofflag = 1; - else - *ap->a_eofflag = 0; - } return (error); } diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index e7d460af21d4..f577cd07ac7c 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -17,6 +17,8 @@ # in NOTES. # +#NO_UNIVERSE + cpu I486_CPU cpu I586_CPU cpu I686_CPU diff --git a/sys/i386/conf/GENERIC-NODEBUG b/sys/i386/conf/GENERIC-NODEBUG index ea07613a796f..a93304481b5f 100644 --- a/sys/i386/conf/GENERIC-NODEBUG +++ b/sys/i386/conf/GENERIC-NODEBUG @@ -25,6 +25,8 @@ # in NOTES. # +#NO_UNIVERSE + include GENERIC include "std.nodebug" diff --git a/sys/i386/conf/LINT b/sys/i386/conf/LINT index 41207eb63cb9..2e947202f723 100644 --- a/sys/i386/conf/LINT +++ b/sys/i386/conf/LINT @@ -1,3 +1,4 @@ +#NO_UNIVERSE include "../../conf/NOTES" include "../../x86/conf/NOTES" diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL index 2a06eb84bff8..8019617ca4d4 100644 --- a/sys/i386/conf/MINIMAL +++ b/sys/i386/conf/MINIMAL @@ -31,6 +31,8 @@ # in NOTES. # +#NO_UNIVERSE + cpu I486_CPU cpu I586_CPU cpu I686_CPU diff --git a/sys/i386/conf/PAE b/sys/i386/conf/PAE index a39d32d77106..72af9e9a9eec 100644 --- a/sys/i386/conf/PAE +++ b/sys/i386/conf/PAE @@ -2,6 +2,8 @@ # PAE -- Generic kernel configuration file for FreeBSD/i386 PAE # +#NO_UNIVERSE + include GENERIC ident PAE-GENERIC diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 465b4d0f365b..5065b7e61ee8 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -5617,6 +5617,8 @@ __CONCAT(PMTYPE, unmapdev)(void *p, vm_size_t size) static void __CONCAT(PMTYPE, page_set_memattr)(vm_page_t m, vm_memattr_t ma) { + if (m->md.pat_mode == ma) + return; m->md.pat_mode = ma; if ((m->flags & PG_FICTITIOUS) != 0) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 93bdd41d1515..a27ab33b34da 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -557,8 +557,10 @@ open_to_fde_flags(int open_flags, bool sticky_orb) { .f = O_CLOFORK, .t = UF_FOCLOSE }, { .f = O_RESOLVE_BENEATH, .t = UF_RESOLVE_BENEATH }, }; +#if defined(__clang__) && __clang_major__ >= 19 _Static_assert(open_to_fde_flags_s[nitems(open_to_fde_flags_s) - 1].f == O_RESOLVE_BENEATH, "O_RESOLVE_BENEATH must be last, for sticky_orb"); +#endif return (flags_trans(open_to_fde_flags_s, nitems(open_to_fde_flags_s) - (sticky_orb ? 0 : 1), open_flags)); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 18388ae5f232..bac7d0080c71 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -338,8 +338,9 @@ ast_handler(struct thread *td, struct trapframe *framep, bool dtor) td->td_ast = 0; } - CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, td->td_proc->p_pid, - td->td_proc->p_comm); + CTR3(KTR_SYSC, "ast: thread %p (pid %d, %s)", td, + td->td_proc == NULL ? -1 : td->td_proc->p_pid, + td->td_proc == NULL ? "" : td->td_proc->p_comm); KASSERT(framep == NULL || TRAPF_USERMODE(framep), ("ast in kernel mode")); diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c index 9867a718e148..5b52bfa80e3b 100644 --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -718,6 +718,7 @@ lagg_capabilities(struct lagg_softc *sc) sc->sc_ifp->if_capenable = ena; sc->sc_ifp->if_capenable2 = ena2; sc->sc_ifp->if_hwassist = hwa; + (void)if_hw_tsomax_update(sc->sc_ifp, &hw_tsomax); getmicrotime(&sc->sc_ifp->if_lastchange); if (sc->sc_ifflags & IFF_DEBUG) diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 8ee4d00daaff..1f2011634695 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -2500,7 +2500,7 @@ int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t, - pf_addr_filter_func_t); + pf_addr_filter_func_t, bool); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * pfr_attach_table(struct pf_kruleset *, char *); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 41658a29014e..a410fe570c39 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -5906,11 +5906,12 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, * it is applied only from the last pass rule. */ pd->act.rt = r->rt; - /* Don't use REASON_SET, pf_map_addr increases the reason counters */ - ctx.reason = pf_map_addr_sn(pd->af, r, pd->src, &pd->act.rt_addr, - &pd->act.rt_kif, NULL, &(r->route), PF_SN_ROUTE); - if (ctx.reason != 0) + if ((transerror = pf_map_addr_sn(pd->af, r, pd->src, + &pd->act.rt_addr, &pd->act.rt_kif, NULL, &(r->route), + PF_SN_ROUTE)) != PFRES_MATCH) { + REASON_SET(&ctx.reason, transerror); goto cleanup; + } } if (pd->virtual_proto != PF_VPROTO_FRAGMENT && @@ -6054,9 +6055,16 @@ pf_create_state(struct pf_krule *r, struct pf_test_ctx *ctx, /* src node for translation rule */ if (ctx->nr != NULL) { KASSERT(ctx->nat_pool != NULL, ("%s: nat_pool is NULL", __func__)); + /* + * The NAT addresses are chosen during ruleset parsing. + * The new afto code stores post-nat addresses in nsaddr. + * The old nat code (also used for new nat-to rules) creates + * state keys and stores addresses in them. + */ if ((ctx->nat_pool->opts & PF_POOL_STICKYADDR) && (sn_reason = pf_insert_src_node(sns, snhs, ctx->nr, - &ctx->sk->addr[pd->sidx], pd->af, &ctx->nk->addr[1], NULL, + ctx->sk ? &(ctx->sk->addr[pd->sidx]) : pd->src, pd->af, + ctx->nk ? &(ctx->nk->addr[1]) : &(pd->nsaddr), NULL, PF_SN_NAT)) != 0 ) { REASON_SET(&ctx->reason, sn_reason); goto csfailed; diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index d4728f61dce8..26f7ab41eef4 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -617,7 +617,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, rpool->tblidx = (int)arc4random_uniform(cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, - af, pf_islinklocal)) { + af, pf_islinklocal, false)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } @@ -684,7 +684,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, rpool->tblidx = (int)(hashidx % cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, - af, pf_islinklocal)) { + af, pf_islinklocal, false)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } @@ -701,11 +701,12 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) + &rpool->tblidx, &rpool->counter, af, NULL, true)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) + &rpool->tblidx, &rpool->counter, af, pf_islinklocal, + true)) goto get_addr; } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) goto get_addr; @@ -715,9 +716,10 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); + rpool->tblidx = -1; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (pfr_pool_get(rpool->cur->addr.p.tbl, - &rpool->tblidx, &rpool->counter, af, NULL)) { + &rpool->tblidx, &rpool->counter, af, NULL, true)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -725,9 +727,9 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, goto done_pool_mtx; } } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { - rpool->tblidx = -1; if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, - &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) { + &rpool->tblidx, &rpool->counter, af, pf_islinklocal, + true)) { /* table contains no address of type 'af' */ if (rpool->cur != acur) goto try_next; @@ -755,10 +757,6 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, done_pool_mtx: mtx_unlock(&rpool->mtx); - if (reason) { - counter_u64_add(V_pf_status.counters[reason], 1); - } - return (reason); } @@ -793,7 +791,7 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, if (nkif) *nkif = sn->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { - printf("pf_map_addr: src tracking maps "); + printf("%s: src tracking maps ", __func__); pf_print_host(saddr, 0, af); printf(" to "); pf_print_host(naddr, 0, af); @@ -808,14 +806,16 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, * Source node has not been found. Find a new address and store it * in variables given by the caller. */ - if (pf_map_addr(af, r, saddr, naddr, nkif, init_addr, rpool) != 0) { - /* pf_map_addr() sets reason counters on its own */ + if ((reason = pf_map_addr(af, r, saddr, naddr, nkif, init_addr, + rpool)) != 0) { + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: pf_map_addr has failed\n", __func__); goto done; } if (V_pf_status.debug >= PF_DEBUG_NOISY && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { - printf("pf_map_addr: selected address "); + printf("%s: selected address ", __func__); pf_print_host(naddr, 0, af); if (nkif) printf("@%s", (*nkif)->pfik_name); @@ -826,10 +826,6 @@ done: if (sn != NULL) PF_SRC_NODE_UNLOCK(sn); - if (reason) { - counter_u64_add(V_pf_status.counters[reason], 1); - } - return (reason); } diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c index 2034f4422ef1..e3f3ab9025f7 100644 --- a/sys/netpfil/pf/pf_table.c +++ b/sys/netpfil/pf/pf_table.c @@ -2294,7 +2294,7 @@ pfr_detach_table(struct pfr_ktable *kt) int pfr_pool_get(struct pfr_ktable *kt, int *pidx, struct pf_addr *counter, - sa_family_t af, pf_addr_filter_func_t filter) + sa_family_t af, pf_addr_filter_func_t filter, bool loop_once) { struct pf_addr *addr, cur, mask, umask_addr; union sockaddr_union uaddr, umask; @@ -2339,7 +2339,7 @@ _next_block: ke = pfr_kentry_byidx(kt, idx, af); if (ke == NULL) { /* we don't have this idx, try looping */ - if (loop || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) { + if ((loop || loop_once) || (ke = pfr_kentry_byidx(kt, 0, af)) == NULL) { pfr_kstate_counter_add(&kt->pfrkt_nomatch, 1); return (1); } diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 7746b668265d..ae17b3289593 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -1469,6 +1469,9 @@ moea_page_set_memattr(vm_page_t m, vm_memattr_t ma) pmap_t pmap; u_int lo; + if (m->md.mdpg_cache_attrs == ma) + return; + if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 79cea408bb5f..796b1719b8ba 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -2134,6 +2134,9 @@ moea64_page_set_memattr(vm_page_t m, vm_memattr_t ma) CTR3(KTR_PMAP, "%s: pa=%#jx, ma=%#x", __func__, (uintmax_t)VM_PAGE_TO_PHYS(m), ma); + if (m->md.mdpg_cache_attrs == ma) + return; + if ((m->oflags & VPO_UNMANAGED) != 0) { m->md.mdpg_cache_attrs = ma; return; diff --git a/sys/powerpc/aim/mmu_radix.c b/sys/powerpc/aim/mmu_radix.c index 45f7bef8bcc9..a12142fc2d7b 100644 --- a/sys/powerpc/aim/mmu_radix.c +++ b/sys/powerpc/aim/mmu_radix.c @@ -5937,6 +5937,10 @@ mmu_radix_page_set_memattr(vm_page_t m, vm_memattr_t ma) { CTR3(KTR_PMAP, "%s(%p, %#x)", __func__, m, ma); + + if (m->md.mdpg_cache_attrs == ma) + return; + m->md.mdpg_cache_attrs = ma; /* diff --git a/sys/powerpc/include/pcb.h b/sys/powerpc/include/pcb.h index 050ada6b0f64..0230cf78aba7 100644 --- a/sys/powerpc/include/pcb.h +++ b/sys/powerpc/include/pcb.h @@ -66,16 +66,8 @@ struct pcb { #define PCB_VECREGS 0x200 /* Process had Altivec registers initialized */ struct fpu { union { -#if _BYTE_ORDER == _BIG_ENDIAN - double fpr; - uint32_t vsr[4]; -#else uint32_t vsr[4]; - struct { - double padding; - double fpr; - }; -#endif + double fpr; } fpr[32]; double fpscr; /* FPSCR stored as double for easier access */ } pcb_fpu; /* Floating point processor */ diff --git a/sys/powerpc/include/ucontext.h b/sys/powerpc/include/ucontext.h index d35c6c773fe0..dc87edd578bc 100644 --- a/sys/powerpc/include/ucontext.h +++ b/sys/powerpc/include/ucontext.h @@ -41,6 +41,7 @@ typedef struct __mcontext { int mc_flags; #define _MC_FP_VALID 0x01 #define _MC_AV_VALID 0x02 +#define _MC_VS_VALID 0x04 int mc_onstack; /* saved onstack flag */ int mc_len; /* sizeof(__mcontext) */ __uint64_t mc_avec[32*2]; /* vector register file */ @@ -56,6 +57,7 @@ typedef struct __mcontext32 { int mc_flags; #define _MC_FP_VALID 0x01 #define _MC_AV_VALID 0x02 +#define _MC_VS_VALID 0x04 int mc_onstack; /* saved onstack flag */ int mc_len; /* sizeof(__mcontext) */ uint64_t mc_avec[32*2]; /* vector register file */ diff --git a/sys/powerpc/powerpc/exec_machdep.c b/sys/powerpc/powerpc/exec_machdep.c index 1893d79f29a8..8a33d0f589a7 100644 --- a/sys/powerpc/powerpc/exec_machdep.c +++ b/sys/powerpc/powerpc/exec_machdep.c @@ -214,10 +214,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) sfpsize = sizeof(sf); #ifdef __powerpc64__ /* - * 64-bit PPC defines a 288 byte scratch region - * below the stack. + * 64-bit PPC defines a 512 byte red zone below + * the existing stack (ELF ABI v2 §2.2.2.4) */ - rndfsize = 288 + roundup(sizeof(sf), 48); + rndfsize = 512 + roundup(sizeof(sf), 48); #else rndfsize = roundup(sizeof(sf), 16); #endif @@ -349,13 +349,6 @@ sys_sigreturn(struct thread *td, struct sigreturn_args *uap) if (error != 0) return (error); - /* - * Save FPU state if needed. User may have changed it on - * signal handler - */ - if (uc.uc_mcontext.mc_srr1 & PSL_FP) - save_fpu(td); - kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", @@ -432,6 +425,7 @@ grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) } if (pcb->pcb_flags & PCB_VSX) { + mcp->mc_flags |= _MC_VS_VALID; for (i = 0; i < 32; i++) memcpy(&mcp->mc_vsxfpreg[i], &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double)); @@ -481,6 +475,7 @@ set_mcontext(struct thread *td, mcontext_t *mcp) struct pcb *pcb; struct trapframe *tf; register_t tls; + register_t msr; int i; pcb = td->td_pcb; @@ -531,6 +526,22 @@ set_mcontext(struct thread *td, mcontext_t *mcp) tf->srr1 &= ~(PSL_FP | PSL_VSX | PSL_VEC); pcb->pcb_flags &= ~(PCB_FPU | PCB_VSX | PCB_VEC); + /* + * Ensure the FPU is also disabled in hardware. + * + * Without this, it's possible for the register reload to fail if we + * don't switch to a FPU disabled context before resuming the original + * thread. Specifically, if the FPU/VSX unavailable exception is never + * hit, then whatever data is still in the FP/VSX registers when + * sigresume is callled will used by the resumed thread, instead of the + * previously saved data from the mcontext. + */ + critical_enter(); + msr = mfmsr() & ~(PSL_FP | PSL_VSX | PSL_VEC); + isync(); + mtmsr(msr); + critical_exit(); + if (mcp->mc_flags & _MC_FP_VALID) { /* enable_fpu() will happen lazily on a fault */ pcb->pcb_flags |= PCB_FPREGS; @@ -539,8 +550,12 @@ set_mcontext(struct thread *td, mcontext_t *mcp) for (i = 0; i < 32; i++) { memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i], sizeof(double)); - memcpy(&pcb->pcb_fpu.fpr[i].vsr[2], - &mcp->mc_vsxfpreg[i], sizeof(double)); + } + if (mcp->mc_flags & _MC_VS_VALID) { + for (i = 0; i < 32; i++) { + memcpy(&pcb->pcb_fpu.fpr[i].vsr[2], + &mcp->mc_vsxfpreg[i], sizeof(double)); + } } } diff --git a/sys/powerpc/powerpc/fpu.c b/sys/powerpc/powerpc/fpu.c index 0eaff2ea4932..cc8f22f7dda3 100644 --- a/sys/powerpc/powerpc/fpu.c +++ b/sys/powerpc/powerpc/fpu.c @@ -64,8 +64,19 @@ save_fpu_int(struct thread *td) * Save the floating-point registers and FPSCR to the PCB */ if (pcb->pcb_flags & PCB_VSX) { - #define SFP(n) __asm ("stxvw4x " #n ", 0,%0" \ +#if _BYTE_ORDER == _BIG_ENDIAN + #define SFP(n) __asm("stxvw4x " #n ", 0,%0" \ :: "b"(&pcb->pcb_fpu.fpr[n])); +#else + /* + * stxvw2x will swap words within the FP double word on LE systems, + * leading to corruption if VSX is used to store state and FP is + * subsequently used to restore state. + * Use stxvd2x instead. + */ + #define SFP(n) __asm("stxvd2x " #n ", 0,%0" \ + :: "b"(&pcb->pcb_fpu.fpr[n])); +#endif SFP(0); SFP(1); SFP(2); SFP(3); SFP(4); SFP(5); SFP(6); SFP(7); SFP(8); SFP(9); SFP(10); SFP(11); @@ -76,7 +87,7 @@ save_fpu_int(struct thread *td) SFP(28); SFP(29); SFP(30); SFP(31); #undef SFP } else { - #define SFP(n) __asm ("stfd " #n ", 0(%0)" \ + #define SFP(n) __asm("stfd " #n ", 0(%0)" \ :: "b"(&pcb->pcb_fpu.fpr[n].fpr)); SFP(0); SFP(1); SFP(2); SFP(3); SFP(4); SFP(5); SFP(6); SFP(7); @@ -149,8 +160,19 @@ enable_fpu(struct thread *td) :: "b"(&pcb->pcb_fpu.fpscr)); if (pcb->pcb_flags & PCB_VSX) { - #define LFP(n) __asm ("lxvw4x " #n ", 0,%0" \ +#if _BYTE_ORDER == _BIG_ENDIAN + #define LFP(n) __asm("lxvw4x " #n ", 0,%0" \ + :: "b"(&pcb->pcb_fpu.fpr[n])); +#else + /* + * lxvw4x will swap words within the FP double word on LE systems, + * leading to corruption if FP is used to store state and VSX is + * subsequently used to restore state. + * Use lxvd2x instead. + */ + #define LFP(n) __asm("lxvd2x " #n ", 0,%0" \ :: "b"(&pcb->pcb_fpu.fpr[n])); +#endif LFP(0); LFP(1); LFP(2); LFP(3); LFP(4); LFP(5); LFP(6); LFP(7); LFP(8); LFP(9); LFP(10); LFP(11); @@ -161,7 +183,7 @@ enable_fpu(struct thread *td) LFP(28); LFP(29); LFP(30); LFP(31); #undef LFP } else { - #define LFP(n) __asm ("lfd " #n ", 0(%0)" \ + #define LFP(n) __asm("lfd " #n ", 0(%0)" \ :: "b"(&pcb->pcb_fpu.fpr[n].fpr)); LFP(0); LFP(1); LFP(2); LFP(3); LFP(4); LFP(5); LFP(6); LFP(7); diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 5d15bd671285..26efaecc64d1 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -4838,6 +4838,8 @@ pmap_unmapbios(void *p, vm_size_t size) void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { + if (m->md.pv_memattr == ma) + return; m->md.pv_memattr = ma; diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c index 7b8bf4c77663..b44bdb96b0d4 100644 --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -131,8 +131,7 @@ static void vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) { - KASSERT(di->di_n > 0, - ("vm_domainset_iter_first: Invalid n %d", di->di_n)); + KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n)); switch (di->di_policy) { case DOMAINSET_POLICY_FIRSTTOUCH: /* @@ -149,11 +148,10 @@ vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) vm_domainset_iter_prefer(di, domain); break; default: - panic("vm_domainset_iter_first: Unknown policy %d", - di->di_policy); + panic("%s: Unknown policy %d", __func__, di->di_policy); } KASSERT(*domain < vm_ndomains, - ("vm_domainset_iter_next: Invalid domain %d", *domain)); + ("%s: Invalid domain %d", __func__, *domain)); } static void @@ -189,13 +187,11 @@ vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) di->di_n = di->di_domain->ds_cnt; break; default: - panic("vm_domainset_iter_first: Unknown policy %d", - di->di_policy); + panic("%s: Unknown policy %d", __func__, di->di_policy); } - KASSERT(di->di_n > 0, - ("vm_domainset_iter_first: Invalid n %d", di->di_n)); + KASSERT(di->di_n > 0, ("%s: Invalid n %d", __func__, di->di_n)); KASSERT(*domain < vm_ndomains, - ("vm_domainset_iter_first: Invalid domain %d", *domain)); + ("%s: Invalid domain %d", __func__, *domain)); } void diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 875c22d27628..e7d7b6726d2c 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -110,11 +110,18 @@ u_int exec_map_entry_size; u_int exec_map_entries; SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, - SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); +#if defined(__amd64__) + &kva_layout.km_low, 0, +#else + SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, +#endif + "Min kernel address"); SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD, #if defined(__arm__) &vm_max_kernel_address, 0, +#elif defined(__amd64__) + &kva_layout.km_high, 0, #else SYSCTL_NULL_ULONG_PTR, VM_MAX_KERNEL_ADDRESS, #endif diff --git a/tests/sys/netpfil/pf/anchor.sh b/tests/sys/netpfil/pf/anchor.sh index 4692c06142df..64ca84b34c3d 100644 --- a/tests/sys/netpfil/pf/anchor.sh +++ b/tests/sys/netpfil/pf/anchor.sh @@ -350,9 +350,9 @@ nat_body() jexec alcatraz pfctl -sn -a "foo/bar" jexec alcatraz pfctl -sn -a "foo/baz" - atf_check -s exit:0 -o match:"nat log on epair0a inet from 192.0.2.0/24 to any port = domain -> 192.0.2.1" \ + atf_check -s exit:0 -o match:"nat log on ${epair}a inet from 192.0.2.0/24 to any port = domain -> 192.0.2.1" \ jexec alcatraz pfctl -sn -a "*" - atf_check -s exit:0 -o match:"rdr on epair0a inet proto tcp from any to any port = echo -> 127.0.0.1 port 7" \ + atf_check -s exit:0 -o match:"rdr on ${epair}a inet proto tcp from any to any port = echo -> 127.0.0.1 port 7" \ jexec alcatraz pfctl -sn -a "*" } diff --git a/tests/sys/netpfil/pf/nat.sh b/tests/sys/netpfil/pf/nat.sh index f1fdf6405d97..16c981f97399 100644 --- a/tests/sys/netpfil/pf/nat.sh +++ b/tests/sys/netpfil/pf/nat.sh @@ -777,6 +777,38 @@ binat_match_cleanup() kill $(cat ${PWD}/inetd_tester.pid) } +atf_test_case "empty_pool" "cleanup" +empty_pool_head() +{ + atf_set descr 'NAT with empty pool' + atf_set require.user root +} + +empty_pool_body() +{ + pft_init + setup_router_server_ipv6 + + + pft_set_rules router \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ + "pass in on ${epair_tester}b" \ + "pass out on ${epair_server}a inet6 from any to ${net_server_host_server} nat-to <nonexistent>" \ + + # pf_map_addr_sn() won't be able to pick a target address, because + # the table used in redireciton pool is empty. Packet will not be + # forwarded, error counter will be increased. + ping_server_check_reply exit:1 + # Ignore warnings about not-loaded ALTQ + atf_check -o "match:map-failed +1 +" -x "jexec router pfctl -qvvsi 2> /dev/null" +} + +empty_pool_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "exhaust" @@ -794,4 +826,5 @@ atf_init_test_cases() atf_add_test_case "nat_match" atf_add_test_case "binat_compat" atf_add_test_case "binat_match" + atf_add_test_case "empty_pool" } diff --git a/tests/sys/netpfil/pf/rdr.sh b/tests/sys/netpfil/pf/rdr.sh index 4c08b4973891..f7c920bbfa8f 100644 --- a/tests/sys/netpfil/pf/rdr.sh +++ b/tests/sys/netpfil/pf/rdr.sh @@ -142,7 +142,7 @@ tcp_v6_pass_body() { tcp_v6_setup # Sets ${epair_…} variables tcp_v6_common \ - "rdr on ${epair_one}a proto tcp from any to any port 80 -> 2001:db8:b::2 port 8000" + "pass in on ${epair_one}a proto tcp from any to any port 80 rdr-to 2001:db8:b::2 port 8000" } tcp_v6_pass_cleanup() diff --git a/tests/sys/netpfil/pf/route_to.sh b/tests/sys/netpfil/pf/route_to.sh index 5c0d355b8ea1..fd1653cce311 100644 --- a/tests/sys/netpfil/pf/route_to.sh +++ b/tests/sys/netpfil/pf/route_to.sh @@ -859,6 +859,121 @@ ttl_cleanup() pft_cleanup } + +atf_test_case "empty_pool" "cleanup" +empty_pool_head() +{ + atf_set descr 'Route-to with empty pool' + atf_set require.user root +} + +empty_pool_body() +{ + pft_init + setup_router_server_ipv6 + + + pft_set_rules router \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ + "pass in on ${epair_tester}b route-to (${epair_server}a <nonexistent>) inet6 from any to ${net_server_host_server}" \ + "pass out on ${epair_server}a" + + # pf_map_addr_sn() won't be able to pick a target address, because + # the table used in redireciton pool is empty. Packet will not be + # forwarded, error counter will be increased. + ping_server_check_reply exit:1 + # Ignore warnings about not-loaded ALTQ + atf_check -o "match:map-failed +1 +" -x "jexec router pfctl -qvvsi 2> /dev/null" +} + +empty_pool_cleanup() +{ + pft_cleanup +} + + +atf_test_case "table_loop" "cleanup" + +table_loop_head() +{ + atf_set descr 'Check that iterating over tables poperly loops' + atf_set require.user root +} + +table_loop_body() +{ + setup_router_server_nat64 + + # Clients will connect from another network behind the router. + # This allows for using multiple source addresses. + jexec router route add -6 ${net_clients_6}::/${net_clients_6_mask} ${net_tester_6_host_tester} + jexec router route add ${net_clients_4}.0/${net_clients_4_mask} ${net_tester_4_host_tester} + + # The servers are reachable over additional IP addresses for + # testing of tables and subnets. The addresses are noncontinougnus + # for pf_map_addr() counter tests. + for i in 0 1 4 5; do + a1=$((24 + i)) + jexec server1 ifconfig ${epair_server1}b inet ${net_server1_4}.${a1}/32 alias + jexec server1 ifconfig ${epair_server1}b inet6 ${net_server1_6}::42:${i}/128 alias + a2=$((40 + i)) + jexec server2 ifconfig ${epair_server2}b inet ${net_server2_4}.${a2}/32 alias + jexec server2 ifconfig ${epair_server2}b inet6 ${net_server2_6}::42:${i}/128 alias + done + + jexec router pfctl -e + pft_set_rules router \ + "set debug loud" \ + "set reassemble yes" \ + "set state-policy if-bound" \ + "table <rt_targets_1> { ${net_server1_6}::42:4/127 ${net_server1_6}::42:0/127 }" \ + "table <rt_targets_2> { ${net_server2_6}::42:4/127 }" \ + "pass in on ${epair_tester}b \ + route-to { \ + (${epair_server1}a <rt_targets_1>) \ + (${epair_server2}a <rt_targets_2_empty>) \ + (${epair_server2}a <rt_targets_2>) \ + } \ + inet6 proto tcp \ + keep state" + + # Both hosts of the pool are tables. Each table gets iterated over once, + # then the pool iterates to the next host, which is also iterated, + # then the pool loops back to the 1st host. If an empty table is found, + # it is skipped. Unless that's the only table, that is tested by + # the "empty_pool" test. + for port in $(seq 1 7); do + port=$((4200 + port)) + atf_check -s exit:0 ${common_dir}/pft_ping.py \ + --sendif ${epair_tester}a --replyif ${epair_tester}a \ + --fromaddr ${net_clients_6}::1 --to ${host_server_6} \ + --ping-type=tcp3way --send-sport=${port} + done + + states=$(mktemp) || exit 1 + jexec router pfctl -qvvss | normalize_pfctl_s > $states + cat $states + + for state_regexp in \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4201\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4202\] .* route-to: ${net_server1_6}::42:1@${epair_server1}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4203\] .* route-to: ${net_server1_6}::42:4@${epair_server1}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4204\] .* route-to: ${net_server1_6}::42:5@${epair_server1}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4205\] .* route-to: ${net_server2_6}::42:4@${epair_server2}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4206\] .* route-to: ${net_server2_6}::42:5@${epair_server2}a" \ + "${epair_tester}b tcp ${host_server_6}\[9\] <- ${net_clients_6}::1\[4207\] .* route-to: ${net_server1_6}::42:0@${epair_server1}a" \ + ; do + grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'" + done +} + +table_loop_cleanup() +{ + pft_cleanup +} + + atf_init_test_cases() { atf_add_test_case "v4" @@ -877,4 +992,6 @@ atf_init_test_cases() atf_add_test_case "dummynet_double" atf_add_test_case "sticky" atf_add_test_case "ttl" + atf_add_test_case "empty_pool" + atf_add_test_case "table_loop" } diff --git a/tests/sys/netpfil/pf/utils.subr b/tests/sys/netpfil/pf/utils.subr index 6af10e80390d..3f8d437920f9 100644 --- a/tests/sys/netpfil/pf/utils.subr +++ b/tests/sys/netpfil/pf/utils.subr @@ -274,6 +274,107 @@ setup_router_server_ipv6() jexec server inetd -p ${PWD}/inetd.pid $inetd_conf } +# Create a router and 2 server jails for nat64 and rfc5549 test cases. +# The router is connected to servers, both are dual-stack, and to the +# tester jail. All links are dual stack. +setup_router_server_nat64() +{ + pft_init + + epair_tester=$(vnet_mkepair) + epair_server1=$(vnet_mkepair) + epair_server2=$(vnet_mkepair) + + # Funny how IPv4 address space is to small to even assign nice /24 + # prefixes on all needed networks. On IPv6 we have a separate /64 for + # each link, loopback server, and client/SNAT pool. On IPv4 we must + # use small /28 prefixes, so even though we define all networks + # as variables we can't easily use them in tests if additional addresses + # are needed. + + # IP addresses which can be used by the tester jail. + # Can be used as SNAT or as source with pft_ping.py. It is up to + # the test code to make them accessible from router. + net_clients_4=203.0.113 + net_clients_4_mask=24 + net_clients_6=2001:db8:44 + net_clients_6_mask=64 + + # IP addresses on loopback interfaces of both servers. They can be + # accessed using the route-to targtet. + host_server_4=192.0.2.100 + host_server_6=2001:db8:4203::100 + + net_tester_4=198.51.100 + net_tester_4_mask=28 + net_tester_4_host_router=198.51.100.1 + net_tester_4_host_tester=198.51.100.2 + + net_tester_6=2001:db8:4200 + net_tester_6_mask=64 + net_tester_6_host_router=2001:db8:4200::1 + net_tester_6_host_tester=2001:db8:4200::2 + + net_server1_4=198.51.100 + net_server1_4_mask=28 + net_server1_4_host_router=198.51.100.17 + net_server1_4_host_server=198.51.100.18 + + net_server1_6=2001:db8:4201 + net_server1_6_mask=64 + net_server1_6_host_router=2001:db8:4201::1 + net_server1_6_host_server=2001:db8:4201::2 + + net_server2_4=198.51.100 + net_server2_4_mask=28 + net_server2_4_host_router=198.51.100.33 + net_server2_4_host_server=198.51.100.34 + + net_server2_6=2001:db8:4202 + net_server2_6_mask=64 + net_server2_6_host_router=2001:db8:4202::1 + net_server2_6_host_server=2001:db8:4202::2 + + vnet_mkjail router ${epair_tester}b ${epair_server1}a ${epair_server2}a + jexec router ifconfig ${epair_tester}b inet ${net_tester_4_host_router}/${net_tester_4_mask} up + jexec router ifconfig ${epair_tester}b inet6 ${net_tester_6_host_router}/${net_tester_6_mask} up no_dad + jexec router ifconfig ${epair_server1}a inet ${net_server1_4_host_router}/${net_server1_4_mask} up + jexec router ifconfig ${epair_server1}a inet6 ${net_server1_6_host_router}/${net_server1_6_mask} up no_dad + jexec router ifconfig ${epair_server2}a inet ${net_server2_4_host_router}/${net_server2_4_mask} up + jexec router ifconfig ${epair_server2}a inet6 ${net_server2_6_host_router}/${net_server2_6_mask} up no_dad + jexec router sysctl net.inet.ip.forwarding=1 + jexec router sysctl net.inet6.ip6.forwarding=1 + jexec router pfctl -e + + ifconfig ${epair_tester}a inet ${net_tester_4_host_tester}/${net_tester_4_mask} up + ifconfig ${epair_tester}a inet6 ${net_tester_6_host_tester}/${net_tester_6_mask} up no_dad + route add 0.0.0.0/0 ${net_tester_4_host_router} + route add -6 ::/0 ${net_tester_6_host_router} + + inetd_conf=$(mktemp) + echo "discard stream tcp46 nowait root internal" >> $inetd_conf + + vnet_mkjail server1 ${epair_server1}b + jexec server1 /etc/rc.d/netif start lo0 + jexec server1 ifconfig ${epair_server1}b inet ${net_server1_4_host_server}/${net_server1_4_mask} up + jexec server1 ifconfig ${epair_server1}b inet6 ${net_server1_6_host_server}/${net_server1_6_mask} up no_dad + jexec server1 ifconfig lo0 ${host_server_4}/32 alias + jexec server1 ifconfig lo0 inet6 ${host_server_6}/128 alias + jexec server1 inetd -p ${PWD}/inetd_1.pid $inetd_conf + jexec server1 route add 0.0.0.0/0 ${net_server1_4_host_router} + + jexec server1 route add -6 ::/0 ${net_server1_6_host_router} + vnet_mkjail server2 ${epair_server2}b + jexec server2 /etc/rc.d/netif start lo0 + jexec server2 ifconfig ${epair_server2}b inet ${net_server2_4_host_server}/${net_server2_4_mask} up + jexec server2 ifconfig ${epair_server2}b inet6 ${net_server2_6_host_server}/${net_server2_6_mask} up no_dad + jexec server2 ifconfig lo0 ${host_server_4}/32 alias + jexec server2 ifconfig lo0 inet6 ${host_server_6}/128 alias + jexec server2 inetd -p ${PWD}/inetd_2.pid $inetd_conf + jexec server2 route add 0.0.0.0/0 ${net_server2_4_host_router} + jexec server2 route add -6 ::/0 ${net_server2_6_host_router} +} + # Ping the dummy static NDP target. # Check for pings being forwarded through the router towards the target. ping_dummy_check_request() diff --git a/usr.bin/find/find.1 b/usr.bin/find/find.1 index eb3fd4d0dbde..8c2d8624a82a 100644 --- a/usr.bin/find/find.1 +++ b/usr.bin/find/find.1 @@ -1156,7 +1156,7 @@ and was removed in .At v3 . It was rewritten for .At v5 -and later be enhanced for the Programmer's Workbench (PWB). +and was later enhanced for the Programmer's Workbench (PWB). These changes were later incorporated in .At v7 . .Sh BUGS diff --git a/usr.bin/fortune/datfiles/freebsd-tips b/usr.bin/fortune/datfiles/freebsd-tips index 1e9501e3a6fb..6a2b59ff5fa7 100644 --- a/usr.bin/fortune/datfiles/freebsd-tips +++ b/usr.bin/fortune/datfiles/freebsd-tips @@ -555,7 +555,7 @@ Use "sysrc name=value" to add an entry and "sysrc -x name" to delete an entry. You can upload the dmesg of your system to help developers get an overview of commonly used hardware and peripherals for FreeBSD. Use the curl package to upload it like this: curl -v -d "nickname=$USER" -d "description=FreeBSD/$(uname -m) on \ -$(kenv smbios.system.maker) $(kenv smbios.system.product)" -d "do=addd" \ +$(kenv smbios.system.maker) $(kenv smbios.system.product)" -d "do=add" \ --data-urlencode 'dmesg@/var/run/dmesg.boot' http://dmesgd.nycbug.org/index.cgi % Want to know how much memory (in bytes) your machine has installed? Let diff --git a/usr.bin/netstat/netstat.1 b/usr.bin/netstat/netstat.1 index 1a2c786e90aa..cf6a907c6aa4 100644 --- a/usr.bin/netstat/netstat.1 +++ b/usr.bin/netstat/netstat.1 @@ -416,7 +416,8 @@ When used with or .Fl 6 , limit the output to IPv4 or IPv6 routes respectively. -This option provides details about individual nexthop addresses used in routing decisions. +This option provides details about individual nexthop addresses +used in routing decisions. .It Xo .Bk -words .Nm netstat @@ -430,7 +431,8 @@ When used with or .Fl 6 , restrict the output to IPv4 or IPv6 nexthop groups respectively. -This option shows grouped nexthop entries for multipath or load-balanced routing setups. +This option shows grouped nexthop entries for multipath or +load-balanced routing setups. .It Xo .Bk -words .Nm @@ -926,25 +928,25 @@ binary is not available in the .Sh EXAMPLES Show packet traffic information (packets, bytes, errors, packet drops, etc) for interface re0 updated every 2 seconds and exit after 5 outputs: -.Bd -literal -offset indent -$ netstat -w 2 -q 5 -I re0 -.Ed +.Pp +.Dl netstat -w 2 -q 5 -I re0 .Pp Show statistics for ICMP on any interface: -.Bd -literal -offset indent -$ netstat -s -p icmp -.Ed +.Pp +.Dl netstat -s -p icmp .Pp Show routing tables: -.Bd -literal -offset indent -$ netstat -r -.Ed +.Pp +.Dl netstat -r .Pp Same as above, but without resolving numeric addresses and port numbers to names: -.Bd -literal -offset indent -$ netstat -rn -.Ed +.Pp +.Dl netstat -rn +.Pp +Show IPv4 listening sockets: +.Pp +.Dl netstat -4l .Sh SEE ALSO .Xr fstat 1 , .Xr nfsstat 1 , diff --git a/usr.sbin/bluetooth/bluetooth-config/bluetooth-config.sh b/usr.sbin/bluetooth/bluetooth-config/bluetooth-config.sh index 48a399a82fc7..148325fcecbc 100755 --- a/usr.sbin/bluetooth/bluetooth-config/bluetooth-config.sh +++ b/usr.sbin/bluetooth/bluetooth-config/bluetooth-config.sh @@ -17,7 +17,7 @@ main() { unset node device started bdaddresses retry # Only one command at the moment is scan (+ add) -[ "$#" -eq 1 -a "$1" = "scan" ] || print_syntax +[ "$1" = "scan" ] || print_syntax shift # Get command line options @@ -28,6 +28,12 @@ while getopts :d:n: arg; do ?) print_syntax;; esac done +shift "$((OPTIND-1))" + +# If there's leftover parameters, print usage +[ "$#" -eq 0 ] || print_syntax +shift + # No use running without super user rights if [ $( id -u ) -ne 0 ]; then |