diff options
Diffstat (limited to 'sys')
240 files changed, 9303 insertions, 5622 deletions
diff --git a/sys/cam/ctl/ctl_frontend_ioctl.c b/sys/cam/ctl/ctl_frontend_ioctl.c index 3449154afb38..4b82552ec21f 100644 --- a/sys/cam/ctl/ctl_frontend_ioctl.c +++ b/sys/cam/ctl/ctl_frontend_ioctl.c @@ -588,7 +588,7 @@ ctl_ioctl_io(struct cdev *dev, u_long cmd, caddr_t addr, int flag, struct thread *td) { struct cfi_port *cfi; - union ctl_io *io; + union ctl_io *io, *user_io; void *pool_tmp, *sc_tmp; int retval = 0; @@ -606,6 +606,11 @@ ctl_ioctl_io(struct cdev *dev, u_long cmd, caddr_t addr, int flag, if ((cfi->port.status & CTL_PORT_STATUS_ONLINE) == 0) return (EPERM); + /* Reject out-of-range initiator IDs. */ + user_io = (void *)addr; + if (user_io->io_hdr.nexus.initid >= CTL_MAX_INIT_PER_PORT) + return (EINVAL); + io = ctl_alloc_io(cfi->port.ctl_pool_ref); /* @@ -614,7 +619,7 @@ ctl_ioctl_io(struct cdev *dev, u_long cmd, caddr_t addr, int flag, */ pool_tmp = io->io_hdr.pool; sc_tmp = CTL_SOFTC(io); - memcpy(io, (void *)addr, sizeof(*io)); + memcpy(io, user_io, sizeof(*io)); io->io_hdr.pool = pool_tmp; CTL_SOFTC(io) = sc_tmp; TAILQ_INIT(&io->io_hdr.blocked_queue); @@ -636,7 +641,7 @@ ctl_ioctl_io(struct cdev *dev, u_long cmd, caddr_t addr, int flag, retval = cfi_submit_wait(io); if (retval == 0) - memcpy((void *)addr, io, sizeof(*io)); + memcpy(user_io, io, sizeof(*io)); ctl_free_io(io); return (retval); diff --git a/sys/compat/linuxkpi/common/include/linux/compiler_attributes.h b/sys/compat/linuxkpi/common/include/linux/compiler_attributes.h index 42908bb6c2b5..159c833802c5 100644 --- a/sys/compat/linuxkpi/common/include/linux/compiler_attributes.h +++ b/sys/compat/linuxkpi/common/include/linux/compiler_attributes.h @@ -39,7 +39,6 @@ #define noinline_for_stack __noinline -#define __maybe_unused __unused #define __always_unused __unused #define __must_check __result_use_check diff --git a/sys/compat/linuxkpi/common/include/linux/module.h b/sys/compat/linuxkpi/common/include/linux/module.h index fbe57cbbed82..de4be1e9ebe9 100644 --- a/sys/compat/linuxkpi/common/include/linux/module.h +++ b/sys/compat/linuxkpi/common/include/linux/module.h @@ -136,9 +136,14 @@ _module_run(void *arg) #define module_exit_order(fn, order) \ SYSUNINIT(fn, SI_SUB_OFED_MODINIT, (order), _module_run, (fn)) -#define module_get(module) -#define module_put(module) -#define try_module_get(module) 1 +static inline void module_get(struct module *module) {} +static inline void module_put(struct module *module) {} + +static inline bool +try_module_get(struct module *module) +{ + return (true); +} #define postcore_initcall(fn) module_init(fn) diff --git a/sys/conf/files b/sys/conf/files index 2b4a453ca556..6804c9c81c69 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1858,6 +1858,7 @@ dev/iicbus/rtc/hym8563.c optional hym8563 iicbus fdt dev/iicbus/rtc/isl12xx.c optional isl12xx dev/iicbus/rtc/nxprtc.c optional nxprtc | pcf8563 dev/iicbus/rtc/pcf85063.c optional pcf85063 iicbus fdt +dev/iicbus/rtc/rs5c372a.c optional rs5c372a iicbus fdt dev/iicbus/rtc/rtc8583.c optional rtc8583 dev/iicbus/rtc/rv3032.c optional rv3032 iicbus fdt dev/iicbus/rtc/rx8803.c optional rx8803 iicbus fdt diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 164a5d01d4b9..a754f78b35f2 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -89,6 +89,27 @@ dev/adb/adb_hb_if.m optional adb dev/adb/adb_if.m optional adb dev/adb/adb_buttons.c optional adb dev/agp/agp_apple.c optional agp powermac +dev/dpaa/bman_portals.c optional dpaa fdt +dev/dpaa/bman.c optional dpaa +dev/dpaa/bman_fdt.c optional dpaa fdt +dev/dpaa/dpaa_eth.c optional dpaa +dev/dpaa/fman.c optional dpaa fdt +dev/dpaa/fman_fdt.c optional dpaa fdt +dev/dpaa/fman_if.m optional dpaa +dev/dpaa/fman_mdio.c optional dpaa fdt +dev/dpaa/fman_port_if.m optional dpaa +dev/dpaa/fman_port.c optional dpaa +dev/dpaa/fman_xmdio.c optional dpaa fdt mdio miibus +dev/dpaa/dpaa_common.c optional dpaa +dev/dpaa/if_dtsec.c optional dpaa +dev/dpaa/if_dtsec_fdt.c optional dpaa fdt +dev/dpaa/if_memac.c optional dpaa +dev/dpaa/if_memac_fdt.c optional dpaa +dev/dpaa/portals_common.c optional dpaa +dev/dpaa/qman_portal_if.m optional dpaa +dev/dpaa/qman_portals.c optional dpaa fdt +dev/dpaa/qman.c optional dpaa +dev/dpaa/qman_fdt.c optional dpaa fdt dev/fb/fb.c optional sc dev/gpio/qoriq_gpio.c optional mpc85xx gpio dev/hwpmc/hwpmc_e500.c optional hwpmc diff --git a/sys/conf/kern.pre.mk b/sys/conf/kern.pre.mk index a5b21cdbe843..d1556660094a 100644 --- a/sys/conf/kern.pre.mk +++ b/sys/conf/kern.pre.mk @@ -157,11 +157,9 @@ NORMAL_FWO= ${CC:N${CCACHE_BIN}} -c ${ASM_CFLAGS} ${WERROR} -o ${.TARGET} \ NOSAN_C= ${NORMAL_C:N-fsanitize*:N-fno-sanitize*:N-fasan-shadow-offset*} # for ZSTD in the kernel (include zstd/lib/freebsd before other CFLAGS) -ZSTD_C= ${CC} -c -I$S/contrib/zstd/lib/freebsd ${CFLAGS} \ +ZSTD_C= ${CC} -c -DZSTD_HEAPMODE=1 -I$S/contrib/zstd/lib/freebsd ${CFLAGS} \ -I$S/contrib/zstd/lib -I$S/contrib/zstd/lib/common ${WERROR} \ - -Wno-missing-prototypes -U__BMI__ \ - -DZSTD_HEAPMODE=1 -DZSTD_NO_INTRINSICS -DZSTD_NO_TRACE \ - ${.IMPSRC} + -Wno-missing-prototypes -U__BMI__ -DZSTD_NO_INTRINSICS ${.IMPSRC} # https://github.com/facebook/zstd/commit/812e8f2a [zstd 1.4.1] # "Note that [GCC] autovectorization still does not do a good job on the # optimized version, so it's turned off via attribute and flag. I found diff --git a/sys/contrib/openzfs/.github/workflows/scripts/merge_summary.awk b/sys/contrib/openzfs/.github/workflows/scripts/merge_summary.awk index 2b00d00226c9..8a4ce9a2be06 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/merge_summary.awk +++ b/sys/contrib/openzfs/.github/workflows/scripts/merge_summary.awk @@ -17,6 +17,7 @@ BEGIN { pass=0 fail=0 skip=0 + killed=0 state="" cl=0 el=0 @@ -49,6 +50,37 @@ BEGIN { /PASS/{ if (state=="pass_count") {pass += $2}} /FAIL/{ if (state=="pass_count") {fail += $2}} /SKIP/{ if (state=="pass_count") {skip += $2}} + +# If the test was killed, you'll get a line like: +# +# [2026-04-22T03:34:17.694616] Test (Linux): /usr/share/zfs/zfs-tests/tests/functional/io/setup (run as root) [10:00] [KILLED] +# +# Parse out the test name minus the /usr/share/zfs/zfs-tests/tests/functional/' +# part, and include the optional "(Linux): " line, as you can have the killed +# tests in two categories, like: +# +# KILLED (Linux): io/setup +# KILLED io/setup +# +/KILLED/{ + extra="" + for(i=1; i<=NF; i++) { + # Look for optional "(Linux):" field + if ($i ~ "\\("){ + extra=$i" "} + + # Look for a field with a '/' in it. It is the test name. + if($i ~ "/") { + testname=$i + # Remove /usr/share/zfs/zfs-test/test/functional string + sub(/\/usr\/share\/zfs\/zfs-tests\/tests\/functional\//,"",testname) + testname=extra""testname + killed_tests[killed] = testname + killed++ + break + } + } +} /Running Time/{ state=""; running[i]=$3; @@ -106,4 +138,10 @@ END { asort(unexpected_lines, sorted) for (j in sorted) print sorted[j] + + # We don't want to sort killed tests, as the first test that was killed + # most likely caused the others to be killed. + print "\n\nTests that were killed:" + for (j in killed_tests) + print " KILLED "killed_tests[j] } diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh index 9d6cc3c6d3e2..5c41a4d6a497 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-1-setup.sh @@ -63,7 +63,7 @@ sudo swapoff -a # configurations. On one config you get two 75GB block devices, and on the # other you get a single 150GB block device. Here's what both look like: # -# --- Two 75GB block devices --- +# --- One 150GB block device --- # NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS # sda 8:0 0 150G 0 disk # ├─sda1 8:1 0 149G 0 part / @@ -77,7 +77,7 @@ sudo swapoff -a # lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part15 -> ../../sda15 # lrwxrwxrwx 1 root root 11 Jan 29 18:07 azure_root-part16 -> ../../sda16 # -# --- One 150GB block device --- +# --- Two 75GB block devices --- # NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINTS # sda 8:0 0 75G 0 disk # ├─sda1 8:1 0 74G 0 part / @@ -139,18 +139,20 @@ fi sudo mkswap $SWAP sudo swapon $SWAP +echo "Block devices:" +lsblk + # adjust zfs module parameter and create pool -exec 1>/dev/null ARC_MIN=$((1024*1024*256)) ARC_MAX=$((1024*1024*512)) -echo $ARC_MIN | sudo tee /sys/module/zfs/parameters/zfs_arc_min -echo $ARC_MAX | sudo tee /sys/module/zfs/parameters/zfs_arc_max -echo 1 | sudo tee /sys/module/zfs/parameters/zvol_use_blk_mq +echo $ARC_MIN | sudo tee /sys/module/zfs/parameters/zfs_arc_min >/dev/null +echo $ARC_MAX | sudo tee /sys/module/zfs/parameters/zfs_arc_max >/dev/null +echo 1 | sudo tee /sys/module/zfs/parameters/zvol_use_blk_mq >/dev/null sudo zpool create -f -o ashift=12 zpool $DISKS -O relatime=off \ -O atime=off -O xattr=sa -O compression=lz4 -O sync=disabled \ -O redundant_metadata=none -O mountpoint=/mnt/tests +echo "Status:" +zpool status -# no need for some scheduler -for i in /sys/block/s*/queue/scheduler; do - echo "none" | sudo tee $i -done +echo "Last dmesg:" +sudo dmesg | tail -n 10 diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 3d78885a9ca3..e63aece389c0 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -88,6 +88,11 @@ case "$OS" in OSv="fedora-unknown" URL="https://download.fedoraproject.org/pub/fedora/linux/releases/43/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-43-1.6.x86_64.qcow2" ;; + fedora44) + OSNAME="Fedora 44" + OSv="fedora-unknown" + URL="https://download.fedoraproject.org/pub/fedora/linux/releases/44/Cloud/x86_64/images/Fedora-Cloud-Base-Generic-44-1.7.x86_64.qcow2" + ;; freebsd13-5r) FreeBSD="13.5-RELEASE" OSNAME="FreeBSD $FreeBSD" @@ -103,6 +108,13 @@ case "$OS" in URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI.raw.xz" KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz" ;; + freebsd15-0r) + FreeBSD="15.0-RELEASE" + OSNAME="FreeBSD $FreeBSD" + OSv="freebsd15.0" + URLxz="$FREEBSD_REL/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" + KSRC="$FREEBSD_REL/../amd64/$FreeBSD/src.txz" + ;; freebsd13-5s) FreeBSD="13.5-STABLE" OSNAME="FreeBSD $FreeBSD" @@ -118,8 +130,8 @@ case "$OS" in URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; - freebsd15-0s) - FreeBSD="15.0-STABLE" + freebsd15-1s) + FreeBSD="15.1-PRERELEASE" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" @@ -188,17 +200,49 @@ DISK="/dev/zvol/zpool/openzfs" sudo zfs create -ps -b 64k -V 80g zpool/openzfs while true; do test -b $DISK && break; sleep 1; done -# we are downloading via axel, curl and wget are mostly slower and -# require more return value checking +# We first try to download with 'axel', which is faster than curl, but fallback +# to curl if that doesn't work. It is hoped that the curl fallback will get +# around the occasional "ERROR 502: Bad Gateway" errors. IMG="/mnt/tests/cloud-image" -if [ ! -z "$URLxz" ]; then - echo "Loading $URLxz ..." - time axel -q -o "$IMG" "$URLxz" - echo "Loading $KSRC ..." - time axel -q -o ~/src.txz $KSRC -else - echo "Loading $URL ..." - time axel -q -o "$IMG" "$URL" +for cmd in 'axel -q -o' 'curl --fail -LSs -o' ; do + if [ ! -z "$URLxz" ]; then + echo "Loading $URLxz with $cmd..." + time eval "$cmd $IMG $URLxz" || true + + if [ ! -s ~/src.txz ] ; then + echo "Loading $KSRC with $cmd..." + time eval "$cmd ~/src.txz $KSRC" || true + fi + else + echo "Loading $URL with $cmd..." + time eval "$cmd $IMG $URL" || true + fi + + if [ -s "$IMG" ] ; then + # Successful download + break + fi +done + +# SPECIAL CASE +# FreeBSD sometimes has broken links in their "current/" URL. Go back up a +# level and look for other images that might work. For example: +# +# https://download.freebsd.org/snapshots/CI-IMAGES/16.0-CURRENT/amd64/: +# +# 20251110/ +# 20251209/ +# 20260420/ +# current/ +# +# In this case let's say the raw.xz link in current/ is bad, so look though the +# other snapshot links for the newest existing raw.xz file. +if [ ! -z "$URLxz" ] && [ ! -s "$IMG" ] ; then + URLxz=$(wget --accept "*.raw.xz" --spider -np --recursive --no-verbose \ + $(dirname $(dirname $URLxz)) 2>&1 | awk '/200 OK/{print $(NF-2)}' | \ + sort -n | tail -n 1) + echo "Couldn't download FreeBSD raw.xz. Trying fallback snapshot $URLxz" + curl --fail -LSs -o $IMG $URLxz fi echo "Importing VM image to zvol..." diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh index 8dad30fe4a5a..c261cbfca06d 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-6-tests.sh @@ -222,9 +222,9 @@ TAGS=$NUM/$DEN sudo dmesg -c > dmesg-prerun.txt mount > mount.txt df -h > df-prerun.txt -$TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS +RV=0 +$TDIR/zfs-tests.sh -vKO -s 3GB -T $TAGS || RV=$? -RV=$? df -h > df-postrun.txt echo $RV > tests-exitcode.txt sync diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh index 5e18f4bf49c1..51ae82567c2c 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-7-prepare.sh @@ -51,6 +51,11 @@ cd $RESPATH # prepare result files for summary for ((i=1; i<=VMs; i++)); do + + # no results, VM either didn't start or was unreachable, create + # the missing directory which is expected by subsequent steps + test -d vm$i || mkdir -p vm$i + file="vm$i/build-stderr.txt" test -s $file && mv -f $file build-stderr.txt @@ -61,12 +66,14 @@ for ((i=1; i<=VMs; i++)); do test -s $file && mv -f $file uname.txt file="vm$i/tests-exitcode.txt" - if [ ! -s $file ]; then - # XXX - add some tests for kernel panic's here - # tail -n 80 vm$i/console.txt | grep XYZ - echo 1 > $file + if [ ! -s "$file" ]; then + # Print in bold red + echo -e "\033[1;31mVM$i didn't finish ZTS and may have crashed!\033[0m" >> extra + + # ENOENT=2 + echo 2 > "$file" fi - rv=$(cat vm$i/tests-exitcode.txt) + rv=$(cat "$file") test $rv != 0 && touch /tmp/have_failed_tests file="vm$i/current/log" @@ -89,6 +96,14 @@ done if [ -s summary ]; then $MERGE summary | grep -v '^/' > summary.txt $MERGE summary | $BASE/scripts/zfs-tests-color.sh > /tmp/summary.txt + + # Add in additional 'extra' text at the end, if file is present. + if [ -s extra ] ; then + echo "" >> /tmp/summary.txt + cat extra >> /tmp/summary.txt + rm -f extra + fi + rm -f summary else touch summary.txt /tmp/summary.txt diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-8-summary.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-8-summary.sh index 00a4bf1ae325..39b3d124c794 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-8-summary.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-8-summary.sh @@ -37,9 +37,11 @@ function showfile_tail() { echo "##[endgroup]" } -# overview -cat /tmp/summary.txt -echo "" +# overview if available +if [ -f /tmp/summary.txt -a -s /tmp/summary.txt ]; then + cat /tmp/summary.txt + echo "" +fi if [ -f /tmp/have_failed_tests -a -s /tmp/failed.txt ]; then echo "Debuginfo of failed tests:" diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml index 88d85a06d975..c3a7397c6aef 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu-packages.yml @@ -58,7 +58,7 @@ jobs: strategy: fail-fast: false matrix: - os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora42', 'fedora43'] + os: ['almalinux8', 'almalinux9', 'almalinux10', 'fedora42', 'fedora43', 'fedora44'] runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v6 diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index f83b319a331f..4b4fd27543fd 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -46,17 +46,17 @@ jobs: case "$ci_type" in quick) - os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd15-0s", "ubuntu24"]' + os_selection='["almalinux8", "almalinux9", "almalinux10", "debian12", "fedora42", "freebsd15-1s", "ubuntu24"]' ;; linux) - os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora42", "fedora43", "ubuntu22", "ubuntu24"]' + os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian11", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "ubuntu22", "ubuntu24"]' ;; freebsd) - os_selection='["freebsd13-5r", "freebsd14-4r", "freebsd13-5s", "freebsd14-4s", "freebsd15-0s", "freebsd16-0c"]' + os_selection='["freebsd13-5r", "freebsd14-4r", "freebsd13-5s", "freebsd14-4s", "freebsd15-1s", "freebsd16-0c"]' ;; *) # default list - os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora42", "fedora43", "freebsd14-4r", "freebsd15-0s", "freebsd16-0c", "ubuntu22", "ubuntu24"]' + os_selection='["almalinux8", "almalinux9", "almalinux10", "centos-stream9", "centos-stream10", "debian12", "debian13", "fedora42", "fedora43", "fedora44", "freebsd14-4r", "freebsd15-1s", "freebsd16-0c", "ubuntu22", "ubuntu24"]' ;; esac @@ -99,7 +99,7 @@ jobs: # misc: archlinux, tumbleweed # FreeBSD variants of november 2025: # FreeBSD Release: freebsd13-5r, freebsd14-4r, freebsd15-0r - # FreeBSD Stable: freebsd13-5s, freebsd14-4s, freebsd15-0s + # FreeBSD Stable: freebsd13-5s, freebsd14-4s, freebsd15-1s # FreeBSD Current: freebsd16-0c os: ${{ fromJson(needs.test-config.outputs.test_os) }} runs-on: ubuntu-24.04 diff --git a/sys/contrib/openzfs/.mailmap b/sys/contrib/openzfs/.mailmap index 377a511bead6..87d9530f4241 100644 --- a/sys/contrib/openzfs/.mailmap +++ b/sys/contrib/openzfs/.mailmap @@ -32,7 +32,9 @@ Andrew Walker <awalker@ixsystems.com> Benedikt Neuffer <github@itfriend.de> Chengfei Zhu <chengfeix.zhu@intel.com> ChenHao Lu <18302010006@fudan.edu.cn> +Chris Jacobs <idefix2020dev@gmail.com> Chris Lindee <chris.lindee+github@gmail.com> +Colin K. Williams <colin@li-nk.org> Colm Buckley <colm@tuatha.org> Crag Wang <crag0715@gmail.com> Damian Szuberski <szuberskidamian@gmail.com> @@ -192,6 +194,7 @@ Kyle Evans <kevans@FreeBSD.org> <kevans91@users.noreply.github.com> LaurenÈ›iu Nicola <lnicola@dend.ro> <lnicola@users.noreply.github.com> loli10K <ezomori.nozomu@gmail.com> <loli10K@users.noreply.github.com> Lorenz Hüdepohl <dev@stellardeath.org> <lhuedepohl@users.noreply.github.com> +Louis Leseur <louis.leseur@gmail.com> <51127370+teapot9@users.noreply.github.com> LuÃs Henriques <henrix@camandro.org> <73643340+lumigch@users.noreply.github.com> Marcin Skarbek <git@skarbek.name> <mskarbek@users.noreply.github.com> Matt Fiddaman <github@m.fiddaman.uk> <81489167+matt-fidd@users.noreply.github.com> @@ -212,6 +215,7 @@ Peter Wirdemo <peter.wirdemo@gmail.com> <4224155+pewo@users.noreply.github.com> Petros Koutoupis <petros@petroskoutoupis.com> <pkoutoupis@users.noreply.github.com> Ping Huang <huangping@smartx.com> <101400146+hpingfs@users.noreply.github.com> Piotr P. Stefaniak <pstef@freebsd.org> <pstef@users.noreply.github.com> +Pranav P <pranavsdreams@gmail.com> <49746983+pranavkaruvally@users.noreply.github.com> Richard Allen <belperite@gmail.com> <33836503+belperite@users.noreply.github.com> Rich Ercolani <rincebrain@gmail.com> <214141+rincebrain@users.noreply.github.com> Rick Macklem <rmacklem@uoguelph.ca> <64620010+rmacklem@users.noreply.github.com> @@ -223,6 +227,7 @@ Samuel Wycliffe <samuelwycliffe@gmail.com> <50765275+npc203@users.noreply.github Savyasachee Jha <hi@savyasacheejha.com> <savyajha@users.noreply.github.com> Scott Colby <scott@scolby.com> <scolby33@users.noreply.github.com> Sean Eric Fagan <kithrup@mac.com> <kithrup@users.noreply.github.com> +Shelvacu <git@shelvacu.com> <1731537+shelvacu@users.noreply.github.com> Shreshth Srivastava <shreshthsrivastava2@gmail.com> <66148173+Shreshth3@users.noreply.github.com> Spencer Kinny <spencerkinny1995@gmail.com> <30333052+Spencer-Kinny@users.noreply.github.com> Srikanth N S <srikanth.nagasubbaraoseetharaman@hpe.com> <75025422+nssrikanth@users.noreply.github.com> @@ -245,3 +250,4 @@ XDTG <click1799@163.com> <35128600+XDTG@users.noreply.github.com> xtouqh <xtouqh@hotmail.com> <72357159+xtouqh@users.noreply.github.com> Yuri Pankov <yuripv@FreeBSD.org> <113725409+yuripv@users.noreply.github.com> Yuri Pankov <yuripv@FreeBSD.org> <82001006+yuripv@users.noreply.github.com> +ZhengYuan Huang <gality369@gmail.com> <68463495+Gality369@users.noreply.github.com> diff --git a/sys/contrib/openzfs/AUTHORS b/sys/contrib/openzfs/AUTHORS index 11aea0171651..fc83b1fe748c 100644 --- a/sys/contrib/openzfs/AUTHORS +++ b/sys/contrib/openzfs/AUTHORS @@ -144,17 +144,18 @@ CONTRIBUTORS: Chris Davidson <christopher.davidson@gmail.com> Chris Dunlap <cdunlap@llnl.gov> Chris Dunlop <chris@onthe.net.au> + Chris Jacobs <idefix2020dev@gmail.com> Chris Lindee <chris.lindee+github@gmail.com> Chris McDonough <chrism@plope.com> Chris Peredun <chris.peredun@ixsystems.com> Chris Siden <chris.siden@delphix.com> Chris Siebenmann <cks.github@cs.toronto.edu> Christer Ekholm <che@chrekh.se> - Christos Longros <chris.longros@gmail.com> Christian Kohlschütter <christian@kohlschutter.com> Christian Neukirchen <chneukirchen@gmail.com> Christian Schwarz <me@cschwarz.com> Christopher Voltz <cjunk@voltz.ws> + Christos Longros <chris.longros@gmail.com> Christ Schlacta <aarcane@aarcane.info> Chris Wedgwood <cw@f00f.org> Chris Williamson <chris.williamson@delphix.com> @@ -167,6 +168,7 @@ CONTRIBUTORS: Clint Armstrong <clint@clintarmstrong.net> Coleman Kane <ckane@colemankane.org> Colin Ian King <colin.king@canonical.com> + Colin K. Williams <colin@li-nk.org> Colin Percival <cperciva@tarsnap.com> Colm Buckley <colm@tuatha.org> Cong Zhang <congzhangzh@users.noreply.github.com> @@ -258,6 +260,8 @@ CONTRIBUTORS: Garrett D'Amore <garrett@nexenta.com> Garrett Fields <ghfields@gmail.com> Garrison Jensen <garrison.jensen@gmail.com> + Garth Snyder <garth@garthsnyder.com> + Gary Guo <gary@garyguo.net> Gary Mills <gary_mills@fastmail.fm> Gaurav Kumar <gauravk.18@gmail.com> GeLiXin <ge.lixin@zte.com.cn> @@ -328,6 +332,7 @@ CONTRIBUTORS: James Wah <james@laird-wah.net> Jan Engelhardt <jengelh@inai.de> Jan Kryl <jan.kryl@nexenta.com> + Jan Martin Mikkelsen <janm-github@transactionware.com> Jan Sanislo <oystr@cs.washington.edu> Jaron Kent-Dobias <jaron@kent-dobias.com> Jason Cohen <jwittlincohen@gmail.com> @@ -377,6 +382,7 @@ CONTRIBUTORS: Jo Zzsi <jozzsicsataban@gmail.com> João Carlos Mendes LuÃs <jonny@jonny.eng.br> JT Pennington <jt.pennington@klarasystems.com> + Juhyung Park <qkrwngud825@gmail.com> Julian Brunner <julian.brunner@gmail.com> Julian Heuking <JulianH@beckhoff.com> jumbi77 <jumbi77@users.noreply.github.com> @@ -433,6 +439,7 @@ CONTRIBUTORS: lorddoskias <lorddoskias@gmail.com> Lorenz Brun <lorenz@dolansoft.org> Lorenz Hüdepohl <dev@stellardeath.org> + Louis Leseur <louis.leseur@gmail.com> louwrentius <louwrentius@gmail.com> Lukas Wunner <lukas@wunner.de> luozhengzheng <luo.zhengzheng@zte.com.cn> @@ -448,6 +455,7 @@ CONTRIBUTORS: Marcel Telka <marcel.telka@nexenta.com> Marcel Wysocki <maci.stgn@gmail.com> Marcin Skarbek <git@skarbek.name> + Marc Sladek <marc@sladek.dev> Mariusz Zaborski <mariusz.zaborski@klarasystems.com> Mark Johnston <markj@FreeBSD.org> Mark Maybee <mark.maybee@delphix.com> @@ -495,6 +503,7 @@ CONTRIBUTORS: Mike Swanson <mikeonthecomputer@gmail.com> Milan Jurik <milan.jurik@xylab.cz> Minsoo Choo <minsoochoo0122@proton.me> + mischivus <mischivus@users.noreply.github.com> mnrx <mnrx@users.noreply.github.com> Mohamed Tawfik <m_tawfik@aucegypt.edu> Morgan Jones <mjones@rice.edu> @@ -557,6 +566,7 @@ CONTRIBUTORS: Piotr P. Stefaniak <pstef@freebsd.org> poscat <poscat@poscat.moe> Prakash Surya <prakash.surya@delphix.com> + Pranav P <pranavsdreams@gmail.com> Prasad Joshi <prasadjoshi124@gmail.com> privb0x23 <privb0x23@users.noreply.github.com> P.SCH <p88@yahoo.com> @@ -627,6 +637,7 @@ CONTRIBUTORS: Shampavman <sham.pavman@nexenta.com> Shaun Tancheff <shaun@aeonazure.com> Shawn Bayern <sbayern@law.fsu.edu> + Shelvacu <git@shelvacu.com> Shengqi Chen <harry-chen@outlook.com> SHENGYI HONG <aokblast@FreeBSD.org> Shen Yan <shenyanxxxy@qq.com> @@ -671,6 +682,7 @@ CONTRIBUTORS: Tim Connors <tconnors@rather.puzzling.org> Tim Crawford <tcrawford@datto.com> Tim Haley <Tim.Haley@Sun.COM> + Tim Hatch <tim@timhatch.com> timor <timor.dd@googlemail.com> Timothy Day <tday141@gmail.com> Tim Schumacher <timschumi@gmx.de> @@ -737,6 +749,7 @@ CONTRIBUTORS: Zach Dykstra <dykstra.zachary@gmail.com> zgock <zgock@nuc.base.zgock-lab.net> Zhao Yongming <zym@apache.org> + ZhengYuan Huang <gality369@gmail.com> Zhenlei Huang <zlei@FreeBSD.org> Zhu Chuang <chuang@melty.land> Érico Nogueira <erico.erc@gmail.com> diff --git a/sys/contrib/openzfs/META b/sys/contrib/openzfs/META index 260bd7e401ec..ab1b8955d245 100644 --- a/sys/contrib/openzfs/META +++ b/sys/contrib/openzfs/META @@ -6,5 +6,5 @@ Release: 1 Release-Tags: relext License: CDDL Author: OpenZFS -Linux-Maximum: 6.19 +Linux-Maximum: 7.0 Linux-Minimum: 4.18 diff --git a/sys/contrib/openzfs/cmd/zed/zed_event.c b/sys/contrib/openzfs/cmd/zed/zed_event.c index ba7cba304b1d..e252e0999c18 100644 --- a/sys/contrib/openzfs/cmd/zed/zed_event.c +++ b/sys/contrib/openzfs/cmd/zed/zed_event.c @@ -238,7 +238,7 @@ _zed_event_value_is_hex(const char *name) NULL }; const char **pp; - char *p; + const char *p; if (!name) return (0); diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 631ddda5c6e6..4c21c92bcd2f 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -6867,7 +6867,7 @@ holds_callback(zfs_handle_t *zhp, void *data) if (cbp->cb_recursive) { const char *snapname; - char *delim = strchr(zname, '@'); + const char *delim = strchr(zname, '@'); if (delim == NULL) return (0); @@ -9417,6 +9417,7 @@ main(int argc, char **argv) /* * Run the appropriate command. */ + libzfs_mnttab_cache(g_zfs, B_TRUE); if (find_command_idx(cmdname, &i) == 0) { current_command = &command_table[i]; ret = command_table[i].func(argc - 1, newargv + 1); diff --git a/sys/contrib/openzfs/cmd/zinject/zinject.c b/sys/contrib/openzfs/cmd/zinject/zinject.c index 37ff92a816f4..1b5fd595830d 100644 --- a/sys/contrib/openzfs/cmd/zinject/zinject.c +++ b/sys/contrib/openzfs/cmd/zinject/zinject.c @@ -229,6 +229,7 @@ static const struct errstr errstrtable[] = { { ECHILD, "dtl" }, { EILSEQ, "corrupt" }, { ENOSYS, "noop" }, + { EFAULT, "io-prefail" }, { 0, NULL }, }; @@ -308,7 +309,8 @@ usage(void) "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " "\t\t'pad1', or 'pad2'.\n" "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl',\n" - "\t\t'corrupt' (bit flip), or 'noop' (successfully do nothing).\n" + "\t\t'corrupt' (bit flip), 'io-prefail' (unsuccessfully do\n" + "\t\tnothing) or 'noop' (successfully do nothing).\n" "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n" "\t\tdevice error injection to a percentage of the IOs.\n" "\n" @@ -1026,7 +1028,8 @@ main(int argc, char **argv) if (error < 0) { (void) fprintf(stderr, "invalid error type " "'%s': must be one of: io decompress " - "decrypt nxio dtl corrupt noop\n", + "decrypt nxio dtl corrupt noop " + "io-prefail\n", optarg); usage(); libzfs_fini(g_zfs); diff --git a/sys/contrib/openzfs/config/kernel-dentry-alias.m4 b/sys/contrib/openzfs/config/kernel-dentry-alias.m4 new file mode 100644 index 000000000000..0baf1a06d3ce --- /dev/null +++ b/sys/contrib/openzfs/config/kernel-dentry-alias.m4 @@ -0,0 +1,32 @@ +dnl # SPDX-License-Identifier: CDDL-1.0 +dnl # +dnl # 7.1 API change +dnl # d_u union in struct dentry is now anonmymous, so d_alias must be +dnl # named directly +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U], [ + ZFS_LINUX_TEST_SRC([dentry_alias_d_u], [ + #include <linux/fs.h> + #include <linux/dcache.h> + #include <linux/list.h> + ], [ + struct inode *inode __attribute__ ((unused)) = NULL; + struct dentry *dentry __attribute__ ((unused)) = NULL; + hlist_for_each_entry(dentry, &inode->i_dentry, + d_u.d_alias) { + d_drop(dentry); + } + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_DENTRY_ALIAS_D_U], [ + AC_MSG_CHECKING([whether dentry aliases are in d_u member]) + ZFS_LINUX_TEST_RESULT([dentry_alias_d_u], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_DENTRY_D_U_ALIASES, 1, + [dentry aliases are in d_u member]) + ],[ + AC_MSG_RESULT(no) + ]) +]) + diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index e1dba34e5dbe..b40e34d373f6 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -74,6 +74,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_SETATTR_PREPARE ZFS_AC_KERNEL_SRC_INSERT_INODE_LOCKED ZFS_AC_KERNEL_SRC_DENTRY + ZFS_AC_KERNEL_SRC_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_SRC_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SRC_SECURITY_INODE ZFS_AC_KERNEL_SRC_FS_CONTEXT @@ -198,6 +199,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_SETATTR_PREPARE ZFS_AC_KERNEL_INSERT_INODE_LOCKED ZFS_AC_KERNEL_DENTRY + ZFS_AC_KERNEL_DENTRY_ALIAS_D_U ZFS_AC_KERNEL_TRUNCATE_SETSIZE ZFS_AC_KERNEL_SECURITY_INODE ZFS_AC_KERNEL_FS_CONTEXT diff --git a/sys/contrib/openzfs/configure.ac b/sys/contrib/openzfs/configure.ac index f4b52e1f7abc..3757b5e2cac8 100644 --- a/sys/contrib/openzfs/configure.ac +++ b/sys/contrib/openzfs/configure.ac @@ -41,7 +41,7 @@ AC_CONFIG_MACRO_DIR([config]) AC_CANONICAL_TARGET AM_MAINTAINER_MODE m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) -AM_INIT_AUTOMAKE([subdir-objects foreign]) +AM_INIT_AUTOMAKE([subdir-objects foreign tar-pax]) # Remove default macros from config.h: # PACKAGE, PACKAGE_{BUGREPORT,NAME,STRING,TARNAME,VERSION}, STDC_HEADERS, VERSION AC_CONFIG_HEADERS([zfs_config.h], [ diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs index 9d2c086ffdfe..1b3bd531064a 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs @@ -514,9 +514,9 @@ destroy_fs() _destroy_fs="${1}" [ "${quiet}" != "y" ] && \ - zfs_log_begin_msg "Destroying '${destroy_fs}'" + zfs_log_begin_msg "Destroying '${_destroy_fs}'" - ZFS_CMD="${ZFS} destroy ${destroy_fs}" + ZFS_CMD="${ZFS} destroy ${_destroy_fs}" ZFS_STDERR="$(${ZFS_CMD} 2>&1)" ZFS_ERROR="${?}" if [ "${ZFS_ERROR}" != 0 ] @@ -674,11 +674,11 @@ setup_snapshot_booting() then # Snapshot does not exist (...@<null> ?) # ask the user for a snapshot to use. - snap="$(ask_user_snap "${_boot_snap%%@*}")" + _boot_snap="$(ask_user_snap "${_boot_snap%%@*}")" fi - # Separate the full snapshot ('${snap}') into it's filesystem and - # snapshot names. Would have been nice with a split() function.. + # Separate the full snapshot ('${_boot_snap}') into its filesystem and + # snapshot names. Would have been nice with a split() function. _rootfs="${_boot_snap%%@*}" _snapname="${_boot_snap##*@}" ZFS_BOOTFS="${_rootfs}_${_snapname}" @@ -693,7 +693,7 @@ setup_snapshot_booting() -r -Sname "${ZFS_BOOTFS}")" for fs in ${_filesystems} do - destroy_fs "${_boot_snap}" + destroy_fs "${fs}" done fi fi diff --git a/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c b/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c index 7ad9ff61d8ee..bbd90a7048f7 100644 --- a/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c +++ b/sys/contrib/openzfs/etc/systemd/system-generators/zfs-mount-generator.c @@ -202,6 +202,7 @@ line_worker(char *line, const char *cachefile) void **tofree = tofree_all; char *toktmp; + const char *toktmp2; /* BEGIN CSTYLED */ const char *dataset = strtok_r(line, "\t", &toktmp); char *p_mountpoint = strtok_r(NULL, "\t", &toktmp); @@ -226,8 +227,8 @@ line_worker(char *line, const char *cachefile) /* END CSTYLED */ size_t pool_len = strlen(dataset); - if ((toktmp = strchr(dataset, '/')) != NULL) - pool_len = toktmp - dataset; + if ((toktmp2 = strchr(dataset, '/')) != NULL) + pool_len = toktmp2 - dataset; const char *pool = *(tofree++) = strndup(dataset, pool_len); if (p_nbmand == NULL) { diff --git a/sys/contrib/openzfs/include/libzfs_core.h b/sys/contrib/openzfs/include/libzfs_core.h index deb810230b48..22f190913c95 100644 --- a/sys/contrib/openzfs/include/libzfs_core.h +++ b/sys/contrib/openzfs/include/libzfs_core.h @@ -93,8 +93,6 @@ _LIBZFS_CORE_H int lzc_send(const char *, const char *, int, enum lzc_send_flags); _LIBZFS_CORE_H int lzc_send_resume(const char *, const char *, int, enum lzc_send_flags, uint64_t, uint64_t); -_LIBZFS_CORE_H int lzc_send_space(const char *, const char *, - enum lzc_send_flags, uint64_t *); struct dmu_replay_record; diff --git a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h index f94dcda6175b..e27158926917 100644 --- a/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h +++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/dcache_compat.h @@ -32,7 +32,9 @@ #define dname(dentry) ((char *)((dentry)->d_name.name)) #define dlen(dentry) ((int)((dentry)->d_name.len)) +#ifdef HAVE_DENTRY_D_U_ALIASES #define d_alias d_u.d_alias +#endif #ifdef HAVE_MM_PAGE_FLAGS_STRUCT /* diff --git a/sys/contrib/openzfs/include/sys/vdev_rebuild.h b/sys/contrib/openzfs/include/sys/vdev_rebuild.h index b787b1d5d993..8ecb30659434 100644 --- a/sys/contrib/openzfs/include/sys/vdev_rebuild.h +++ b/sys/contrib/openzfs/include/sys/vdev_rebuild.h @@ -70,6 +70,7 @@ typedef struct vdev_rebuild { zfs_range_tree_t *vr_scan_tree; kmutex_t vr_io_lock; /* inflight IO lock */ kcondvar_t vr_io_cv; /* inflight IO cv */ + uint64_t vr_last_txg; /* last used txg */ /* In-core state and progress */ uint64_t vr_scan_offset[TXG_SIZE]; diff --git a/sys/contrib/openzfs/include/sys/zio.h b/sys/contrib/openzfs/include/sys/zio.h index acb0a03a36b2..c3a199ce813c 100644 --- a/sys/contrib/openzfs/include/sys/zio.h +++ b/sys/contrib/openzfs/include/sys/zio.h @@ -243,6 +243,7 @@ typedef uint64_t zio_flag_t; #define ZIO_FLAG_REEXECUTED (1ULL << 30) #define ZIO_FLAG_DELEGATED (1ULL << 31) #define ZIO_FLAG_PREALLOCATED (1ULL << 32) +#define ZIO_FLAG_POSTREAD (1ULL << 33) #define ZIO_ALLOCATOR_NONE (-1) #define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE) diff --git a/sys/contrib/openzfs/include/zfs_fletcher.h b/sys/contrib/openzfs/include/zfs_fletcher.h index d450f7f5e8a7..4cd8cabfab9e 100644 --- a/sys/contrib/openzfs/include/zfs_fletcher.h +++ b/sys/contrib/openzfs/include/zfs_fletcher.h @@ -60,6 +60,8 @@ _ZFS_FLETCHER_H int fletcher_2_incremental_native(void *, size_t, void *); _ZFS_FLETCHER_H int fletcher_2_incremental_byteswap(void *, size_t, void *); _ZFS_FLETCHER_H void fletcher_4_native_varsize(const void *, uint64_t, zio_cksum_t *); +_ZFS_FLETCHER_H void fletcher_4_byteswap_varsize(const void *, uint64_t, + zio_cksum_t *); _ZFS_FLETCHER_H void fletcher_4_byteswap(const void *, uint64_t, const void *, zio_cksum_t *); _ZFS_FLETCHER_H int fletcher_4_incremental_native(void *, size_t, void *); diff --git a/sys/contrib/openzfs/lib/libspl/os/linux/mnttab.c b/sys/contrib/openzfs/lib/libspl/os/linux/mnttab.c index 25fa132ac6fc..f51219e898e6 100644 --- a/sys/contrib/openzfs/lib/libspl/os/linux/mnttab.c +++ b/sys/contrib/openzfs/lib/libspl/os/linux/mnttab.c @@ -125,7 +125,14 @@ getextmntent(const char *path, struct mnttab *entry, struct stat64 *statbuf) } #ifdef HAVE_STATX_MNT_ID - if (statx(AT_FDCWD, path, AT_STATX_SYNC_AS_STAT | AT_SYMLINK_NOFOLLOW, + /* + * Use AT_STATX_SYNC_AS_STAT without AT_SYMLINK_NOFOLLOW so that + * symlinks are followed, matching the behavior of stat64() above. + * Without this, if path is a symlink crossing a mount boundary, + * statx() returns the mnt_id of the symlink's location rather + * than the symlink target's mount. + */ + if (statx(AT_FDCWD, path, AT_STATX_SYNC_AS_STAT, STATX_MNT_ID, &stx) == 0 && (stx.stx_mask & STATX_MNT_ID)) { have_mnt_id = B_TRUE; target_mnt_id = stx.stx_mnt_id; diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs.abi b/sys/contrib/openzfs/lib/libzfs/libzfs.abi index 6349fca09bcb..ad28c8766303 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi +++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi @@ -189,6 +189,7 @@ <elf-symbol name='fletcher_2_incremental_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_2_native' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_4_byteswap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> + <elf-symbol name='fletcher_4_byteswap_varsize' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_4_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_4_impl_set' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> <elf-symbol name='fletcher_4_incremental_byteswap' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/> @@ -1668,8 +1669,103 @@ </function-decl> </abi-instr> <abi-instr address-size='64' path='lib/libspl/os/linux/mnttab.c' language='LANG_C99'> + <array-type-def dimensions='1' type-id='d315442e' size-in-bits='16' id='811205dc'> + <subrange length='1' type-id='7359adad' id='52f813b4'/> + </array-type-def> + <array-type-def dimensions='1' type-id='d3130597' size-in-bits='768' id='f63f23b9'> + <subrange length='12' type-id='7359adad' id='84827bdc'/> + </array-type-def> + <typedef-decl name='__u16' type-id='8efea9e5' id='d315442e'/> + <typedef-decl name='__s32' type-id='95e97e5e' id='3158a266'/> + <typedef-decl name='__u32' type-id='f0981eeb' id='3f1a6b60'/> + <typedef-decl name='__s64' type-id='1eb56b1e' id='49659421'/> + <typedef-decl name='__u64' type-id='3a47d82b' id='d3130597'/> + <class-decl name='statx_timestamp' size-in-bits='128' is-struct='yes' visibility='default' id='94101016'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='tv_sec' type-id='49659421' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='64'> + <var-decl name='tv_nsec' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='96'> + <var-decl name='__reserved' type-id='3158a266' visibility='default'/> + </data-member> + </class-decl> + <class-decl name='statx' size-in-bits='2048' is-struct='yes' visibility='default' id='720b04c5'> + <data-member access='public' layout-offset-in-bits='0'> + <var-decl name='stx_mask' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='32'> + <var-decl name='stx_blksize' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='64'> + <var-decl name='stx_attributes' type-id='d3130597' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='128'> + <var-decl name='stx_nlink' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='160'> + <var-decl name='stx_uid' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='192'> + <var-decl name='stx_gid' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='224'> + <var-decl name='stx_mode' type-id='d315442e' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='240'> + <var-decl name='__spare0' type-id='811205dc' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='256'> + <var-decl name='stx_ino' type-id='d3130597' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='320'> + <var-decl name='stx_size' type-id='d3130597' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='384'> + <var-decl name='stx_blocks' type-id='d3130597' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='448'> + <var-decl name='stx_attributes_mask' type-id='d3130597' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='512'> + <var-decl name='stx_atime' type-id='94101016' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='640'> + <var-decl name='stx_btime' type-id='94101016' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='768'> + <var-decl name='stx_ctime' type-id='94101016' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='896'> + <var-decl name='stx_mtime' type-id='94101016' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='1024'> + <var-decl name='stx_rdev_major' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='1056'> + <var-decl name='stx_rdev_minor' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='1088'> + <var-decl name='stx_dev_major' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='1120'> + <var-decl name='stx_dev_minor' type-id='3f1a6b60' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='1152'> + <var-decl name='stx_mnt_id' type-id='d3130597' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='1216'> + <var-decl name='__spare2' type-id='d3130597' visibility='default'/> + </data-member> + <data-member access='public' layout-offset-in-bits='1280'> + <var-decl name='__spare3' type-id='f63f23b9' visibility='default'/> + </data-member> + </class-decl> <pointer-type-def type-id='56fe4a37' size-in-bits='64' id='b6b61d2f'/> <qualified-type-def type-id='b6b61d2f' restrict='yes' id='3cad23cd'/> + <pointer-type-def type-id='720b04c5' size-in-bits='64' id='936b8e35'/> + <qualified-type-def type-id='936b8e35' restrict='yes' id='31d265b7'/> <function-decl name='getmntent_r' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='e75a27e9'/> <parameter type-id='3cad23cd'/> @@ -1681,6 +1777,14 @@ <parameter type-id='822cd80b'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='statx' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='95e97e5e'/> + <parameter type-id='9d26089a'/> + <parameter type-id='95e97e5e'/> + <parameter type-id='f0981eeb'/> + <parameter type-id='31d265b7'/> + <return type-id='95e97e5e'/> + </function-decl> </abi-instr> <abi-instr address-size='64' path='lib/libspl/procfs_list.c' language='LANG_C99'> <class-decl name='procfs_list' size-in-bits='768' is-struct='yes' visibility='default' id='0f4d3b87'> @@ -3830,6 +3934,10 @@ <parameter type-id='80f4b756'/> <return type-id='58603c44'/> </function-decl> + <function-decl name='zfs_prop_user' mangled-name='zfs_prop_user' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_user'> + <parameter type-id='80f4b756'/> + <return type-id='c19b74c3'/> + </function-decl> <function-decl name='nvlist_add_uint64' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='5ce45b60'/> <parameter type-id='80f4b756'/> @@ -3854,9 +3962,6 @@ <parameter type-id='7d3cd834'/> <return type-id='95e97e5e'/> </function-decl> - <function-decl name='fnvlist_alloc' visibility='default' binding='global' size-in-bits='64'> - <return type-id='5ce45b60'/> - </function-decl> <function-decl name='__ctype_b_loc' visibility='default' binding='global' size-in-bits='64'> <return type-id='c59e1ef0'/> </function-decl> @@ -4529,10 +4634,6 @@ <parameter type-id='c19b74c3'/> <return type-id='c19b74c3'/> </function-decl> - <function-decl name='zfs_prop_user' mangled-name='zfs_prop_user' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_user'> - <parameter type-id='80f4b756'/> - <return type-id='c19b74c3'/> - </function-decl> <function-decl name='zfs_prop_userquota' mangled-name='zfs_prop_userquota' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_prop_userquota'> <parameter type-id='80f4b756'/> <return type-id='c19b74c3'/> @@ -4642,6 +4743,9 @@ <parameter type-id='7d3cd834'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='fnvlist_alloc' visibility='default' binding='global' size-in-bits='64'> + <return type-id='5ce45b60'/> + </function-decl> <function-decl name='fnvlist_free' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='5ce45b60'/> <return type-id='48b5725f'/> @@ -4909,6 +5013,12 @@ <parameter type-id='80f4b756' name='path'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='zfs_create_ancestors_props' mangled-name='zfs_create_ancestors_props' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_create_ancestors_props'> + <parameter type-id='b0382bb3' name='hdl'/> + <parameter type-id='80f4b756' name='path'/> + <parameter type-id='5ce45b60' name='props'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='zfs_create' mangled-name='zfs_create' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_create'> <parameter type-id='b0382bb3' name='hdl'/> <parameter type-id='80f4b756' name='path'/> @@ -5155,6 +5265,19 @@ <parameter type-id='0d41d328'/> <return type-id='95e97e5e'/> </function-decl> + <function-decl name='zfs_is_namespace_prop' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='58603c44'/> + <return type-id='c19b74c3'/> + </function-decl> + <function-decl name='zfs_namespace_prop_flag' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='58603c44'/> + <return type-id='8f92235e'/> + </function-decl> + <function-decl name='zfs_mount_setattr' visibility='default' binding='global' size-in-bits='64'> + <parameter type-id='9200a744'/> + <parameter type-id='8f92235e'/> + <return type-id='95e97e5e'/> + </function-decl> <function-decl name='zpool_name_valid' visibility='default' binding='global' size-in-bits='64'> <parameter type-id='b0382bb3'/> <parameter type-id='c19b74c3'/> @@ -8337,6 +8460,7 @@ <parameter type-id='b0382bb3'/> <parameter type-id='26a90f95'/> <parameter type-id='95e97e5e'/> + <parameter type-id='5ce45b60'/> <return type-id='95e97e5e'/> </function-decl> <function-decl name='zfs_send_progress' mangled-name='zfs_send_progress' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_send_progress'> @@ -10331,6 +10455,12 @@ <parameter type-id='c24fc2ee' name='zcp'/> <return type-id='48b5725f'/> </function-decl> + <function-decl name='fletcher_4_byteswap_varsize' mangled-name='fletcher_4_byteswap_varsize' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_byteswap_varsize'> + <parameter type-id='eaa32e2f' name='buf'/> + <parameter type-id='9c313c2d' name='size'/> + <parameter type-id='c24fc2ee' name='zcp'/> + <return type-id='48b5725f'/> + </function-decl> <function-decl name='fletcher_4_byteswap' mangled-name='fletcher_4_byteswap' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='fletcher_4_byteswap'> <parameter type-id='eaa32e2f' name='buf'/> <parameter type-id='9c313c2d' name='size'/> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c index e5a7ca9ba3f5..ea316010629b 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c @@ -708,7 +708,7 @@ zfs_open(libzfs_handle_t *hdl, const char *path, int types) { zfs_handle_t *zhp; char errbuf[ERRBUFLEN]; - char *bookp; + const char *bookp; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, "cannot open '%s'"), path); @@ -2958,7 +2958,7 @@ userquota_propname_decode(const char *propname, boolean_t zoned, zfs_userquota_prop_t *typep, char *domain, int domainlen, uint64_t *ridp) { zfs_userquota_prop_t type; - char *cp; + const char *cp; boolean_t isuser; boolean_t isgroup; boolean_t isproject; @@ -4314,12 +4314,13 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags) { int ret = 0; zfs_cmd_t zc = {"\0"}; - char *delim; prop_changelist_t *cl = NULL; char parent[ZFS_MAX_DATASET_NAME_LEN]; char property[ZFS_MAXPROPLEN]; libzfs_handle_t *hdl = zhp->zfs_hdl; char errbuf[ERRBUFLEN]; + const char *delim; + char *delim2; /* if we have the same exact name, just return success */ if (strcmp(zhp->zfs_name, target) == 0) @@ -4344,11 +4345,11 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags) */ (void) strlcpy(parent, zhp->zfs_name, sizeof (parent)); - delim = strchr(parent, '@'); + delim2 = strchr(parent, '@'); if (strchr(target, '@') == NULL) - *(++delim) = '\0'; + *(++delim2) = '\0'; else - *delim = '\0'; + *delim2 = '\0'; (void) strlcat(parent, target, sizeof (parent)); target = parent; } else { @@ -4369,6 +4370,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags) if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE)) return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf)); } else { + if (flags.recursive) { zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "recursive rename must be a snapshot")); @@ -4424,8 +4426,8 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags) } if (flags.recursive) { char *parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name); - delim = strchr(parentname, '@'); - *delim = '\0'; + delim2 = strchr(parentname, '@'); + *delim2 = '\0'; zfs_handle_t *zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET); free(parentname); @@ -5138,7 +5140,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl) err = zfs_error(hdl, EZFS_BADVERSION, errbuf); break; case EINVAL: - err = zfs_error(hdl, EZFS_BADTYPE, errbuf); + err = zfs_error(hdl, EZFS_BADPERM, errbuf); break; case ENOENT: err = zfs_error(hdl, EZFS_NOENT, errbuf); diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c index 8e71dfa450b1..7f2130682118 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c @@ -550,8 +550,8 @@ get_snapshot_names(differ_info_t *di, const char *fromsnap, const char *tosnap) { libzfs_handle_t *hdl = di->zhp->zfs_hdl; - char *atptrf = NULL; - char *atptrt = NULL; + const char *atptrf = NULL; + const char *atptrt = NULL; int fdslen, fsnlen; int tdslen, tsnlen; diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h b/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h index 2ce8d49f08bc..6d7eca981d26 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h @@ -66,6 +66,7 @@ struct libzfs_handle { char *libfetch_load_error; kmutex_t zh_mnttab_lock; avl_tree_t zh_mnttab; + boolean_t zh_mnttab_cache_enabled; }; struct zfs_handle { diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_mnttab.c b/sys/contrib/openzfs/lib/libzfs/libzfs_mnttab.c index 473e3a7b45a1..75277066063c 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_mnttab.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_mnttab.c @@ -81,34 +81,13 @@ mnttab_compare(const void *arg1, const void *arg2) return (TREE_ISIGN(rv)); } -void -libzfs_mnttab_init(libzfs_handle_t *hdl) -{ - mutex_init(&hdl->zh_mnttab_lock, NULL, MUTEX_DEFAULT, NULL); - assert(avl_numnodes(&hdl->zh_mnttab) == 0); - avl_create(&hdl->zh_mnttab, mnttab_compare, - sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); -} - -void -libzfs_mnttab_fini(libzfs_handle_t *hdl) +static void +mnttab_drop(libzfs_handle_t *hdl) { - void *cookie = NULL; mnttab_node_t *mtn; - - while ((mtn = avl_destroy_nodes(&hdl->zh_mnttab, &cookie)) - != NULL) + void *cookie = NULL; + while ((mtn = avl_destroy_nodes(&hdl->zh_mnttab, &cookie)) != NULL) mnttab_node_free(hdl, mtn); - - avl_destroy(&hdl->zh_mnttab); - (void) mutex_destroy(&hdl->zh_mnttab_lock); -} - -void -libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable) -{ - /* This is a no-op to preserve ABI backward compatibility. */ - (void) hdl, (void) enable; } static int @@ -145,6 +124,33 @@ mnttab_update(libzfs_handle_t *hdl) return (0); } + +void +libzfs_mnttab_init(libzfs_handle_t *hdl) +{ + mutex_init(&hdl->zh_mnttab_lock, NULL, MUTEX_DEFAULT, NULL); + assert(avl_numnodes(&hdl->zh_mnttab) == 0); + avl_create(&hdl->zh_mnttab, mnttab_compare, + sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); + hdl->zh_mnttab_cache_enabled = B_FALSE; +} + +void +libzfs_mnttab_fini(libzfs_handle_t *hdl) +{ + mnttab_drop(hdl); + avl_destroy(&hdl->zh_mnttab); + (void) mutex_destroy(&hdl->zh_mnttab_lock); +} + +void +libzfs_mnttab_cache(libzfs_handle_t *hdl, boolean_t enable) +{ + mutex_enter(&hdl->zh_mnttab_lock); + hdl->zh_mnttab_cache_enabled = enable; + mutex_exit(&hdl->zh_mnttab_lock); +} + int libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, struct mnttab *entry) @@ -154,6 +160,9 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, int ret = ENOENT; mutex_enter(&hdl->zh_mnttab_lock); + if (!hdl->zh_mnttab_cache_enabled) + mnttab_drop(hdl); + if (avl_numnodes(&hdl->zh_mnttab) == 0) { int error; @@ -180,6 +189,11 @@ libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special, mnttab_node_t *mtn; mutex_enter(&hdl->zh_mnttab_lock); + if (!hdl->zh_mnttab_cache_enabled) { + /* Don't bother; we're going to discard it anyway. */ + mutex_exit(&hdl->zh_mnttab_lock); + return; + } mtn = mnttab_node_alloc(hdl, special, mountp, mntopts); @@ -202,6 +216,12 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) mnttab_node_t *ret; mutex_enter(&hdl->zh_mnttab_lock); + if (!hdl->zh_mnttab_cache_enabled) { + /* Don't bother; we're going to discard it anyway. */ + mutex_exit(&hdl->zh_mnttab_lock); + return; + } + find.mtn_mt.mnt_special = (char *)fsname; if ((ret = avl_find(&hdl->zh_mnttab, (void *)&find, NULL)) != NULL) { avl_remove(&hdl->zh_mnttab, ret); diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c index 7cd6a768084b..0b015d8bce64 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c @@ -640,7 +640,7 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname, prop = zpool_name_to_prop(propname); if (prop == ZPOOL_PROP_INVAL && zpool_prop_feature(propname)) { int err; - char *fname = strchr(propname, '@') + 1; + const char *fname = strchr(propname, '@') + 1; err = zfeature_lookup_name(fname, NULL); if (err != 0) { diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c index 18130a34fea8..f55d0c646bdf 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c @@ -2697,7 +2697,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd, char bookname[ZFS_MAX_DATASET_NAME_LEN]; nvlist_t *redact_snaps; zfs_handle_t *book_zhp; - char *at, *pound; + const char *at, *pound; int dsnamelen; pound = strchr(redactbook, '#'); diff --git a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_mount_os.c b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_mount_os.c index 7d8768d12dda..991ef3dfbb94 100644 --- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_mount_os.c +++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_mount_os.c @@ -406,7 +406,11 @@ do_unmount(zfs_handle_t *zhp, const char *mntpt, int flags) argv[count] = (char *)mntpt; rc = libzfs_run_process(argv[0], argv, STDOUT_VERBOSE|STDERR_VERBOSE); - return (rc ? EINVAL : 0); + if (rc == 0) + return (0); + if (rc > 0 && geteuid() != 0) + return (EPERM); + return (EINVAL); } #ifdef HAVE_MOUNT_SETATTR diff --git a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_device_path_os.c b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_device_path_os.c index 8ed062bf9b37..64e05f52d95e 100644 --- a/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_device_path_os.c +++ b/sys/contrib/openzfs/lib/libzutil/os/linux/zutil_device_path_os.c @@ -218,7 +218,8 @@ zfs_get_pci_slots_sys_path(const char *dev_name) char *address2 = NULL; char *path = NULL; char buf[MAXPATHLEN]; - char *tmp; + const char *tmp; + char *tmp2; /* If they preface 'dev' with a path (like "/dev") then strip it off */ tmp = strrchr(dev_name, '/'); @@ -240,9 +241,9 @@ zfs_get_pci_slots_sys_path(const char *dev_name) * be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be * "0000:01:00". Just NULL terminate at the '.' so they match. */ - tmp = strrchr(address1, '.'); - if (tmp != NULL) - *tmp = '\0'; + tmp2 = strrchr(address1, '.'); + if (tmp2 != NULL) + *tmp2 = '\0'; dp = opendir("/sys/bus/pci/slots/"); if (dp == NULL) { @@ -311,6 +312,7 @@ zfs_get_enclosure_sysfs_path(const char *dev_name) DIR *dp = NULL; struct dirent *ep; char buf[MAXPATHLEN]; + const char *tmp0; char *tmp1 = NULL; char *tmp2 = NULL; char *tmp3 = NULL; @@ -322,9 +324,9 @@ zfs_get_enclosure_sysfs_path(const char *dev_name) return (NULL); /* If they preface 'dev' with a path (like "/dev") then strip it off */ - tmp1 = strrchr(dev_name, '/'); - if (tmp1 != NULL) - dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */ + tmp0 = strrchr(dev_name, '/'); + if (tmp0 != NULL) + dev_name = tmp0 + 1; /* +1 since we want the chr after '/' */ tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name); if (tmpsize == -1 || tmp1 == NULL) { diff --git a/sys/contrib/openzfs/man/man4/spl.4 b/sys/contrib/openzfs/man/man4/spl.4 index 61dfe42e463d..2ca69e929778 100644 --- a/sys/contrib/openzfs/man/man4/spl.4 +++ b/sys/contrib/openzfs/man/man4/spl.4 @@ -130,6 +130,19 @@ When not enabled, the thread is halted to facilitate further debugging. .Pp Set to a non-zero value to enable. . +.It Sy spl_schedule_hrtimeout_slack_us Ns = Ns Sy 0 Pq uint +Slack value in microseconds passed to +.Fn schedule_hrtimeout_range +when a condition variable times out. +A non-zero value enforces the kernel coalesce the wakeup with other timers +to reduce wakeup count, at the cost of some additional sleep duration. +The maximum is +.Sy 1000 , +as defined by +.Sy MAX_HRTIMEOUT_SLACK_US . +.Pp +Linux-only. +. .It Sy spl_taskq_kick Ns = Ns Sy 0 Pq uint Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will scan all the taskqs. diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index 82b0a890e0b4..c1fe65d2ad90 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -745,6 +745,13 @@ depends on kernel configuration. This is the minimum allocation size that will use scatter (page-based) ABDs. Smaller allocations will use linear ABDs. . +.It Sy zfs_active_allocator Ns = Ns Sy dynamic Pq charp +Select the SPA metaslab allocator. +Valid values are +.Sy dynamic +and +.Sy cursor . +. .It Sy zfs_arc_dnode_limit Ns = Ns Sy 0 Ns B Pq u64 When the number of bytes consumed by dnodes in the ARC exceeds this number of bytes, try to unpin some of it in response to demand for non-metadata. @@ -937,6 +944,13 @@ If equivalent to the greater of the number of online CPUs and .Sy 4 . . +.It Sy zfs_arc_no_grow_shift Ns = Ns Sy 5 Pq uint +If less than +.Sy arc_c No >> Sy zfs_arc_no_grow_shift +free memory is available, the ARC is not allowed to grow. +This parameter is +.Fx Ns -specific . +. .It Sy zfs_arc_overflow_shift Ns = Ns Sy 8 Pq int The ARC size is considered to be overflowing if it exceeds the current ARC target size @@ -1034,6 +1048,11 @@ The timeout is scaled based on a percentage of the last lwb latency to avoid significantly impacting the latency of each individual transaction record (itx). . +.It Sy zfs_compressed_arc_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Enables storing ARC buffers in their on-disk compressed form, reducing +memory pressure. +When disabled, buffers are decompressed before being cached. +. .It Sy zfs_condense_indirect_commit_entry_delay_ms Ns = Ns Sy 0 Ns ms Pq int Vdev indirection layer (used for device removal) sleeps for this many milliseconds during mapping generation. @@ -1690,6 +1709,11 @@ which have the .Em no_root_squash option set. . +.It Sy zfs_snapshot_history_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int +Whether snapshot creation and destruction events are recorded in the pool +history log, viewable with +.Nm zpool Cm history . +. .It Sy zfs_snapshot_no_setuid Ns = Ns Sy 0 Ns | Ns 1 Pq int Whether to disable .Em setuid/setgid @@ -2181,6 +2205,10 @@ working on a scrub between TXG flushes. .It Sy zfs_scrub_error_blocks_per_txg Ns = Ns Sy 4096 Pq uint Error blocks to be scrubbed in one txg. . +.It Sy zfs_scan_blkstats Ns = Ns Sy 0 Ns | Ns 1 Pq int +When enabled, ZFS counts blocks by type and indirection level during a scrub. +The counts are kept in memory for debugging and are not exposed to userspace. +. .It Sy zfs_scan_checkpoint_intval Ns = Ns Sy 7200 Ns s Po 2 hour Pc Pq uint To preserve progress across reboots, the sequential scan algorithm periodically needs to stop metadata scanning and issue all the verification I/O to disk. @@ -2388,6 +2416,13 @@ and may need to load new metaslabs to satisfy these allocations. .It Sy zfs_sync_pass_rewrite Ns = Ns Sy 2 Pq uint Rewrite new block pointers starting in this pass. . +.It Sy zfs_scrub_partial_writes Ns = Ns Sy 1 Ns | Ns 0 Pq int +If a write to a multi-disk vdev fails, but the data is recoverable, the data is +persisted on disk but may not be as redundant as the vdev usually ensures. +If this tunable is set, we issue a read after such a write error to detect the +full extent of the problem and attempt to recover from it. +Note: This currently only works with RAID-Z and dRAID. +. .It Sy zfs_trim_extent_bytes_max Ns = Ns Sy 134217728 Ns B Po 128 MiB Pc Pq uint Maximum size of TRIM command. Larger ranges will be split into chunks no larger than this value before diff --git a/sys/contrib/openzfs/man/man7/vdevprops.7 b/sys/contrib/openzfs/man/man7/vdevprops.7 index bb5fec65eff6..b45128dd9242 100644 --- a/sys/contrib/openzfs/man/man7/vdevprops.7 +++ b/sys/contrib/openzfs/man/man7/vdevprops.7 @@ -134,6 +134,12 @@ The number of I/O operations of each type performed by this vdev The cumulative size of all operations of each type performed by this vdev .It Sy removing If this device is currently being removed from the pool +.It Sy raidz_expanding +Set while a +.Nm zpool Cm attach +expansion is in progress on this RAID-Z vdev; cleared on completion. +See +.Xr zpool-attach 8 . .It Sy trim_support Indicates if a leaf device supports trim operations. .El diff --git a/sys/contrib/openzfs/man/man7/zfsprops.7 b/sys/contrib/openzfs/man/man7/zfsprops.7 index 183e6ea95745..302dffa0e6d1 100644 --- a/sys/contrib/openzfs/man/man7/zfsprops.7 +++ b/sys/contrib/openzfs/man/man7/zfsprops.7 @@ -1360,6 +1360,21 @@ This was only supported by Linux prior to 5.15, and was buggy there, and is not supported by .Fx . On Solaris it's used for SMB clients. +.It Sy longname Ns = Ns Sy on Ns | Ns Sy off +Controls support for filenames longer than 255 bytes, up to 1023 bytes. +The default is +.Sy off . +Setting this property to +.Sy on +activates the +.Sy longname +pool feature, which must be enabled +.Po see +.Xr zpool-features 7 +.Pc . +Once a file with a long name is created, the feature becomes active and +the pool can no longer be imported by an OpenZFS implementation that does +not support it. .It Sy overlay Ns = Ns Sy on Ns | Ns Sy off Allow mounting on a busy directory or a directory which already contains files or directories. diff --git a/sys/contrib/openzfs/man/man8/zinject.8 b/sys/contrib/openzfs/man/man8/zinject.8 index 092af93211c8..cda6d337864f 100644 --- a/sys/contrib/openzfs/man/man8/zinject.8 +++ b/sys/contrib/openzfs/man/man8/zinject.8 @@ -237,8 +237,10 @@ for an ECHILD error, for an EIO error where reopening the device will succeed, .It Sy nxio for an ENXIO error where reopening the device will fail, or +.It Sy io-prefail +to drop the IO without executing it and return failure, or .It Sy noop -to drop the IO without executing it, and return success. +to drop the IO without executing it and return success. .El .Pp For EIO and ENXIO, the "failed" reads or writes still occur. diff --git a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 b/sys/contrib/openzfs/man/man8/zpool-labelclear.8 index b807acaaede3..65b2f6c4fd3e 100644 --- a/sys/contrib/openzfs/man/man8/zpool-labelclear.8 +++ b/sys/contrib/openzfs/man/man8/zpool-labelclear.8 @@ -50,6 +50,15 @@ is a cache device, it also removes the L2ARC header The .Ar device must not be part of an active pool configuration. +.Pp +This overwrites pool metadata, making all data on the +.Ar device +inaccessible without specialized recovery tools. +Unlike +.Nm zpool Cm destroy , +this cannot be undone by ZFS tooling. +Only use this when you don't care about the data on the +.Ar device . .Bl -tag -width Ds .It Fl f Treat exported or foreign devices as inactive. diff --git a/sys/contrib/openzfs/man/man8/zpool.8 b/sys/contrib/openzfs/man/man8/zpool.8 index 3bfef780b298..4b07f96bbcbc 100644 --- a/sys/contrib/openzfs/man/man8/zpool.8 +++ b/sys/contrib/openzfs/man/man8/zpool.8 @@ -108,7 +108,9 @@ specified. Destroys the given pool, freeing up any devices for other use. .It Xr zpool-labelclear 8 Removes ZFS label information from the specified -.Ar device . +.Ar device , +making all data inaccessible without specialized recovery tools. +Cannot be undone with ZFS tooling. .El . .Ss Virtual Devices diff --git a/sys/contrib/openzfs/module/nvpair/nvpair.c b/sys/contrib/openzfs/module/nvpair/nvpair.c index 14fbddb60f6b..07ac102145e2 100644 --- a/sys/contrib/openzfs/module/nvpair/nvpair.c +++ b/sys/contrib/openzfs/module/nvpair/nvpair.c @@ -1923,8 +1923,9 @@ nvlist_lookup_nvpair_ei_sep(nvlist_t *nvl, const char *name, const char sep, { nvpair_t *nvp; const char *np; - char *sepp = NULL; - char *idxp, *idxep; + const char *sepp = NULL; + const char *idxp; + char *idxep; nvlist_t **nva; long idx = 0; int n; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 6e52d90e0940..4dd9ed88d433 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -3524,7 +3524,7 @@ zfs_do_rename_impl(vnode_t *sdvp, vnode_t **svpp, struct componentname *scnp, ZRENAMING, NULL)); } } - if (error == 0) { + if (error == 0 && zfsvfs->z_use_namecache) { cache_vop_rename(sdvp, *svpp, tdvp, *tvpp, scnp, tcnp); } } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c index c73ef86df4dc..767272177fb7 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c @@ -1138,15 +1138,28 @@ zfsctl_snapshot_unmount(const char *snapname, int flags) cv_wait(&se->se_cv, &se->se_mtx); mutex_exit(&se->se_mtx); - exportfs_flush(); - if (flags & MNT_FORCE) argv[4] = "-fn"; argv[5] = se->se_path; dprintf("unmount; path=%s\n", se->se_path); error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); - zfsctl_snapshot_rele(se); + /* + * The kernel's NFS export cache can hold references to the + * snapshot mountpoint and cause umount to fail. ZFS cannot + * invalidate individual entries because the relevant kernel + * APIs are exported GPL-only, so we issue a global flush + * instead. To avoid impacting unrelated snapshots, the flush + * runs only on umount failure. Not perfect, but better than + * flushing unconditionally. + */ + if (error) { + exportfs_flush(); + error = call_usermodehelper(argv[0], argv, envp, + UMH_WAIT_PROC); + } + + zfsctl_snapshot_rele(se); /* * The umount system utility will return 256 on error. We must diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c index 8f9b161995f4..bfce9e6b5202 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c @@ -234,6 +234,8 @@ zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, boolean_t revert) { size_t cnt = MIN(n, uio->uio_resid); + size_t oldcnt = cnt; + int error = 0; if (rw == UIO_READ) cnt = copy_to_iter(p, cnt, uio->uio_iter); @@ -249,16 +251,21 @@ zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, return (EFAULT); /* - * Revert advancing the uio_iter. This is set by zfs_uiocopy() - * to avoid consuming the uio and its iov_iter structure. + * When revert is set this is a zfs_uiocopy() which should not + * consume the uio and its iov_iter structure. Otherwise, it's + * a zfs_uiomove() which is expected to update the uio. Partial + * copies are allowed for both copy and move but EFAULT should + * be returned for zfs_uiomove(). */ if (revert) iov_iter_revert(uio->uio_iter, cnt); + else if (cnt != oldcnt) + error = EFAULT; uio->uio_resid -= cnt; uio->uio_loffset += cnt; - return (0); + return (error); } int diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index 9c0d92551843..d7b502429923 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -953,11 +953,12 @@ objs: int zfs_statvfs(struct inode *ip, struct kstatfs *statp) { + znode_t *zp = ITOZ(ip); zfsvfs_t *zfsvfs = ITOZSB(ip); uint64_t refdbytes, availbytes, usedobjs, availobjs; int err = 0; - if ((err = zfs_enter(zfsvfs, FTAG)) != 0) + if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0) return (err); dmu_objset_space(zfsvfs->z_os, @@ -1013,8 +1014,6 @@ zfs_statvfs(struct inode *ip, struct kstatfs *statp) if (dmu_objset_projectquota_enabled(zfsvfs->z_os) && dmu_objset_projectquota_present(zfsvfs->z_os)) { - znode_t *zp = ITOZ(ip); - if (zp->z_pflags & ZFS_PROJINHERIT && zp->z_projid && zpl_is_valid_projid(zp->z_projid)) err = zfs_statfs_project(zfsvfs, zp, statp, bshift); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index ffe227796f0a..6d57bff56540 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -673,6 +673,8 @@ static long zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) { cred_t *cr = CRED(); + znode_t *zp = ITOZ(ip); + zfsvfs_t *zfsvfs = ITOZSB(ip); loff_t olen; fstrans_cookie_t cookie; int error = 0; @@ -706,7 +708,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) bf.l_len = len; bf.l_pid = 0; - error = -zfs_space(ITOZ(ip), F_FREESP, &bf, O_RDWR, offset, cr); + error = -zfs_space(zp, F_FREESP, &bf, O_RDWR, offset, cr); } else if ((mode & ~FALLOC_FL_KEEP_SIZE) == 0) { unsigned int percent = zfs_fallocate_reserve_percent; struct kstatfs statfs; @@ -721,7 +723,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) * Use zfs_statvfs() instead of dmu_objset_space() since it * also checks project quota limits, which are relevant here. */ - error = zfs_statvfs(ip, &statfs); + error = -zfs_statvfs(ip, &statfs); if (error) goto out_unmark; @@ -734,8 +736,14 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len) error = -ENOSPC; goto out_unmark; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > olen) - error = zfs_freesp(ITOZ(ip), offset + len, 0, 0, FALSE); + if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > olen) { + error = zpl_enter_verify_zp(zfsvfs, zp, FTAG); + if (error) + goto out_unmark; + + error = -zfs_freesp(zp, offset + len, 0, 0, FALSE); + zfs_exit(zfsvfs, FTAG); + } } out_unmark: spl_fstrans_unmark(cookie); diff --git a/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c b/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c index 1a7dde1dd8f6..5f6f37157ff7 100644 --- a/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c +++ b/sys/contrib/openzfs/module/zcommon/zfs_fletcher.c @@ -499,6 +499,13 @@ fletcher_4_native_varsize(const void *buf, uint64_t size, zio_cksum_t *zcp) fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, buf, size); } +void +fletcher_4_byteswap_varsize(const void *buf, uint64_t size, zio_cksum_t *zcp) +{ + ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); + fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp, buf, size); +} + static inline void fletcher_4_byteswap_impl(const void *buf, uint64_t size, zio_cksum_t *zcp) { diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index be31cff50059..c28cb9114dd8 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -1874,7 +1874,7 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj) if (ret == 0) arc_hdr_clear_flags(hdr, ARC_FLAG_NOAUTH); - else if (ret == ENOENT) + else if (ret == EACCES) ret = 0; if (free_abd) diff --git a/sys/contrib/openzfs/module/zfs/dmu.c b/sys/contrib/openzfs/module/zfs/dmu.c index 0f40164ecc95..d709f9a34aff 100644 --- a/sys/contrib/openzfs/module/zfs/dmu.c +++ b/sys/contrib/openzfs/module/zfs/dmu.c @@ -966,8 +966,14 @@ dmu_evict_range(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) * access patterns are rare. */ rw_enter(&dn->dn_struct_rwlock, RW_READER); - uint64_t start = dbuf_whichblock(dn, 0, offset); - uint64_t end = dbuf_whichblock(dn, 0, offset + len); + uint64_t start, end; + if (dn->dn_datablkshift != 0) { + start = dbuf_whichblock(dn, 0, offset); + end = dbuf_whichblock(dn, 0, offset + len); + } else { + start = (offset >= dn->dn_datablksz); + end = (offset + len >= dn->dn_datablksz); + } if (end > start) dbuf_evict_range(dn, start, end - 1); rw_exit(&dn->dn_struct_rwlock); diff --git a/sys/contrib/openzfs/module/zfs/dmu_direct.c b/sys/contrib/openzfs/module/zfs/dmu_direct.c index d44c686088fc..5b00698da728 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_direct.c +++ b/sys/contrib/openzfs/module/zfs/dmu_direct.c @@ -91,6 +91,7 @@ dmu_write_direct_done(zio_t *zio) dmu_sync_arg_t *dsa = zio->io_private; dbuf_dirty_record_t *dr = dsa->dsa_dr; dmu_buf_impl_t *db = dr->dr_dbuf; + dmu_tx_t *tx = dsa->dsa_tx; abd_free(zio->io_abd); @@ -101,6 +102,11 @@ dmu_write_direct_done(zio_t *zio) db->db_state = DB_UNCACHED; mutex_exit(&db->db_mtx); + /* + * dmu_sync_done() owns dsa and frees it after publishing the final + * override state. The direct-I/O error path still needs the original + * open-context tx to roll the dirty record back with dbuf_undirty(). + */ dmu_sync_done(zio, NULL, zio->io_private); if (zio->io_error != 0) { @@ -120,7 +126,7 @@ dmu_write_direct_done(zio_t *zio) * calling dbuf_undirty(). */ mutex_enter(&db->db_mtx); - VERIFY3B(dbuf_undirty(db, dsa->dsa_tx), ==, B_FALSE); + VERIFY3B(dbuf_undirty(db, tx), ==, B_FALSE); mutex_exit(&db->db_mtx); } diff --git a/sys/contrib/openzfs/module/zfs/dmu_objset.c b/sys/contrib/openzfs/module/zfs/dmu_objset.c index ed81647bcfa3..4919ead3cea6 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_objset.c +++ b/sys/contrib/openzfs/module/zfs/dmu_objset.c @@ -3011,7 +3011,7 @@ dmu_objset_get_user(objset_t *os) int dmu_fsname(const char *snapname, char *buf) { - char *atp = strchr(snapname, '@'); + const char *atp = strchr(snapname, '@'); if (atp == NULL) return (SET_ERROR(EINVAL)); if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN) diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index 8ecb99d5f57c..4c354722e4f8 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -1844,7 +1844,7 @@ send_reader_thread(void *arg) continue; } uint64_t file_max = - MIN(dn->dn_maxblkid, range->end_blkid); + MIN(dn->dn_maxblkid + 1, range->end_blkid); /* * The object exists, so we need to try to find the * blkptr for each block in the range we're processing. diff --git a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c index 8533657ad03b..4ffd75ceace9 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_bookmark.c +++ b/sys/contrib/openzfs/module/zfs/dsl_bookmark.c @@ -39,10 +39,10 @@ static int dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname, - dsl_dataset_t **dsp, const void *tag, char **shortnamep) + dsl_dataset_t **dsp, const void *tag, const char **shortnamep) { char buf[ZFS_MAX_DATASET_NAME_LEN]; - char *hashp; + const char *hashp; if (strlen(fullname) >= ZFS_MAX_DATASET_NAME_LEN) return (SET_ERROR(ENAMETOOLONG)); @@ -105,7 +105,7 @@ int dsl_bookmark_lookup(dsl_pool_t *dp, const char *fullname, dsl_dataset_t *later_ds, zfs_bookmark_phys_t *bmp) { - char *shortname; + const char *shortname; dsl_dataset_t *ds; int error; @@ -219,7 +219,7 @@ dsl_bookmark_create_check_impl(dsl_pool_t *dp, int error; dsl_dataset_t *newbm_ds; - char *newbm_short; + const char *newbm_short; zfs_bookmark_phys_t bmark_phys; error = dsl_bookmark_hold_ds(dp, newbm, &newbm_ds, FTAG, &newbm_short); @@ -329,7 +329,7 @@ dsl_bookmark_create_check(void *arg, dmu_tx_t *tx) } static dsl_bookmark_node_t * -dsl_bookmark_node_alloc(char *shortname) +dsl_bookmark_node_alloc(const char *shortname) { dsl_bookmark_node_t *dbn = kmem_alloc(sizeof (*dbn), KM_SLEEP); dbn->dbn_name = spa_strdup(shortname); @@ -445,7 +445,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, dsl_pool_t *dp = dmu_tx_pool(tx); objset_t *mos = dp->dp_meta_objset; dsl_dataset_t *snapds, *bmark_fs; - char *shortname; + const char *shortname; boolean_t bookmark_redacted; uint64_t *dsredactsnaps; uint64_t dsnumsnaps; @@ -535,7 +535,7 @@ dsl_bookmark_create_sync_impl_book( { dsl_pool_t *dp = dmu_tx_pool(tx); dsl_dataset_t *bmark_fs_source, *bmark_fs_new; - char *source_shortname, *new_shortname; + const char *source_shortname, *new_shortname; zfs_bookmark_phys_t source_phys; VERIFY0(dsl_bookmark_hold_ds(dp, source_name, &bmark_fs_source, FTAG, @@ -1105,7 +1105,7 @@ dsl_bookmark_destroy_check(void *arg, dmu_tx_t *tx) dsl_dataset_t *ds; zfs_bookmark_phys_t bm; int error; - char *shortname; + const char *shortname; error = dsl_bookmark_hold_ds(dp, fullname, &ds, FTAG, &shortname); @@ -1161,7 +1161,7 @@ dsl_bookmark_destroy_sync(void *arg, dmu_tx_t *tx) for (nvpair_t *pair = nvlist_next_nvpair(dbda->dbda_success, NULL); pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_success, pair)) { dsl_dataset_t *ds; - char *shortname; + const char *shortname; uint64_t zap_cnt; VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair), diff --git a/sys/contrib/openzfs/module/zfs/dsl_crypt.c b/sys/contrib/openzfs/module/zfs/dsl_crypt.c index 9207737f908b..56004f7fc902 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_crypt.c +++ b/sys/contrib/openzfs/module/zfs/dsl_crypt.c @@ -2677,23 +2677,16 @@ int spa_crypt_get_salt(spa_t *spa, uint64_t dsobj, uint8_t *salt) { int ret; - dsl_crypto_key_t *dck = NULL; + dsl_crypto_key_t *dck; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) - goto error; + return (SET_ERROR(EACCES)); ret = zio_crypt_key_get_salt(&dck->dck_key, salt); - if (ret != 0) - goto error; - spa_keystore_dsl_key_rele(spa, dck, FTAG); - return (0); -error: - if (dck != NULL) - spa_keystore_dsl_key_rele(spa, dck, FTAG); return (ret); } @@ -2708,9 +2701,7 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, boolean_t byteswap) { int ret; - dsl_crypto_key_t *dck = NULL; - void *buf = abd_borrow_buf_copy(abd, datalen); - objset_phys_t *osp = buf; + dsl_crypto_key_t *dck; uint8_t portable_mac[ZIO_OBJSET_MAC_LEN]; uint8_t local_mac[ZIO_OBJSET_MAC_LEN]; const uint8_t zeroed_mac[ZIO_OBJSET_MAC_LEN] = {0}; @@ -2718,15 +2709,19 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) - goto error; + return (SET_ERROR(EACCES)); + + void *buf = abd_borrow_buf_copy(abd, datalen); + objset_phys_t *osp = buf; /* calculate both HMACs */ ret = zio_crypt_do_objset_hmacs(&dck->dck_key, buf, datalen, byteswap, portable_mac, local_mac); - if (ret != 0) - goto error; - spa_keystore_dsl_key_rele(spa, dck, FTAG); + if (ret != 0) { + abd_return_buf(abd, buf, datalen); + return (ret); + } /* if we are generating encode the HMACs in the objset_phys_t */ if (generate) { @@ -2760,14 +2755,7 @@ spa_do_crypt_objset_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, } abd_return_buf(abd, buf, datalen); - return (0); - -error: - if (dck != NULL) - spa_keystore_dsl_key_rele(spa, dck, FTAG); - abd_return_buf(abd, buf, datalen); - return (ret); } int @@ -2775,23 +2763,22 @@ spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, uint_t datalen, uint8_t *mac) { int ret; - dsl_crypto_key_t *dck = NULL; - uint8_t *buf = abd_borrow_buf_copy(abd, datalen); + dsl_crypto_key_t *dck; uint8_t digestbuf[ZIO_DATA_MAC_LEN]; /* look up the key from the spa's keystore */ ret = spa_keystore_lookup_key(spa, dsobj, FTAG, &dck); if (ret != 0) - goto error; + return (SET_ERROR(EACCES)); + uint8_t *buf = abd_borrow_buf_copy(abd, datalen); /* perform the hmac */ ret = zio_crypt_do_hmac(&dck->dck_key, buf, datalen, digestbuf, ZIO_DATA_MAC_LEN); - if (ret != 0) - goto error; - - abd_return_buf(abd, buf, datalen); spa_keystore_dsl_key_rele(spa, dck, FTAG); + abd_return_buf(abd, buf, datalen); + if (ret != 0) + return (ret); /* * Truncate and fill in mac buffer if we were asked to generate a MAC. @@ -2806,12 +2793,6 @@ spa_do_crypt_mac_abd(boolean_t generate, spa_t *spa, uint64_t dsobj, abd_t *abd, return (SET_ERROR(ECKSUM)); return (0); - -error: - if (dck != NULL) - spa_keystore_dsl_key_rele(spa, dck, FTAG); - abd_return_buf(abd, buf, datalen); - return (ret); } /* diff --git a/sys/contrib/openzfs/module/zfs/dsl_dir.c b/sys/contrib/openzfs/module/zfs/dsl_dir.c index 6ce1890cfea1..2253b868b53c 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dir.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dir.c @@ -417,7 +417,7 @@ dsl_dir_namelen(dsl_dir_t *dd) static int getcomponent(const char *path, char *component, const char **nextp) { - char *p; + const char *p; if ((path == NULL) || (path[0] == '\0')) return (SET_ERROR(ENOENT)); diff --git a/sys/contrib/openzfs/module/zfs/vdev_draid.c b/sys/contrib/openzfs/module/zfs/vdev_draid.c index c76557e80c9b..8f556b868784 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_draid.c +++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c @@ -505,7 +505,11 @@ verify_perms(uint8_t *perms, uint64_t children, uint64_t nperms, int permssz = sizeof (uint8_t) * children * nperms; zio_cksum_t cksum; +#if defined(_ZFS_BIG_ENDIAN) + fletcher_4_byteswap_varsize(perms, permssz, &cksum); +#else fletcher_4_native_varsize(perms, permssz, &cksum); +#endif if (checksum != cksum.zc_word[0]) { kmem_free(counts, countssz); diff --git a/sys/contrib/openzfs/module/zfs/vdev_raidz.c b/sys/contrib/openzfs/module/zfs/vdev_raidz.c index aa44acbf39cb..2db7422e772e 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c +++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c @@ -406,6 +406,16 @@ static unsigned long raidz_io_aggregate_rows = 4; */ static int zfs_scrub_after_expand = 1; +/* + * If there are errors when writing, but few enough that the data is + * recoverable, then ZFS used to silently move on, leaving the data not 100% + * redundant. If this tunable is set, we issue a read after that case occurs, + * allowing the normal error recovery process to handle it. + * + * NOTE: Currently applies only to raidz and draid. + */ +static int zfs_scrub_partial_writes = 1; + static void vdev_raidz_row_free(raidz_row_t *rr) { @@ -3641,6 +3651,7 @@ vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr) { int normal_errors = 0; int shadow_errors = 0; + int retryable_errors = 0; ASSERT3U(rr->rr_missingparity, <=, rr->rr_firstdatacol); ASSERT3U(rr->rr_missingdata, <=, rr->rr_cols - rr->rr_firstdatacol); @@ -3657,6 +3668,11 @@ vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr) ASSERT(rc->rc_shadow_error != ECKSUM); shadow_errors++; } + if (rc->rc_error || rc->rc_shadow_error) { + vdev_t *cvd = zio->io_vd->vdev_child[rc->rc_devidx]; + if (!(vdev_is_dead(cvd) || cvd->vdev_cant_write)) + retryable_errors++; + } } /* @@ -3676,6 +3692,8 @@ vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr) shadow_errors > rr->rr_firstdatacol) { zio->io_error = zio_worst_error(zio->io_error, vdev_raidz_worst_error(rr)); + } else if (retryable_errors && zfs_scrub_partial_writes) { + zio->io_flags |= ZIO_FLAG_POSTREAD; } } @@ -5528,6 +5546,9 @@ ZFS_MODULE_PARAM(zfs_vdev, raidz_, io_aggregate_rows, ULONG, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scrub_after_expand, INT, ZMOD_RW, "For expanded RAIDZ, automatically start a pool scrub when expansion " "completes"); +ZFS_MODULE_PARAM(zfs, zfs_, scrub_partial_writes, INT, ZMOD_RW, + "Issue reads after writes with recoverable failures to ensure " + "integrity"); ZFS_MODULE_PARAM(zfs_vdev, vdev_, read_sit_out_secs, ULONG, ZMOD_RW, "Raidz/draid slow disk sit out time period in seconds"); ZFS_MODULE_PARAM(zfs_vdev, vdev_, raidz_outlier_check_interval_ms, U64, diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index 4fe6dc9d6fbb..de691f73e939 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -593,6 +593,7 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size) dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); VERIFY0(dmu_tx_assign(tx, DMU_TX_WAIT | DMU_TX_SUSPEND)); uint64_t txg = dmu_tx_get_txg(tx); + vr->vr_last_txg = txg; spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER); mutex_enter(&vd->vdev_rebuild_lock); @@ -824,6 +825,7 @@ vdev_rebuild_thread(void *arg) uint64_t limit = (arc_c_max / 2) / MAX(rvd->vdev_children, 1); vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, zfs_rebuild_vdev_limit * vd->vdev_children)); + vr->vr_last_txg = 0; /* * Removal of vdevs from the vdev tree may eliminate the need @@ -910,8 +912,16 @@ vdev_rebuild_thread(void *arg) error = vdev_rebuild_ranges(vr); zfs_range_tree_vacate(vr->vr_scan_tree, NULL, NULL); - spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); + /* + * Allow rebuilt ranges to be sync-ed before enabling metaslab + * to avoid any interfering allocations. Otherwise, we might + * see checksum errors after scrub. + */ + if (vr->vr_last_txg != 0) + txg_wait_synced(dp, vr->vr_last_txg); + metaslab_enable(msp, B_FALSE, B_FALSE); + spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER); if (error != 0) break; diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 5c2c984c34b6..94b44561bd98 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -1676,10 +1676,10 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset, * have already processed the original allocating I/O. */ if (flags & ZIO_FLAG_ALLOC_THROTTLED && - (vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) { + (vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY)) && + type == ZIO_TYPE_WRITE) { ASSERT(pio->io_metaslab_class != NULL); ASSERT(pio->io_metaslab_class->mc_alloc_throttle_enabled); - ASSERT(type == ZIO_TYPE_WRITE); ASSERT(priority == ZIO_PRIORITY_ASYNC_WRITE); ASSERT(!(flags & ZIO_FLAG_IO_REPAIR)); ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) || @@ -4779,11 +4779,17 @@ zio_vdev_io_start(zio_t *zio) } zio->io_delay = gethrtime(); - if (zio_handle_device_injection(vd, zio, ENOSYS) != 0) { + int error = zio_handle_device_injections(vd, zio, ENOSYS, + EFAULT); + if (error == ENOSYS || (error == EFAULT && + !(zio->io_flags & ZIO_FLAG_IO_REPAIR))) { /* * "no-op" injections return success, but do no actual - * work. Just return it. + * work. Just return it. "io-prefail" injections are + * similar, but don't return success. */ + if (error == EFAULT) + zio->io_error = EIO; zio_delay_interrupt(zio); return (NULL); } @@ -5513,6 +5519,12 @@ zio_dva_throttle_done(zio_t *zio) } } +static void +zio_done_postread_done(zio_t *zio) +{ + abd_free(zio->io_abd); +} + static zio_t * zio_done(zio_t *zio) { @@ -5843,6 +5855,24 @@ zio_done(zio_t *zio) zfs_ereport_free_checksum(zcr); } + if (zio->io_flags & ZIO_FLAG_POSTREAD) { + ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE); + zl = NULL; + zio_t *pio = zio_walk_parents(zio, &zl); + blkptr_t *bp = zio->io_bp; + abd_t *abd = abd_alloc_for_io(BP_GET_PSIZE(bp), B_FALSE); + zio_priority_t prio = zio->io_priority == + ZIO_PRIORITY_SYNC_WRITE ? ZIO_PRIORITY_SYNC_READ : + ZIO_PRIORITY_SCRUB; + zio_t *cio = zio_vdev_child_io(pio, zio->io_bp, zio->io_vd, + zio->io_offset, abd, zio->io_size, ZIO_TYPE_READ, prio, + ZIO_FLAG_SCRUB | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL | + ZIO_FLAG_RESILVER | ZIO_FLAG_DONT_PROPAGATE, + zio_done_postread_done, NULL); + cio->io_flags &= ~ZIO_FLAG_ALLOC_THROTTLED; + zio_nowait(cio); + } + /* * It is the responsibility of the done callback to ensure that this * particular zio is no longer discoverable for adoption, and as diff --git a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h index b7ff1e9b0923..614d996ab619 100644 --- a/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h +++ b/sys/contrib/openzfs/module/zstd/include/zstd_compat_wrapper.h @@ -33,6 +33,8 @@ * Copyright (c) 2020, Sebastian Gottschall */ +#define _ZSTD_COMPAT_WRAPPER_H + /* * This wrapper fixes a problem, in case the ZFS filesystem driver, is compiled * statically into the kernel. diff --git a/sys/contrib/openzfs/module/zstd/lib/common/zstd_common.c b/sys/contrib/openzfs/module/zstd/lib/common/zstd_common.c index dfbe6f635627..90b9fe00fece 100644 --- a/sys/contrib/openzfs/module/zstd/lib/common/zstd_common.c +++ b/sys/contrib/openzfs/module/zstd/lib/common/zstd_common.c @@ -9,7 +9,9 @@ * You may select, at your option, one of the above-listed licenses. */ - +#ifdef _ZSTD_COMPAT_WRAPPER_H +#undef ZSTD_isError /* defined within zstd_internal.h */ +#endif /*-************************************* * Dependencies @@ -34,8 +36,8 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } /*! ZSTD_isError() : * tells if a return value is an error code * symbol is required for external callers */ -#if !defined(_STANDALONE) -unsigned ZSTD_isError(size_t code) __asm__("zfs_ZSTD_isError"); +#ifdef _ZSTD_COMPAT_WRAPPER_H +#define ZSTD_isError zfs_ZSTD_isError #endif unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 243d28e8bc49..4c7e4e85ec00 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -254,6 +254,10 @@ tests = ['zfs_inherit_001_neg', 'zfs_inherit_002_neg', 'zfs_inherit_003_pos', 'zfs_inherit_mountpoint'] tags = ['functional', 'cli_root', 'zfs_inherit'] +[tests/functional/cli_root/zfs_list] +tests = ['zfs_list_009_pos'] +tags = ['functional', 'cli_root', 'zfs_list'] + [tests/functional/cli_root/zfs_load-key] tests = ['zfs_load-key', 'zfs_load-key_all', 'zfs_load-key_file', 'zfs_load-key_https', 'zfs_load-key_location', 'zfs_load-key_noop', @@ -902,7 +906,8 @@ tags = ['functional', 'quota'] [tests/functional/redacted_send] tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted', 'redacted_disabled_feature', 'redacted_embedded', 'redacted_holes', - 'redacted_incrementals', 'redacted_largeblocks', 'redacted_many_clones', + 'redacted_incrementals', 'redacted_largeblocks', 'redacted_max_blkid', + 'redacted_many_clones', 'redacted_mixed_recsize', 'redacted_mounts', 'redacted_negative', 'redacted_origin', 'redacted_panic', 'redacted_props', 'redacted_resume', 'redacted_size', 'redacted_volume'] @@ -912,7 +917,7 @@ tags = ['functional', 'redacted_send'] tests = ['raidz_001_neg', 'raidz_002_pos', 'raidz_expand_001_pos', 'raidz_expand_002_pos', 'raidz_expand_003_neg', 'raidz_expand_003_pos', 'raidz_expand_004_pos', 'raidz_expand_005_pos', 'raidz_expand_006_neg', - 'raidz_expand_007_neg'] + 'raidz_expand_007_neg', 'raidz_zinject'] tags = ['functional', 'raidz'] timeout = 1200 @@ -1146,7 +1151,7 @@ tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos'] tags = ['functional', 'zvol', 'zvol_swap'] [tests/functional/libzfs] -tests = ['many_fds', 'libzfs_input'] +tests = ['many_fds', 'libzfs_input', 'libzfs_mnttab_cache'] tags = ['functional', 'libzfs'] [tests/functional/log_spacemap] diff --git a/sys/contrib/openzfs/tests/runfiles/sanity.run b/sys/contrib/openzfs/tests/runfiles/sanity.run index ca16bee67dda..936f2bcc32be 100644 --- a/sys/contrib/openzfs/tests/runfiles/sanity.run +++ b/sys/contrib/openzfs/tests/runfiles/sanity.run @@ -466,7 +466,7 @@ tests = ['large_files_001_pos', 'large_files_002_pos'] tags = ['functional', 'large_files'] [tests/functional/libzfs] -tests = ['many_fds', 'libzfs_input'] +tests = ['many_fds', 'libzfs_input', 'libzfs_mnttab_cache'] tags = ['functional', 'libzfs'] [tests/functional/limits] diff --git a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in index 874a23a87574..29d2760ccb8f 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in @@ -173,9 +173,6 @@ if sys.platform.startswith('freebsd'): 'cli_root/zpool_resilver/zpool_resilver_concurrent': ['SKIP', na_reason], 'zoned_uid/setup': ['SKIP', na_reason], - 'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason], - 'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason], - 'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason], 'cli_root/zfs_unshare/zfs_unshare_008_pos': ['SKIP', na_reason], 'cp_files/cp_files_002_pos': ['SKIP', na_reason], 'link_count/link_count_001': ['SKIP', na_reason], @@ -204,7 +201,6 @@ elif sys.platform.startswith('linux'): # reasons listed above can be used. # maybe = { - 'append/threadsappend_001_pos': ['FAIL', 6136], 'chattr/setup': ['SKIP', exec_reason], 'crtime/crtime_001_pos': ['SKIP', statx_reason], 'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason], diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore index 4bdca0acf52b..c0785d9a28e8 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/.gitignore @@ -18,6 +18,7 @@ /getversion /largest_file /libzfs_input_check +/libzfs_mnttab_cache_check /manipulate_user_buffer /mkbusy /mkfile diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am index c4155ca3cacd..9f92310985e7 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/Makefile.am @@ -66,6 +66,10 @@ scripts_zfs_tests_bin_PROGRAMS += %D%/libzfs_input_check libzfs_core.la \ libnvpair.la +scripts_zfs_tests_bin_PROGRAMS += %D%/libzfs_mnttab_cache_check +%C%_libzfs_mnttab_cache_check_LDADD = \ + libzfs.la + scripts_zfs_tests_bin_PROGRAMS += %D%/manipulate_user_buffer %C%_manipulate_user_buffer_LDADD = -lpthread diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_mnttab_cache_check.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_mnttab_cache_check.c new file mode 100644 index 000000000000..661c5ce43f66 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/libzfs_mnttab_cache_check.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: CDDL-1.0 +/* + * CDDL HEADER START + * + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2026 by Delphix. All rights reserved. + */ + +/* + * libzfs_mnttab_cache_check.c + * + * Tests that libzfs_mnttab_cache(hdl, B_FALSE) does indeed disable the + * per-handle mnttab cache. It does this by adding a fake entry to it, then + * trying to read the status of a known-mounted dataset from it. + * + * As currently implemented, when enabled, libzfs_mnttab_find() assumes the + * cache is correct and up to date if it has any entries in it at all. So by + * putting something in it before searching, the initial load from /etc/mtab + * never happens, and the real mounted datasets are never seen. + * + * When disabled, the entire cache is discarded and reloaded on every lookup, + * so the fake entry will disappear and the real state will be found correctly. + * to date if it has any entries in it at all. + * + * Run (as a user that can read /etc/mtab): + * ./libzfs_mnttab_cache_check <name-of-any-currently-mounted-zfs-dataset> + */ + +#include <errno.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/mnttab.h> +#include <libzfs.h> + +int +main(int argc, char *argv[]) +{ + if (argc != 2) { + fprintf(stderr, + "usage: %s <currently-mounted-zfs-dataset>\n", argv[0]); + return (2); + } + const char *real_ds = argv[1]; + + libzfs_handle_t *hdl = libzfs_init(); + if (hdl == NULL) { + fprintf(stderr, "libzfs_init failed\n"); + return (1); + } + + /* Ask libzfs to disable the per-handle mnttab cache. */ + libzfs_mnttab_cache(hdl, B_FALSE); + + /* + * Stand-in for what zfs_mount() does internally on every successful + * mount: zfs_mount_at() calls libzfs_mnttab_add(hdl, ...) after + * do_mount(). In a real consumer, this happens implicitly; we call it + * directly here so the reproducer doesn't need root or a mountable + * dataset. + */ + libzfs_mnttab_add(hdl, "fake/dataset", "/fake/mountpoint", "rw"); + + /* + * Now query ZFS_PROP_MOUNTED on a real, currently-mounted dataset. + * This is the standard libzfs API a consumer uses to check mount + * state. Internally it calls libzfs_mnttab_find(). + */ + zfs_handle_t *zhp = zfs_open(hdl, real_ds, ZFS_TYPE_FILESYSTEM); + if (zhp == NULL) { + fprintf(stderr, "zfs_open(%s) failed\n", real_ds); + libzfs_fini(hdl); + return (1); + } + + uint64_t mounted = zfs_prop_get_int(zhp, ZFS_PROP_MOUNTED); + zfs_close(zhp); + + int rc; + if (mounted) { + printf("OK: ZFS_PROP_MOUNTED reports %s as mounted\n", real_ds); + rc = 0; + } else { + printf("BUG: ZFS_PROP_MOUNTED reports %s as NOT mounted\n", + real_ds); + printf(" but %s IS mounted (see /etc/mtab and " + "`zfs get mounted`).\n", real_ds); + printf(" libzfs_mnttab_cache(hdl, B_FALSE) did not " + "actually disable the cache.\n"); + rc = 1; + } + + libzfs_fini(hdl); + return (rc); +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/threadsappend.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/threadsappend.c index bdbb2881f529..ed0d74c3c279 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/threadsappend.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/threadsappend.c @@ -87,7 +87,7 @@ usage(void) int main(int argc, char **argv) { - pthread_t tid; + pthread_t tid[2]; int ret = 0; long ncpus = 0; int i; @@ -120,7 +120,7 @@ main(int argc, char **argv) } for (i = 0; i < 2; i++) { - ret = pthread_create(&tid, NULL, go, (void *)&i); + ret = pthread_create(&tid[i], NULL, go, (void *)&i); if (ret != 0) { (void) fprintf(stderr, "zfs_threadsappend: thr_create(#%d) " @@ -129,8 +129,8 @@ main(int argc, char **argv) } } - while (pthread_join(tid, NULL) == 0) - continue; + for (i = 0; i < 2; i++) + (void) pthread_join(tid[i], NULL); return (0); } diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg index a52cacec224a..19dbb5202c3c 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/commands.cfg @@ -202,6 +202,7 @@ export ZFSTEST_FILES_COMMON='badsend get_diff getversion largest_file + libzfs_mnttab_cache_check libzfs_input_check manipulate_user_buffer mkbusy diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib index 974e19c04269..39e63bed7bf0 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib +++ b/sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib @@ -2021,6 +2021,26 @@ function wait_sit_out #pool vdev timeout # # Check the output of 'zpool status -v <pool>', +# and to see if the counts of <device> contain the <regex> specified. +# +# Return 0 is contain, 1 otherwise +# +function check_pool_device # pool device regex <verbose> +{ + typeset pool=$1 + typeset device=$2 + typeset regex=$3 + typeset verbose=${4:-false} + + scan=$(zpool status -v "$pool" 2>/dev/null | grep $device) + if [[ $verbose == true ]]; then + log_note $scan + fi + echo $scan | grep -qi "$regex" +} + +# +# Check the output of 'zpool status -v <pool>', # and to see if the content of <token> contain the <keyword> specified. # # Return 0 is contain, 1 otherwise diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index cf04950a9612..c4bcfea55955 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -772,6 +772,9 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zfs_jail/cleanup.ksh \ functional/cli_root/zfs_jail/setup.ksh \ functional/cli_root/zfs_jail/zfs_jail_001_pos.ksh \ + functional/cli_root/zfs_list/cleanup.ksh \ + functional/cli_root/zfs_list/setup.ksh \ + functional/cli_root/zfs_list/zfs_list_009_pos.ksh \ functional/cli_root/zfs_load-key/cleanup.ksh \ functional/cli_root/zfs_load-key/setup.ksh \ functional/cli_root/zfs_load-key/zfs_load-key_all.ksh \ @@ -1697,6 +1700,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/largest_pool/largest_pool_001_pos.ksh \ functional/libzfs/cleanup.ksh \ functional/libzfs/libzfs_input.ksh \ + functional/libzfs/libzfs_mnttab_cache.ksh \ functional/libzfs/setup.ksh \ functional/limits/cleanup.ksh \ functional/limits/filesystem_count.ksh \ @@ -1884,6 +1888,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/raidz/raidz_expand_005_pos.ksh \ functional/raidz/raidz_expand_006_neg.ksh \ functional/raidz/raidz_expand_007_neg.ksh \ + functional/raidz/raidz_zinject.ksh \ functional/raidz/setup.ksh \ functional/redacted_send/cleanup.ksh \ functional/redacted_send/redacted_compressed.ksh \ @@ -1894,6 +1899,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/redacted_send/redacted_holes.ksh \ functional/redacted_send/redacted_incrementals.ksh \ functional/redacted_send/redacted_largeblocks.ksh \ + functional/redacted_send/redacted_max_blkid.ksh \ functional/redacted_send/redacted_many_clones.ksh \ functional/redacted_send/redacted_mixed_recsize.ksh \ functional/redacted_send/redacted_mounts.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/cleanup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/cleanup.ksh new file mode 100755 index 000000000000..138dfe0473cc --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/cleanup.ksh @@ -0,0 +1,30 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +default_cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/setup.ksh new file mode 100755 index 000000000000..912fcfc40f69 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/setup.ksh @@ -0,0 +1,32 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +default_setup $DISKS diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/zfs_list_009_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/zfs_list_009_pos.ksh new file mode 100755 index 000000000000..758aa7608d3e --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zfs_list/zfs_list_009_pos.ksh @@ -0,0 +1,69 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zfs list -Ho name <path>' follows symlinks when resolving the path to +# a dataset name. A symlink that crosses a mount boundary must resolve to +# the dataset owning the symlink's target, not the dataset containing the +# symlink itself. +# +# STRATEGY: +# 1. Create two child datasets: ds1 and ds2. +# 2. Place a symlink inside ds1 that points into ds2. +# 3. Verify that 'zfs list -Ho name <symlink>' returns ds2. +# + +verify_runnable "global" + +DS1="$TESTPOOL/$TESTFS/ds1" +DS2="$TESTPOOL/$TESTFS/ds2" +LINK="$TESTDIR/ds1/link_to_ds2" + +function cleanup +{ + rm -f "$LINK" + datasetexists "$DS1" && log_must zfs destroy "$DS1" + datasetexists "$DS2" && log_must zfs destroy "$DS2" +} + +log_onexit cleanup + +log_assert "'zfs list -Ho name' follows symlinks when resolving a path." + +log_must zfs create "$DS1" +log_must zfs create "$DS2" +log_must ln -s "$TESTDIR/ds2" "$LINK" + +result=$(zfs list -Ho name "$LINK") +if [[ "$result" != "$DS2" ]]; then + log_fail "'zfs list -Ho name $LINK' returned '$result', expected '$DS2'" +fi + +log_pass "'zfs list -Ho name' correctly follows a symlink crossing a mount boundary." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh index 19351dc8f2df..93c320da6fde 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zinject/zinject_args.ksh @@ -48,7 +48,7 @@ function cleanup function test_device_fault { - typeset -a errno=("io" "decompress" "decrypt" "nxio" "dtl" "corrupt" "noop") + typeset -a errno=("io" "decompress" "decrypt" "nxio" "dtl" "corrupt" "noop" "io-prefail") for e in ${errno[@]}; do log_must eval \ "zinject -d $DISK1 -e $e -T read -f 0.001 $TESTPOOL" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh index 0bddfd08032b..2f866140dd8f 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_export/zpool_export_parallel_admin.ksh @@ -64,7 +64,7 @@ log_must zpool create -f $TESTPOOL1 mirror ${DEVICE_DIR}/disk0 ${DEVICE_DIR}/dis log_must zinject -P export -s 10 $TESTPOOL1 -log_must zpool export $TESTPOOL1 & +log_must_busy zpool export $TESTPOOL1 & zpool set comment=hello $TESTPOOL1 zpool reguid $TESTPOOL1 & diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh index 7be2a316a873..483cb3738c72 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/setup.ksh @@ -25,7 +25,6 @@ verify_runnable "global" if is_freebsd; then - log_unsupported "FreeBSD has no hole punching mechanism for the time being." diskinfo -v $DISKS | grep -qE 'No.*# TRIM/UNMAP support' && log_unsupported "DISKS do not support discard (TRIM/UNMAP)" else diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh index 9e130d87e6e3..3d42a58102a8 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_online_offline.ksh @@ -40,7 +40,7 @@ DISK1=${DISKS%% *} DISK2="$(echo $DISKS | cut -d' ' -f2)" -log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2 -O recordsize=4k +log_must zpool create -f -O recordsize=4k $TESTPOOL mirror $DISK1 $DISK2 sync_and_rewrite_some_data_a_few_times $TESTPOOL log_must zpool trim -r 1 $TESTPOOL $DISK1 diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh index 6cf22fee18b3..e37f8e44c1af 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_partial.ksh @@ -35,6 +35,12 @@ # 5. Run 'zpool trim' to perform a full TRIM. # 6. Verify the disk is less than 10% of its original size. +# On FreeBSD, manual 'zpool trim' does not reclaim space on file +# vdevs stored on a ZFS filesystem within the test framework. +if is_freebsd; then + log_unsupported "Manual trim on file vdevs not supported on FreeBSD" +fi + function cleanup { if poolexists $TESTPOOL; then @@ -69,19 +75,19 @@ log_must zpool create -O compression=off $TESTPOOL "$LARGEFILE" log_must mkfile $(( floor(LARGESIZE * 0.80) )) /$TESTPOOL/file sync_all_pools -new_size=$(du -B1 "$LARGEFILE" | cut -f1) +new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}') log_must test $new_size -le $LARGESIZE log_must test $new_size -gt $(( floor(LARGESIZE * 0.70) )) # Expand the pool to create new unallocated metaslabs. log_must zpool export $TESTPOOL -log_must dd if=/dev/urandom of=$LARGEFILE conv=notrunc,nocreat \ +log_must dd if=/dev/urandom of=$LARGEFILE conv=notrunc \ seek=$((LARGESIZE / (1024 * 1024))) bs=$((1024 * 1024)) \ count=$((3 * LARGESIZE / (1024 * 1024))) log_must zpool import -d $TESTDIR $TESTPOOL log_must zpool online -e $TESTPOOL "$LARGEFILE" -new_size=$(du -B1 "$LARGEFILE" | cut -f1) +new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}') log_must test $new_size -gt $((4 * floor(LARGESIZE * 0.70) )) # Perform a partial trim, we expect it to skip most of the new metaslabs @@ -90,12 +96,11 @@ log_must set_tunable64 TRIM_METASLAB_SKIP 1 log_must zpool trim $TESTPOOL log_must set_tunable64 TRIM_METASLAB_SKIP 0 -sync_all_pools while [[ "$(trim_progress $TESTPOOL $LARGEFILE)" -lt "100" ]]; do sleep 0.5 done -new_size=$(du -B1 "$LARGEFILE" | cut -f1) +new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}') log_must test $new_size -gt $LARGESIZE # Perform a full trim, all metaslabs will be trimmed the pool vdev @@ -103,12 +108,11 @@ log_must test $new_size -gt $LARGESIZE # space usage of the new metaslabs. log_must zpool trim $TESTPOOL -sync_all_pools while [[ "$(trim_progress $TESTPOOL $LARGEFILE)" -lt "100" ]]; do sleep 0.5 done -new_size=$(du -B1 "$LARGEFILE" | cut -f1) +new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}') log_must test $new_size -le $(( 2 * LARGESIZE)) log_must test $new_size -gt $(( floor(LARGESIZE * 0.70) )) diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh index fafe57355e22..e42ee020d505 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_neg.ksh @@ -40,7 +40,7 @@ DISK2="$(echo $DISKS | cut -d' ' -f2)" DISK3="$(echo $DISKS | cut -d' ' -f3)" log_must zpool list -v -log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3 -O recordsize=4k +log_must zpool create -f -O recordsize=4k $TESTPOOL $DISK1 $DISK2 $DISK3 sync_and_rewrite_some_data_a_few_times $TESTPOOL log_must zpool trim -r 1 $TESTPOOL $DISK1 diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh index 9c1fcf42ff49..c5dac5b140c6 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_verify_trimmed.ksh @@ -34,6 +34,12 @@ # 3. Trim the pool and verify the file vdev is again sparse. # +# On FreeBSD, manual 'zpool trim' does not reclaim space on file +# vdevs stored on a ZFS filesystem within the test framework. +if is_freebsd; then + log_unsupported "Manual trim on file vdevs not supported on FreeBSD" +fi + function cleanup { if poolexists $TESTPOOL; then @@ -59,7 +65,7 @@ log_must mkdir "$TESTDIR" log_must truncate -s $LARGESIZE "$LARGEFILE" log_must zpool create $TESTPOOL "$LARGEFILE" -original_size=$(du -B1 "$LARGEFILE" | cut -f1) +original_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}') log_must zpool initialize $TESTPOOL @@ -67,8 +73,8 @@ while [[ "$(initialize_progress $TESTPOOL $LARGEFILE)" -lt "100" ]]; do sleep 0.5 done -new_size=$(du -B1 "$LARGEFILE" | cut -f1) -log_must within_tolerance $new_size $LARGESIZE $((128 * 1024 * 1024)) +new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}') +log_must within_tolerance $new_size $LARGESIZE $((200 * 1024 * 1024)) log_must zpool trim $TESTPOOL @@ -76,7 +82,7 @@ while [[ "$(trim_progress $TESTPOOL $LARGEFILE)" -lt "100" ]]; do sleep 0.5 done -new_size=$(du -B1 "$LARGEFILE" | cut -f1) +new_size=$(du -k "$LARGEFILE" | awk '{print $1 * 1024}') log_must within_tolerance $new_size $original_size $((128 * 1024 * 1024)) log_pass "Trimmed appropriate amount of disk space" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh index 504eec11d70e..785556229908 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_basic.ksh @@ -46,10 +46,6 @@ function trim_in_progress zpool status -t "$pool" | grep "trimmed, started" } -if is_freebsd; then - log_unsupported "FreeBSD has no hole punching mechanism for the time being." -fi - typeset -r FILE_VDEV="$TESTDIR/file_vdev" typeset pid diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh index b94e7a50ce91..44372d657388 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_cancel.ksh @@ -59,10 +59,6 @@ function do_test bkgrnd_proc_succeeded $pid } -if is_freebsd; then - log_unsupported "FreeBSD has no hole punching mechanism for the time being." -fi - typeset pid typeset -r FILE_VDEV="$TESTDIR/file_vdev1" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh index a6b7cabb1c4a..dcfda3ad817a 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_wait/zpool_wait_trim_flag.ksh @@ -41,10 +41,6 @@ function cleanup [[ -d "$TESTDIR" ]] && log_must rm -r "$TESTDIR" } -if is_freebsd; then - log_unsupported "FreeBSD has no hole punching mechanism for the time being." -fi - typeset trim12_pid trim3_pid typeset -r VDEV1="$TESTDIR/file_vdev1" typeset -r VDEV2="$TESTDIR/file_vdev2" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh index eb032373185f..a2b3464b2bf0 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh @@ -38,7 +38,7 @@ # # STRATEGY: # 1. Set the interval to 1 and count to 4. -# 2. Sleep for 4 seconds. +# 2. Sleep for 5 seconds. # 3. Verify that the output has 4 records. # 4. Set interval to 0.5 and count to 1 to test floating point intervals. @@ -61,11 +61,12 @@ if ! is_global_zone ; then TESTPOOL=${TESTPOOL%%/*} fi -zpool iostat $TESTPOOL 1 4 > $tmpfile 2>&1 & -sleep 4 +log_must eval "zpool iostat $TESTPOOL 1 4 > $tmpfile 2>&1 &" +log_must sleep 5 stat_count=$(grep -c $TESTPOOL $tmpfile) if [[ $stat_count -ne 4 ]]; then + cat $tmpfile log_fail "zpool iostat [pool_name] [interval] [count] failed" fi diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh index b19f576adcf4..53f3ad999d3b 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/fadvise/fadvise_dontneed.ksh @@ -32,11 +32,13 @@ # 2. Record cache_count from dbufstats # 3. Call file_fadvise with POSIX_FADV_DONTNEED on the file # 4. Verify that cache_count decreased +# 5. Sanity-check eviction for single-block files. # verify_runnable "global" -FILE=$TESTDIR/$TESTFILE0 +FILE0=$TESTDIR/$TESTFILE0 +FILE1=$TESTDIR/$TESTFILE1 BLKSZ=$(get_prop recordsize $TESTPOOL) function cleanup @@ -48,16 +50,21 @@ log_assert "Ensure POSIX_FADV_DONTNEED evicts data from the dbuf cache" log_onexit cleanup -log_must file_write -o create -f $FILE -b $BLKSZ -c 100 +log_must file_write -o create -f $FILE0 -b $BLKSZ -c 100 sync_pool $TESTPOOL evicts1=$(kstat dbufstats.cache_count) -log_must file_fadvise -f $FILE -a POSIX_FADV_DONTNEED +log_must file_fadvise -f $FILE0 -a POSIX_FADV_DONTNEED evicts2=$(kstat dbufstats.cache_count) log_note "cache_count before=$evicts1 after=$evicts2" log_must [ $evicts1 -gt $evicts2 ] +log_must file_write -o create -f $FILE1 -b 12000 -c 1 +sync_pool $TESTPOOL + +log_must file_fadvise -f $FILE1 -a POSIX_FADV_DONTNEED + log_pass "POSIX_FADV_DONTNEED evicts data from the dbuf cache" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/libzfs/libzfs_mnttab_cache.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/libzfs/libzfs_mnttab_cache.ksh new file mode 100755 index 000000000000..2459e364c1d7 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/libzfs/libzfs_mnttab_cache.ksh @@ -0,0 +1,26 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2026, TrueNAS. +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "global" + +log_assert "libzfs mnttab cache works as expected" + +log_must libzfs_mnttab_cache_check $TESTPOOL/$TESTFS + +log_pass "libzfs mnttab cache works as expected" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh index 54fe2fa35ac7..3e76cd1ff626 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/mmp/mmp_on_uberblocks.ksh @@ -16,20 +16,17 @@ # # -# Copyright (c) 2017 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2026 by Lawrence Livermore National Security, LLC. # # DESCRIPTION: # Ensure that MMP updates uberblocks with MMP info at expected intervals. # # STRATEGY: -# 1. Set TXG_TIMEOUT to large value -# 2. Create a zpool -# 3. Clear multihost history -# 4. Sleep, then collect count of uberblocks written -# 5. If number of changes seen is less than min threshold, then fail -# 6. If number of changes seen is more than max threshold, then fail -# 7. Sequence number increments when no TXGs are syncing +# 1. Create a zpool +# 2. Clear multihost history +# 3. Sleep for 10s, then collect count of uberblocks written +# 4. Verify the mmp updates are within 20% of the expected target # . $STF_SUITE/include/libtest.shlib @@ -38,51 +35,29 @@ verify_runnable "both" -UBER_CHANGES=0 DURATION=10 -EXPECTED=$((($(echo $DISKS | wc -w) * $DURATION * 1000) / $MMP_INTERVAL_DEFAULT)) -FUDGE=$((EXPECTED * 20 / 100)) -MIN_UB_WRITES=$((EXPECTED - FUDGE)) -MAX_UB_WRITES=$((EXPECTED + FUDGE)) -MIN_SEQ_VALUES=8 +NDISKS=$(echo $DISKS | wc -w) +MMP_INTERVAL=$(get_tunable MULTIHOST_INTERVAL) +TARGET=$((($NDISKS * $DURATION * 1000) / $MMP_INTERVAL)) function cleanup { datasetexists $TESTPOOL && destroy_pool $TESTPOOL - log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT - set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_DEFAULT log_must mmp_clear_hostid } log_assert "Ensure MMP uberblocks update at the correct interval" log_onexit cleanup -log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_DEFAULT -log_must set_tunable64 TXG_TIMEOUT $TXG_TIMEOUT_LONG log_must mmp_set_hostid $HOSTID1 log_must zpool create -f $TESTPOOL $DISKS log_must zpool set multihost=on $TESTPOOL -clear_mmp_history -UBER_CHANGES=$(count_mmp_writes $TESTPOOL $DURATION) - -log_note "Uberblock changed $UBER_CHANGES times" -if [ $UBER_CHANGES -lt $MIN_UB_WRITES ]; then - log_fail "Fewer uberblock writes occurred than expected ($EXPECTED)" -fi - -if [ $UBER_CHANGES -gt $MAX_UB_WRITES ]; then - log_fail "More uberblock writes occurred than expected ($EXPECTED)" -fi +clear_mmp_history +MMP_WRITES=$(count_mmp_writes $TESTPOOL $DURATION) -log_must set_tunable64 MULTIHOST_INTERVAL $MMP_INTERVAL_MIN -SEQ_BEFORE=$(zdb -luuuu ${DISK[0]} | awk '/mmp_seq/ {if ($NF>max) max=$NF}; END {print max}') -sleep 5 -SEQ_AFTER=$(zdb -luuuu ${DISK[0]} | awk '/mmp_seq/ {if ($NF>max) max=$NF}; END {print max}') -if [ $((SEQ_AFTER - SEQ_BEFORE)) -lt $MIN_SEQ_VALUES ]; then - zdb -luuuu ${DISK[0]} - log_fail "ERROR: mmp_seq did not increase by $MIN_SEQ_VALUES; before $SEQ_BEFORE after $SEQ_AFTER" -fi +log_note "Uberblock changed $MMP_WRITES times" +log_must within_percent $MMP_WRITES $TARGET 80 log_pass "Ensure MMP uberblocks update at the correct interval passed" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/raidz/raidz_zinject.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/raidz/raidz_zinject.ksh new file mode 100755 index 000000000000..e0417afc7755 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/raidz/raidz_zinject.ksh @@ -0,0 +1,94 @@ +#!/bin/ksh -p +# SPDX-License-Identifier: CDDL-1.0 +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2026, Klara Inc. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Inject an io-prefail error on a child of a raidz device, then write +# some data and verify that the pool encountered errors. +# + +function cleanup +{ + log_pos zpool status $TESTPOOL + + log_must zinject -c all + + poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL" + + for i in {1..$devs}; do + log_must rm -f "$TEST_BASE_DIR/dev-$i" + done + +} + +log_onexit cleanup + +typeset -r devs=6 +typeset -r dev_size_mb=128 + +typeset -a disks + +# Disk files which will be used by pool +for i in {1..$devs}; do + device=$TEST_BASE_DIR/dev-$i + log_must truncate -s ${dev_size_mb}M $device + disks[${#disks[*]}+1]=$device +done + +function run_test +{ + log_must zpool create -f -o cachefile=none -O recordsize=16k $TESTPOOL raidz1 ${disks[@]} + + log_must zinject -d $TEST_BASE_DIR/dev-1 -e io-prefail -T write -f 25 $TESTPOOL + + log_must file_write -o create -f /$TESTPOOL/file -b 128k -c 1000 -d R + log_must zpool sync $TESTPOOL + log_pos check_pool_status $TESTPOOL "errors" "No known data errors" || return 1 + log_pos check_pool_status $TESTPOOL "status" "One or more" || return 1 + + log_must zinject -c all + log_must zpool export -f $TESTPOOL + log_must rm $TEST_BASE_DIR/dev-2 + log_must zpool import -d $TEST_BASE_DIR $TESTPOOL + log_must zpool scrub $TESTPOOL + log_must zpool wait -t scrub $TESTPOOL + log_pos check_pool_status $TESTPOOL "errors" "No known data" || return 1 + log_pos check_pool_device $TESTPOOL "dev-1" "ONLINE.* 0$" || return 1 +} + +i=0 +while [[ $i -lt 3 ]]; do + run_test && log_pass "raidz handles partial write failure." + log_must zinject -c all + log_must zpool destroy $TESTPOOL + log_must truncate -s ${dev_size_mb}M $TEST_BASE_DIR/dev-2 + i=$((i + 1)) +done + +log_fail "raidz does not handle partial write failure." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_max_blkid.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_max_blkid.ksh new file mode 100755 index 000000000000..13820b470222 --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_max_blkid.ksh @@ -0,0 +1,118 @@ +#!/bin/ksh +# SPDX-License-Identifier: CDDL-1.0 + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright (c) 2026 by Delphix. All rights reserved. +# + +. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib + +# +# Description: +# Verify that an incremental send from a redaction bookmark correctly sends +# the last block (dn_maxblkid) of a file through the PREVIOUSLY_REDACTED +# path. +# +# Regression test for an off-by-one bug in the PREVIOUSLY_REDACTED handler +# in send_reader_thread(). file_max was computed as: +# +# MIN(dn->dn_maxblkid, range->end_blkid) +# +# dn_maxblkid is an inclusive maximum block ID while range->end_blkid is +# exclusive (one past the last block). Mixing these in MIN() caused the +# loop condition "blkid < file_max" to skip block dn_maxblkid, silently +# dropping the last block of any file whose last block was in the redaction +# list. The block remained as zeros on the receiver even though ZFS +# reported the send and receive as successful. +# +# Strategy: +# 1. Create a dataset with a 16-block file of random data and snapshot it. +# 2. Create a clone (redact_clone) that overwrites only the last block +# (block 15, i.e., dn_maxblkid). +# 3. Redact the base snapshot using redact_clone; block 15 enters the +# redaction list. +# 4. Create a second clone (send_clone) of the base snapshot that does NOT +# modify block 15. Because block 15 in send_clone has birth <= +# snap.creation_txg the TO traversal thread skips it; it must be sent +# via the PREVIOUSLY_REDACTED path. +# 5. Redacted-send the base snapshot to the receiver (block 15 = zeros). +# 6. Incrementally send send_clone from the redaction bookmark; block 15 +# must be filled in by the PREVIOUSLY_REDACTED handler. +# 7. Verify that block 15 on the receiver matches the original. +# + +typeset ds_name="max_blkid" +typeset sendfs="$POOL/$ds_name" +typeset redact_clone="$POOL/${ds_name}_redact" +typeset send_clone="$POOL/${ds_name}_send" +typeset recvfs="$POOL2/$ds_name" +typeset recv_clone="$POOL2/${ds_name}_send" +typeset tmpdir="$(get_prop mountpoint $POOL)/tmp" +typeset stream=$(mktemp $tmpdir/stream.XXXX) + +log_onexit redacted_cleanup $sendfs $recvfs $recv_clone + +# Create a dataset with a 16-block file. +log_must zfs create $sendfs +typeset mntpnt=$(get_prop mountpoint $sendfs) +typeset bs=$(get_prop recsize $sendfs) +log_must dd if=/dev/urandom of=$mntpnt/f1 bs=$bs count=16 + +# Take the base snapshot. +log_must zfs snapshot $sendfs@snap + +# Create redact_clone and overwrite ONLY the last block (block 15). +# This is the block at index dn_maxblkid for a 16-block file. +log_must zfs clone $sendfs@snap $redact_clone +typeset redact_mnt=$(get_prop mountpoint $redact_clone) +log_must dd if=/dev/urandom of=$redact_mnt/f1 bs=$bs count=1 seek=15 conv=notrunc +log_must zfs snapshot $redact_clone@snap + +# Create the redaction bookmark; block 15 is now in the redaction list. +log_must zfs redact $sendfs@snap book1 $redact_clone@snap + +# Create send_clone as an unmodified clone of the base snapshot. +# Block 15 in send_clone is inherited (birth <= snap.creation_txg), so the +# TO traversal thread does not include it. The PREVIOUSLY_REDACTED path +# must send it. +log_must zfs clone $sendfs@snap $send_clone +log_must zfs snapshot $send_clone@snap + +# Redacted send of the base snapshot; block 15 of f1 is omitted. +log_must eval "zfs send --redact book1 $sendfs@snap >$stream" +log_must eval "zfs recv $recvfs <$stream" + +# Incremental send of send_clone from the redaction bookmark. +# Block 15 must be sent via the PREVIOUSLY_REDACTED path. +log_must eval "zfs send -i $sendfs#book1 $send_clone@snap >$stream" +log_must eval "zfs recv $recv_clone <$stream" + +# Verify that the received clone is identical to the source. +# If the bug is present, block 15 is zeros on the receiver and this fails. +typeset send_mnt=$(get_prop mountpoint $send_clone) +typeset recv_mnt=$(get_prop mountpoint $recv_clone) +log_must directory_diff $send_mnt $recv_mnt + +# Explicitly verify block 15 is not all zeros and matches the source. +typeset src_block=$(mktemp $tmpdir/src_block.XXXX) +typeset recv_block=$(mktemp $tmpdir/recv_block.XXXX) +typeset zero_block=$(mktemp $tmpdir/zero_block.XXXX) +log_must dd if=$mntpnt/f1 bs=$bs skip=15 count=1 of=$src_block 2>/dev/null +log_must dd if=$recv_mnt/f1 bs=$bs skip=15 count=1 of=$recv_block 2>/dev/null +log_must dd if=/dev/zero bs=$bs count=1 of=$zero_block 2>/dev/null + +cmp -s $recv_block $zero_block && log_fail "Block 15 is all zeros on receiver (off-by-one bug)" +log_must cmp $src_block $recv_block + +log_pass "Incremental send from bookmark correctly sends the last block (dn_maxblkid)." diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib index 53e2efffac2d..2b5a28b06206 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy.kshlib @@ -52,7 +52,7 @@ function cleanup # function cksum_pool { - typeset -i cksum=$(zpool status $1 | awk ' + typeset -i cksum=$(zpool status -p $1 | awk ' !NF { isvdev = 0 } isvdev { errors += $NF } /CKSUM$/ { isvdev = 1 } @@ -408,3 +408,72 @@ function recover_bad_missing_devs return 0 } + +# +# Given a dRAID pool issue a scrub and verify the current pool status +# aligns with the expected status based on the 'replace_mode' passed. +# Valid modes are: +# +# 1. healing - The pool is perfectly intact. No checksum errors have +# been reported and the scrub didn't make any repairs. This is the +# expected state after a healing resilver of a healthy pool. +# +# 2. sequential - The pool is fully intact. There should never be a +# checksum error. +# +# 3. damaged - The pool was intentionally silently damaged. Checksum +# errors are expected to be reported as the damaged blocks are +# detected and repaired. +# +# In all of these cases a scrub must be able to successfully repair the +# pool and result in no data loss. +# +function verify_draid_pool +{ + typeset pool=${1:-$TESTPOOL} + typeset replace_mode=${2:-healing} + + log_note "verify_draid_pool $pool $replace_mode" + log_must zpool scrub -w $pool + + typeset -i cksum=$(cksum_pool $pool) + + if [[ "$replace_mode" = "healing" ]]; then + if [[ $cksum -gt 0 ]]; then + log_must zpool status -v $pool + log_fail "Unexpected CKSUM errors found for $pool ($cksum)" + fi + + if ! check_pool_status $pool "scan" "repaired 0B"; then + log_must zpool status -v $pool + log_fail "Unexpected repair IO found for $pool ($cksum)" + fi + elif [[ "$replace_mode" = "sequential" ]]; then + if [[ $cksum -gt 0 ]]; then + log_must zpool status -v $pool + log_fail "Unexpected CKSUM errors found for $pool ($cksum)" + fi + elif [[ "$replace_mode" = "damaged" ]]; then + if [[ $cksum -lt 1 ]]; then + log_must zpool status -v $pool + log_fail "Expected CKSUM errors missing for $pool ($cksum)" + fi + + if check_pool_status $pool "scan" "repaired 0B"; then + log_must zpool status -v $pool + log_fail "Expected repair IO missing for $pool ($cksum)" + fi + else + log_fail "Invalid replace_mode=$replace_mode" + fi + + if ! check_pool_status $pool "scan" "with 0 errors"; then + log_must zpool status -v $pool + log_fail "Unexpected repair errors found for $pool" + fi + + if ! check_pool_status $pool "errors" "No known data errors"; then + log_must zpool status -v $pool + log_fail "Unexpected data errors found for $pool" + fi +} diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh index a1356f619009..81a01c07a3fd 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid.ksh @@ -86,8 +86,7 @@ function test_selfheal # <pool> <parity> <dir> # from the files which were read. Before overwriting additional # devices we need to repair all of the blocks in the pool. # - log_must zpool scrub -w $pool - log_must check_pool_status $pool "errors" "No known data errors" + log_must verify_draid_pool $pool "damaged" log_must zpool clear $pool @@ -104,8 +103,7 @@ function test_selfheal # <pool> <parity> <dir> log_must eval "find $mntpnt -type f -exec cksum {} + >> /dev/null 2>&1" log_must check_pool_status $pool "errors" "No known data errors" - log_must zpool scrub -w $pool - log_must check_pool_status $pool "errors" "No known data errors" + log_must verify_draid_pool $pool "damaged" log_must zpool clear $pool } @@ -182,8 +180,7 @@ function test_scrub # <pool> <parity> <dir> log_must zpool import -o cachefile=none -d $dir $pool - log_must zpool scrub -w $pool - log_must check_pool_status $pool "errors" "No known data errors" + log_must verify_draid_pool $pool "damaged" log_must zpool clear $pool @@ -196,8 +193,7 @@ function test_scrub # <pool> <parity> <dir> log_must zpool import -o cachefile=none -d $dir $pool - log_must zpool scrub -w $pool - log_must check_pool_status $pool "errors" "No known data errors" + log_must verify_draid_pool $pool "damaged" log_must zpool clear $pool } diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh index cafd63166013..56b12373af52 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged1.ksh @@ -89,12 +89,7 @@ function test_sequential_resilver # <pool> <parity> <dir> log_must zpool replace -fsw $pool $dir/dev-$i $spare done - log_must zpool scrub -w $pool - log_must zpool status $pool - - log_mustnot check_pool_status $pool "scan" "repaired 0B" - log_must check_pool_status $pool "errors" "No known data errors" - log_must check_pool_status $pool "scan" "with 0 errors" + log_must verify_draid_pool $pool "damaged" } log_onexit cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh index 46bf9f950864..3dfb760be07d 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_damaged2.ksh @@ -121,12 +121,7 @@ for nparity in 1 2 3; do # Scrub the pool after the sequential resilver and verify # that the silent damage was repaired by the scrub. - log_must zpool scrub -w $TESTPOOL - log_must zpool status $TESTPOOL - log_must check_pool_status $TESTPOOL "errors" \ - "No known data errors" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" - log_mustnot check_pool_status $TESTPOOL "scan" "repaired 0B" + log_must verify_draid_pool $TESTPOOL "damaged" done for nspare in 0 1 2; do @@ -145,12 +140,7 @@ for nparity in 1 2 3; do done log_must zpool clear $TESTPOOL - log_must zpool scrub -w $TESTPOOL - log_must zpool status $TESTPOOL - - log_must check_pool_status $TESTPOOL "errors" "No known data errors" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" + log_must verify_draid_pool $TESTPOOL "healing" log_must zpool destroy "$TESTPOOL" done diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh index ae65d3a21290..31444850f76c 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded1.ksh @@ -89,12 +89,7 @@ function test_sequential_resilver # <pool> <parity> <dir> spare=draid${nparity}-0-0 log_must zpool replace -fsw $pool $dir/dev-$nparity $spare - log_must zpool scrub -w $pool - log_must zpool status $pool - - log_must check_pool_status $pool "scan" "repaired 0B" - log_must check_pool_status $pool "errors" "No known data errors" - log_must check_pool_status $pool "scan" "with 0 errors" + log_must verify_draid_pool $pool "sequential" } log_onexit cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh index 8d102627fdb0..22e1f2dfb310 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_degraded2.ksh @@ -105,12 +105,7 @@ function test_sequential_resilver # <pool> <parity> <dir> log_must zpool wait -t resilver $pool - log_must zpool scrub -w $pool - log_must zpool status $pool - - log_must check_pool_status $pool "scan" "repaired 0B" - log_must check_pool_status $pool "errors" "No known data errors" - log_must check_pool_status $pool "scan" "with 0 errors" + log_must verify_draid_pool $pool "sequential" } log_onexit cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare1.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare1.ksh index 0604f7f48c7e..e5d16910ddcd 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare1.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare1.ksh @@ -85,9 +85,7 @@ for replace_mode in "healing" "sequential"; do log_must check_hotspare_state $TESTPOOL $spare_vdev "INUSE" # Preserve the 1st faulted vdev for the next test. [[ $i -eq 0 ]] || log_must zpool detach $TESTPOOL $fault_vdev - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode (( i += 1 )) done @@ -98,9 +96,7 @@ for replace_mode in "healing" "sequential"; do # Verify that after clearing the 1st faulted vdev, all is healed. log_must zpool clear $TESTPOOL "$BASEDIR/vdev0" log_must wait_resilvered $TESTPOOL - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode cleanup done diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare2.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare2.ksh index 288f02392b43..6f94161b47fe 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare2.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare2.ksh @@ -60,9 +60,9 @@ log_must zpool offline -f $TESTPOOL $BASEDIR/vdev9 log_must zpool replace -w $TESTPOOL $BASEDIR/vdev9 draid1-0-2 # Verify, refill and verify the pool contents. -verify_pool $TESTPOOL +log_must verify_draid_pool $TESTPOOL "healing" refill_test_env $TESTPOOL -verify_pool $TESTPOOL +log_must verify_draid_pool $TESTPOOL "healing" # Bring everything back online and check for errors. log_must zpool clear $TESTPOOL @@ -72,9 +72,7 @@ log_must wait_hotspare_state $TESTPOOL draid1-0-0 "AVAIL" log_must wait_hotspare_state $TESTPOOL draid1-0-1 "AVAIL" log_must wait_hotspare_state $TESTPOOL draid1-0-2 "AVAIL" -log_must zpool scrub -w $TESTPOOL -log_must check_pool_status $TESTPOOL "scan" "repaired 0B" -log_must check_pool_status $TESTPOOL "scan" "with 0 errors" +log_must verify_draid_pool $TESTPOOL "healing" log_must is_data_valid $TESTPOOL diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh index 425c30a4984c..f1485375fdf1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare3.ksh @@ -111,9 +111,7 @@ for replace_mode in "healing" "sequential"; do log_must zpool detach $TESTPOOL $BASEDIR/vdev7 log_must check_vdev_state $TESTPOOL draid1-0-0 "ONLINE" log_must check_hotspare_state $TESTPOOL draid1-0-0 "INUSE" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode # Distributed spare in mirror with original device faulted log_must zpool offline -f $TESTPOOL $BASEDIR/vdev8 @@ -122,9 +120,7 @@ for replace_mode in "healing" "sequential"; do log_must check_vdev_state $TESTPOOL spare-8 "DEGRADED" log_must check_vdev_state $TESTPOOL draid1-0-1 "ONLINE" log_must check_hotspare_state $TESTPOOL draid1-0-1 "INUSE" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode # Distributed spare in mirror with original device still online log_must check_vdev_state $TESTPOOL $BASEDIR/vdev9 "ONLINE" @@ -132,9 +128,7 @@ for replace_mode in "healing" "sequential"; do log_must check_vdev_state $TESTPOOL spare-9 "ONLINE" log_must check_vdev_state $TESTPOOL draid1-0-2 "ONLINE" log_must check_hotspare_state $TESTPOOL draid1-0-2 "INUSE" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode # Normal faulted device replacement new_vdev0="$BASEDIR/new_vdev0" @@ -143,9 +137,7 @@ for replace_mode in "healing" "sequential"; do log_must check_vdev_state $TESTPOOL $BASEDIR/vdev0 "FAULTED" log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev0 $new_vdev0 log_must check_vdev_state $TESTPOOL $new_vdev0 "ONLINE" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode # Distributed spare faulted device replacement log_must zpool offline -f $TESTPOOL $BASEDIR/vdev2 @@ -154,9 +146,7 @@ for replace_mode in "healing" "sequential"; do log_must check_vdev_state $TESTPOOL spare-2 "DEGRADED" log_must check_vdev_state $TESTPOOL draid1-0-3 "ONLINE" log_must check_hotspare_state $TESTPOOL draid1-0-3 "INUSE" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode # Normal online device replacement new_vdev1="$BASEDIR/new_vdev1" @@ -164,9 +154,7 @@ for replace_mode in "healing" "sequential"; do log_must check_vdev_state $TESTPOOL $BASEDIR/vdev1 "ONLINE" log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev1 $new_vdev1 log_must check_vdev_state $TESTPOOL $new_vdev1 "ONLINE" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode # Distributed spare online device replacement (then fault) log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev3 draid1-0-4 @@ -176,9 +164,7 @@ for replace_mode in "healing" "sequential"; do log_must zpool offline -f $TESTPOOL $BASEDIR/vdev3 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev3 "FAULTED" log_must check_vdev_state $TESTPOOL spare-3 "DEGRADED" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode # Verify the original data is valid log_must is_data_valid $TESTPOOL diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh index ffbf664046ec..e4f01bbcf672 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redundancy/redundancy_draid_spare4.ksh @@ -102,9 +102,7 @@ for replace_mode in "healing" "sequential"; do log_must zpool detach $TESTPOOL $fault_vdev done - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode done # Fail remaining drives as long as parity permits. @@ -120,9 +118,7 @@ for replace_mode in "healing" "sequential"; do log_must zpool offline -f $TESTPOOL $fault_vdev log_must check_vdev_state $TESTPOOL $fault_vdev "FAULTED" - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode (( faults_left > 0 && faults_left-- )) done done @@ -138,9 +134,7 @@ for replace_mode in "healing" "sequential"; do break fi - log_must verify_pool $TESTPOOL - log_must check_pool_status $TESTPOOL "scan" "repaired 0B" - log_must check_pool_status $TESTPOOL "scan" "with 0 errors" + log_must verify_draid_pool $TESTPOOL $replace_mode done log_must is_data_valid $TESTPOOL diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh index f1e42ff7d849..8827b5398151 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/replacement/resilver_restart_001.ksh @@ -46,6 +46,7 @@ function cleanup { + log_must zpool events log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME log_must set_tunable32 SCAN_SUSPEND_PROGRESS \ $ORIG_SCAN_SUSPEND_PROGRESS @@ -53,11 +54,12 @@ function cleanup $ORIG_RESILVER_DEFER_PERCENT log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX log_must zinject -c all + log_must zpool events -c destroy_pool $TESTPOOL1 rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE } -# count resilver events in zpool and number of deferred rsilvers on vdevs +# count resilver events in zpool and number of deferred resilvers on vdevs function verify_restarts # <msg> <cnt> <defer> { msg=$1 @@ -113,7 +115,7 @@ log_must set_tunable32 ZEVENT_LEN_MAX 512 log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL1 \ - raidz ${VDEV_FILES[@]} + raidz2 ${VDEV_FILES[@]} # create 4 filesystems for fs in fs{0..3} @@ -157,8 +159,8 @@ do log_must set_tunable32 RESILVER_MIN_TIME_MS 20 # initiate a resilver and suspend the scan as soon as possible - log_must zpool replace $TESTPOOL1 $VDEV_REPLACE log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1 + log_must zpool replace $TESTPOOL1 $VDEV_REPLACE # there should only be 1 resilver start verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" @@ -166,9 +168,12 @@ do # offline then online a vdev to introduce a new DTL range after current # scan, which should restart (or defer) the resilver log_must zpool offline $TESTPOOL1 ${VDEV_FILES[2]} - sync_pool $TESTPOOL1 + log_must wait_vdev_state $TESTPOOL1 ${VDEV_FILES[2]} "OFFLINE" + sync_pool $TESTPOOL1 true + log_must zpool online $TESTPOOL1 ${VDEV_FILES[2]} - sync_pool $TESTPOOL1 + log_must wait_vdev_state $TESTPOOL1 ${VDEV_FILES[2]} "ONLINE" + sync_pool $TESTPOOL1 true # there should now be 2 resilver starts w/o defer, 1 with defer verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}" @@ -190,8 +195,8 @@ do log_must is_pool_resilvered $TESTPOOL1 # wait for a few txg's to see if a resilver happens - sync_pool $TESTPOOL1 - sync_pool $TESTPOOL1 + sync_pool $TESTPOOL1 true + sync_pool $TESTPOOL1 true # there should now be 2 resilver starts verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_018_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_018_pos.ksh index db9e268c3c40..cb78cd39d09b 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_018_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/snapshot/snapshot_018_pos.ksh @@ -91,6 +91,7 @@ log_must eval "[[ $(stat_mtime $tfs_snapdir) == 0 ]]" # Create snapshots for filesystems and check snapshots_changed reports correct time curr_time=$(date '+%s') +log_must sleep 1 log_must zfs snapshot $snap_testpool snap_changed_testpool=$(zfs get -H -o value -p snapshots_changed $TESTPOOL) snap_changed_nsecs_testpool=$(zfs get -H -o value -p snapshots_changed_nsecs $TESTPOOL) @@ -103,6 +104,7 @@ log_must eval "[[ $list_changed_nsecs_testpool == $snap_changed_nsecs_testpool ] log_must eval "[[ $(stat_mtime $tpool_snapdir) == $snap_changed_testpool ]]" curr_time=$(date '+%s') +log_must sleep 1 log_must zfs snapshot $snap_testfsv1 snap_changed_testfs=$(zfs get -H -o value -p snapshots_changed $TESTPOOL/$TESTFS) snap_changed_nsecs_testfs=$(zfs get -H -o value -p snapshots_changed_nsecs $TESTPOOL/$TESTFS) @@ -121,6 +123,7 @@ log_must eval "[[ $(zfs get -H -o value -p snapshots_changed_nsecs $TESTPOOL/$TE # Create snapshot while unmounted curr_time=$(date '+%s') +log_must sleep 1 log_must zfs snapshot $snap_testfsv2 snap_changed_testfs=$(zfs get -H -o value -p snapshots_changed $TESTPOOL/$TESTFS) snap_changed_nsecs_testfs=$(zfs get -H -o value -p snapshots_changed_nsecs $TESTPOOL/$TESTFS) @@ -144,6 +147,7 @@ log_must eval "[[ $(stat_mtime $tfs_snapdir) == $snap_changed_testfs ]]" # Destroy the snapshots and check snapshots_changed shows correct time curr_time=$(date '+%s') +log_must sleep 1 log_must zfs destroy $snap_testfsv1 snap_changed_testfs=$(zfs get -H -o value -p snapshots_changed $TESTPOOL/$TESTFS) snap_changed_nsecs_testfs=$(zfs get -H -o value -p snapshots_changed_nsecs $TESTPOOL/$TESTFS) @@ -152,6 +156,7 @@ log_must eval "[[ $((snap_changed_nsecs_testfs / 1000000000)) == $snap_changed_t log_must eval "[[ $(stat_mtime $tfs_snapdir) == $snap_changed_testfs ]]" curr_time=$(date '+%s') +log_must sleep 1 log_must zfs destroy $snap_testpool snap_changed_testpool=$(zfs get -H -o value -p snapshots_changed $TESTPOOL) snap_changed_nsecs_testpool=$(zfs get -H -o value -p snapshots_changed_nsecs $TESTPOOL) diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh index a8deedfb8c3c..d67ff8b0ce2a 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/autotrim_config.ksh @@ -40,6 +40,12 @@ verify_runnable "global" +# On FreeBSD, autotrim does not reclaim space on file vdevs stored +# on a ZFS filesystem within the test framework. +if is_freebsd; then + log_unsupported "Autotrim on file vdevs not supported on FreeBSD" +fi + log_assert "Set 'autotrim=on' verify pool disks were trimmed" function cleanup diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/setup.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/setup.ksh index 7be2a316a873..483cb3738c72 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/setup.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/setup.ksh @@ -25,7 +25,6 @@ verify_runnable "global" if is_freebsd; then - log_unsupported "FreeBSD has no hole punching mechanism for the time being." diskinfo -v $DISKS | grep -qE 'No.*# TRIM/UNMAP support' && log_unsupported "DISKS do not support discard (TRIM/UNMAP)" else diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim.kshlib b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim.kshlib index 75d2ee570776..b58e010970f8 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim.kshlib +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim.kshlib @@ -23,7 +23,7 @@ # function get_size_mb { - du --block-size 1048576 -s "$1" | cut -f1 + du -m -s "$1" | cut -f1 } # diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh index ff569177357b..73496a3d68d6 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/trim/trim_config.ksh @@ -40,6 +40,12 @@ verify_runnable "global" +# On FreeBSD, manual trim does not reclaim space on file vdevs stored +# on a ZFS filesystem within the test framework. +if is_freebsd; then + log_unsupported "Manual trim on file vdevs not supported on FreeBSD" +fi + log_assert "Run 'zpool trim' verify pool disks were trimmed" function cleanup @@ -68,8 +74,8 @@ log_must set_tunable64 TRIM_TXG_BATCH 8 typeset vdev_min_ms_count=$(get_tunable VDEV_MIN_MS_COUNT) log_must set_tunable64 VDEV_MIN_MS_COUNT 32 -typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.75 / 1024 / 1024) )) -typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.30 / 1024 / 1024) )) +typeset VDEV_MAX_MB=$(( floor(4 * MINVDEVSIZE * 0.65 / 1024 / 1024) )) +typeset VDEV_MIN_MB=$(( floor(4 * MINVDEVSIZE * 0.40 / 1024 / 1024) )) for type in "" "mirror" "raidz2" "draid"; do @@ -100,7 +106,9 @@ for type in "" "mirror" "raidz2" "draid"; do # Remove the file, issue trim, verify the vdevs are now sparse. log_must rm /$TESTPOOL/file + sync_pool $TESTPOOL log_must timeout 120 zpool trim -w $TESTPOOL + sync_all_pools true verify_vdevs "-le" "$VDEV_MIN_MB" $VDEVS log_must zpool destroy $TESTPOOL diff --git a/sys/dev/acpi_support/acpi_panasonic.c b/sys/dev/acpi_support/acpi_panasonic.c index 8fea47ee45e8..5f54ca07c5a6 100644 --- a/sys/dev/acpi_support/acpi_panasonic.c +++ b/sys/dev/acpi_support/acpi_panasonic.c @@ -233,7 +233,9 @@ acpi_panasonic_shutdown(device_t dev) /* Mute the main audio during reboot to prevent static burst to speaker. */ sc = device_get_softc(dev); mute = 1; + ACPI_SERIAL_BEGIN(panasonic); hkey_sound_mute(sc->handle, HKEY_SET, &mute); + ACPI_SERIAL_END(panasonic); return (0); } diff --git a/sys/dev/asmc/asmc.c b/sys/dev/asmc/asmc.c index 5fe89d85be6d..8cd7842d03fd 100644 --- a/sys/dev/asmc/asmc.c +++ b/sys/dev/asmc/asmc.c @@ -123,7 +123,7 @@ static int asmc_mbp_sysctl_light_left(SYSCTL_HANDLER_ARGS); static int asmc_mbp_sysctl_light_right(SYSCTL_HANDLER_ARGS); static int asmc_mbp_sysctl_light_control(SYSCTL_HANDLER_ARGS); static int asmc_mbp_sysctl_light_left_10byte(SYSCTL_HANDLER_ARGS); -static int asmc_wol_sysctl(SYSCTL_HANDLER_ARGS); +static int asmc_aupo_sysctl(SYSCTL_HANDLER_ARGS); static int asmc_key_getinfo(device_t, const char *, uint8_t *, char *); @@ -793,14 +793,14 @@ asmc_init(device_t dev) device_printf(dev, "SMC revision: %x.%x%x%x\n", buf[0], buf[1], buf[2], ntohs(*(uint16_t *)buf + 4)); - /* Wake-on-LAN convenience sysctl */ + /* Auto power-on after AC power loss (AUPO). */ if (asmc_key_read(dev, ASMC_KEY_AUPO, buf, 1) == 0) { SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), - OID_AUTO, "wol", + OID_AUTO, "auto_poweron", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, - dev, 0, asmc_wol_sysctl, "I", - "Wake-on-LAN enable (0=off, 1=on)"); + dev, 0, asmc_aupo_sysctl, "I", + "Auto power-on after AC power loss (0=off, 1=on)"); } sc->sc_nfan = asmc_fan_count(dev); @@ -1222,7 +1222,7 @@ out: /* * Raw SMC key access sysctls - enables reading/writing any SMC key by name * Usage: - * sysctl dev.asmc.0.raw.key=AUPO # Set key, auto-detects length + * sysctl dev.asmc.0.raw.key=TC0P # Set key, auto-detects length * sysctl dev.asmc.0.raw.value # Read current value (hex bytes) * sysctl dev.asmc.0.raw.value=01 # Write new value */ @@ -2338,18 +2338,17 @@ asmc_mbp_sysctl_light_left_10byte(SYSCTL_HANDLER_ARGS) } /* - * Wake-on-LAN convenience sysctl. - * Reading returns 1 if WoL is enabled, 0 if disabled. - * Writing 1 enables WoL, 0 disables it. + * Auto power-on after AC power loss (AUPO key). + * When non-zero the machine boots automatically when AC is restored + * after an unclean power loss. Useful for always-on servers / home labs. */ static int -asmc_wol_sysctl(SYSCTL_HANDLER_ARGS) +asmc_aupo_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev = (device_t)arg1; uint8_t aupo; int val, error; - /* Read current AUPO value */ if (asmc_key_read(dev, ASMC_KEY_AUPO, &aupo, 1) != 0) return (EIO); @@ -2358,10 +2357,7 @@ asmc_wol_sysctl(SYSCTL_HANDLER_ARGS) if (error != 0 || req->newptr == NULL) return (error); - /* Clamp to 0 or 1 */ aupo = (val != 0) ? 1 : 0; - - /* Write AUPO */ if (asmc_key_write(dev, ASMC_KEY_AUPO, &aupo, 1) != 0) return (EIO); diff --git a/sys/dev/asmc/asmcvar.h b/sys/dev/asmc/asmcvar.h index ae027ba33ae9..6388fc78fb69 100644 --- a/sys/dev/asmc/asmcvar.h +++ b/sys/dev/asmc/asmcvar.h @@ -175,7 +175,9 @@ struct asmc_softc { #define ASMC_KEY_CLAMSHELL "MSLD" /* RO; 1 byte */ /* - * Auto power on / Wake-on-LAN. + * Auto power-on after AC power loss (AUPO). + * When set, the machine boots automatically when AC power is restored + * after an unclean power loss. This is NOT Wake-on-LAN. */ #define ASMC_KEY_AUPO "AUPO" /* RW; 1 byte */ diff --git a/sys/dev/dpaa/bman.c b/sys/dev/dpaa/bman.c index c275d2335eb8..3d1052f9ed9f 100644 --- a/sys/dev/dpaa/bman.c +++ b/sys/dev/dpaa/bman.c @@ -1,27 +1,7 @@ -/*- - * Copyright (c) 2011-2012 Semihalf. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. +/* + * Copyright (c) 2026 Justin Hibbits * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * SPDX-License-Identifier: BSD-2-Clause */ #include <sys/param.h> @@ -29,6 +9,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/module.h> #include <sys/mutex.h> #include <sys/proc.h> @@ -36,63 +17,161 @@ #include <sys/rman.h> #include <sys/sched.h> +#include <machine/bus.h> #include <machine/tlb.h> #include "bman.h" +#include "dpaa_common.h" +#include "bman_var.h" + +#define BMAN_POOL_SWDET(n) (0x000 + 4 * (n)) +#define BMAN_POOL_HWDET(n) (0x100 + 4 * (n)) +#define BMAN_POOL_SWDXT(n) (0x200 + 4 * (n)) +#define BMAN_POOL_HWDXT(n) (0x300 + 4 * (n)) +#define FBPR_FP_LWIT 0x804 +#define BMAN_IP_REV_1 0x0bf8 +#define IP_MAJ_S 8 +#define IP_MAJ_M 0x0000ff00 +#define IP_MIN_M 0x000000ff +#define BMAN_IP_REV_2 0x0bfc +#define BMAN_FBPR_BARE 0x0c00 +#define BMAN_FBPR_BAR 0x0c04 +#define BMAN_FBPR_AR 0x0c10 +#define BMAN_LIODNR 0x0d08 + +#define BMAN_POOL_CONTENT(n) (0x0600 + 4 * (n)) +#define BMAN_ECSR 0x0a00 +#define BMAN_ECIR 0x0a04 +#define ECIR_PORTAL(r) (((r) >> 24) & 0x0f) +#define ECIR_VERB(r) (((r) >> 16) & 0x07) +#define ECIR_R 0x00080000 +#define ECIR_POOL(r) ((r) & 0x3f) +#define BMAN_CECR 0x0a34 /* Corruption Error Capture Register */ +#define BMAN_CEAR 0x0a38 /* Corruption Error Address Register */ +#define BMAN_AECR 0x0a34 /* Acces Error Capture Register */ +#define BMAN_AEAR 0x0a38 /* Acces Error Address Register */ +#define BMAN_ERR_ISR 0x0e00 +#define BMAN_ERR_IER 0x0e04 +#define BMAN_ERR_ISDR 0x0e08 +#define ERR_EMAI 0x00000040 +#define ERR_EMCI 0x00000020 +#define ERR_IVCI 0x00000010 +#define ERR_FLWI 0x00000008 +#define ERR_MBEI 0x00000004 +#define ERR_SBEI 0x00000002 +#define ERR_BSCN 0x00000001 + +static MALLOC_DEFINE(M_BMAN, "bman", "DPAA Buffer Manager structures"); static struct bman_softc *bman_sc; -extern t_Handle bman_portal_setup(struct bman_softc *bsc); +static void +bman_isr(void *arg) +{ + struct bman_softc *sc = arg; + uint32_t ier, isr, isr_bit; + uint32_t reg; + + ier = bus_read_4(sc->sc_rres, BMAN_ERR_IER); + isr = bus_read_4(sc->sc_rres, BMAN_ERR_ISR); + + isr_bit = (isr & ier); + if (isr_bit == 0) + goto end; + + if (isr_bit & ERR_EMAI) { + device_printf(sc->sc_dev, "External memory access error\n"); + reg = bus_read_4(sc->sc_rres, BMAN_AECR); + if (reg <= 63) + device_printf(sc->sc_dev, " pool %d\n", reg); + else + device_printf(sc->sc_dev, " FBPR free list\n"); + reg = bus_read_4(sc->sc_rres, BMAN_AEAR); + device_printf(sc->sc_dev, " offset: %#x\n", reg); + } + + if (isr_bit & ERR_EMCI) { + device_printf(sc->sc_dev, "External memory corruption error\n"); + reg = bus_read_4(sc->sc_rres, BMAN_CECR); + if (reg <= 63) + device_printf(sc->sc_dev, " pool %d\n", reg); + else + device_printf(sc->sc_dev, " FBPR free list\n"); + reg = bus_read_4(sc->sc_rres, BMAN_CEAR); + device_printf(sc->sc_dev, " offset: %#x\n", reg); + } + if (isr_bit & ERR_IVCI) { + reg = bus_read_4(sc->sc_rres, BMAN_ECIR); + device_printf(sc->sc_dev, "Invalid verb command\n"); + device_printf(sc->sc_dev, "Portal: %d, ring: %s\n", + ECIR_POOL(reg), (reg & ECIR_R) ? "RCR" : "Command"); + device_printf(sc->sc_dev, "verb: 0x%02x, pool: %d\n", + ECIR_VERB(reg), ECIR_POOL(reg)); + } + if (isr_bit & (ERR_MBEI | ERR_SBEI)) { + if (isr_bit & ERR_MBEI) + device_printf(sc->sc_dev, "Multi-bit ECC error\n"); + if (isr_bit & ERR_MBEI) + device_printf(sc->sc_dev, "Single-bit ECC error\n"); + /* TODO: Add more error details for ECC errors. */ + } + +end: + bus_write_4(sc->sc_rres, BMAN_ERR_ISR, isr); +} static void -bman_exception(t_Handle h_App, e_BmExceptions exception) +bman_get_version(struct bman_softc *sc) { - struct bman_softc *sc; - const char *message; + uint32_t reg = bus_read_4(sc->sc_rres, BMAN_IP_REV_1); - sc = h_App; + sc->sc_major = (reg & IP_MAJ_M) >> IP_MAJ_S; + sc->sc_minor = (reg & IP_MIN_M); +} - switch (exception) { - case e_BM_EX_INVALID_COMMAND: - message = "Invalid Command Verb"; - break; - case e_BM_EX_FBPR_THRESHOLD: - message = "FBPR pool exhaused. Consider increasing " - "BMAN_MAX_BUFFERS"; - break; - case e_BM_EX_SINGLE_ECC: - message = "Single bit ECC error"; - break; - case e_BM_EX_MULTI_ECC: - message = "Multi bit ECC error"; - break; - default: - message = "Unknown error"; +static int +bman_set_memory(struct bman_softc *sc, vm_paddr_t pa, vm_size_t size) +{ + vm_paddr_t bar_pa; + if ((pa & (size - 1)) != 0 || (size & (size - 1)) != 0) { + device_printf(sc->sc_dev, + "invalid memory configuration: pa: %#jx, size: %#jx\n", + (uintmax_t)pa, (uintmax_t)size); + return (ENXIO); } + bar_pa = bus_read_4(sc->sc_rres, BMAN_FBPR_BARE); + bar_pa <<= 32; + bar_pa |= bus_read_4(sc->sc_rres, BMAN_FBPR_BAR); + if (bar_pa != 0 && bar_pa != pa) { + device_printf(sc->sc_dev, + "attempted to reinitialize BMan with different BAR\n"); + return (ENOMEM); + } else if (bar_pa == pa) + return (0); + + bus_write_4(sc->sc_rres, BMAN_FBPR_BARE, pa >> 32); + bus_write_4(sc->sc_rres, BMAN_FBPR_BAR, pa & 0xffffffff); + bus_write_4(sc->sc_rres, BMAN_FBPR_AR, ilog2(size) - 1); - device_printf(sc->sc_dev, "BMAN Exception: %s.\n", message); + return (0); } int bman_attach(device_t dev) { struct bman_softc *sc; - t_BmRevisionInfo rev; - t_Error error; - t_BmParam bp; + vm_paddr_t bp_pa; + size_t bp_size; + int bp_count; sc = device_get_softc(dev); sc->sc_dev = dev; bman_sc = sc; - /* Check if MallocSmart allocator is ready */ - if (XX_MallocSmartInit() != E_OK) - return (ENXIO); - /* Allocate resources */ sc->sc_rrid = 0; - sc->sc_rres = bus_alloc_resource_anywhere(dev, SYS_RES_MEMORY, - &sc->sc_rrid, BMAN_CCSR_SIZE, RF_ACTIVE); + sc->sc_rres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + sc->sc_rrid, RF_ACTIVE); if (sc->sc_rres == NULL) return (ENXIO); @@ -102,36 +181,33 @@ bman_attach(device_t dev) if (sc->sc_ires == NULL) goto err; - /* Initialize BMAN */ - memset(&bp, 0, sizeof(bp)); - bp.guestId = NCSW_MASTER_ID; - bp.baseAddress = rman_get_bushandle(sc->sc_rres); - bp.totalNumOfBuffers = BMAN_MAX_BUFFERS; - bp.f_Exception = bman_exception; - bp.h_App = sc; - bp.errIrq = (uintptr_t)sc->sc_ires; - bp.partBpidBase = 0; - bp.partNumOfPools = BM_MAX_NUM_OF_POOLS; + bman_get_version(sc); + if (sc->sc_major == 2 && sc->sc_minor == 0) + bp_count = BMAN_MAX_POOLS_1023; + else + bp_count = BMAN_MAX_POOLS; - sc->sc_bh = BM_Config(&bp); - if (sc->sc_bh == NULL) - goto err; + /* TODO: LIODN */ + bus_write_4(sc->sc_rres, BMAN_LIODNR, 0); - /* Warn if there is less than 5% free FPBR's in pool */ - error = BM_ConfigFbprThreshold(sc->sc_bh, (BMAN_MAX_BUFFERS / 8) / 20); - if (error != E_OK) - goto err; + sc->sc_vmem = vmem_create("BMan Pools", 0, bp_count, 1, 0, M_WAITOK); - error = BM_Init(sc->sc_bh); - if (error != E_OK) - goto err; + /* Pool is reserved memory, so no need to track it ourselves. */ + dpaa_map_private_memory(dev, 0, "fsl,bman-fbpr", &bp_pa, &bp_size); + bman_set_memory(sc, bp_pa, bp_size); - error = BM_GetRevision(sc->sc_bh, &rev); - if (error != E_OK) - goto err; + /* Warn if FBPR drops below 5% total. */ + bus_write_4(sc->sc_rres, FBPR_FP_LWIT, (bp_size / 8) / 20); + + /* Clear interrupt status, and enable all interrupts. */ + bus_write_4(sc->sc_rres, BMAN_ERR_ISR, 0xffffffff); + bus_write_4(sc->sc_rres, BMAN_ERR_IER, 0xffffffff); + bus_write_4(sc->sc_rres, BMAN_ERR_ISDR, 0); - device_printf(dev, "Hardware version: %d.%d.\n", - rev.majorRev, rev.minorRev); + /* Enable the IRQ line now. */ + if (bus_setup_intr(dev, sc->sc_ires, INTR_TYPE_NET, NULL, bman_isr, + sc, &sc->sc_icookie) != 0) + goto err; return (0); @@ -147,9 +223,10 @@ bman_detach(device_t dev) sc = device_get_softc(dev); - if (sc->sc_bh != NULL) - BM_Free(sc->sc_bh); - + if (sc->sc_vmem != NULL) + vmem_destroy(sc->sc_vmem); + if (sc->sc_icookie != NULL) + bus_teardown_intr(dev, sc->sc_ires, sc->sc_icookie); if (sc->sc_ires != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irid, sc->sc_ires); @@ -186,179 +263,105 @@ bman_shutdown(device_t dev) * BMAN API */ -t_Handle -bman_pool_create(uint8_t *bpid, uint16_t bufferSize, uint16_t maxBuffers, - uint16_t minBuffers, uint16_t allocBuffers, t_GetBufFunction *f_GetBuf, - t_PutBufFunction *f_PutBuf, uint32_t dep_sw_entry, uint32_t dep_sw_exit, - uint32_t dep_hw_entry, uint32_t dep_hw_exit, - t_BmDepletionCallback *f_Depletion, t_Handle h_BufferPool, - t_PhysToVirt *f_PhysToVirt, t_VirtToPhys *f_VirtToPhys) +struct bman_pool * +bman_new_pool(void) { - uint32_t thresholds[MAX_DEPLETION_THRESHOLDS]; struct bman_softc *sc; - t_Handle pool, portal; - t_BmPoolParam bpp; - int error; + vmem_addr_t bpid; + struct bman_pool *pool; sc = bman_sc; pool = NULL; - sched_pin(); - - portal = bman_portal_setup(sc); - if (portal == NULL) - goto err; - - memset(&bpp, 0, sizeof(bpp)); - bpp.h_Bm = sc->sc_bh; - bpp.h_BmPortal = portal; - bpp.h_App = h_BufferPool; - bpp.numOfBuffers = allocBuffers; - - bpp.bufferPoolInfo.h_BufferPool = h_BufferPool; - bpp.bufferPoolInfo.f_GetBuf = f_GetBuf; - bpp.bufferPoolInfo.f_PutBuf = f_PutBuf; - bpp.bufferPoolInfo.f_PhysToVirt = f_PhysToVirt; - bpp.bufferPoolInfo.f_VirtToPhys = f_VirtToPhys; - bpp.bufferPoolInfo.bufferSize = bufferSize; - - pool = BM_POOL_Config(&bpp); - if (pool == NULL) - goto err; - - /* - * Buffer context must be disabled on FreeBSD - * as it could cause memory corruption. - */ - BM_POOL_ConfigBuffContextMode(pool, 0); - - if (minBuffers != 0 || maxBuffers != 0) { - error = BM_POOL_ConfigStockpile(pool, maxBuffers, minBuffers); - if (error != E_OK) - goto err; - } - - if (f_Depletion != NULL) { - thresholds[BM_POOL_DEP_THRESH_SW_ENTRY] = dep_sw_entry; - thresholds[BM_POOL_DEP_THRESH_SW_EXIT] = dep_sw_exit; - thresholds[BM_POOL_DEP_THRESH_HW_ENTRY] = dep_hw_entry; - thresholds[BM_POOL_DEP_THRESH_HW_EXIT] = dep_hw_exit; - error = BM_POOL_ConfigDepletion(pool, f_Depletion, thresholds); - if (error != E_OK) - goto err; - } - - error = BM_POOL_Init(pool); - if (error != E_OK) - goto err; + if (vmem_alloc(sc->sc_vmem, 1, M_FIRSTFIT | M_NOWAIT, &bpid) != 0) + return (NULL); - *bpid = BM_POOL_GetId(pool); - sc->sc_bpool_cpu[*bpid] = PCPU_GET(cpuid); + pool = malloc(sizeof(*pool), M_BMAN, M_WAITOK | M_ZERO); - sched_unpin(); + pool->bpid = bpid; return (pool); - -err: - if (pool != NULL) - BM_POOL_Free(pool); - - sched_unpin(); - - return (NULL); } -int -bman_pool_destroy(t_Handle pool) +struct bman_pool * +bman_pool_create(uint8_t *bpid, uint16_t buffer_size, uint16_t max_buffers, + uint32_t dep_sw_entry, uint32_t dep_sw_exit, + uint32_t dep_hw_entry, uint32_t dep_hw_exit, + bm_depletion_handler dep_cb, void *arg) { struct bman_softc *sc; + struct bman_pool *bp; sc = bman_sc; - thread_lock(curthread); - sched_bind(curthread, sc->sc_bpool_cpu[BM_POOL_GetId(pool)]); - thread_unlock(curthread); + bp = bman_new_pool(); + if (bpid != NULL) + *bpid = bp->bpid; + + if (dep_cb) { + bp->dep_cb = dep_cb; + bus_write_4(sc->sc_rres, BMAN_POOL_SWDET(bp->bpid), + dep_sw_entry); + bus_write_4(sc->sc_rres, BMAN_POOL_SWDXT(bp->bpid), + dep_sw_exit); + bus_write_4(sc->sc_rres, BMAN_POOL_HWDET(bp->bpid), + dep_hw_entry); + bus_write_4(sc->sc_rres, BMAN_POOL_HWDXT(bp->bpid), + dep_hw_exit); + bp->arg = arg; + bman_portal_enable_scn(DPCPU_GET(bman_affine_portal), bp); + } - BM_POOL_Free(pool); + return (bp); +} - thread_lock(curthread); - sched_unbind(curthread); - thread_unlock(curthread); +int +bman_pool_destroy(struct bman_pool *pool) +{ + /* Need to error, or print a warning, if the pool isn't empty */ + if (bman_count(pool) != 0) + return (EBUSY); + vmem_free(bman_sc->sc_vmem, pool->bpid, 1); + free(pool, M_BMAN); return (0); } int -bman_pool_fill(t_Handle pool, uint16_t nbufs) +bman_put_buffers(struct bman_pool *pool, struct bman_buffer *buffers, int count) { - struct bman_softc *sc; - t_Handle portal; + struct bman_portal_softc *portal; int error; - sc = bman_sc; - sched_pin(); + critical_enter(); - portal = bman_portal_setup(sc); + portal = DPCPU_GET(bman_affine_portal); if (portal == NULL) { - sched_unpin(); + critical_exit(); return (EIO); } - error = BM_POOL_FillBufs(pool, portal, nbufs); - - sched_unpin(); - - return ((error == E_OK) ? 0 : EIO); -} - -void * -bman_get_buffer(t_Handle pool) -{ - struct bman_softc *sc; - t_Handle portal; - void *buffer; - - sc = bman_sc; - sched_pin(); - - portal = bman_portal_setup(sc); - if (portal == NULL) { - sched_unpin(); - return (NULL); + while (count > 0) { + int c = min(count, 8); + error = bman_release(pool, buffers, c); + buffers += c; + count -= c; } - buffer = BM_POOL_GetBuf(pool, portal); + critical_exit(); - sched_unpin(); - - return (buffer); + return (error); } -int -bman_put_buffer(t_Handle pool, void *buffer) +uint32_t +bman_get_bpid(struct bman_pool *pool) { - struct bman_softc *sc; - t_Handle portal; - int error; - - sc = bman_sc; - sched_pin(); - - portal = bman_portal_setup(sc); - if (portal == NULL) { - sched_unpin(); - return (EIO); - } - - error = BM_POOL_PutBuf(pool, portal, buffer); - - sched_unpin(); - - return ((error == E_OK) ? 0 : EIO); + return (pool->bpid); } uint32_t -bman_count(t_Handle pool) +bman_count(struct bman_pool *pool) { - return (BM_POOL_GetCounter(pool, e_BM_POOL_COUNTERS_CONTENT)); + return (bus_read_4(bman_sc->sc_rres, BMAN_POOL_CONTENT(pool->bpid))); } + diff --git a/sys/dev/dpaa/bman.h b/sys/dev/dpaa/bman.h index 01c09489890c..118c6f5938bc 100644 --- a/sys/dev/dpaa/bman.h +++ b/sys/dev/dpaa/bman.h @@ -1,3 +1,9 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + /*- * Copyright (c) 2011-2012 Semihalf. * All rights reserved. @@ -27,29 +33,16 @@ #ifndef _BMAN_H #define _BMAN_H +#include <sys/vmem.h> #include <machine/vmparam.h> -#include <contrib/ncsw/inc/Peripherals/bm_ext.h> - /* * BMAN Configuration */ -/* Maximum number of buffers in all BMAN pools */ -#define BMAN_MAX_BUFFERS 4096 - /* * Portal definitions */ -#define BMAN_CE_PA(base) (base) -#define BMAN_CI_PA(base) ((base) + 0x100000) - -#define BMAN_PORTAL_CE_PA(base, n) \ - (BMAN_CE_PA(base) + ((n) * BMAN_PORTAL_CE_SIZE)) -#define BMAN_PORTAL_CI_PA(base, n) \ - (BMAN_CI_PA(base) + ((n) * BMAN_PORTAL_CI_SIZE)) - -#define BMAN_CCSR_SIZE 0x1000 struct bman_softc { device_t sc_dev; /* device handle */ @@ -57,129 +50,42 @@ struct bman_softc { struct resource *sc_rres; /* register resource */ int sc_irid; /* interrupt rid */ struct resource *sc_ires; /* interrupt resource */ + void *sc_icookie; + vmem_t *sc_vmem; /* resource pool */ + int sc_major; + int sc_minor; +}; - bool sc_regs_mapped[MAXCPU]; /* register mapping status */ +struct bman_buffer { + uint16_t bpid; + uint16_t buf_hi; + uint32_t buf_lo; +} __aligned(8); - t_Handle sc_bh; /* BMAN handle */ - t_Handle sc_bph[MAXCPU]; /* BMAN portal handles */ - vm_paddr_t sc_bp_pa; /* BMAN portals PA */ - unsigned int sc_bpool_cpu[BM_MAX_NUM_OF_POOLS]; -}; +struct bman_pool; +struct bman_buffer; -/* - * External API - */ +typedef void (*bm_depletion_handler)(void *, bool); /* - * @brief Function to create BMAN pool. - * - * @param bpid The pointer to variable where Buffer Pool ID will be - * stored. - * - * @param bufferSize The size of buffers in newly created pool. - * - * @param maxBuffers The maximum number of buffers in software stockpile. - * Set to 0 if software stockpile should not be created. - * - * @param minBuffers The minimum number of buffers in software stockpile. - * Set to 0 if software stockpile should not be created. - * - * @param allocBuffers The number of buffers to preallocate during pool - * creation. - * - * @param f_GetBuf The buffer allocating function. Called only by - * bman_pool_create() and bman_pool_fill(). - * - * @param f_PutBuf The buffer freeing function. Called only by - * bman_pool_destroy(). - * - * @param dep_sw_entry The software portal depletion entry threshold. - * Set to 0 if depletion should not be signaled on - * software portal. - * - * @param dep_sw_exit The software portal depletion exit threshold. - * Set to 0 if depletion should not be signaled on - * software portal. - * - * @param dep_hw_entry The hardware portal depletion entry threshold. - * Set to 0 if depletion should not be signaled on - * hardware portal. - * - * @param dep_hw_exit The hardware portal depletion exit threshold. - * Set to 0 if depletion should not be signaled on - * hardware portal. - * - * @param f_Depletion The software portal depletion notification function. - * Set to NULL if depletion notification is not used. - * - * @param h_BufferPool The user provided buffer pool context passed to - * f_GetBuf, f_PutBuf and f_Depletion functions. - * - * @param f_PhysToVirt The PA to VA translation function. Set to NULL if - * default one should be used. - * - * @param f_VirtToPhys The VA to PA translation function. Set to NULL if - * default one should be used. - * - * @returns Handle to newly created BMAN pool or NULL on error. - * - * @cautions If pool uses software stockpile, all accesses to given - * pool must be protected by lock. Even if only hardware - * portal depletion notification is used, the caller must - * provide valid @p f_Depletion function. + * External API */ -t_Handle bman_pool_create(uint8_t *bpid, uint16_t bufferSize, - uint16_t maxBuffers, uint16_t minBuffers, uint16_t allocBuffers, - t_GetBufFunction *f_GetBuf, t_PutBufFunction *f_PutBuf, - uint32_t dep_sw_entry, uint32_t dep_sw_exit, uint32_t dep_hw_entry, - uint32_t dep_hw_exit, t_BmDepletionCallback *f_Depletion, - t_Handle h_BufferPool, t_PhysToVirt *f_PhysToVirt, - t_VirtToPhys *f_VirtToPhys); -/* - * @brief Fill pool with buffers. - * - * The bman_pool_fill() function fills the BMAN pool with buffers. The buffers - * are allocated through f_GetBuf function (see bman_pool_create() description). - * - * @param pool The BMAN pool handle. - * @param nbufs The number of buffers to allocate. To maximize - * performance this value should be multiple of 8. - * - * @returns Zero on success or error code on failure. - */ -int bman_pool_fill(t_Handle pool, uint16_t nbufs); +struct bman_pool *bman_new_pool(void); +struct bman_pool *bman_pool_create(uint8_t *bpid, uint16_t buffer_size, + uint16_t max_buffers, uint32_t dep_sw_entry, uint32_t dep_sw_exit, uint32_t + dep_hw_entry, uint32_t dep_hw_exit, bm_depletion_handler dep_cb, void *arg); /* * @brief Destroy pool. * - * The bman_pool_destroy() function destroys the BMAN pool. Buffers for pool - * are free through f_PutBuf function (see bman_pool_create() description). - * - * @param pool The BMAN pool handle. - * - * @returns Zero on success or error code on failure. - */ -int bman_pool_destroy(t_Handle pool); - -/* - * @brief Get a buffer from BMAN pool. + * The bman_pool_destroy() function destroys the BMAN pool. + * The buffer pool must be empty. * * @param pool The BMAN pool handle. - * - * @returns Pointer to the buffer or NULL if pool is empty. + * @return 0 on success, EBUSY if the pool is not empty. */ -void *bman_get_buffer(t_Handle pool); - -/* - * @brief Put a buffer to BMAN pool. - * - * @param pool The BMAN pool handle. - * @param buffer The pointer to buffer. - * - * @returns Zero on success or error code on failure. - */ -int bman_put_buffer(t_Handle pool, void *buffer); +int bman_pool_destroy(struct bman_pool *pool); /* * @brief Count free buffers in given pool. @@ -188,7 +94,25 @@ int bman_put_buffer(t_Handle pool, void *buffer); * * @returns Number of free buffers in pool. */ -uint32_t bman_count(t_Handle pool); +uint32_t bman_count(struct bman_pool *pool); + +int bman_put_buffers(struct bman_pool *, struct bman_buffer *, int); +static inline int +bman_put_buffer(struct bman_pool *p, vm_paddr_t buf, int bpid) +{ + struct bman_buffer b = { + .bpid = bpid, + .buf_hi = ((uintptr_t)buf) >> 32, + .buf_lo = ((uintptr_t)buf) & 0xffffffff + }; + return (bman_put_buffers(p, &b, 1)); +} + +int bman_acquire(struct bman_pool *, struct bman_buffer *, uint8_t); + +int bman_create_affine_portal(device_t, vm_offset_t, vm_offset_t, int); +void bman_destroy_affine_portal(int); +uint32_t bman_get_bpid(struct bman_pool *); /* * Bus i/f diff --git a/sys/dev/dpaa/bman_fdt.c b/sys/dev/dpaa/bman_fdt.c index 330db7b89715..c77d58cf118d 100644 --- a/sys/dev/dpaa/bman_fdt.c +++ b/sys/dev/dpaa/bman_fdt.c @@ -40,6 +40,7 @@ #include <dev/ofw/ofw_subr.h> #include "bman.h" +#include "bman_var.h" #include "portals.h" #define FBMAN_DEVSTR "Freescale Buffer Manager" @@ -59,12 +60,7 @@ static device_method_t bman_methods[] = { DEVMETHOD_END }; -static driver_t bman_driver = { - "bman", - bman_methods, - sizeof(struct bman_softc), -}; - +DEFINE_CLASS_0(bman, bman_driver, bman_methods, sizeof(struct bman_softc)); EARLY_DRIVER_MODULE(bman, simplebus, bman_driver, 0, 0, BUS_PASS_SUPPORTDEV); static int @@ -82,144 +78,46 @@ bman_fdt_probe(device_t dev) /* * BMAN Portals */ -#define BMAN_PORT_DEVSTR "Freescale Buffer Manager - Portals" +#define BMAN_PORT_DEVSTR "Freescale Buffer Manager - Portal" -static device_probe_t bman_portals_fdt_probe; -static device_attach_t bman_portals_fdt_attach; +static int portal_ncpus; +static device_probe_t bman_portal_fdt_probe; +static device_attach_t bman_portal_fdt_attach; -static device_method_t bm_portals_methods[] = { +static device_method_t bman_portal_methods[] = { /* Device interface */ - DEVMETHOD(device_probe, bman_portals_fdt_probe), - DEVMETHOD(device_attach, bman_portals_fdt_attach), - DEVMETHOD(device_detach, bman_portals_detach), + DEVMETHOD(device_probe, bman_portal_fdt_probe), + DEVMETHOD(device_attach, bman_portal_fdt_attach), + DEVMETHOD(device_detach, bman_portal_detach), DEVMETHOD_END }; -static driver_t bm_portals_driver = { - "bman-portals", - bm_portals_methods, - sizeof(struct dpaa_portals_softc), -}; - -EARLY_DRIVER_MODULE(bman_portals, ofwbus, bm_portals_driver, 0, 0, - BUS_PASS_BUS); - -static void -get_addr_props(phandle_t node, uint32_t *addrp, uint32_t *sizep) -{ - - *addrp = 2; - *sizep = 1; - OF_getencprop(node, "#address-cells", addrp, sizeof(*addrp)); - OF_getencprop(node, "#size-cells", sizep, sizeof(*sizep)); -} +DEFINE_CLASS_0(bman_portal, bman_portal_driver, bman_portal_methods, + sizeof(struct bman_portal_softc)); +EARLY_DRIVER_MODULE(bman_portal, simplebus, bman_portal_driver, 0, 0, + BUS_PASS_SUPPORTDEV + BUS_PASS_ORDER_MIDDLE); static int -bman_portals_fdt_probe(device_t dev) +bman_portal_fdt_probe(device_t dev) { - phandle_t node; - - if (ofw_bus_is_compatible(dev, "simple-bus")) { - node = ofw_bus_get_node(dev); - for (node = OF_child(node); node > 0; node = OF_peer(node)) { - if (ofw_bus_node_is_compatible(node, "fsl,bman-portal")) - break; - } - if (node <= 0) - return (ENXIO); - } else if (!ofw_bus_is_compatible(dev, "fsl,bman-portals")) + if (!ofw_bus_is_compatible(dev, "fsl,bman-portal")) return (ENXIO); device_set_desc(dev, BMAN_PORT_DEVSTR); - return (BUS_PROBE_DEFAULT); } static int -bman_portals_fdt_attach(device_t dev) +bman_portal_fdt_attach(device_t dev) { - struct dpaa_portals_softc *sc; - struct resource_list_entry *rle; - phandle_t node, child, cpu_node; - vm_paddr_t portal_pa; - vm_size_t portal_size; - uint32_t addr, size; - ihandle_t cpu; - int cpu_num, cpus, intr_rid; - struct dpaa_portals_devinfo di; - struct ofw_bus_devinfo ofw_di = {}; + int portal_cpu = portal_ncpus; - cpus = 0; - sc = device_get_softc(dev); - sc->sc_dev = dev; - - node = ofw_bus_get_node(dev); - get_addr_props(node, &addr, &size); - - /* Find portals tied to CPUs */ - for (child = OF_child(node); child != 0; child = OF_peer(child)) { - if (cpus >= mp_ncpus) - break; - if (!ofw_bus_node_is_compatible(child, "fsl,bman-portal")) { - continue; - } - /* Checkout related cpu */ - if (OF_getprop(child, "cpu-handle", (void *)&cpu, - sizeof(cpu)) > 0) { - cpu_node = OF_instance_to_package(cpu); - /* Acquire cpu number */ - if (OF_getencprop(cpu_node, "reg", &cpu_num, sizeof(cpu_num)) <= 0) { - device_printf(dev, "Could not retrieve CPU number.\n"); - return (ENXIO); - } - } else - cpu_num = cpus; - - cpus++; - - if (ofw_bus_gen_setup_devinfo(&ofw_di, child) != 0) { - device_printf(dev, "could not set up devinfo\n"); - continue; - } - - resource_list_init(&di.di_res); - if (ofw_bus_reg_to_rl(dev, child, addr, size, &di.di_res)) { - device_printf(dev, "%s: could not process 'reg' " - "property\n", ofw_di.obd_name); - ofw_bus_gen_destroy_devinfo(&ofw_di); - continue; - } - if (ofw_bus_intr_to_rl(dev, child, &di.di_res, &intr_rid)) { - device_printf(dev, "%s: could not process " - "'interrupts' property\n", ofw_di.obd_name); - resource_list_free(&di.di_res); - ofw_bus_gen_destroy_devinfo(&ofw_di); - continue; - } - di.di_intr_rid = intr_rid; - - ofw_reg_to_paddr(child, 0, &portal_pa, &portal_size, NULL); - rle = resource_list_find(&di.di_res, SYS_RES_MEMORY, 0); - - if (sc->sc_dp_pa == 0) - sc->sc_dp_pa = portal_pa - rle->start; - - portal_size = rle->end + 1; - rle = resource_list_find(&di.di_res, SYS_RES_MEMORY, 1); - portal_size = ulmax(rle->end + 1, portal_size); - sc->sc_dp_size = ulmax(sc->sc_dp_size, portal_size); - - if (dpaa_portal_alloc_res(dev, &di, cpu_num)) - goto err; - } + /* Don't attach to more portals than we have CPUs */ + if (mp_ncpus == portal_ncpus) + return (ENXIO); - ofw_bus_gen_destroy_devinfo(&ofw_di); + portal_ncpus++; - return (bman_portals_attach(dev)); -err: - resource_list_free(&di.di_res); - ofw_bus_gen_destroy_devinfo(&ofw_di); - bman_portals_detach(dev); - return (ENXIO); + return (bman_portal_attach(dev, portal_cpu)); } diff --git a/sys/dev/dpaa/bman_portals.c b/sys/dev/dpaa/bman_portals.c index 10c788410e1c..b0c8200c1880 100644 --- a/sys/dev/dpaa/bman_portals.c +++ b/sys/dev/dpaa/bman_portals.c @@ -1,27 +1,7 @@ -/*- - * Copyright (c) 2012 Semihalf. - * All rights reserved. +/* + * Copyright (c) 2026 Justin Hibbits * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * SPDX-License-Identifier: BSD-2-Clause */ #include "opt_platform.h" @@ -30,6 +10,8 @@ #include <sys/systm.h> #include <sys/kernel.h> #include <sys/bus.h> +#include <sys/cpuset.h> +#include <sys/interrupt.h> #include <sys/lock.h> #include <sys/module.h> #include <sys/mutex.h> @@ -46,132 +28,300 @@ #include <powerpc/mpc85xx/mpc85xx.h> #include "bman.h" +#include "bman_var.h" #include "portals.h" -t_Handle bman_portal_setup(struct bman_softc *); +#define BCSP_CFG 0x0100 +#define CFG_RPM_M 0x00000003 +#define CFG_RPM_PI 0x00000000 +#define CFG_RPM_PE 0x00000001 +#define CFG_RPM_VBM 0x00000002 +#define BCSP_SCN0 0x0200 +#define BCSP_SCN1 0x0204 +#define BCSP_ISR 0x0e00 +#define BCSP_IER 0x0e04 +#define BCSP_ISDR 0x0e08 +#define INTR_RCDI 0x00000004 +#define INTR_RCRI 0x00000002 +#define INTR_BSCN 0x00000001 + +#define BMAN_CE_CR 0x0000 +#define BMAN_CE_RR0 0x0100 +#define BMAN_CE_RR1 0x0140 +#define BMAN_CE_RR(n) (BMAN_CE_RR0 + 0x40 * (n)) +#define BMAN_CE_RCR 0x1000 +#define BCSP_RCR_PI_CENA 0x3000 +#define BCSP_RCR_CI_CENA 0x3100 +#define BCSP_RCR_PI_CINH 0x000 +#define BCSP_RCR_CI_CINH 0x004 + +#define BMAN_MC_VERB_VBIT 0x80 +#define BMAN_MC_VERB_ACQUIRE 0x10 +#define BMAN_MC_VERB_QUERY 0x40 +#define BMAN_RCR_VERB_BPID0 0x20 +#define BMAN_RCR_VERB_BPID_BUF 0x30 + +struct bman_mc_command { + uint8_t verb; + uint8_t cd; + uint8_t rsvd[62]; +}; + +union bman_mc_result { + struct { + uint8_t verb; + uint8_t cd; + uint8_t rsvd[62]; + }; + struct { + uint64_t rsvd_q1[5]; + uint64_t bp_as; + uint64_t rsvd_q2; + uint64_t bp_ds; + }; + struct bman_buffer bufs[8]; +}; + +struct bman_rcr_entry { + union { + struct { + uint8_t verb; + uint8_t bpid; + uint8_t rsvd[62]; + }; + struct bman_buffer bufs[8]; + }; +}; + +static void bman_portal_isr(void *arg); + +static union bman_mc_result *bman_mc_send(struct bman_portal_softc *p, + uint8_t verb, uint8_t cd); -struct dpaa_portals_softc *bp_sc; +DPCPU_DEFINE(struct bman_portal_softc *, bman_affine_portal); + +DPAA_RING(bman_rcr, 8, BCSP_RCR_PI_CENA, BCSP_RCR_CI_CENA, + BCSP_RCR_PI_CINH, BCSP_RCR_CI_CINH); + +static uint32_t +bm_ci_read(struct bman_portal_softc *sc, bus_size_t off) +{ + return (bus_read_4(sc->sc_base.sc_mres[1], off)); +} + +static void +bm_ci_write(struct bman_portal_softc *sc, bus_size_t off, uint32_t val) +{ + bus_write_4(sc->sc_base.sc_mres[1], off, val); +} int -bman_portals_attach(device_t dev) +bman_portal_attach(device_t dev, int cpu) { - struct dpaa_portals_softc *sc; + struct bman_portal_softc *sc = device_get_softc(dev); - sc = bp_sc = device_get_softc(dev); - - /* Map bman portal to physical address space */ - if (law_enable(OCP85XX_TGTIF_BMAN, sc->sc_dp_pa, sc->sc_dp_size)) { - bman_portals_detach(dev); - return (ENXIO); - } - /* Set portal properties for XX_VirtToPhys() */ - XX_PortalSetInfo(dev); + sc->sc_base.sc_cpu = cpu; + dpaa_portal_alloc_res(dev, cpu); + + bm_ci_write(sc, BCSP_ISDR, 0); + bm_ci_write(sc, BCSP_IER, INTR_RCRI | INTR_BSCN); + bus_setup_intr(dev, sc->sc_base.sc_ires, INTR_TYPE_NET | INTR_MPSAFE, + NULL, bman_portal_isr, sc, &sc->sc_base.sc_intr_cookie); + bus_bind_intr(dev, sc->sc_base.sc_ires, cpu); + + /* Select valid-bit mode for rings */ + bus_write_4(sc->sc_base.sc_mres[1], BCSP_CFG, CFG_RPM_VBM); + /* Disable pool depletion notifications. */ + bm_ci_write(sc, BCSP_SCN0, 0); + bm_ci_write(sc, BCSP_SCN1, 0); + + DPCPU_ID_SET(cpu, bman_affine_portal, sc); + + sc->sc_rcr.ring = + (struct bman_rcr_entry *)(sc->sc_base.sc_ce_va + BMAN_CE_RCR); + bman_rcr_ring_init(&sc->sc_rcr, &sc->sc_base); + /* Starting MC polarity is always 1 */ + sc->mc.polarity = BMAN_MC_VERB_VBIT; - bus_attach_children(dev); return (0); } int -bman_portals_detach(device_t dev) +bman_portal_detach(device_t dev) { - struct dpaa_portals_softc *sc; + struct bman_portal_softc *sc; int i; - bp_sc = NULL; sc = device_get_softc(dev); - for (i = 0; i < ARRAY_SIZE(sc->sc_dp); i++) { - if (sc->sc_dp[i].dp_ph != NULL) { - thread_lock(curthread); - sched_bind(curthread, i); - thread_unlock(curthread); - BM_PORTAL_Free(sc->sc_dp[i].dp_ph); + /* TODO: Unmap TLB regions */ + thread_lock(curthread); + sched_bind(curthread, sc->sc_base.sc_cpu); + thread_unlock(curthread); - thread_lock(curthread); - sched_unbind(curthread); - thread_unlock(curthread); - } + if (sc->sc_base.sc_ires != NULL) + bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_base.sc_ires); - if (sc->sc_dp[i].dp_ires != NULL) { - XX_DeallocIntr((uintptr_t)sc->sc_dp[i].dp_ires); - bus_release_resource(dev, SYS_RES_IRQ, - sc->sc_dp[i].dp_irid, sc->sc_dp[i].dp_ires); - } - } - for (i = 0; i < ARRAY_SIZE(sc->sc_rres); i++) { - if (sc->sc_rres[i] != NULL) + for (i = 0; i < nitems(sc->sc_base.sc_mres); i++) { + if (sc->sc_base.sc_mres[i] != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - sc->sc_rrid[i], - sc->sc_rres[i]); + i, sc->sc_base.sc_mres[i]); } + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + return (0); } -t_Handle -bman_portal_setup(struct bman_softc *bsc) +static uint64_t +bman_query(struct bman_portal_softc *sc, bool depletion) { - struct dpaa_portals_softc *sc; - t_BmPortalParam bpp; - t_Handle portal; - unsigned int cpu; - uintptr_t p; + union bman_mc_result *mc_res; + uint64_t res; - /* Return NULL if we're not ready or while detach */ - if (bp_sc == NULL) - return (NULL); + critical_enter(); + mc_res = bman_mc_send(sc, BMAN_MC_VERB_QUERY, 0); + if (mc_res == NULL) + goto err; - sc = bp_sc; + if (depletion) + res = mc_res->bp_ds; + else + res = mc_res->bp_as; + critical_exit(); - sched_pin(); - portal = NULL; - cpu = PCPU_GET(cpuid); + return (res); - /* Check if portal is ready */ - while (atomic_cmpset_acq_ptr((uintptr_t *)&sc->sc_dp[cpu].dp_ph, - 0, -1) == 0) { - p = atomic_load_acq_ptr((uintptr_t *)&sc->sc_dp[cpu].dp_ph); +err: + critical_exit(); + device_printf(sc->sc_base.sc_dev, "Timeout querying depltetion\n"); + return (0); +} - /* Return if portal is already initialized */ - if (p != 0 && p != -1) { - sched_unpin(); - return ((t_Handle)p); - } +static void +bman_portal_isr(void *arg) +{ + struct bman_portal_softc *sc = arg; + uint32_t intrs; + + intrs = bm_ci_read(sc, BCSP_ISR); - /* Not inititialized and "owned" by another thread */ - sched_relinquish(curthread); + /* Release Command Ring interrupt. */ + if (intrs & INTR_RCRI) { + bman_rcr_update(&sc->sc_rcr, &sc->sc_base); + } + /* Buffer Pool State Change Notification. */ + if (intrs & INTR_BSCN) { + struct bman_pool *pool; + uint64_t res = bman_query(sc, true); + if (__predict_true(res != 0)) { + int idx = flsll(res); + pool = sc->sc_pools[64 - idx]; + KASSERT(pool != NULL, + ("state change on unassociated bpid %d\n", idx)); + pool->dep_cb(pool->arg, true); + } } - /* Map portal registers */ - dpaa_portal_map_registers(sc); + bm_ci_write(sc, BCSP_ISR, intrs); +} - /* Configure and initialize portal */ - bpp.ceBaseAddress = rman_get_bushandle(sc->sc_rres[0]); - bpp.ciBaseAddress = rman_get_bushandle(sc->sc_rres[1]); - bpp.h_Bm = bsc->sc_bh; - bpp.swPortalId = cpu; - bpp.irq = (uintptr_t)sc->sc_dp[cpu].dp_ires; +/* RCR */ - portal = BM_PORTAL_Config(&bpp); - if (portal == NULL) - goto err; +int +bman_release(struct bman_pool *pool, const struct bman_buffer *bufs, + uint8_t count) +{ + struct bman_portal_softc *portal; + struct bman_rcr_entry *rcr; - if (BM_PORTAL_Init(portal) != E_OK) - goto err; + if (count > 8) + return (EINVAL); - atomic_store_rel_ptr((uintptr_t *)&sc->sc_dp[cpu].dp_ph, (uintptr_t)portal); - - sched_unpin(); + critical_enter(); + portal = DPCPU_GET(bman_affine_portal); + rcr = bman_rcr_start(&portal->sc_rcr, &portal->sc_base); + bzero(rcr, sizeof(*rcr)); - return (portal); + /* This should be safe, because bpid must be less than 256. */ + for (int i = 0; i < count; i++) + rcr->bufs[i] = bufs[i]; + rcr->bufs[0].bpid = pool->bpid; + bman_rcr_commit(&portal->sc_rcr, BMAN_RCR_VERB_BPID0 | count); + critical_exit(); -err: - if (portal != NULL) - BM_PORTAL_Free(portal); + return (0); +} + +/* MC commands */ +/* Assumes pinned */ +static union bman_mc_result * +bman_mc_send(struct bman_portal_softc *p, uint8_t verb, uint8_t cd) +{ + int res_idx; + struct bman_mc_command *command; + union bman_mc_result *rr; + uintptr_t ce_va = p->sc_base.sc_ce_va; + + command = (struct bman_mc_command *)(ce_va + BMAN_CE_CR); + dpaa_zero_line(command); + command->cd = cd; + dpaa_lw_barrier(); + command->verb = verb | p->mc.polarity; + res_idx = (p->mc.polarity ? 1 : 0); + p->mc.polarity ^= BMAN_MC_VERB_VBIT; + dpaa_flush_line(command); - atomic_store_rel_ptr((uintptr_t *)&sc->sc_dp[cpu].dp_ph, 0); - sched_unpin(); + rr = (union bman_mc_result *)(ce_va + BMAN_CE_RR(res_idx)); + for (;;) { + if (rr->verb != 0) + break; + dpaa_flush_line(rr); + } + return (rr); +} + +int +bman_acquire(struct bman_pool *pool, struct bman_buffer *bufs, uint8_t count) +{ + union bman_mc_result *rr; + + if (count > 8 || count == 0) + return (EINVAL); + critical_enter(); + rr = bman_mc_send(DPCPU_GET(bman_affine_portal), + BMAN_MC_VERB_ACQUIRE | count, + pool->bpid); + critical_exit(); + + if (rr == NULL) + return (ETIMEDOUT); + if ((rr->verb & ~BMAN_MC_VERB_VBIT) == 0) + return (ENOMEM); + + memcpy(bufs, rr, count * sizeof(*bufs)); + + return (0); +} + +/* + * Enable pool state change notifications on this portal. This requires the + * pool to already be configured with the callback to handle state changes. + */ +void +bman_portal_enable_scn(struct bman_portal_softc *sc, struct bman_pool *pool) +{ + uint32_t reg, reg_ptr; - return (NULL); + if (pool->bpid >= 32) + reg_ptr = BCSP_SCN1; + else + reg_ptr = BCSP_SCN0; + reg = bm_ci_read(sc, reg_ptr); + reg |= (1 << (31 - pool->bpid)); + bm_ci_write(sc, reg_ptr, reg); + sc->sc_pools[pool->bpid] = pool; } diff --git a/sys/dev/dpaa/bman_var.h b/sys/dev/dpaa/bman_var.h new file mode 100644 index 000000000000..91484da61ac3 --- /dev/null +++ b/sys/dev/dpaa/bman_var.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef BMAN_VAR_H +#define BMAN_VAR_H + +#include "dpaa_common.h" +#include "portals.h" + +#define BMAN_MAX_POOLS 64 +#define BMAN_MAX_POOLS_1023 8 + +DPAA_RING_DECLARE(bman_rcr); + +struct bman_mc { + uint8_t polarity; + bool busy; +}; + +struct bman_portal_softc { + struct dpaa_portal_softc sc_base; + + struct bman_mc mc; + struct bman_rcr_ring sc_rcr; + struct bman_pool *sc_pools[BMAN_MAX_POOLS]; +}; + +struct bman_pool { + uint32_t bpid; + bm_depletion_handler dep_cb; + void *arg; +}; + +DPCPU_DECLARE(struct bman_portal_softc *, bman_affine_portal); + +int bman_release(struct bman_pool *pool, const struct bman_buffer *bufs, + uint8_t count); + +void bman_portal_enable_scn(struct bman_portal_softc *, struct bman_pool *); + +#endif diff --git a/sys/dev/dpaa/dpaa_common.c b/sys/dev/dpaa/dpaa_common.c new file mode 100644 index 000000000000..c5055231298b --- /dev/null +++ b/sys/dev/dpaa/dpaa_common.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <dev/fdt/fdt_common.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include "dpaa_common.h" + +#define FDT_REG_CELLS 4 +int +dpaa_map_private_memory(device_t dev, int idx, const char *compat, + vm_paddr_t *addrp, size_t *sizep) +{ + phandle_t node; + pcell_t cells[idx + 1]; + pcell_t *cell_alloc; + int addr_cells, size_cells; + uint64_t tmp; + u_long align, base, size; + vm_paddr_t alloc_base; + vm_size_t alloc_range_size; + ssize_t alloc_size; + void *reserved; + int rv; + + node = ofw_bus_get_node(dev); + if (OF_getencprop(node, "memory-region", cells, sizeof(cells)) <= 0) + return (ENXIO); + + node = OF_node_from_xref(cells[idx]); + /* If the memory is already reserved, we just need to return it. */ + if (fdt_regsize(node, &base, &size) == 0) + goto success; + + rv = fdt_addrsize_cells(OF_parent(node), &addr_cells, &size_cells); + if (rv != 0) + return (rv); + + if (OF_getprop(node, "alignment", &tmp, sizeof(tmp)) <= 0) + return (ENXIO); + + align = fdt_data_get(&tmp, addr_cells); + if (OF_getprop(node, "size", &tmp, sizeof(tmp)) <= 0) + return (ENXIO); + size = fdt_data_get(&tmp, size_cells); + + alloc_size = + OF_getencprop_alloc(node, "alloc-ranges", (void **)&cell_alloc); + if (alloc_size < 0) + return (ENXIO); + + alloc_size /= sizeof(pcell_t); + for (int i = 0; i < alloc_size; i += (addr_cells + size_cells)) { + alloc_base = fdt_data_get(&cell_alloc[i], addr_cells); + alloc_range_size = + fdt_data_get(&cell_alloc[i + addr_cells], size_cells); + reserved = contigmalloc(size, M_DEVBUF, M_NOWAIT | M_ZERO, + alloc_base, alloc_base + alloc_range_size, align, 0); + if (reserved != NULL) + break; + } + if (reserved == NULL) + return (ENOMEM); + /* Flush the cache (zeroed memory) because it won't be touched later. */ + cpu_flush_dcache(reserved, size); + base = pmap_kextract((vm_offset_t)reserved); + +success: + *addrp = base; + *sizep = size; + + return (0); +} diff --git a/sys/dev/dpaa/dpaa_common.h b/sys/dev/dpaa/dpaa_common.h new file mode 100644 index 000000000000..8fa43a034ef5 --- /dev/null +++ b/sys/dev/dpaa/dpaa_common.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef DPAA_COMMON_H +#define DPAA_COMMON_H + +#include <machine/atomic.h> + +int dpaa_map_private_memory(device_t dev, int idx, const char *compat, + vm_paddr_t *addrp, size_t *sizep); + +struct dpaa_fd { + uint64_t liodn:8; + uint64_t bpid:8; + uint64_t eliodn:4; + uint64_t _rsvd1:4; + uint64_t addr:40; + uint32_t format:3; + uint32_t offset:9; + uint32_t length:20; + uint32_t cmd_stat; +} __packed; + +#define DPAA_FD_FORMAT_SHORT_MBSF 4 + +#define DPAA_FD_RX_STATUS_DCL4C 0x10000000 +#define DPAA_FD_RX_STATUS_DME 0x01000000 +#define DPAA_FD_RX_STATUS_IPRE_M 0x00300000 +#define DPAA_FD_RX_STATUS_FPE 0x00080000 +#define DPAA_FD_RX_STATUS_FSE 0x00040000 +#define DPAA_FD_RX_STATUS_DIS 0x00020000 +#define DPAA_FD_RX_STATUS_EOF 0x00008000 +#define DPAA_FD_RX_STATUS_NSS 0x00004000 +#define DPAA_FD_RX_STATUS_KSO 0x00002000 +#define DPAA_FD_RX_STATUS_FCL_M 0x00000c00 +#define DPAA_FD_RX_STATUS_IPP 0x00000200 +#define DPAA_FD_RX_STATUS_FLM 0x00000100 +#define DPAA_FD_RX_STATUS_PTE 0x00000080 +#define DPAA_FD_RX_STATUS_ISP 0x00000040 +#define DPAA_FD_RX_STATUS_PHE 0x00000020 +#define DPAA_FD_RX_STATUS_FRDR 0x00000010 +#define DPAA_FD_RX_STATUS_BLE 0x00000008 +#define DPAA_FD_RX_STATUS_L4CV 0x00000004 +#define DPAA_FD_RX_STATUS_IPR 0x00000001 + +#define DPAA_FD_TX_CMD_RPD 0x40000000 +#define DPAA_FD_TX_CMD_DTC 0x10000000 +#define DPAA_FD_TX_STATUS_UFD 0x04000000 +#define DPAA_FD_TX_STATUS_LGE 0x02000000 +#define DPAA_FD_TX_STATUS_DME 0x01000000 + +/* Most of the above are error flags, but some aren't */ +#define DPAA_FD_CMD_STAT_ERR_M 0x010ce3e8 +#define DPAA_FD_TX_STAT_ERR_M 0x03000000 + +#define DPAA_FD_GET_ADDR(fd) ((void *)PHYS_TO_DMAP(fd->addr)) + +struct dpaa_sgte { + uint64_t addr; + uint32_t extension:1; + uint32_t final:1; + uint32_t length:30; + uint16_t bpid; + uint16_t offset; +} __packed; +struct qman_fqr; + + +#define DPAA_NUM_OF_SG_TABLE_ENTRY 16 + +/* + * Ring API infrastructure + * + * BMan and QMan both use cache-enabled rings. Abstract this away to a more + * generalized interface to reduce code copying. + * + * Requirements: + * - Before calling <ring>_init() the ring base (ring->ring) must be initialized + * to the base of the ring. + */ +#define DPAA_RING_DECLARE(pfx) \ + struct pfx##_ring { \ + struct pfx##_entry *ring; \ + struct pfx##_entry *cursor; \ + uint8_t vbit; \ + uint8_t avail; \ + uint8_t ci; \ + uint8_t ithresh; \ + } + +/* + * Ring functions: + * + * ring_cyc_diff() -- get the (circular) difference of `l - f` + * ring_ring_init() -- Set up the ring structures. Portal must be + * initialized beforehand, and ring->ring must be nonzero. + * ring_CARRYCLEAR() -- stealth math to do circular roll-over + * ring_INC() -- Increment the cursor within the ring + * ring_update() -- Update ring entry availability count + * ring_start() -- Reserve the next entry in the ring if available. + * ring_commit() -- Commit the reserved ring entry by setting the verb and + * AVB bit + */ +#define DPAA_RING(pfx,sz,pi_e,ci_e,pi_i,ci_i) \ +static inline int \ +pfx##_cyc_diff(uint8_t size, uint8_t f, uint8_t l) \ +{ \ + if (f <= l) \ + return (uint8_t)(l - f); \ + return (uint8_t)(l + size - f); \ +} \ +static inline void \ +pfx##_ring_init(struct pfx##_ring *ring, struct dpaa_portal_softc *portal)\ +{ \ + uint32_t pi = *(uint32_t*)(portal->sc_ci_va + pi_i) & (sz - 1); \ + uint32_t ci = *(uint32_t*)(portal->sc_ci_va + ci_i); \ + ring->ci = ci & (sz - 1); \ + ring->vbit = !!(ci & sz) << 7; \ + ring->cursor = ring->ring + pi; \ + ring->avail = sz - 1 - pfx##_cyc_diff(sz, ring->ci, pi); \ +} \ +static inline void * \ +pfx##_CARRYCLEAR(struct pfx##_entry *p) \ +{ \ + return ((void *)((uintptr_t)p & (~(uintptr_t)(sz << 6)))); \ +} \ +static inline void \ +pfx##_INC(struct pfx##_ring *ring) \ +{ \ + struct pfx##_entry *partial = ring->cursor + 1; \ + ring->cursor = pfx##_CARRYCLEAR(partial); \ + if (partial != ring->cursor) \ + ring->vbit ^= 0x80; \ +} \ +static inline uint8_t \ +pfx##_update(struct pfx##_ring *ring, struct dpaa_portal_softc *portal) \ +{ \ + uint8_t diff, old_ci = ring->ci; \ + ring->ci = *(uint32_t*)(portal->sc_ci_va + ci_i) & (sz - 1); \ + diff = pfx##_cyc_diff(sz, old_ci, ring->ci); \ + ring->avail += diff; \ + return (diff); \ +} \ +static inline struct pfx##_entry * __unused \ +pfx##_start(struct pfx##_ring *ring, struct dpaa_portal_softc *portal) \ +{ \ + if (ring->avail <= 1) { \ + pfx##_update(ring, portal); \ + if (ring->avail == 0) \ + return (NULL); \ + } \ + dpaa_zero_line(ring->cursor); \ + return (ring->cursor); \ +} \ +static inline void __unused \ +pfx##_commit(struct pfx##_ring *ring, uint8_t verb) \ +{ \ + struct pfx##_entry *entry = ring->cursor; \ + dpaa_lw_barrier(); \ + entry->verb = verb | ring->vbit; \ + dpaa_flush_line(entry); \ + pfx##_INC(ring); \ + ring->avail--; \ +} struct hack + +#ifdef __powerpc__ +static inline void +dpaa_flush_line(void *line) +{ + __asm __volatile ("dcbf 0, %0" :: "r"(line) : "memory"); +} + +static inline void +dpaa_zero_line(void *line) +{ + __asm __volatile ("dcbz 0, %0" :: "r"(line) : "memory"); +} + +static inline void +dpaa_touch_line(void *line) +{ + __asm __volatile ("dcbt 0, %0" :: "r"(line) : "memory"); +} + +static inline void +dpaa_lw_barrier(void) +{ + powerpc_lwsync(); +} +#endif + +#endif diff --git a/sys/dev/dpaa/dpaa_eth.c b/sys/dev/dpaa/dpaa_eth.c new file mode 100644 index 000000000000..6424a6e0b0c3 --- /dev/null +++ b/sys/dev/dpaa/dpaa_eth.c @@ -0,0 +1,719 @@ +/*- + * Copyright (c) 2026 Justin Hibbits + * Copyright (c) 2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/rman.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/smp.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/sysctl.h> + +#include <net/ethernet.h> +#include <net/if.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/if_arp.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> + +#include <dev/mii/mii.h> +#include <dev/mii/miivar.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include "miibus_if.h" + +#include "bman.h" +#include "dpaa_common.h" +#include "dpaa_eth.h" +#include "fman.h" +#include "fman_parser.h" +#include "fman_port.h" +#include "fman_if.h" +#include "fman_port_if.h" +#include "if_dtsec.h" +#include "qman.h" +#include "qman_var.h" +#include "qman_portal_if.h" + + +#define DPAA_ETH_LOCK(sc) mtx_lock(&(sc)->sc_lock) +#define DPAA_ETH_UNLOCK(sc) mtx_unlock(&(sc)->sc_lock) +#define DPAA_ETH_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) + +/** + * @group dTSEC RM private defines. + * @{ + */ +#define DTSEC_BPOOLS_USED (1) +#define DTSEC_MAX_TX_QUEUE_LEN 256 + +struct dpaa_eth_frame_info { + struct mbuf *fi_mbuf; + struct fman_internal_context fi_ic; + struct dpaa_sgte fi_sgt[DPAA_NUM_OF_SG_TABLE_ENTRY]; +}; + +enum dpaa_eth_pool_params { + DTSEC_RM_POOL_RX_LOW_MARK = 16, + DTSEC_RM_POOL_RX_HIGH_MARK = 64, + DTSEC_RM_POOL_RX_MAX_SIZE = 256, + + DTSEC_RM_POOL_FI_LOW_MARK = 16, + DTSEC_RM_POOL_FI_HIGH_MARK = 64, + DTSEC_RM_POOL_FI_MAX_SIZE = 256, +}; + +#define DTSEC_RM_FQR_RX_CHANNEL 0x401 +#define DTSEC_RM_FQR_TX_CONF_CHANNEL 0 +enum dpaa_eth_fq_params { + DTSEC_RM_FQR_RX_WQ = 1, + DTSEC_RM_FQR_TX_WQ = 1, + DTSEC_RM_FQR_TX_CONF_WQ = 1 +}; +/** @} */ + + +/** + * @group dTSEC Frame Info routines. + * @{ + */ +void +dpaa_eth_fi_pool_free(struct dpaa_eth_softc *sc) +{ + + if (sc->sc_fi_zone != NULL) + uma_zdestroy(sc->sc_fi_zone); +} + +int +dpaa_eth_fi_pool_init(struct dpaa_eth_softc *sc) +{ + + snprintf(sc->sc_fi_zname, sizeof(sc->sc_fi_zname), "%s: Frame Info", + device_get_nameunit(sc->sc_dev)); + + sc->sc_fi_zone = uma_zcreate(sc->sc_fi_zname, + sizeof(struct dpaa_eth_frame_info), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + + return (0); +} + +static struct dpaa_eth_frame_info * +dpaa_eth_fi_alloc(struct dpaa_eth_softc *sc) +{ + struct dpaa_eth_frame_info *fi; + + fi = uma_zalloc(sc->sc_fi_zone, M_NOWAIT | M_ZERO); + + return (fi); +} + +static void +dpaa_eth_fi_free(struct dpaa_eth_softc *sc, struct dpaa_eth_frame_info *fi) +{ + + uma_zfree(sc->sc_fi_zone, fi); +} +/** @} */ + + +/** + * @group dTSEC FMan PORT routines. + * @{ + */ +int +dpaa_eth_fm_port_rx_init(struct dpaa_eth_softc *sc) +{ + struct fman_port_params params; + int error; + + params.dflt_fqid = sc->sc_rx_fqid; + params.err_fqid = sc->sc_rx_fqid; + params.rx_params.num_pools = 1; + params.rx_params.bpools[0].bpid = bman_get_bpid(sc->sc_rx_pool); + params.rx_params.bpools[0].size = MCLBYTES; + error = FMAN_PORT_CONFIG(sc->sc_rx_port, ¶ms); + error = FMAN_PORT_INIT(sc->sc_rx_port); + if (error != 0) { + device_printf(sc->sc_dev, "couldn't initialize FM Port RX.\n"); + return (ENXIO); + } + + return (0); +} + +int +dpaa_eth_fm_port_tx_init(struct dpaa_eth_softc *sc) +{ + struct fman_port_params params; + int error; + + params.dflt_fqid = sc->sc_tx_conf_fqid; + params.err_fqid = sc->sc_tx_conf_fqid; + + error = FMAN_PORT_CONFIG(sc->sc_tx_port, ¶ms); + error = FMAN_PORT_INIT(sc->sc_tx_port); + if (error != 0) { + device_printf(sc->sc_dev, "couldn't initialize FM Port TX.\n"); + return (ENXIO); + } + + return (0); +} +/** @} */ + + +/** + * @group dTSEC buffer pools routines. + * @{ + */ +static int +dpaa_eth_pool_rx_put_buffer(struct dpaa_eth_softc *sc, uint8_t *buffer, + void *context) +{ + + uma_zfree(sc->sc_rx_zone, buffer); + + return (0); +} + +static int +dtsec_add_buffers(struct dpaa_eth_softc *sc, int count) +{ + struct bman_buffer bufs[8] = {}; + int err; + int c; + + while (count > 0) { + c = min(8, count); + for (int i = 0; i < c; i++) { + void *b; + vm_paddr_t pa; + + b = uma_zalloc(sc->sc_rx_zone, M_NOWAIT); + if (b == NULL) + return (ENOMEM); + pa = pmap_kextract((vm_offset_t)b); + bufs[i].buf_hi = (pa >> 32); + bufs[i].buf_lo = (pa & 0xffffffff); + } + + err = bman_put_buffers(sc->sc_rx_pool, bufs, c); + if (err != 0) + return (err); + count -= c; + } + + return (0); +} + +static void +dpaa_eth_pool_rx_depleted(void *h_App, bool in) +{ + struct dpaa_eth_softc *sc; + unsigned int count; + + sc = h_App; + + if (!in) + return; + + while (1) { + count = bman_count(sc->sc_rx_pool); + if (count > DTSEC_RM_POOL_RX_HIGH_MARK) + return; + + /* Can only release 8 buffers at a time */ + count = min(DTSEC_RM_POOL_RX_HIGH_MARK - count + 8, 8); + if (dtsec_add_buffers(sc, count) != 0) + return; + } +} + +void +dpaa_eth_pool_rx_free(struct dpaa_eth_softc *sc) +{ + + if (sc->sc_rx_pool != NULL) + bman_pool_destroy(sc->sc_rx_pool); + + if (sc->sc_rx_zone != NULL) + uma_zdestroy(sc->sc_rx_zone); +} + +int +dpaa_eth_pool_rx_init(struct dpaa_eth_softc *sc) +{ + + /* MCLBYTES must be less than PAGE_SIZE */ + CTASSERT(MCLBYTES < PAGE_SIZE); + + snprintf(sc->sc_rx_zname, sizeof(sc->sc_rx_zname), "%s: RX Buffers", + device_get_nameunit(sc->sc_dev)); + + sc->sc_rx_zone = uma_zcreate(sc->sc_rx_zname, MCLBYTES, NULL, + NULL, NULL, NULL, MCLBYTES - 1, 0); + + sc->sc_rx_pool = bman_pool_create(&sc->sc_rx_bpid, MCLBYTES, + DTSEC_RM_POOL_RX_MAX_SIZE, DTSEC_RM_POOL_RX_LOW_MARK, + DTSEC_RM_POOL_RX_HIGH_MARK, 0, 0, dpaa_eth_pool_rx_depleted, sc); + if (sc->sc_rx_pool == NULL) { + device_printf(sc->sc_dev, "NULL rx pool somehow\n"); + dpaa_eth_pool_rx_free(sc); + return (EIO); + } + + dtsec_add_buffers(sc, DTSEC_RM_POOL_RX_HIGH_MARK); + + return (0); +} +/** @} */ + + +/** + * @group dTSEC Frame Queue Range routines. + * @{ + */ +static void +dpaa_eth_fq_mext_free(struct mbuf *m) +{ + struct dpaa_eth_softc *sc; + void *buffer; + + buffer = m->m_ext.ext_arg1; + sc = m->m_ext.ext_arg2; + if (bman_count(sc->sc_rx_pool) <= DTSEC_RM_POOL_RX_MAX_SIZE) + bman_put_buffer(sc->sc_rx_pool, + pmap_kextract((vm_offset_t)buffer), sc->sc_rx_bpid); + else + dpaa_eth_pool_rx_put_buffer(sc, buffer, NULL); +} + +static int +dpaa_eth_update_csum_flags(struct qman_fd *frame, + struct fman_parse_result *prs, struct mbuf *m) +{ + uint16_t l3r = be16toh(prs->l3r); + + /* TODO: nested protocols? */ + if ((l3r & L3R_FIRST_IP_M) != 0) { + m->m_pkthdr.csum_flags |= CSUM_L3_CALC; + if ((l3r & L3R_FIRST_ERROR) == 0) + m->m_pkthdr.csum_flags |= CSUM_L3_VALID; + } + if (frame->cmd_stat & DPAA_FD_RX_STATUS_L4CV) { + m->m_pkthdr.csum_flags |= CSUM_L4_CALC; + m->m_pkthdr.csum_data = 0xffff; + if ((prs->l4r & L4R_TYPE_M) != 0 && + (prs->l4r & L4R_ERR) == 0) + m->m_pkthdr.csum_flags |= CSUM_L4_VALID; + } + + return (0); +} + +static int +dpaa_eth_fq_rx_callback(device_t portal, struct qman_fq *fq, + struct qman_fd *frame, void *app) +{ + struct dpaa_eth_softc *sc; + struct mbuf *m; + struct fman_internal_context *frame_ic; + void *frame_va; + + m = NULL; + sc = app; + + frame_va = DPAA_FD_GET_ADDR(frame); + frame_ic = frame_va; /* internal context at head of the frame */ + /* Only simple (single- or multi-) frames are supported. */ + KASSERT(frame->format == 0 || frame->format == 4, + ("%s(): Got unsupported frame format 0x%02X!", __func__, + frame->format)); + + if ((frame->cmd_stat & DPAA_FD_CMD_STAT_ERR_M) != 0) { + device_printf(sc->sc_dev, "RX error: 0x%08X\n", + frame->cmd_stat); + goto err; + } + + m = m_gethdr(M_NOWAIT, MT_HEADER); + if (m == NULL) + goto err; + + if (frame->format == 0) { + /* Single-frame format */ + m_extadd(m, (char *)frame_va + frame->offset, frame->length, + dpaa_eth_fq_mext_free, frame_va, sc, 0, EXT_NET_DRV); + } else { + struct dpaa_sgte *sgt = + (struct dpaa_sgte *)(char *)frame_va + frame->offset; + /* Simple multi-frame format */ + for (int i = 0; i < DPAA_NUM_OF_SG_TABLE_ENTRY; i++) { + if (sgt[i].length > 0) + m_extadd(m, PHYS_TO_DMAP(sgt[i].addr), + sgt[i].length, dpaa_eth_fq_mext_free, + PHYS_TO_DMAP(sgt[i].addr), sc, 0, + EXT_NET_DRV); + if (sgt[i].final) + break; + } + /* Free the SGT buffer, it's no longer needed. */ + bman_put_buffer(sc->sc_rx_pool, frame->addr, sc->sc_rx_bpid); + } + + if (if_getcapenable(sc->sc_ifnet) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) + dpaa_eth_update_csum_flags(frame, &frame_ic->prs, m); + + m->m_pkthdr.rcvif = sc->sc_ifnet; + m->m_len = frame->length; + m_fixhdr(m); + + if_input(sc->sc_ifnet, m); + + return (1); + +err: + bman_put_buffer(sc->sc_rx_pool, frame->addr, sc->sc_rx_bpid); + if (m != NULL) + m_freem(m); + + return (1); +} + +static int +dpaa_eth_fq_tx_confirm_callback(device_t portal, struct qman_fq *fq, + struct qman_fd *frame, void *app) +{ + struct dpaa_eth_frame_info *fi; + struct dpaa_eth_softc *sc; + unsigned int qlen; + struct dpaa_sgte *sgt0; + + sc = app; + + if ((frame->cmd_stat & DPAA_FD_TX_STAT_ERR_M) != 0) + device_printf(sc->sc_dev, "TX error: 0x%08X\n", + frame->cmd_stat); + + /* + * We are storing struct dpaa_eth_frame_info in first entry + * of scatter-gather table. + */ + sgt0 = (struct dpaa_sgte *)PHYS_TO_DMAP(frame->addr + frame->offset); + fi = (struct dpaa_eth_frame_info *)PHYS_TO_DMAP(sgt0->addr); + + /* Free transmitted frame */ + m_freem(fi->fi_mbuf); + dpaa_eth_fi_free(sc, fi); + + qlen = qman_fq_get_counter(sc->sc_tx_conf_fq, QMAN_COUNTER_FRAME); + + if (qlen == 0) { + DPAA_ETH_LOCK(sc); + + if (sc->sc_tx_fq_full) { + sc->sc_tx_fq_full = 0; + dpaa_eth_if_start_locked(sc); + } + + DPAA_ETH_UNLOCK(sc); + } + + return (1); +} + +void +dpaa_eth_fq_rx_free(struct dpaa_eth_softc *sc) +{ + int cpu; + + if (sc->sc_rx_fq) + qman_fq_free(sc->sc_rx_fq); + if (sc->sc_rx_channel != 0) { + CPU_FOREACH(cpu) { + device_t portal = DPCPU_ID_GET(cpu, qman_affine_portal); + QMAN_PORTAL_STATIC_DEQUEUE_RM_CHANNEL(portal, + sc->sc_rx_channel); + } + qman_free_channel(sc->sc_rx_channel); + } +} + +int +dpaa_eth_fq_rx_init(struct dpaa_eth_softc *sc) +{ + void *fq; + int error; + int cpu; + + /* Default Frame Queue */ + if (sc->sc_rx_channel == 0) + sc->sc_rx_channel = qman_alloc_channel(); + fq = qman_fq_create(1, sc->sc_rx_channel, DTSEC_RM_FQR_RX_WQ, + false, 0, false, false, true, false, 0, 0, 0); + if (fq == NULL) { + device_printf(sc->sc_dev, + "could not create default RX queue\n"); + return (EIO); + } + + CPU_FOREACH(cpu) { + device_t portal = DPCPU_ID_GET(cpu, qman_affine_portal); + QMAN_PORTAL_STATIC_DEQUEUE_CHANNEL(portal, sc->sc_rx_channel); + } + + sc->sc_rx_fq = fq; + sc->sc_rx_fqid = qman_fq_get_fqid(fq); + + error = qman_fq_register_cb(fq, dpaa_eth_fq_rx_callback, sc); + if (error != 0) { + device_printf(sc->sc_dev, "could not register RX callback\n"); + dpaa_eth_fq_rx_free(sc); + return (EIO); + } + + return (0); +} + +void +dpaa_eth_fq_tx_free(struct dpaa_eth_softc *sc) +{ + + if (sc->sc_tx_fq) + qman_fq_free(sc->sc_tx_fq); + + if (sc->sc_tx_conf_fq) + qman_fq_free(sc->sc_tx_conf_fq); +} + +int +dpaa_eth_fq_tx_init(struct dpaa_eth_softc *sc) +{ + int error; + void *fq; + + /* TX Frame Queue */ + fq = qman_fq_create(1, sc->sc_port_tx_qman_chan, + DTSEC_RM_FQR_TX_WQ, false, 0, false, false, true, false, 0, 0, 0); + if (fq == NULL) { + device_printf(sc->sc_dev, "could not create default TX queue" + "\n"); + return (EIO); + } + + sc->sc_tx_fq = fq; + + if (sc->sc_rx_channel == 0) + sc->sc_rx_channel = qman_alloc_channel(); + /* TX Confirmation Frame Queue */ + fq = qman_fq_create(1, sc->sc_rx_channel, + DTSEC_RM_FQR_TX_CONF_WQ, false, 0, false, false, true, false, 0, 0, + 0); + if (fq == NULL) { + device_printf(sc->sc_dev, "could not create TX confirmation " + "queue\n"); + dpaa_eth_fq_tx_free(sc); + return (EIO); + } + + sc->sc_tx_conf_fq = fq; + sc->sc_tx_conf_fqid = qman_fq_get_fqid(fq); + + error = qman_fq_register_cb(fq, dpaa_eth_fq_tx_confirm_callback, sc); + if (error != 0) { + device_printf(sc->sc_dev, "could not register TX confirmation " + "callback\n"); + dpaa_eth_fq_tx_free(sc); + return (EIO); + } + + return (0); +} +/** @} */ + +/* Returns the cmd_stat field for the frame descriptor */ +static uint32_t +dpaa_eth_tx_add_csum(struct dpaa_eth_frame_info *fi) +{ + struct mbuf *m = fi->fi_mbuf; + struct fman_parse_result *prs = &fi->fi_ic.prs; + uint32_t csum_flags = m->m_pkthdr.csum_flags; + uint8_t ether_size = ETHER_HDR_LEN; + + if ((csum_flags & CSUM_FLAGS_TX) == 0) + return (0); + + if (m->m_flags & M_VLANTAG) + ether_size += ETHER_VLAN_ENCAP_LEN; + if (csum_flags & CSUM_IP) + prs->l3r = L3R_FIRST_IPV4; + if (csum_flags & CSUM_IP_UDP) { + prs->l4r = L4R_TYPE_UDP; + prs->l4_off = ether_size + sizeof(struct ip); + } else if (csum_flags & CSUM_IP_TCP) { + prs->l4r = L4R_TYPE_TCP; + prs->l4_off = ether_size + sizeof(struct ip); + } else if (csum_flags & CSUM_IP6_UDP) { + prs->l3r = L3R_FIRST_IPV6; + prs->l4r = L4R_TYPE_UDP; + prs->l4_off = ether_size + sizeof(struct ip6_hdr); + } else if (csum_flags & CSUM_IP6_TCP) { + prs->l3r = L3R_FIRST_IPV6; + prs->l4r = L4R_TYPE_TCP; + prs->l4_off = ether_size + sizeof(struct ip6_hdr); + } + + prs->ip_off[0] = ether_size; + + return (DPAA_FD_TX_CMD_RPD | DPAA_FD_TX_CMD_DTC); +} + +/** + * @group dTSEC IFnet routines. + * @{ + */ +void +dpaa_eth_if_start_locked(struct dpaa_eth_softc *sc) +{ + vm_size_t dsize, psize, ssize; + struct dpaa_eth_frame_info *fi; + unsigned int qlen, i; + struct mbuf *m0, *m; + vm_offset_t vaddr; + struct dpaa_fd fd; + + DPAA_ETH_LOCK_ASSERT(sc); + /* TODO: IFF_DRV_OACTIVE */ + + if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) == 0) + return; + + if ((if_getdrvflags(sc->sc_ifnet) & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) + return; + + while (!if_sendq_empty(sc->sc_ifnet)) { + /* Check length of the TX queue */ + qlen = qman_fq_get_counter(sc->sc_tx_fq, QMAN_COUNTER_FRAME); + + if (qlen >= DTSEC_MAX_TX_QUEUE_LEN) { + sc->sc_tx_fq_full = 1; + return; + } + + fi = dpaa_eth_fi_alloc(sc); + if (fi == NULL) + return; + + m0 = if_dequeue(sc->sc_ifnet); + if (m0 == NULL) { + dpaa_eth_fi_free(sc, fi); + return; + } + + i = 0; + m = m0; + psize = 0; + dsize = 0; + fi->fi_mbuf = m0; + while (m && i < DPAA_NUM_OF_SG_TABLE_ENTRY) { + if (m->m_len == 0) + continue; + + /* + * First entry in scatter-gather table is used to keep + * pointer to frame info structure. + */ + fi->fi_sgt[i].addr = pmap_kextract((vm_offset_t)fi); + i++; + + dsize = m->m_len; + vaddr = (vm_offset_t)m->m_data; + while (dsize > 0 && i < DPAA_NUM_OF_SG_TABLE_ENTRY) { + ssize = PAGE_SIZE - (vaddr & PAGE_MASK); + if (m->m_len < ssize) + ssize = m->m_len; + + fi->fi_sgt[i].addr = pmap_kextract(vaddr); + fi->fi_sgt[i].length = ssize; + + fi->fi_sgt[i].extension = 0; + fi->fi_sgt[i].final = 0; + fi->fi_sgt[i].bpid = 0; + fi->fi_sgt[i].offset = 0; + + dsize -= ssize; + vaddr += ssize; + psize += ssize; + i++; + } + + if (dsize > 0) + break; + + m = m->m_next; + } + + /* Check if SG table was constructed properly */ + if (m != NULL || dsize != 0) { + dpaa_eth_fi_free(sc, fi); + m_freem(m0); + continue; + } + + fi->fi_sgt[i - 1].final = 1; + + fd.addr = pmap_kextract((vm_offset_t)&fi->fi_ic); + fd.length = psize; + fd.format = DPAA_FD_FORMAT_SHORT_MBSF; + + fd.liodn = 0; + fd.bpid = 0; + fd.eliodn = 0; + fd.offset = offsetof(struct dpaa_eth_frame_info, fi_sgt) - + offsetof(struct dpaa_eth_frame_info, fi_ic); + fd.cmd_stat = dpaa_eth_tx_add_csum(fi); + + DPAA_ETH_UNLOCK(sc); + if (qman_fq_enqueue(sc->sc_tx_fq, &fd) != 0) { + dpaa_eth_fi_free(sc, fi); + m_freem(m0); + } + DPAA_ETH_LOCK(sc); + } +} +/** @} */ diff --git a/sys/dev/dpaa/dpaa_eth.h b/sys/dev/dpaa/dpaa_eth.h new file mode 100644 index 000000000000..7832b4dd7bff --- /dev/null +++ b/sys/dev/dpaa/dpaa_eth.h @@ -0,0 +1,116 @@ +/*- + * Copyright (c) 2012 Semihalf. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef DPAA_ETH_H_ +#define DPAA_ETH_H_ + +struct dpaa_eth_softc { + /* XXX MII bus requires that struct ifnet is first!!! */ + if_t sc_ifnet; + + device_t sc_dev; + struct resource *sc_mem; + struct mtx sc_lock; + + int sc_mac_enet_mode; + + /* RX Pool */ + struct bman_pool *sc_rx_pool; + uint8_t sc_rx_bpid; + uma_zone_t sc_rx_zone; + char sc_rx_zname[64]; + + /* RX Frame Queue */ + struct qman_fq *sc_rx_fq; + uint32_t sc_rx_fqid; + + /* TX Frame Queue */ + struct qman_fq *sc_tx_fq; + bool sc_tx_fq_full; + struct qman_fq *sc_tx_conf_fq; + uint32_t sc_tx_conf_fqid; + + /* Methods */ + int (*sc_port_rx_init) + (struct dpaa_eth_softc *sc, int unit); + int (*sc_port_tx_init) + (struct dpaa_eth_softc *sc, int unit); + void (*sc_start_locked) + (struct dpaa_eth_softc *sc); + + /* dTSEC data */ + uint8_t sc_eth_id; /* Ethernet ID within its frame manager */ + uintptr_t sc_mac_mem_offset; + int sc_mac_mdio_irq; + uint8_t sc_mac_addr[6]; + int sc_port_rx_hw_id; + int sc_port_tx_hw_id; + uint32_t sc_port_tx_qman_chan; + int sc_phy_addr; + bool sc_hidden; + device_t sc_mdio; + int sc_rev_major; + int sc_rev_minor; + + device_t sc_rx_port; + device_t sc_tx_port; + + int sc_rx_channel; + + /* MII data */ + struct mii_data *sc_mii; + device_t sc_mii_dev; + struct mtx sc_mii_lock; + + struct callout sc_tick_callout; + + /* Frame Info Zone */ + uma_zone_t sc_fi_zone; + char sc_fi_zname[64]; +}; + +/** + * @group dTSEC Regular Mode API. + * @{ + */ +int dpaa_eth_fm_port_rx_init(struct dpaa_eth_softc *sc); +int dpaa_eth_fm_port_tx_init(struct dpaa_eth_softc *sc); + +void dpaa_eth_if_start_locked(struct dpaa_eth_softc *sc); + +int dpaa_eth_pool_rx_init(struct dpaa_eth_softc *sc); +void dpaa_eth_pool_rx_free(struct dpaa_eth_softc *sc); + +int dpaa_eth_fi_pool_init(struct dpaa_eth_softc *sc); +void dpaa_eth_fi_pool_free(struct dpaa_eth_softc *sc); + +int dpaa_eth_fq_rx_init(struct dpaa_eth_softc *sc); +int dpaa_eth_fq_tx_init(struct dpaa_eth_softc *sc); +void dpaa_eth_fq_rx_free(struct dpaa_eth_softc *sc); +void dpaa_eth_fq_tx_free(struct dpaa_eth_softc *sc); +/** @} */ + +#endif /* DPAA_ETH_H_ */ diff --git a/sys/dev/dpaa/fman.c b/sys/dev/dpaa/fman.c index 393c28487ba9..b94c05fd7dbf 100644 --- a/sys/dev/dpaa/fman.c +++ b/sys/dev/dpaa/fman.c @@ -1,27 +1,7 @@ -/*- - * Copyright (c) 2011-2012 Semihalf. - * All rights reserved. +/* + * Copyright (c) 2026 Justin Hibbits * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * SPDX-License-Identifier: BSD-2-Clause */ #include <sys/param.h> @@ -33,6 +13,7 @@ #include <sys/malloc.h> #include <dev/fdt/simplebus.h> +#include <dev/fdt/fdt_common.h> #include <dev/ofw/ofw_bus.h> #include <dev/ofw/ofw_bus_subr.h> @@ -40,50 +21,140 @@ #include "opt_platform.h" -#include <contrib/ncsw/inc/Peripherals/fm_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_muram_ext.h> -#include <contrib/ncsw/inc/ncsw_ext.h> -#include <contrib/ncsw/integrations/fman_ucode.h> +#include <powerpc/mpc85xx/mpc85xx.h> #include "fman.h" +#define FMAN_BMI_OFFSET 0x80000 +#define FMAN_QMI_OFFSET 0x80400 +#define FMAN_KG_OFFSET 0xc1000 +#define FMAN_DMA_OFFSET 0xc2000 +#define FMAN_FPM_OFFSET 0xc3000 +#define FMAN_IMEM_OFFSET 0xc4000 +#define FMAN_HWP_OFFSET 0xc7000 +#define FMAN_CGP_OFFSET 0xdb000 + +#define FM_IP_REV_1 (FMAN_FPM_OFFSET + 0x0c4) +#define IP_REV_1_MAJ_M 0x0000ff00 +#define IP_REV_1_MAJ_S 8 +#define IP_REV_1_MIN_M 0x000000ff +#define FM_RSTC (FMAN_FPM_OFFSET + 0x0cc) +#define FM_RSTC_FM_RESET 0x80000000 + +#define FMBM_INIT (FMAN_BMI_OFFSET + 0x000) +#define INIT_STR 0x80000000 +#define FMBM_CFG1 (FMAN_BMI_OFFSET + 0x0004) +#define FBPS_M 0x07ff0000 +#define FBPS_S 16 +#define FBPO_M 0x000007ff +#define FMBM_CFG2 (FMAN_BMI_OFFSET + 0x0008) +#define TNTSKS_M 0x007f0000 +#define TNTSKS_S 16 +#define FMBM_IEVR (FMAN_BMI_OFFSET + 0x0020) +#define IEVR_SPEC 0x80000000 +#define IEVR_LEC 0x40000000 +#define IEVR_STEC 0x20000000 +#define IEVR_DEC 0x10000000 +#define FMBM_IER (FMAN_BMI_OFFSET + 0x0024) +#define IER_SPECE 0x80000000 +#define IER_LECE 0x40000000 +#define IER_STECE 0x20000000 +#define IER_DECE 0x10000000 +#define FMBM_PP(n) (FMAN_BMI_OFFSET + 0x104 + ((n - 1) * 4)) +#define PP_MXT_M 0x3f000000 +#define PP_MXT_S 24 +#define PP_EXT_M 0x000f0000 +#define PP_EXT_S 16 +#define PP_MXD_M 0x00000f00 +#define PP_MXD_S 8 +#define PP_EXD_M 0x0000000f +#define FMBM_PFS(n) (FMAN_BMI_OFFSET + 0x204 + ((n - 1) * 4)) +#define PFS_EXBS_M 0x03ff0000 +#define PFS_EXBS_S 16 +#define PFS_IFSZ_M 0x000003ff +#define FMQM_GC (FMAN_QMI_OFFSET + 0x000) +#define GC_STEN 0x10000000 +#define GC_ENQ_THR_S 8 +#define GC_ENQ_THR_M 0x00003f00 +#define GC_DEQ_THR_M 0x0000003f +#define FMQM_EIE (FMAN_QMI_OFFSET + 0x008) +#define EIE_DEE 0x80000000 +#define EIE_DFUPE 0x40000000 +#define FMQM_EIEN (FMAN_QMI_OFFSET + 0x00c) +#define EIEN_DEE 0x80000000 +#define EIEN_DFUPE 0x40000000 +#define FMQM_IE +#define IRAM_ADDR (FMAN_IMEM_OFFSET + 0x000) +#define IADD_AIE 0x80000000 +#define IRAM_DATA (FMAN_IMEM_OFFSET + 0x004) +#define IRAM_READY (FMAN_IMEM_OFFSET + 0x0c) +#define IREADY_READY 0x80000000 + +#define FMPR_RPIMAC (FMAN_HWP_OFFSET + 0x844) +#define HWP_RPIMAC_PEN 0x00000001 + +#define FMDM_SR (FMAN_DMA_OFFSET + 0x000) +#define SR_CMDQNE 0x10000000 +#define SR_BER 0x08000000 +#define SR_RDB_ECC 0x04000000 +#define SR_WRB_SECC 0x02000000 +#define FMDM_MR (FMAN_DMA_OFFSET + 0x004) +#define MR_CEN_M 0x0000e000 +#define MR_CEN_S 13 +#define FMDM_SETR (FMAN_DMA_OFFSET + 0x010) +#define FMDM_EBCR (FMAN_DMA_OFFSET + 0x2c) +#define FMDM_PLRn(n) (FMAN_DMA_OFFSET + 0x060 + (4 * (n / 2))) +#define PLRN_LIODN_M(n) (0xfff << PLRN_LIODN_S(n)) +#define PLRN_LIODN_S(n) ((n & 1) ? 0 : 16) + +#define FMFP_TSC1 (FMAN_FPM_OFFSET + 0x060) +#define TSC1_TEN 0x80000000 +#define FMFP_TSC2 (FMAN_FPM_OFFSET + 0x064) +#define TSC2_TSIV_INT_S 16 +#define FM_RCR (FMAN_FPM_OFFSET + 0x070) +#define RCR_FEE 0x80000000 +#define RCR_IEE 0x40000000 +#define RCR_MET 0x20000000 +#define RCR_IET 0x10000000 +#define RCR_SFE 0x08000000 +#define FMFP_EE (FMAN_FPM_OFFSET + 0x0dc) +#define EE_DECC 0x80000000 +#define EE_STL 0x40000000 +#define EE_SECC 0x20000000 +#define EE_RFM 0x00010000 +#define EE_DECC_EN 0x00008000 +#define EE_STL_EN 0x00004000 +#define EE_SECC_EN 0x00002000 +#define EE_EHM 0x00000008 +#define EE_CER 0x00000002 +#define EE_DER 0x00000001 +#define FMFP_CEV0 (FMAN_FPM_OFFSET + 0x0e0) +#define FMFP_CEV1 (FMAN_FPM_OFFSET + 0x0e4) +#define FMFP_CEV2 (FMAN_FPM_OFFSET + 0x0e8) +#define FMFP_CEV3 (FMAN_FPM_OFFSET + 0x0ec) + +/* DMA constants */ +#define DMA_CAM_UNITS 8 +#define DMA_CAM_SIZE 64 +#define DMA_CAM_ALIGN 64 + + +/* Timestamp counter */ +#define FM_TIMESTAMP_1US_BIT 8 static MALLOC_DEFINE(M_FMAN, "fman", "fman devices information"); +static void fman_intr(void *arg); + /** * @group FMan private defines. * @{ */ -enum fman_irq_enum { - FMAN_IRQ_NUM = 0, - FMAN_ERR_IRQ_NUM = 1 -}; - -enum fman_mu_ram_map { - FMAN_MURAM_OFF = 0x0, - FMAN_MURAM_SIZE = 0x28000 -}; - -struct fman_config { - device_t fman_device; - uintptr_t mem_base_addr; - uintptr_t irq_num; - uintptr_t err_irq_num; - uint8_t fm_id; - t_FmExceptionsCallback *exception_callback; - t_FmBusErrorCallback *bus_error_callback; -}; /** * @group FMan private methods/members. * @{ */ -/** - * Frame Manager firmware. - * We use the same firmware for both P3041 and P2041 devices. - */ -const uint32_t fman_firmware[] = FMAN_UC_IMG; -const uint32_t fman_firmware_size = sizeof(fman_firmware); int fman_activate_resource(device_t bus, device_t child, struct resource *res) @@ -125,7 +196,7 @@ fman_release_resource(device_t bus, device_t child, struct resource *res) passthrough = (device_get_parent(child) != bus); rl = BUS_GET_RESOURCE_LIST(bus, child); if (rman_get_type(res) != SYS_RES_IRQ) { - if ((rman_get_flags(res) & RF_ACTIVE) != 0 ){ + if ((rman_get_flags(res) & RF_ACTIVE) != 0) { rv = bus_deactivate_resource(child, res); if (rv != 0) return (rv); @@ -209,202 +280,400 @@ fman_alloc_resource(device_t bus, device_t child, int type, int rid, return (NULL); } + static int -fman_fill_ranges(phandle_t node, struct simplebus_softc *sc) +fman_get_revision_major(struct fman_softc *sc) { - int host_address_cells; - cell_t *base_ranges; - ssize_t nbase_ranges; - int err; - int i, j, k; + uint32_t reg; - err = OF_searchencprop(OF_parent(node), "#address-cells", - &host_address_cells, sizeof(host_address_cells)); - if (err <= 0) - return (-1); + reg = bus_read_4(sc->mem_res, FM_IP_REV_1); - nbase_ranges = OF_getproplen(node, "ranges"); - if (nbase_ranges < 0) - return (-1); - sc->nranges = nbase_ranges / sizeof(cell_t) / - (sc->acells + host_address_cells + sc->scells); - if (sc->nranges == 0) - return (0); + return ((reg & IP_REV_1_MAJ_M) >> IP_REV_1_MAJ_S); +} - sc->ranges = malloc(sc->nranges * sizeof(sc->ranges[0]), - M_DEVBUF, M_WAITOK); - base_ranges = malloc(nbase_ranges, M_DEVBUF, M_WAITOK); - OF_getencprop(node, "ranges", base_ranges, nbase_ranges); +static int +fman_get_revision_minor(struct fman_softc *sc) +{ + uint32_t reg; - for (i = 0, j = 0; i < sc->nranges; i++) { - sc->ranges[i].bus = 0; - for (k = 0; k < sc->acells; k++) { - sc->ranges[i].bus <<= 32; - sc->ranges[i].bus |= base_ranges[j++]; - } - sc->ranges[i].host = 0; - for (k = 0; k < host_address_cells; k++) { - sc->ranges[i].host <<= 32; - sc->ranges[i].host |= base_ranges[j++]; - } - sc->ranges[i].size = 0; - for (k = 0; k < sc->scells; k++) { - sc->ranges[i].size <<= 32; - sc->ranges[i].size |= base_ranges[j++]; + reg = bus_read_4(sc->mem_res, FM_IP_REV_1); + + return ((reg & IP_REV_1_MIN_M)); +} + +static void +fman_fill_soc_params(struct fman_softc *sc) +{ + + switch (sc->sc_revision_major) { + case 2: + sc->bmi_max_fifo_size = 160 * 1024; + sc->iram_size = 64 * 1024; + sc->dma_thresh_max_commq = 31; + sc->dma_thresh_max_buf = 127; + sc->qmi_max_tnums = 64; + sc->qmi_def_tnums_thresh = 48; + sc->bmi_max_tasks = 128; + sc->max_open_dmas = 32; + sc->dma_cam_num_entries = 32; + sc->port_cgs = 256; + sc->rx_ports = 5; + sc->total_fifo_size = 100 * 1024; + break; + case 3: + sc->bmi_max_fifo_size = 160 * 1024; + sc->iram_size = 64 * 1024; + sc->dma_thresh_max_commq = 31; + sc->dma_thresh_max_buf = 127; + sc->qmi_max_tnums = 64; + sc->qmi_def_tnums_thresh = 48; + sc->bmi_max_tasks = 128; + sc->max_open_dmas = 32; + sc->dma_cam_num_entries = 32; + sc->port_cgs = 256; + sc->rx_ports = 6; + sc->total_fifo_size = 136 * 1024; + break; + case 6: + sc->dma_thresh_max_commq = 31; + sc->dma_thresh_max_buf = 127; + sc->qmi_max_tnums = 64; + sc->qmi_def_tnums_thresh = 48; + sc->dma_cam_num_entries = 64; + sc->port_cgs = 256; + switch (sc->sc_revision_minor) { + case 1: + case 4: + sc->bmi_max_fifo_size = 192 * 1024; + sc->bmi_max_tasks = 64; + sc->max_open_dmas = 32; + sc->rx_ports = 5; + sc->total_fifo_size = 156 * 1024; + if (sc->sc_revision_minor == 1) + sc->iram_size = 32 * 1024; + else + sc->iram_size = 64 * 1024; + break; + case 0: + case 2: + case 3: + sc->bmi_max_fifo_size = 384 * 1024; + sc->bmi_max_tasks = 128; + sc->max_open_dmas = 84; + sc->rx_ports = 8; + sc->iram_size = 64 * 1024; + sc->total_fifo_size = 295 * 1024; + break; + default: + device_printf(sc->sc_base.dev, + "Unsupported FManv3 revision: %d\n", + sc->sc_revision_minor); + break; } + break; + default: + device_printf(sc->sc_base.dev, + "Unsupported FMan version: %d\n", sc->sc_revision_major); + break; } - - free(base_ranges, M_DEVBUF); - return (sc->nranges); } -static t_Handle -fman_init(struct fman_softc *sc, struct fman_config *cfg) +static int +fman_reset(struct fman_softc *sc) { - phandle_t node; - t_FmParams fm_params; - t_Handle muram_handle, fm_handle; - t_Error error; - t_FmRevisionInfo revision_info; - uint16_t clock; - uint32_t tmp, mod; + unsigned int count; + + if (sc->sc_revision_major < 6) { + bus_write_4(sc->mem_res, FM_RSTC, FM_RSTC_FM_RESET); + count = 100; + do { + DELAY(1); + } while ((bus_read_4(sc->mem_res, FM_RSTC) & FM_RSTC_FM_RESET) && + --count); + if (count == 0) + return (EBUSY); + return (0); + } else { +#ifdef __powerpc__ + phandle_t node; + u_long base, size; + uint32_t devdisr2; +#define GUTS_DEVDISR2 0x0074 +#define DEVDISR2_FMAN1 0xfcc00000 +#define DEVDISR2_FMAN2 0x000fcc00 + + node = ofw_bus_get_node(device_get_parent(sc->sc_base.dev)); + node = fdt_find_compatible(node, "fsl,qoriq-device-config-2.0", + false); - /* MURAM configuration */ - muram_handle = FM_MURAM_ConfigAndInit(cfg->mem_base_addr + - FMAN_MURAM_OFF, FMAN_MURAM_SIZE); - if (muram_handle == NULL) { - device_printf(cfg->fman_device, "couldn't init FM MURAM module" - "\n"); - return (NULL); + if (node == 0) { + device_printf(sc->sc_base.dev, + "missing device-config node in FDT. Cannot reset FMAN"); + return (0); + } + fdt_regsize(node, &base, &size); + + devdisr2 = ccsr_read4(ccsrbar_va + base + GUTS_DEVDISR2); + if (sc->fm_id == 0) + ccsr_write4(ccsrbar_va + base + GUTS_DEVDISR2, + devdisr2 & ~DEVDISR2_FMAN1); + else + ccsr_write4(ccsrbar_va + base + GUTS_DEVDISR2, + devdisr2 & ~DEVDISR2_FMAN2); +#endif + bus_write_4(sc->mem_res, FM_RSTC, FM_RSTC_FM_RESET); + count = 100; + do { + DELAY(1); + } while ((bus_read_4(sc->mem_res, FM_RSTC) & FM_RSTC_FM_RESET) && + --count); +#ifdef __powerpc__ + ccsr_write4(ccsrbar_va + base + GUTS_DEVDISR2, devdisr2); +#endif + if (count == 0) + return (EBUSY); + return (0); } - sc->muram_handle = muram_handle; +} - /* Fill in FM configuration */ - fm_params.fmId = cfg->fm_id; - /* XXX we support only one partition thus each fman has master id */ - fm_params.guestId = NCSW_MASTER_ID; +static int +fman_clear_iram(struct fman_softc *sc) +{ +#ifdef notyet + int i; - fm_params.baseAddr = cfg->mem_base_addr; - fm_params.h_FmMuram = muram_handle; + /* + * TODO: Allow clearing the IRAM and loading new firmware. Currently + * this is not supported, so assume that there's already firmware + * loaded, and don't clear it just yet. + */ + bus_write_4(sc->mem_res, IRAM_ADDR, IADD_AIE); + for (i = 0; i < 100 && bus_read_4(sc->mem_res, IRAM_ADDR) != IADD_AIE; i++) + DELAY(1); - /* Get FMan clock in Hz */ - if ((tmp = fman_get_clock(sc)) == 0) - return (NULL); + if (i == 100) + return (EBUSY); - /* Convert FMan clock to MHz */ - clock = (uint16_t)(tmp / 1000000); - mod = tmp % 1000000; + for (i = 0; i < sc->iram_size / 4; i++) + bus_write_4(sc->mem_res, IRAM_DATA, 0xffffffff); - if (mod >= 500000) - ++clock; + bus_write_4(sc->mem_res, IRAM_ADDR, sc->iram_size - 4); + for (i = 0; i < 100 && + bus_read_4(sc->mem_res, IRAM_DATA) != 0xffffffff; i++) + DELAY(1); - fm_params.fmClkFreq = clock; - fm_params.f_Exception = cfg->exception_callback; - fm_params.f_BusError = cfg->bus_error_callback; - fm_params.h_App = cfg->fman_device; - fm_params.irq = cfg->irq_num; - fm_params.errIrq = cfg->err_irq_num; + if (i == 100) + return (EBUSY); +#endif - fm_params.firmware.size = fman_firmware_size; - fm_params.firmware.p_Code = (uint32_t*)fman_firmware; + return (0); +} - fm_handle = FM_Config(&fm_params); - if (fm_handle == NULL) { - device_printf(cfg->fman_device, "couldn't configure FM " - "module\n"); - goto err; - } +static int +fman_dma_init(struct fman_softc *sc) +{ + vmem_addr_t addr; + uint32_t reg; + int err; - FM_ConfigResetOnInit(fm_handle, TRUE); + reg = bus_read_4(sc->mem_res, FMDM_SR); + bus_write_4(sc->mem_res, FMDM_SR, reg | SR_BER); + reg = bus_read_4(sc->mem_res, FMDM_MR) & ~MR_CEN_M; + reg |= ((sc->dma_cam_num_entries / DMA_CAM_UNITS) - 1) << MR_CEN_S; + bus_write_4(sc->mem_res, FMDM_MR, reg); - error = FM_Init(fm_handle); - if (error != E_OK) { - device_printf(cfg->fman_device, "couldn't init FM module\n"); - goto err2; - } + err = vmem_xalloc(sc->muram_vmem, + sc->dma_cam_num_entries * DMA_CAM_SIZE, DMA_CAM_ALIGN, 0, 0, + VMEM_ADDR_MIN, VMEM_ADDR_MAX, M_BESTFIT | M_WAITOK, &addr); + if (err != 0) + device_printf(sc->sc_base.dev, + "failed to allocate DMA buffer\n"); + reg = addr; + bus_write_4(sc->mem_res, FMDM_EBCR, reg); + return (0); +} - error = FM_GetRevision(fm_handle, &revision_info); - if (error != E_OK) { - device_printf(cfg->fman_device, "couldn't get FM revision\n"); - goto err2; - } +static int +fman_bmi_init(struct fman_softc *sc) +{ + uint32_t reg; - device_printf(cfg->fman_device, "Hardware version: %d.%d.\n", - revision_info.majorRev, revision_info.minorRev); + reg = sc->bmi_fifo_base / FMAN_BMI_FIFO_ALIGN; + reg |= (sc->total_fifo_size / FMAN_BMI_FIFO_UNITS - 1) << FBPS_S; + bus_write_4(sc->mem_res, FMBM_CFG1, reg); - /* Initialize the simplebus part of things */ - simplebus_init(sc->sc_base.dev, 0); + reg = ((sc->bmi_max_tasks - 1) << TNTSKS_S) & TNTSKS_M; + //bus_write_4(sc->mem_res, FMBM_CFG2, reg); - node = ofw_bus_get_node(sc->sc_base.dev); - fman_fill_ranges(node, &sc->sc_base); - sc->rman.rm_type = RMAN_ARRAY; - sc->rman.rm_descr = "FMan range"; - rman_init_from_resource(&sc->rman, sc->mem_res); - for (node = OF_child(node); node > 0; node = OF_peer(node)) { - simplebus_add_device(sc->sc_base.dev, node, 0, NULL, -1, NULL); - } + bus_write_4(sc->mem_res, FMBM_IEVR, + IEVR_SPEC | IEVR_LEC | IEVR_STEC | IEVR_DEC); - return (fm_handle); + bus_write_4(sc->mem_res, FMBM_IER, + IER_SPECE | IER_LECE | IER_STECE | IER_DECE); -err2: - FM_Free(fm_handle); -err: - FM_MURAM_Free(muram_handle); - return (NULL); + return (0); } -static void -fman_exception_callback(t_Handle app_handle, e_FmExceptions exception) +static int +fman_qmi_init(struct fman_softc *sc) { - struct fman_softc *sc; - - sc = app_handle; - device_printf(sc->sc_base.dev, "FMan exception occurred.\n"); + bus_write_4(sc->mem_res, FMQM_EIE, EIE_DEE | EIE_DFUPE); + bus_write_4(sc->mem_res, FMQM_EIEN, EIEN_DEE | EIEN_DFUPE); + return (0); } static void -fman_error_callback(t_Handle app_handle, e_FmPortType port_type, - uint8_t port_id, uint64_t addr, uint8_t tnum, uint16_t liodn) +fman_hwp_init(struct fman_softc *sc) { - struct fman_softc *sc; - - sc = app_handle; - device_printf(sc->sc_base.dev, "FMan error occurred.\n"); + /* Start up the parser */ + bus_write_4(sc->mem_res, FMPR_RPIMAC, HWP_RPIMAC_PEN); } -/** @} */ +static int +fman_enable(struct fman_softc *sc) +{ + bus_write_4(sc->mem_res, FMBM_INIT, INIT_STR); + bus_write_4(sc->mem_res, FMQM_GC, 0xc0000000 | + GC_STEN | (sc->qmi_def_tnums_thresh << GC_ENQ_THR_S) | + sc->qmi_def_tnums_thresh); -/** - * @group FMan driver interface. - * @{ + return (0); +} + +/* + * Enable timestamp counting. Matching Freescale's reference code, generate the + * timestamp incrementer to be roughly 256MHz, such that bit 23 would update + * every microsecond. */ +static int +fman_enable_timestamp(struct fman_softc *sc) +{ + uint64_t frac; + uint32_t clock = fman_get_clock(sc) / 1000000; + uint32_t intgr, tmp; + uint32_t ts_freq = 1 << FM_TIMESTAMP_1US_BIT; -int -fman_get_handle(device_t dev, t_Handle *fmh) + intgr = ts_freq / clock; + + frac = ((uint64_t)ts_freq << 16) - ((uint64_t)intgr << 16) * clock; + frac = (frac % clock ? 1 : 0) + (frac / clock); + + tmp = (intgr << TSC2_TSIV_INT_S) | (uint32_t)frac; + + bus_write_4(sc->mem_res, FMFP_TSC2, tmp); + bus_write_4(sc->mem_res, FMFP_TSC1, TSC1_TEN); + + return (0); +} + +static int +fman_keygen_init(struct fman_softc *sc) { - struct fman_softc *sc = device_get_softc(dev); + /* TODO: keygen */ + return (0); +} + +static int +fman_fpm_init(struct fman_softc *sc) +{ + /* Clear all events, and enable interrupts. */ + bus_write_4(sc->mem_res, FMFP_EE, + EE_DECC | EE_STL | EE_SECC | EE_EHM | + EE_DECC_EN | EE_STL_EN | EE_SECC_EN); + + bus_write_4(sc->mem_res, FMFP_CEV0, 0xffffffff); + bus_write_4(sc->mem_res, FMFP_CEV1, 0xffffffff); + bus_write_4(sc->mem_res, FMFP_CEV2, 0xffffffff); + bus_write_4(sc->mem_res, FMFP_CEV3, 0xffffffff); - *fmh = sc->fm_handle; + bus_write_4(sc->mem_res, FM_RCR, RCR_FEE | RCR_IEE); return (0); } -int -fman_get_muram_handle(device_t dev, t_Handle *muramh) +static int +fman_init(struct fman_softc *sc) { - struct fman_softc *sc = device_get_softc(dev); + vmem_addr_t base_addr; + sc->sc_revision_major = fman_get_revision_major(sc); + sc->sc_revision_minor = fman_get_revision_minor(sc); + + if (bootverbose) + device_printf(sc->sc_base.dev, "Hardware version: %d.%d.\n", + sc->sc_revision_major, sc->sc_revision_minor); + + fman_fill_soc_params(sc); + bus_set_region_4(sc->mem_res, FMAN_CGP_OFFSET, 0, sc->port_cgs / 4); + + if (fman_reset(sc) != 0) + goto err; + + if (fman_clear_iram(sc) != 0) + goto err; + + if (fman_dma_init(sc) != 0) + goto err; - *muramh = sc->muram_handle; + fman_fpm_init(sc); + + vmem_alloc(sc->muram_vmem, sc->total_fifo_size, M_BESTFIT | M_WAITOK, + &base_addr); + sc->bmi_fifo_base = base_addr; + + fman_bmi_init(sc); + fman_qmi_init(sc); + fman_hwp_init(sc); + if (fman_keygen_init(sc) != 0) + goto err; + + if (fman_enable(sc) != 0) + goto err; + + fman_enable_timestamp(sc); return (0); +err: + return (ENXIO); } -int -fman_get_bushandle(device_t dev, vm_offset_t *fm_base) +void +fman_get_revision(device_t dev, int *major, int *minor) { struct fman_softc *sc = device_get_softc(dev); - *fm_base = rman_get_bushandle(sc->mem_res); + if (major) + *major = sc->sc_revision_major; + if (minor) + *minor = sc->sc_revision_minor; +} + +/** @} */ + +static int +fman_init_muram(struct fman_softc *sc) +{ + u_long base, size; + phandle_t node; + + node = ofw_bus_get_node(sc->sc_base.dev); + for (node = OF_child(node); node != 0; node = OF_peer(node)) { + char compat[255]; + + if (OF_getprop(node, "compatible", compat, sizeof(compat)) < 0) + continue; + if (strcmp(compat, "fsl,fman-muram") == 0) + break; + } + if (node == 0) { + device_printf(sc->sc_base.dev, "no muram node\n"); + return (ENXIO); + } + if (fdt_regsize(node, &base, &size) != 0) { + device_printf(sc->sc_base.dev, "failed to get muram reg\n"); + return (ENXIO); + } + sc->muram_vmem = vmem_create("MURAM", + base, size, 1, 0, M_WAITOK); return (0); } @@ -413,20 +682,18 @@ int fman_attach(device_t dev) { struct fman_softc *sc; - struct fman_config cfg; pcell_t qchan_range[2]; + pcell_t cell; phandle_t node; sc = device_get_softc(dev); sc->sc_base.dev = dev; - /* Check if MallocSmart allocator is ready */ - if (XX_MallocSmartInit() != E_OK) { - device_printf(dev, "could not initialize smart allocator.\n"); - return (ENXIO); - } - + cell = 0; node = ofw_bus_get_node(dev); + OF_getencprop(node, "cell-index", &cell, sizeof(cell)); + sc->fm_id = cell; + if (OF_getencprop(node, "fsl,qman-channel-range", qchan_range, sizeof(qchan_range)) <= 0) { device_printf(dev, "Missing QMan channel range property!\n"); @@ -450,6 +717,12 @@ fman_attach(device_t dev) goto err; } + if (bus_setup_intr(dev, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE, + NULL, fman_intr, sc, &sc->irq_cookie) != 0) { + device_printf(dev, "error setting up interrupt handler.\n"); + goto err; + } + sc->err_irq_rid = 1; sc->err_irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->err_irq_rid, RF_ACTIVE | RF_SHAREABLE); @@ -458,20 +731,19 @@ fman_attach(device_t dev) goto err; } - /* Set FMan configuration */ - cfg.fman_device = dev; - cfg.fm_id = device_get_unit(dev); - cfg.mem_base_addr = rman_get_bushandle(sc->mem_res); - cfg.irq_num = (uintptr_t)sc->irq_res; - cfg.err_irq_num = (uintptr_t)sc->err_irq_res; - cfg.exception_callback = fman_exception_callback; - cfg.bus_error_callback = fman_error_callback; + /* Initialize the simplebus part of things */ + sc->rman.rm_type = RMAN_ARRAY; + sc->rman.rm_descr = "FMan range"; + rman_init_from_resource(&sc->rman, sc->mem_res); + simplebus_attach_impl(sc->sc_base.dev); + + if (fman_init_muram(sc) != 0) + goto err; + + /* TODO: Interrupts... */ - sc->fm_handle = fman_init(sc, &cfg); - if (sc->fm_handle == NULL) { - device_printf(dev, "could not be configured\n"); + if (fman_init(sc) != 0) goto err; - } bus_attach_children(dev); return (0); @@ -485,16 +757,14 @@ int fman_detach(device_t dev) { struct fman_softc *sc; + int rv; - sc = device_get_softc(dev); + rv = simplebus_detach(dev); - if (sc->muram_handle) { - FM_MURAM_Free(sc->muram_handle); - } + if (rv != 0) + return (rv); - if (sc->fm_handle) { - FM_Free(sc->fm_handle); - } + sc = device_get_softc(dev); if (sc->mem_res) { bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, @@ -511,6 +781,9 @@ fman_detach(device_t dev) sc->err_irq_res); } + if (sc->muram_vmem != NULL) + vmem_destroy(sc->muram_vmem); + return (0); } @@ -535,21 +808,152 @@ fman_shutdown(device_t dev) return (0); } +static void +fman_intr(void *arg) +{ + /* TODO: All FMAN interrupts */ +} + int fman_qman_channel_id(device_t dev, int port) { struct fman_softc *sc; - int qman_port_id[] = {0x31, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, - 0x2f, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; int i; sc = device_get_softc(dev); - for (i = 0; i < sc->qman_chan_count; i++) { - if (qman_port_id[i] == port) - return (sc->qman_chan_base + i); + if (sc->sc_revision_major >= 6) { + static const int qman_port_id[] = { + 0x30, 0x31, 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x02, 0x03, 0x04, 0x05, 0x07, 0x07 + }; + for (i = 0; i < sc->qman_chan_count; i++) { + if (qman_port_id[i] == port) + return (sc->qman_chan_base + i); + } + } else { + static const int qman_port_id[] = { + 0x31, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x01, + 0x02, 0x03, 0x04, 0x05, 0x07, 0x07 + }; + for (i = 0; i < sc->qman_chan_count; i++) { + if (qman_port_id[i] == port) + return (sc->qman_chan_base + i); + } } return (0); } +size_t +fman_get_bmi_max_fifo_size(device_t dev) +{ + struct fman_softc *sc = device_get_softc(dev); + + return (sc->bmi_max_fifo_size); +} + +int +fman_reset_mac(device_t dev, int mac_id) +{ + struct fman_softc *sc = device_get_softc(dev); + int timeout = 100; + uint32_t mask; + + if (mac_id < 0 || mac_id > 9) + return (EINVAL); + + /* MAC bits start at bit 1 for MAC0, and go down */ + mask = (1 << (30 - mac_id)); + bus_write_4(sc->mem_res, FM_RSTC, mask); + while ((bus_read_4(sc->mem_res, FM_RSTC) & mask) && --timeout) + DELAY(10); + + if (timeout == 0) + return (EIO); + + return (0); +} + +static int +fman_set_port_tasks(struct fman_softc *sc, int port_id, + uint8_t tasks, uint8_t extra) +{ + uint32_t reg; + + reg = bus_read_4(sc->mem_res, FMBM_PP(port_id)); + + reg &= ~(PP_MXT_M | PP_EXT_M); + reg |= ((uint32_t)(tasks - 1) << PP_MXT_S) | + ((uint32_t)extra << PP_EXT_S); + bus_write_4(sc->mem_res, FMBM_PP(port_id), reg); + + return (0); +} + +static int +fman_set_port_fifo_size(struct fman_softc *sc, int port_id, + uint32_t fifo_size, uint32_t extra) +{ + uint32_t reg; + + reg = (fifo_size / FMAN_BMI_FIFO_UNITS - 1) | + ((extra / FMAN_BMI_FIFO_UNITS) << PFS_EXBS_S); + + /* TODO: Make sure fifo size doesn't overrun */ + /* See Linux driver, fman set_size_of_fifo */ + + bus_write_4(sc->mem_res, FMBM_PFS(port_id), reg); + return (0); +} + +static int +fman_set_port_dmas(struct fman_softc *sc, int port_id, + int open_dmas, int extra_dmas) +{ + /* TODO: set port DMAs */ + return (0); +} + +static void +fman_set_port_liodn(struct fman_softc *sc, int port_id, uint32_t liodn) +{ + uint32_t reg; + + reg = bus_read_4(sc->mem_res, FMDM_PLRn(port_id)); + reg &= ~PLRN_LIODN_M(port_id); + reg |= liodn << PLRN_LIODN_S(port_id); + bus_write_4(sc->mem_res, FMDM_PLRn(port_id), reg); +} + +int +fman_set_port_params(device_t dev, struct fman_port_init_params *params) +{ + struct fman_softc *sc = device_get_softc(dev); + int error; + + error = fman_set_port_tasks(sc, params->port_id, + params->num_tasks, params->extra_tasks); + + if (error != 0) + return (error); + + if (!params->is_rx_port) { + } + error = fman_set_port_fifo_size(sc, params->port_id, params->fifo_size, + params->extra_fifo_size); + + if (error != 0) + return (error); + + error = fman_set_port_dmas(sc, params->port_id, + params->open_dmas, params->extra_dmas); + + if (error != 0) + return (error); + + fman_set_port_liodn(sc, params->port_id, params->liodn); + + return (0); +} + /** @} */ diff --git a/sys/dev/dpaa/fman.h b/sys/dev/dpaa/fman.h index a2ada5e16ffb..a0af1b36b5a3 100644 --- a/sys/dev/dpaa/fman.h +++ b/sys/dev/dpaa/fman.h @@ -28,6 +28,23 @@ #define FMAN_H_ #include <dev/fdt/simplebus.h> +#include <sys/vmem.h> + +#define FMAN_BMI_FIFO_UNITS 0x100 +#define FMAN_BMI_FIFO_ALIGN 0x100 + +#define FM_FD_ERR_DMA 0x01000000 +#define FM_FD_ERR_FPE 0x00080000 +#define FM_FD_ERR_FSE 0x00040000 +#define FM_FD_ERR_DIS 0x00020000 +#define FM_FD_ERR_EOF 0x00008000 +#define FM_FD_ERR_NSS 0x00004000 +#define FM_FD_ERR_KSO 0x00002000 +#define FM_FD_ERR_IPP 0x00000200 +#define FM_FD_ERR_PTE 0x00000080 +#define FM_FD_ERR_ISP 0x00000040 +#define FM_FD_ERR_PHE 0x00000020 +#define FM_FD_ERR_BLE 0x00000008 /** * FMan driver instance data. @@ -38,22 +55,91 @@ struct fman_softc { struct resource *irq_res; struct resource *err_irq_res; struct rman rman; + vmem_t *muram_vmem; int mem_rid; int irq_rid; int err_irq_rid; + void *irq_cookie; int qman_chan_base; int qman_chan_count; + int fm_id; + + int sc_revision_major; + int sc_revision_minor; + + uint16_t clock; + bool timestamps; + + uint32_t iram_size; + uint32_t dma_thresh_max_commq; + uint32_t dma_thresh_max_buf; + uint32_t dma_cam_num_entries; + uint32_t max_open_dmas; + + uint32_t qmi_max_tnums; + uint32_t qmi_def_tnums_thresh; + + uint32_t bmi_max_tasks; + uint32_t bmi_max_fifo_size; + uint32_t bmi_fifo_base; - t_Handle fm_handle; - t_Handle muram_handle; + uint32_t port_cgs; + uint32_t rx_ports; + uint32_t total_fifo_size; + + uint32_t qman_channel_base; + uint32_t qman_channels; +}; + +struct fman_port_init_params { + int port_id; + bool is_rx_port; + uint8_t num_tasks; + uint8_t extra_tasks; + uint8_t open_dmas; + uint8_t extra_dmas; + uint32_t fifo_size; + uint32_t extra_fifo_size; + uint8_t deq_pipeline_size; + uint16_t max_frame_length; + uint16_t liodn; }; +struct fman_parse_result { + uint8_t lpid; + uint8_t shimr; + uint16_t l2r; + uint16_t l3r; + uint8_t l4r; + uint8_t cpid; + uint16_t nxthdr; + uint16_t cksum; + uint32_t lcv; + uint8_t shim_off[2]; + uint8_t ip_pid_off; + uint8_t eth_off; + uint8_t llc_snap_off; + uint8_t vlan_tic_off[2]; + uint8_t last_e_type_off; + uint8_t pppoe_off; + uint8_t mpls_off[2]; + uint8_t ip_off[2]; + uint8_t gre_off; + uint8_t l4_off; + uint8_t nxthdr_off; +}; + +struct fman_internal_context { + struct fman_parse_result prs; + uint64_t timestamp; + uint64_t hash; +}; /** - * @group QMan bus interface. + * @group FMan bus interface. * @{ */ -struct resource * fman_alloc_resource(device_t bus, device_t child, int type, +struct resource *fman_alloc_resource(device_t bus, device_t child, int type, int rid, rman_res_t start, rman_res_t end, rman_res_t count, u_int flags); int fman_activate_resource(device_t bus, device_t child, struct resource *res); @@ -66,11 +152,16 @@ int fman_shutdown(device_t dev); int fman_read_ivar(device_t dev, device_t child, int index, uintptr_t *result); int fman_qman_channel_id(device_t, int); +void fman_get_revision(device_t, int *, int *); /** @} */ uint32_t fman_get_clock(struct fman_softc *sc); -int fman_get_handle(device_t dev, t_Handle *fmh); -int fman_get_muram_handle(device_t dev, t_Handle *muramh); int fman_get_bushandle(device_t dev, vm_offset_t *fm_base); +size_t fman_get_bmi_max_fifo_size(device_t); +int fman_reset_mac(device_t, int); +int fman_set_port_params(device_t dev, struct fman_port_init_params *params); +int fman_qman_channel_id(device_t, int); +int fman_set_mac_intr_handler(device_t, int, driver_intr_t, void *); +int fman_set_mac_err_handler(device_t, int, driver_intr_t, void *); #endif /* FMAN_H_ */ diff --git a/sys/dev/dpaa/fman_fdt.c b/sys/dev/dpaa/fman_fdt.c index 4d3723b6de4d..88f08269f086 100644 --- a/sys/dev/dpaa/fman_fdt.c +++ b/sys/dev/dpaa/fman_fdt.c @@ -34,10 +34,8 @@ #include <dev/ofw/ofw_bus.h> #include <dev/ofw/ofw_bus_subr.h> -#include <contrib/ncsw/inc/ncsw_ext.h> -#include <contrib/ncsw/inc/enet_ext.h> - #include "fman.h" +#include "fman_if.h" #define FFMAN_DEVSTR "Freescale Frame Manager" @@ -56,6 +54,12 @@ static device_method_t fman_methods[] = { DEVMETHOD(bus_alloc_resource, fman_alloc_resource), DEVMETHOD(bus_activate_resource, fman_activate_resource), DEVMETHOD(bus_release_resource, fman_release_resource), + + DEVMETHOD(fman_get_revision, fman_get_revision), + DEVMETHOD(fman_reset_mac, fman_reset_mac), + DEVMETHOD(fman_set_port_params, fman_set_port_params), + DEVMETHOD(fman_get_qman_channel_id, fman_qman_channel_id), + DEVMETHOD_END }; @@ -92,8 +96,8 @@ fman_get_clock(struct fman_softc *sc) if ((OF_getprop(node, "clock-frequency", &fman_clock, sizeof(fman_clock)) <= 0) || (fman_clock == 0)) { - device_printf(dev, "could not acquire correct frequency " - "from DTS\n"); + device_printf(dev, + "could not acquire correct frequency from DTS\n"); return (0); } diff --git a/sys/dev/dpaa/fman_if.m b/sys/dev/dpaa/fman_if.m new file mode 100644 index 000000000000..e7412ca64195 --- /dev/null +++ b/sys/dev/dpaa/fman_if.m @@ -0,0 +1,38 @@ +# +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2026 Justin Hibbits + +#include <machine/bus.h> +#include <dev/dpaa/fman.h> + +/** + * @brief DPAA FMan interface + * + */ +INTERFACE fman; + +METHOD void get_revision { + device_t dev; + int *major; + int *minor; +}; + +METHOD size_t get_bmi_max_fifo_size { + device_t dev; +}; + +METHOD int get_qman_channel_id { + device_t dev; + int port_id; +}; + +METHOD int reset_mac { + device_t dev; + int mac_id; +}; + +METHOD int set_port_params { + device_t dev; + struct fman_port_init_params *params; +}; diff --git a/sys/dev/dpaa/fman_mdio.c b/sys/dev/dpaa/fman_mdio.c index 8df716fc5571..d32b13ad0a79 100644 --- a/sys/dev/dpaa/fman_mdio.c +++ b/sys/dev/dpaa/fman_mdio.c @@ -46,17 +46,15 @@ #include <dev/ofw/ofw_bus.h> #include <dev/ofw/ofw_bus_subr.h> -#include <contrib/ncsw/inc/Peripherals/fm_ext.h> - #include "fman.h" #include "miibus_if.h" #define MDIO_LOCK() mtx_lock(&sc->sc_lock) #define MDIO_UNLOCK() mtx_unlock(&sc->sc_lock) -#define MDIO_WRITE4(sc,r,v) \ - bus_space_write_4(&bs_be_tag, sc->sc_handle, sc->sc_offset + r, v) +#define MDIO_WRITE4(sc, r, v) \ + bus_write_4(sc->sc_res, r, v) #define MDIO_READ4(sc, r) \ - bus_space_read_4(&bs_be_tag, sc->sc_handle, sc->sc_offset + r) + bus_read_4(sc->sc_res, r) #define MDIO_MIIMCFG 0x0 #define MDIO_MIIMCOM 0x4 @@ -76,8 +74,7 @@ static int pqmdio_miibus_writereg(device_t dev, int phy, int reg, int value); struct pqmdio_softc { struct mtx sc_lock; - bus_space_handle_t sc_handle; - int sc_offset; + struct resource *sc_res; }; static device_method_t pqmdio_methods[] = { @@ -123,13 +120,10 @@ static int pqmdio_fdt_attach(device_t dev) { struct pqmdio_softc *sc; - rman_res_t start, count; sc = device_get_softc(dev); - fman_get_bushandle(device_get_parent(dev), &sc->sc_handle); - bus_get_resource(dev, SYS_RES_MEMORY, 0, &start, &count); - sc->sc_offset = start; + sc->sc_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 0, RF_ACTIVE); OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); diff --git a/sys/dev/dpaa/fman_muram.c b/sys/dev/dpaa/fman_muram.c new file mode 100644 index 000000000000..a3df993dbb98 --- /dev/null +++ b/sys/dev/dpaa/fman_muram.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/bus.h> +#include <sys/rman.h> +#include <sys/malloc.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <machine/bus.h> + +#include "opt_platform.h" + +#include <powerpc/mpc85xx/mpc85xx.h> + +#include "fman.h" + +struct fman_muram_softc { + struct resource *sc_mem; + vmem_t sc_vmem; +}; + +static int +fman_muram_probe(device_t dev) +{ + if (!ofw_bus_is_compatible(dev, "fsl,fman-muram")) + return (ENXIO); + + device_set_desc(dev, "FMan MURAM"); + + return (BUS_PROBE_DEFAULT); +} + +static int +fman_muram_attach(device_t dev) +{ + struct fman_muram_softc *sc = device_get_softc(dev); + + sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 0, + RF_ACTIVE | RF_SHAREABLE); + + if (sc->sc_mem == NULL) { + device_printf(dev, "cannot allocate memory\n"); + return (ENXIO); + } + sc->sc_vmem = vmem_create("MURAM", rman_get_bushandle(sc->sc_mem), + rman_get_size(sc->sc_mem), +} + +static device_method_t muram_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, fman_muram_probe), + DEVMETHOD(device_attach, fman_muram_attach), + DEVMETHOD(device_detach, fman_muram_detach), + + DEVMETHOD_END +}; + +DEFINE_CLASS_0(fman_muram, fman_muram_driver, muram_methods, + sizeof(struct fman_muram_softc)); +EARLY_DRIVER_MODULE(fman_muram, fman, fman_muram_driver, 0, 0, + BUS_PASS_SUPPORTDEV); diff --git a/sys/dev/dpaa/fman_parser.h b/sys/dev/dpaa/fman_parser.h new file mode 100644 index 000000000000..aff95b1ae9d9 --- /dev/null +++ b/sys/dev/dpaa/fman_parser.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef DPAA_FMAN_PARSER_H +#define DPAA_FMAN_PARSER_H + +#define FMAN_PARSE_RESULT_OFF 32 +#define L3R_FIRST_IPV4 0x8000 +#define L3R_FIRST_IPV6 0x4000 +#define L3R_FIRST_IP_M (L3R_FIRST_IPV4 | L3R_FIRST_IPV6) +#define L3R_LAST_IPV4 0x8000 +#define L3R_LAST_IPV6 0x4000 +#define L3R_LAST_IP_M (L3R_LAST_IPV4 | L3R_LAST_IPV6) +#define L3R_FIRST_ERROR 0x2000 +#define L3R_LAST_ERROR 0x0080 +#define L4R_TYPE_M 0xe0 +#define L4R_TYPE_TCP 0x20 +#define L4R_TYPE_UDP 0x40 +#define L4R_TYPE_IPSEC 0x60 +#define L4R_TYPE_SCTP 0x80 +#define L4R_DCCP 0xa0 +#define L4R_ERR 0x10 + +#endif diff --git a/sys/dev/dpaa/fman_port.c b/sys/dev/dpaa/fman_port.c new file mode 100644 index 000000000000..384d13a7b3a7 --- /dev/null +++ b/sys/dev/dpaa/fman_port.c @@ -0,0 +1,697 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <machine/bus.h> +#include "fman.h" +#include "fman_parser.h" +#include "fman_port.h" +#include "fman_if.h" +#include "fman_port_if.h" + +struct fman_port_rsrc { + uint32_t num; + uint32_t extra; +}; + +#define MAX_BM_POOLS 64 +struct fman_port_softc { + device_t sc_dev; + struct resource *sc_mem; + int sc_port_id; + int sc_port_speed; + int sc_port_type; + + int sc_revision_major; + int sc_revision_minor; + + int sc_max_frame_length; + int sc_bm_max_pools; + int sc_max_port_fifo_size; + int sc_qman_channel_id; + + int sc_deq_byte_count; + int sc_deq_high_priority; + int sc_tx_deq_pipeline_depth; + + int sc_default_fqid; + int sc_err_fqid; + int pcd_base_fqid; + int pcd_fqs_count; + + int sc_max_ext_portals; + int sc_max_sub_portals; + + struct fman_port_rsrc sc_open_dmas; + struct fman_port_rsrc sc_tasks; + struct fman_port_rsrc sc_fifo_bufs; + + struct fman_port_buffer_pool sc_bpools[FMAN_PORT_MAX_POOLS]; +}; + +#define TX_10G_PORT_BASE 0x30 +#define RX_10G_PORT_BASE 0x10 + +#define FMAN_PORT_TYPE_TX 0 +#define FMAN_PORT_TYPE_RX 1 + +#define PORT_RX 0x01 +#define PORT_TX 0x02 +#define PORT_V3 0x04 + +#define FMBM_RCFG 0x000 +#define BMI_PORT_CFG_EN 0x80000000 +#define FMBM_RST 0x004 +#define FMBM_RDA 0x008 +#define RDA_WOPT 0x00100000 +#define FMBM_RFP 0x00c +#define FMBM_RFED 0x010 +#define BMI_RX_FRAME_END_CUT_SHIFT 16 +#define FMBM_RICP 0x014 /* Counts are units of 16 bytes */ +#define RICP_ICEOF_M 0x001f0000 +#define RICP_ICEOF_S 16 +#define RICP_ICIOF_M 0x00000f00 +#define RICP_ICIOF_S 8 +#define RICP_ICSZ_S 0x0000001f +#define FMBM_RIM 0x018 +#define FMBM_REBM 0x01c +#define REBM_BSM_M 0x01ff0000 +#define REBM_BSM_S 16 +#define REBM_BEM_M 0x000001ff +#define FMBM_RFNE 0x020 +#define FMBM_RFCA 0x024 +#define RFCA_OR 0x80000000 +#define RFCA_COLOR 0x0c000000 +#define RFCA_SYNC 0x03000000 +#define RFCA_SYNC_REQ 0x02000000 +#define RFCA_MR 0x003f0000 +#define RFCA_MR_DEF 0x003c0000 +#define FMBM_RFPNE 0x028 +#define FMBM_RETH 0x038 +#define RETH_ETHE 0x80000000 /* Excessive Threshold Enable */ +#define FMBM_RFQID 0x060 +#define FMBM_REFQID 0x064 +#define FMBM_RFSDM 0x068 +#define FMBM_RFSEM 0x06c +#define FMBM_RFENE 0x070 +#define FMBM_REBMPI(i) (0x100 + (4 * (i))) +#define REBMPI_VAL 0x80000000 +#define REBMPI_ACE 0x40000000 +#define REBMPI_BPID_S 16 +#define FMBM_RSTC 0x0200 +#define RSTC_EN 0x80000000 + +#define FMBM_TCFG 0x000 +#define FMBM_TST 0x004 +#define FMBM_TDA 0x008 +#define FMBM_TFP 0x00c +#define BMI_FIFO_PIPELINE_DEPTH_SHIFT 12 +#define FMBM_TFED 0x010 +#define FMBM_TICP 0x014 +#define TICP_ICEOF_M 0x001f0000 +#define TICP_ICEOF_S 16 +#define TICP_ICIOF_M 0x00000f00 +#define TICP_ICIOF_S 8 +#define TICP_ICSZ_S 0x0000001f +#define FMBM_TFDNE 0x018 +#define FMBM_TFCA 0x01c +#define TFCA_MR_DEF 0 +#define TFCA_ATTR_ORDER 0x80000000 +#define FMBM_TCFQID 0x020 +#define FMBM_TEFQID 0x024 +#define FMBM_TFENE 0x028 +#define FMBM_TFNE 0x070 +#define TFNE_EBD 0x80000000 + +#define FMQM_PNC 0x400 +#define PNC_EN 0x80000000 +#define PNC_STEN 0x80000000 +#define FMQM_PNS 0x404 +#define PNS_DEQ_FD_BSY 0x20000000 +#define FMQM_PNEN 0x41c +#define FMQM_PNDN 0x42c +#define FMQM_PNDC 0x430 +#define QMI_DEQ_CFG_PRI 0x80000000 +#define QMI_DEQ_CFG_TYPE1 0x10000000 +#define QMI_DEQ_CFG_TYPE2 0x20000000 +#define QMI_DEQ_CFG_TYPE3 0x30000000 +#define QMI_DEQ_CFG_PREFETCH_PARTIAL 0x01000000 +#define QMI_DEQ_CFG_PREFETCH_FULL 0x03000000 +#define QMI_DEQ_CFG_SP_MASK 0xf +#define QMI_DEQ_CFG_SP_SHIFT 20 + +#define HWP_PCAC 0xbf8 +#define HWP_PCAC_PSTOP 0x00000001 +#define HWP_HXS_PCAC_PSTAT 0x00000100 +#define HWP_HXS_SSA(x) (0x800 + x * 2 * sizeof(uint32_t)) +#define HWP_HXS_LCV(x) (0x800 + (x * 2 + 1) * sizeof(uint32_t)) +#define HWP_HXS_TCP 0xA +#define HWP_HXS_UDP 0xB +#define HXS_SH_PAD_REM 0x80000000 +#define HWP_HXS_COUNT 16 + +#define PORT_MAX_FRAME_LENGTH 9600 + +#define NIA_ORDER_RESTORE 0x00800000 +#define NIA_ENG_BMI 0x00500000 +#define NIA_ENG_QMI_DEQ 0x00580000 +#define NIA_ENG_QMI_ENQ 0x00540000 +#define NIA_ENG_HWP 0x00440000 +#define NIA_ENG_HWK 0x00480000 +#define NIA_BMI_AC_TX_RELEASE 0x000002c0 +#define NIA_BMI_AC_TX 0x00000274 +#define NIA_BMI_AC_RELEASE 0x000000c0 +#define NIA_BMI_AC_ENQ_FRAME 0x00000002 +#define NIA_BMI_AC_FETCH_ALLFRAME 0x0000020c + +#define BMI_RX_ERR (FM_FD_ERR_DMA | FM_FD_ERR_FPE | \ + FM_FD_ERR_FSE | FM_FD_ERR_DIS | \ + FM_FD_ERR_EOF | FM_FD_ERR_NSS | \ + FM_FD_ERR_KSO | FM_FD_ERR_IPP | \ + FM_FD_ERR_PTE | FM_FD_ERR_PHE | \ + FM_FD_ERR_BLE) + +/* Default configurations */ +#define DEFAULT_RX_CUT_END_BYTES 4 + +static struct ofw_compat_data compats[] = { + { "fsl,fman-v2-port-rx", PORT_RX }, + { "fsl,fman-v2-port-tx", PORT_TX }, + { "fsl,fman-v3-port-rx", PORT_V3 | PORT_RX }, + { "fsl,fman-v3-port-tx", PORT_V3 | PORT_TX }, + { NULL, 0 } +}; + +static int +fman_port_probe(device_t dev) +{ + if (ofw_bus_search_compatible(dev, compats)->ocd_str == NULL) + return (ENXIO); + + device_set_desc(dev, "FMan port"); + + return (BUS_PROBE_DEFAULT); +} + +static int +fman_port_attach(device_t dev) +{ + struct fman_port_softc *sc; + phandle_t node; + pcell_t cell; + uintptr_t compat_data = + ofw_bus_search_compatible(dev, compats)->ocd_data; + int port_speed = 1000; + int port_type; + + sc = device_get_softc(dev); + sc->sc_dev = dev; + + node = ofw_bus_get_node(dev); + if (OF_getencprop(node, "cell-index", &cell, sizeof(cell)) < 0) { + device_printf(dev, "No cell-index property"); + return (ENXIO); + } + + sc->sc_port_id = cell; + + sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 0, + RF_ACTIVE | RF_SHAREABLE); + + if (sc->sc_mem == NULL) { + device_printf(dev, "failed to allocate MMIO"); + return (ENXIO); + } + + FMAN_GET_REVISION(device_get_parent(dev), + &sc->sc_revision_major, &sc->sc_revision_minor); + + if ((compat_data & PORT_TX) == PORT_TX) + port_type = FMAN_PORT_TYPE_TX; + else + port_type = FMAN_PORT_TYPE_RX; + + if ((compat_data & PORT_V3) == PORT_V3) { + if (OF_hasprop(node, "fsl,fman-10g-port")) + port_speed = 10000; + } else { + if ((compat_data & PORT_TX) && + sc->sc_port_id > TX_10G_PORT_BASE) + port_speed = 10000; + else if ((compat_data & PORT_RX) && + sc->sc_port_id > RX_10G_PORT_BASE) + port_speed = 10000; + } + + if (sc->sc_port_speed == 10000) { + sc->sc_deq_high_priority = true; + sc->sc_deq_byte_count = 0x1400; + } else { + sc->sc_deq_high_priority = false; + sc->sc_deq_byte_count = 0x0400; + } + + sc->sc_port_type = port_type; + sc->sc_port_speed = port_speed; + + sc->sc_bm_max_pools = MAX_BM_POOLS; + sc->sc_max_frame_length = PORT_MAX_FRAME_LENGTH; + + if (port_type == FMAN_PORT_TYPE_TX) + sc->sc_qman_channel_id = + FMAN_GET_QMAN_CHANNEL_ID(device_get_parent(dev), + sc->sc_port_id); + + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + + return (0); +} + +static int +fman_port_detach(device_t dev) +{ + struct fman_port_softc *sc = device_get_softc(dev); + + if (sc->sc_mem != NULL) + bus_release_resource(dev, sc->sc_mem); + + return (0); +} + +static int +fman_port_config(device_t dev, struct fman_port_params *params) +{ + struct fman_port_softc *sc = device_get_softc(dev); + + sc->sc_default_fqid = params->dflt_fqid; + sc->sc_err_fqid = params->err_fqid; + + sc->sc_max_port_fifo_size = + FMAN_GET_BMI_MAX_FIFO_SIZE(device_get_parent(dev)); + switch (sc->sc_revision_major) { + case 2: + case 3: + sc->sc_max_ext_portals = 4; + sc->sc_max_sub_portals = 12; + break; + case 6: + sc->sc_max_ext_portals = 8; + sc->sc_max_sub_portals = 16; + break; + } + if (sc->sc_revision_major >= 6 && + sc->sc_port_type == FMAN_PORT_TYPE_TX && sc->sc_port_speed == 1000) + /* Errata A005127 workaround */ + bus_write_4(sc->sc_mem, FMBM_TFP, 0x00001013); + + sc->sc_tasks.extra = 0; + + switch (sc->sc_port_speed) { + case 10000: + if (sc->sc_revision_major < 6) { + sc->sc_tasks.num = 16; + if (sc->sc_port_type == FMAN_PORT_TYPE_RX) + sc->sc_tasks.extra = 8; + } else + sc->sc_tasks.num = 14; + break; + case 1000: + if (sc->sc_revision_major >= 6) + sc->sc_tasks.num = 4; + else { + sc->sc_tasks.num = 3; + if (sc->sc_port_type == FMAN_PORT_TYPE_RX) + sc->sc_tasks.extra = 2; + } + break; + default: + sc->sc_tasks.num = 0; + break; + } + + /* Open DMAs */ + if (sc->sc_revision_major >= 6) { + sc->sc_open_dmas.extra = 0; + if (sc->sc_port_speed == 10000) { + if (sc->sc_port_type == FMAN_PORT_TYPE_TX) + sc->sc_open_dmas.num = 12; + else + sc->sc_open_dmas.num = 8; + } else { + if (sc->sc_port_type == FMAN_PORT_TYPE_TX) + sc->sc_open_dmas.num = 3; + else + sc->sc_open_dmas.num = 2; + } + } else { + if (sc->sc_port_speed == 10000) { + sc->sc_open_dmas.num = 8; + sc->sc_open_dmas.num = 8; + } else { + sc->sc_open_dmas.num = 1; + sc->sc_open_dmas.extra = 1; + } + } + + /* FIFO bufs */ + if (sc->sc_revision_major >= 6) { + if (sc->sc_port_type == FMAN_PORT_TYPE_TX) + if (sc->sc_port_speed == 10000) + sc->sc_fifo_bufs.num = 64; + else + sc->sc_fifo_bufs.num = 50; + else + if (sc->sc_port_speed == 10000) + sc->sc_fifo_bufs.num = 96; + else + sc->sc_fifo_bufs.num = 50; + } else { + if (sc->sc_port_type == FMAN_PORT_TYPE_TX) + if (sc->sc_port_speed == 10000) + sc->sc_fifo_bufs.num = 48; + else + sc->sc_fifo_bufs.num = 44; + else + if (sc->sc_port_speed == 10000) + sc->sc_fifo_bufs.num = 48; + else + sc->sc_fifo_bufs.num = 45; + } + + sc->sc_fifo_bufs.extra = 0; + sc->sc_fifo_bufs.num *= FMAN_BMI_FIFO_UNITS; + + if (sc->sc_port_type == FMAN_PORT_TYPE_RX) + for (int i = 0; i < params->rx_params.num_pools; i++) + sc->sc_bpools[i] = params->rx_params.bpools[i]; + + /* TODO: buf_margins? See fman_sp_build_buffer_struct */ + + return (0); +} + +static int +fman_port_init_bmi_rx(struct fman_port_softc *sc) +{ + uint32_t reg; + + /* TODO: Sort the buffer pool list. */ + /* TODO: Backup pools */ + /* TODO: Depletion mode */ + for (int i = 0; i < FMAN_PORT_MAX_POOLS; i++) { + /* Initialize the external pool info */ + if (sc->sc_bpools[i].size != 0) { + bus_write_4(sc->sc_mem, FMBM_REBMPI(i), + REBMPI_VAL | REBMPI_ACE | + (sc->sc_bpools[i].bpid << REBMPI_BPID_S) | + sc->sc_bpools[i].size); + } else + /* Mark invalid if zero */ + bus_write_4(sc->sc_mem, FMBM_REBMPI(i), 0); + } + + bus_write_4(sc->sc_mem, FMBM_RDA, RDA_WOPT); + + bus_write_4(sc->sc_mem, FMBM_RFCA, + RFCA_OR | RFCA_SYNC_REQ | RFCA_MR_DEF); + + bus_write_4(sc->sc_mem, FMBM_RFPNE, + NIA_ENG_BMI | NIA_BMI_AC_ENQ_FRAME); + bus_write_4(sc->sc_mem, FMBM_RFENE, + NIA_ENG_QMI_ENQ | NIA_ORDER_RESTORE); + + bus_write_4(sc->sc_mem, FMBM_RFQID, sc->sc_default_fqid); + bus_write_4(sc->sc_mem, FMBM_REFQID, sc->sc_err_fqid); + + if (sc->sc_revision_major < 6) + bus_write_4(sc->sc_mem, FMBM_RETH, RETH_ETHE); + + /* Errata A006320 makes CFED field bad */ + if (sc->sc_revision_major == 6 && (sc->sc_revision_minor == 0)) + /* These are under errata A006320 */; + else + bus_write_4(sc->sc_mem, FMBM_RFED, + DEFAULT_RX_CUT_END_BYTES << BMI_RX_FRAME_END_CUT_SHIFT); + + /* Insert internal context ahead of the frame */ + reg = sizeof(struct fman_internal_context) << REBM_BSM_S; + bus_write_4(sc->sc_mem, FMBM_REBM, reg); + reg = howmany(FMAN_PARSE_RESULT_OFF, 0x10) << RICP_ICIOF_S; + reg |= howmany(sizeof(struct fman_internal_context), 0x10); + bus_write_4(sc->sc_mem, FMBM_RICP, reg); + + bus_write_4(sc->sc_mem, FMBM_RFNE, NIA_ENG_HWP); + bus_write_4(sc->sc_mem, FMBM_RFSDM, FM_FD_ERR_DIS); + bus_write_4(sc->sc_mem, FMBM_RFSEM, BMI_RX_ERR & ~FM_FD_ERR_DIS); + + return (0); +} + +static int +fman_port_init_bmi_tx(struct fman_port_softc *sc) +{ + uint32_t reg; + int depth; + + bus_write_4(sc->sc_mem, FMBM_TCFG, 0); + bus_write_4(sc->sc_mem, FMBM_TDA, 0); + bus_write_4(sc->sc_mem, FMBM_TFED, 0); + if (sc->sc_port_speed == 10000) + depth = 4; + else if (sc->sc_revision_major >= 6) + depth = 2; + else + depth = 1; + sc->sc_tx_deq_pipeline_depth = depth; + reg = ((depth - 1) << BMI_FIFO_PIPELINE_DEPTH_SHIFT) | 0x13; + bus_write_4(sc->sc_mem, FMBM_TFP, reg); + + /* Default color: green */ + bus_write_4(sc->sc_mem, FMBM_TFCA, + TFCA_MR_DEF | TFCA_ATTR_ORDER); + + bus_write_4(sc->sc_mem, FMBM_TFDNE, NIA_ENG_QMI_DEQ); + bus_write_4(sc->sc_mem, FMBM_TFENE, + NIA_ENG_QMI_ENQ | NIA_ORDER_RESTORE); + + /* Insert internal context ahead of the frame */ + reg = howmany(FMAN_PARSE_RESULT_OFF, 0x10) << TICP_ICIOF_S; + reg |= howmany(sizeof(struct fman_internal_context), 0x10); + bus_write_4(sc->sc_mem, FMBM_TICP, reg); + + if (sc->sc_revision_major >= 6) + bus_write_4(sc->sc_mem, FMBM_TFNE, + (sc->sc_default_fqid == 0 ? TFNE_EBD : 0) | + NIA_BMI_AC_FETCH_ALLFRAME); + bus_write_4(sc->sc_mem, FMBM_TCFQID, sc->sc_default_fqid); + bus_write_4(sc->sc_mem, FMBM_TEFQID, sc->sc_err_fqid); + + return (0); +} + +static int +fman_port_init_hwp(struct fman_port_softc *sc) +{ + int i; + + /* Stop the parser so we can initialize it for our uses */ + bus_write_4(sc->sc_mem, HWP_PCAC, HWP_PCAC_PSTOP); + + for (i = 0; i < 100 && + (bus_read_4(sc->sc_mem, HWP_PCAC) & HWP_HXS_PCAC_PSTAT) != 0; i++) { + DELAY(10); + } + if (i == 100) { + device_printf(sc->sc_dev, "Timeout stopping HW parser\n"); + return (ENXIO); + } + + /* set the parser examination config */ + for (i = 0; i < HWP_HXS_COUNT; i++) { + bus_write_4(sc->sc_mem, HWP_HXS_SSA(i), 0); + bus_write_4(sc->sc_mem, HWP_HXS_LCV(i), 0xffffffff); + } + bus_write_4(sc->sc_mem, HWP_HXS_SSA(HWP_HXS_TCP), HXS_SH_PAD_REM); + bus_write_4(sc->sc_mem, HWP_HXS_SSA(HWP_HXS_UDP), HXS_SH_PAD_REM); + + /* Re-enable the parser */ + bus_write_4(sc->sc_mem, HWP_PCAC, 0); + + return (0); +} + +static int +fman_port_init_qmi(struct fman_port_softc *sc) +{ + uint32_t reg; + + if (sc->sc_port_type == FMAN_PORT_TYPE_RX) { + bus_write_4(sc->sc_mem, FMQM_PNEN, + NIA_ENG_BMI | NIA_BMI_AC_RELEASE); + return (0); + } + + /* TX port */ + bus_write_4(sc->sc_mem, FMQM_PNDN, + NIA_ENG_BMI | NIA_BMI_AC_TX); + /* TX port */ + bus_write_4(sc->sc_mem, FMQM_PNEN, + NIA_ENG_BMI | NIA_BMI_AC_TX_RELEASE); + + reg = 0; + + if (sc->sc_deq_high_priority) + reg |= QMI_DEQ_CFG_PRI; + + reg |= QMI_DEQ_CFG_TYPE1; + reg |= QMI_DEQ_CFG_PREFETCH_FULL; + reg |= (sc->sc_qman_channel_id & QMI_DEQ_CFG_SP_MASK) << QMI_DEQ_CFG_SP_SHIFT; + reg |= sc->sc_deq_byte_count; + bus_write_4(sc->sc_mem, FMQM_PNDC, reg); + + return (0); +} + +static int +fman_port_init(device_t dev) +{ + struct fman_port_init_params params; + struct fman_port_softc *sc = device_get_softc(dev); + int err; + + if (sc->sc_port_type == FMAN_PORT_TYPE_RX) { + /* Set up RX buffers and fifo */ + } + params.port_id = sc->sc_port_id; + params.is_rx_port = (sc->sc_port_type == FMAN_PORT_TYPE_RX); + params.num_tasks = sc->sc_tasks.num; + params.extra_tasks = sc->sc_tasks.extra; + params.open_dmas = sc->sc_open_dmas.num; + params.extra_dmas = sc->sc_open_dmas.extra; + params.fifo_size = sc->sc_fifo_bufs.num; + params.extra_fifo_size = sc->sc_fifo_bufs.extra; + params.max_frame_length = sc->sc_max_frame_length; + params.deq_pipeline_size = sc->sc_tx_deq_pipeline_depth; + + /* TODO: verify_size_of_fifo() from Linux driver */ + err = FMAN_SET_PORT_PARAMS(device_get_parent(dev), ¶ms); + + if (err != 0) + return (err); + + if (sc->sc_port_type == FMAN_PORT_TYPE_TX) + err = fman_port_init_bmi_tx(sc); + else { + err = fman_port_init_bmi_rx(sc); + if (err == 0) + fman_port_init_hwp(sc); + } + + if (err != 0) + return (err); + + err = fman_port_init_qmi(sc); + + /* TODO: keygen here */ + + return (err); +} + +static int +fman_port_disable(device_t dev) +{ + struct fman_port_softc *sc; + uint32_t reg; + int count; + + sc = device_get_softc(dev); + + switch (sc->sc_port_type) { + case FMAN_PORT_TYPE_TX: + reg = bus_read_4(sc->sc_mem, FMQM_PNC); + bus_write_4(sc->sc_mem, FMQM_PNC, reg & ~PNC_EN); + for (count = 0; count < 100; count++) { + DELAY(10); + reg = bus_read_4(sc->sc_mem, FMQM_PNS); + if (!(reg & PNS_DEQ_FD_BSY)) + break; + } + if (count == 100) + device_printf(sc->sc_dev, "Timeout stopping QMI\n"); + reg = bus_read_4(sc->sc_mem, FMBM_TCFG); + bus_write_4(sc->sc_mem, FMBM_TCFG, reg & ~BMI_PORT_CFG_EN); + for (count = 0; count < 100; count++) { + DELAY(10); + reg = bus_read_4(sc->sc_mem, FMBM_TST); + if (!(reg & PNS_DEQ_FD_BSY)) + break; + } + if (count == 100) + device_printf(sc->sc_dev, "Timeout stopping BMI"); + break; + case FMAN_PORT_TYPE_RX: + reg = bus_read_4(sc->sc_mem, FMBM_RCFG); + bus_write_4(sc->sc_mem, FMBM_RCFG, reg & ~BMI_PORT_CFG_EN); + for (count = 0; count < 100; count++) { + DELAY(10); + reg = bus_read_4(sc->sc_mem, FMBM_RST); + if (!(reg & PNS_DEQ_FD_BSY)) + break; + } + if (count == 100) + device_printf(sc->sc_dev, "Timeout stopping BMI"); + break; + } + + return (0); +} + +static int +fman_port_enable(device_t dev) +{ + struct fman_port_softc *sc; + uint32_t reg; + + sc = device_get_softc(dev); + switch (sc->sc_port_type) { + case FMAN_PORT_TYPE_TX: + reg = bus_read_4(sc->sc_mem, FMQM_PNC); + bus_write_4(sc->sc_mem, FMQM_PNC, reg | PNC_EN | PNC_STEN); + reg = bus_read_4(sc->sc_mem, FMBM_TCFG); + bus_write_4(sc->sc_mem, FMBM_TCFG, reg | BMI_PORT_CFG_EN); + break; + case FMAN_PORT_TYPE_RX: + reg = bus_read_4(sc->sc_mem, FMBM_RCFG); + bus_write_4(sc->sc_mem, FMQM_PNC, reg | PNC_EN | PNC_STEN); + bus_write_4(sc->sc_mem, FMBM_RCFG, reg | BMI_PORT_CFG_EN); + bus_write_4(sc->sc_mem, FMBM_RSTC, RSTC_EN); + break; + } + + return (0); +} + +static device_method_t fman_port_methods[] = { + DEVMETHOD(device_probe, fman_port_probe), + DEVMETHOD(device_attach, fman_port_attach), + DEVMETHOD(device_detach, fman_port_detach), + + DEVMETHOD(fman_port_config, fman_port_config), + DEVMETHOD(fman_port_init, fman_port_init), + DEVMETHOD(fman_port_enable, fman_port_enable), + DEVMETHOD(fman_port_disable, fman_port_disable), + + DEVMETHOD_END +}; + +DEFINE_CLASS_0(fman_port, fman_port_driver, fman_port_methods, + sizeof(struct fman_port_softc)); +EARLY_DRIVER_MODULE(fman_port, fman, fman_port_driver, 0, 0, + BUS_PASS_SUPPORTDEV + BUS_PASS_ORDER_MIDDLE); diff --git a/sys/dev/dpaa/fman_port.h b/sys/dev/dpaa/fman_port.h new file mode 100644 index 000000000000..8803c5e5f7c9 --- /dev/null +++ b/sys/dev/dpaa/fman_port.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef DPAA_FMAN_PORT_H +#define DPAA_FMAN_PORT_H + +#define FMAN_PORT_MAX_POOLS 4 +struct fman_port_buffer_pool { + uint8_t bpid; + uint16_t size; +}; + +struct fman_port_params { + uint32_t dflt_fqid; /* Must not be 0 */ + uint32_t err_fqid; + union { + struct { + int num_pools; + struct fman_port_buffer_pool bpools[FMAN_PORT_MAX_POOLS]; + } rx_params; + struct { + } tx_params; + }; +}; + +#endif diff --git a/sys/dev/dpaa/fman_port_if.m b/sys/dev/dpaa/fman_port_if.m new file mode 100644 index 000000000000..fe6159340c2c --- /dev/null +++ b/sys/dev/dpaa/fman_port_if.m @@ -0,0 +1,33 @@ +# +# Copyright (c) 2026 Justin Hibbits +# +# SPDX-License-Identifier: BSD-2-Clause + +#include <machine/bus.h> +#include <dev/dpaa/fman_port.h> + +/** + * @brief DPAA FMan Port interface + * + */ +INTERFACE fman_port; + +/** + * @brief Configure the port for a specific purpose + */ +METHOD int config { + device_t dev; + struct fman_port_params *params; +}; + +METHOD int init { + device_t dev; +}; + +METHOD int disable { + device_t dev; +}; + +METHOD int enable { + device_t dev; +}; diff --git a/sys/dev/dpaa/fman_xmdio.c b/sys/dev/dpaa/fman_xmdio.c new file mode 100644 index 000000000000..521c30860dc5 --- /dev/null +++ b/sys/dev/dpaa/fman_xmdio.c @@ -0,0 +1,284 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/mutex.h> +#include <sys/resource.h> +#include <sys/socket.h> + +#include <machine/bus.h> + +#include <net/if.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/if_var.h> + +#include <dev/mdio/mdio.h> +#include <dev/mii/mii.h> +#include <dev/mii/miivar.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include "fman.h" +#include "miibus_if.h" +#include "mdio_if.h" + +#define MDIO_LOCK() mtx_lock(&sc->sc_lock) +#define MDIO_UNLOCK() mtx_unlock(&sc->sc_lock) +#define MDIO_WRITE4(sc, r, v) \ + bus_write_4(sc->sc_res, r, v) +#define MDIO_READ4(sc, r) \ + bus_read_4(sc->sc_res, r) + +#define MDIO_CFG 0x30 +#define CFG_ENC45 0x00000040 +#define MDIO_STAT 0x30 +#define STAT_BUSY 0x80000000 +#define STAT_MDIO_RD_ER 0x00000002 +#define MDIO_CTL 0x34 +#define CTL_READ 0x00008000 +#define MDIO_DATA 0x38 +#define MDIO_ADDR 0x3c + +static int xmdio_fdt_probe(device_t dev); +static int xmdio_fdt_attach(device_t dev); +static int xmdio_detach(device_t dev); +static int xmdio_miibus_readreg(device_t dev, int phy, int reg); +static int xmdio_miibus_writereg(device_t dev, int phy, int reg, int value); +static int xmdio_mdio_readextreg(device_t dev, int phy, int devad, int reg); +static int xmdio_mdio_writeextreg(device_t dev, int phy, int devad, int reg, + int val); + +struct xmdio_softc { + struct mtx sc_lock; + struct resource *sc_res; +}; + +static struct ofw_compat_data mdio_compat_data[] = { + {"fsl,fman-memac-mdio", 0}, + {"fsl,fman-xmdio", 0}, + {NULL, 0} +}; + +static device_method_t xmdio_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xmdio_fdt_probe), + DEVMETHOD(device_attach, xmdio_fdt_attach), + DEVMETHOD(device_detach, xmdio_detach), + DEVMETHOD(bus_add_child, bus_generic_add_child), + + /* MII interface */ + DEVMETHOD(miibus_readreg, xmdio_miibus_readreg), + DEVMETHOD(miibus_writereg, xmdio_miibus_writereg), + + /* MDIO interface */ + DEVMETHOD(mdio_readreg, xmdio_miibus_readreg), + DEVMETHOD(mdio_writereg, xmdio_miibus_writereg), + DEVMETHOD(mdio_readextreg, xmdio_mdio_readextreg), + DEVMETHOD(mdio_writeextreg, xmdio_mdio_writeextreg), + + DEVMETHOD_END +}; + +static driver_t xmdio_driver = { + "xmdio", + xmdio_methods, + sizeof(struct xmdio_softc), +}; + +EARLY_DRIVER_MODULE(xmdio, fman, xmdio_driver, 0, 0, + BUS_PASS_SUPPORTDEV); +DRIVER_MODULE(miibus, xmdio, miibus_driver, 0, 0); +DRIVER_MODULE(mdio, xmdio, mdio_driver, 0, 0); +MODULE_DEPEND(xmdio, miibus, 1, 1, 1); + +static int +xmdio_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_search_compatible(dev, mdio_compat_data)->ocd_str) + return (ENXIO); + + device_set_desc(dev, "Freescale XGMAC MDIO"); + + return (BUS_PROBE_DEFAULT); +} + +static int +xmdio_fdt_attach(device_t dev) +{ + struct xmdio_softc *sc; + + sc = device_get_softc(dev); + + sc->sc_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 0, RF_ACTIVE); + + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + + mtx_init(&sc->sc_lock, device_get_nameunit(dev), "XMDIO lock", + MTX_DEF); + + return (0); +} + +static int +xmdio_detach(device_t dev) +{ + struct xmdio_softc *sc; + + sc = device_get_softc(dev); + + mtx_destroy(&sc->sc_lock); + + return (0); +} + +static void +set_clause45(struct xmdio_softc *sc) +{ + uint32_t reg; + + reg = MDIO_READ4(sc, MDIO_CFG); + MDIO_WRITE4(sc, MDIO_CFG, reg | CFG_ENC45); +} + +static void +set_clause22(struct xmdio_softc *sc) +{ + uint32_t reg; + + reg = MDIO_READ4(sc, MDIO_CFG); + MDIO_WRITE4(sc, MDIO_CFG, reg & ~CFG_ENC45); +} + +static int +xmdio_wait_no_busy(struct xmdio_softc *sc) +{ + uint32_t count, val; + + for (count = 1000; count > 0; count--) { + val = MDIO_READ4(sc, MDIO_CFG); + if ((val & STAT_BUSY) == 0) + break; + DELAY(1); + } + + if (count == 0) + return (0xffff); + + return (0); +} + +int +xmdio_miibus_readreg(device_t dev, int phy, int reg) +{ + struct xmdio_softc *sc; + int rv; + uint32_t ctl; + + sc = device_get_softc(dev); + + MDIO_LOCK(); + + set_clause22(sc); + ctl = (phy << 5) | reg; + MDIO_WRITE4(sc, MDIO_CTL, ctl | CTL_READ); + + MDIO_READ4(sc, MDIO_CTL); + + if (xmdio_wait_no_busy(sc)) + rv = 0xffff; + else + rv = MDIO_READ4(sc, MDIO_DATA); + + MDIO_WRITE4(sc, MDIO_CTL, 0); + MDIO_UNLOCK(); + + return (rv); +} + +int +xmdio_miibus_writereg(device_t dev, int phy, int reg, int value) +{ + struct xmdio_softc *sc; + + sc = device_get_softc(dev); + + MDIO_LOCK(); + set_clause22(sc); + /* Stop the MII management read cycle */ + MDIO_WRITE4(sc, MDIO_CTL, (phy << 5) | reg); + + MDIO_WRITE4(sc, MDIO_DATA, value); + + /* Wait till MII management write is complete */ + xmdio_wait_no_busy(sc); + MDIO_UNLOCK(); + + return (0); +} + +static int +xmdio_mdio_readextreg(device_t dev, int phy, int devad, int reg) +{ + struct xmdio_softc *sc; + int rv; + uint32_t ctl; + + sc = device_get_softc(dev); + + MDIO_LOCK(); + + set_clause45(sc); + ctl = (phy << 5) | devad; + MDIO_WRITE4(sc, MDIO_CTL, ctl); + MDIO_WRITE4(sc, MDIO_ADDR, reg); + xmdio_wait_no_busy(sc); + MDIO_WRITE4(sc, MDIO_CTL, ctl | CTL_READ); + MDIO_READ4(sc, MDIO_CTL); + + xmdio_wait_no_busy(sc); + + if (MDIO_READ4(sc, MDIO_STAT) & STAT_MDIO_RD_ER) + rv = 0xffff; + else + rv = MDIO_READ4(sc, MDIO_DATA); + + MDIO_WRITE4(sc, MDIO_CTL, 0); + MDIO_UNLOCK(); + + return (rv); +} + +static int +xmdio_mdio_writeextreg(device_t dev, int phy, int devad, int reg, int val) +{ + struct xmdio_softc *sc; + + sc = device_get_softc(dev); + + MDIO_LOCK(); + set_clause45(sc); + /* Stop the MII management read cycle */ + MDIO_WRITE4(sc, MDIO_CTL, (phy << 5) | devad); + + MDIO_WRITE4(sc, MDIO_DATA, val); + + /* Wait till MII management write is complete */ + xmdio_wait_no_busy(sc); + MDIO_UNLOCK(); + + return (0); +} + diff --git a/sys/dev/dpaa/if_dtsec.c b/sys/dev/dpaa/if_dtsec.c index a5f9955061a4..0d886abc5345 100644 --- a/sys/dev/dpaa/if_dtsec.c +++ b/sys/dev/dpaa/if_dtsec.c @@ -54,184 +54,54 @@ #include "miibus_if.h" -#include <contrib/ncsw/inc/integrations/dpaa_integration_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_mac_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_port_ext.h> -#include <contrib/ncsw/inc/flib/fsl_fman_dtsec.h> -#include <contrib/ncsw/inc/xx_ext.h> - +#include "dpaa_eth.h" #include "fman.h" +#include "fman_port.h" #include "if_dtsec.h" -#include "if_dtsec_im.h" -#include "if_dtsec_rm.h" + +#include "fman_if.h" +#include "fman_port_if.h" #define DTSEC_MIN_FRAME_SIZE 64 #define DTSEC_MAX_FRAME_SIZE 9600 #define DTSEC_REG_MAXFRM 0x110 -#define DTSEC_REG_GADDR(i) (0x0a0 + 4*(i)) +#define DTSEC_REG_IGADDR(i) (0x080 + 4 * (i)) +#define DTSEC_REG_GADDR(i) (0x0a0 + 4 * (i)) -/** - * @group dTSEC private defines. - * @{ - */ -/** - * dTSEC FMan MAC exceptions info struct. - */ -struct dtsec_fm_mac_ex_str { - const int num; - const char *str; -}; -/** @} */ +#define DTSEC_ECNTRL 0x014 +#define ECNTRL_R100M 0x00000008 +#define DTSEC_TCTRL 0x040 +#define TCTRL_GTS 0x00000020 +#define DTSEC_RCTRL 0x050 +#define RCTRL_CFA 0x00008000 +#define RCTRL_GHTX 0x00000400 +#define RCTRL_GRS 0x00000020 +#define RCTRL_MPROM 0x00000008 +#define DTSEC_MACCFG1 0x100 +#define DTSEC_MACCFG2 0x104 +#define MACCFG_IF_M 0x00000300 +#define MACCFG_IF_10_100 0x00000100 +#define MACCFG_IF_1G 0x00000200 +#define MACCFG_FULLDUPLEX 0x00000001 +#define DTSEC_MACSTNADDR1 0x140 +#define DTSEC_MACSTNADDR2 0x144 +static void dtsec_if_init_locked(struct dtsec_softc *sc); /** * @group FMan MAC routines. * @{ */ -#define DTSEC_MAC_EXCEPTIONS_END (-1) - -/** - * FMan MAC exceptions. - */ -static const struct dtsec_fm_mac_ex_str dtsec_fm_mac_exceptions[] = { - { e_FM_MAC_EX_10G_MDIO_SCAN_EVENTMDIO, "MDIO scan event" }, - { e_FM_MAC_EX_10G_MDIO_CMD_CMPL, "MDIO command completion" }, - { e_FM_MAC_EX_10G_REM_FAULT, "Remote fault" }, - { e_FM_MAC_EX_10G_LOC_FAULT, "Local fault" }, - { e_FM_MAC_EX_10G_1TX_ECC_ER, "Transmit frame ECC error" }, - { e_FM_MAC_EX_10G_TX_FIFO_UNFL, "Transmit FIFO underflow" }, - { e_FM_MAC_EX_10G_TX_FIFO_OVFL, "Receive FIFO overflow" }, - { e_FM_MAC_EX_10G_TX_ER, "Transmit frame error" }, - { e_FM_MAC_EX_10G_RX_FIFO_OVFL, "Receive FIFO overflow" }, - { e_FM_MAC_EX_10G_RX_ECC_ER, "Receive frame ECC error" }, - { e_FM_MAC_EX_10G_RX_JAB_FRM, "Receive jabber frame" }, - { e_FM_MAC_EX_10G_RX_OVRSZ_FRM, "Receive oversized frame" }, - { e_FM_MAC_EX_10G_RX_RUNT_FRM, "Receive runt frame" }, - { e_FM_MAC_EX_10G_RX_FRAG_FRM, "Receive fragment frame" }, - { e_FM_MAC_EX_10G_RX_LEN_ER, "Receive payload length error" }, - { e_FM_MAC_EX_10G_RX_CRC_ER, "Receive CRC error" }, - { e_FM_MAC_EX_10G_RX_ALIGN_ER, "Receive alignment error" }, - { e_FM_MAC_EX_1G_BAB_RX, "Babbling receive error" }, - { e_FM_MAC_EX_1G_RX_CTL, "Receive control (pause frame) interrupt" }, - { e_FM_MAC_EX_1G_GRATEFUL_TX_STP_COMPLET, "Graceful transmit stop " - "complete" }, - { e_FM_MAC_EX_1G_BAB_TX, "Babbling transmit error" }, - { e_FM_MAC_EX_1G_TX_CTL, "Transmit control (pause frame) interrupt" }, - { e_FM_MAC_EX_1G_TX_ERR, "Transmit error" }, - { e_FM_MAC_EX_1G_LATE_COL, "Late collision" }, - { e_FM_MAC_EX_1G_COL_RET_LMT, "Collision retry limit" }, - { e_FM_MAC_EX_1G_TX_FIFO_UNDRN, "Transmit FIFO underrun" }, - { e_FM_MAC_EX_1G_MAG_PCKT, "Magic Packet detected when dTSEC is in " - "Magic Packet detection mode" }, - { e_FM_MAC_EX_1G_MII_MNG_RD_COMPLET, "MII management read completion" }, - { e_FM_MAC_EX_1G_MII_MNG_WR_COMPLET, "MII management write completion" }, - { e_FM_MAC_EX_1G_GRATEFUL_RX_STP_COMPLET, "Graceful receive stop " - "complete" }, - { e_FM_MAC_EX_1G_TX_DATA_ERR, "Internal data error on transmit" }, - { e_FM_MAC_EX_1G_RX_DATA_ERR, "Internal data error on receive" }, - { e_FM_MAC_EX_1G_1588_TS_RX_ERR, "Time-Stamp Receive Error" }, - { e_FM_MAC_EX_1G_RX_MIB_CNT_OVFL, "MIB counter overflow" }, - { DTSEC_MAC_EXCEPTIONS_END, "" } -}; - -static const char * -dtsec_fm_mac_ex_to_str(e_FmMacExceptions exception) -{ - int i; - - for (i = 0; dtsec_fm_mac_exceptions[i].num != exception && - dtsec_fm_mac_exceptions[i].num != DTSEC_MAC_EXCEPTIONS_END; ++i) - ; - - if (dtsec_fm_mac_exceptions[i].num == DTSEC_MAC_EXCEPTIONS_END) - return ("<Unknown Exception>"); - - return (dtsec_fm_mac_exceptions[i].str); -} - -static void -dtsec_fm_mac_mdio_event_callback(t_Handle h_App, - e_FmMacExceptions exception) -{ - struct dtsec_softc *sc; - - sc = h_App; - device_printf(sc->sc_dev, "MDIO event %i: %s.\n", exception, - dtsec_fm_mac_ex_to_str(exception)); -} - -static void -dtsec_fm_mac_exception_callback(t_Handle app, e_FmMacExceptions exception) -{ - struct dtsec_softc *sc; - - sc = app; - device_printf(sc->sc_dev, "MAC exception %i: %s.\n", exception, - dtsec_fm_mac_ex_to_str(exception)); -} - -static void -dtsec_fm_mac_free(struct dtsec_softc *sc) -{ - if (sc->sc_mach == NULL) - return; - - FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX); - FM_MAC_Free(sc->sc_mach); - sc->sc_mach = NULL; -} static int dtsec_fm_mac_init(struct dtsec_softc *sc, uint8_t *mac) { - t_FmMacParams params; - t_Error error; - - memset(¶ms, 0, sizeof(params)); - memcpy(¶ms.addr, mac, sizeof(params.addr)); - - params.baseAddr = rman_get_bushandle(sc->sc_mem); - params.enetMode = sc->sc_mac_enet_mode; - params.macId = sc->sc_eth_id; - params.mdioIrq = sc->sc_mac_mdio_irq; - params.f_Event = dtsec_fm_mac_mdio_event_callback; - params.f_Exception = dtsec_fm_mac_exception_callback; - params.h_App = sc; - params.h_Fm = sc->sc_fmh; + FMAN_GET_REVISION(device_get_parent(sc->sc_base.sc_dev), &sc->sc_base.sc_rev_major, + &sc->sc_base.sc_rev_minor); - sc->sc_mach = FM_MAC_Config(¶ms); - if (sc->sc_mach == NULL) { - device_printf(sc->sc_dev, "couldn't configure FM_MAC module.\n" - ); + if (FMAN_RESET_MAC(device_get_parent(sc->sc_base.sc_dev), sc->sc_base.sc_eth_id) != 0) return (ENXIO); - } - - error = FM_MAC_ConfigResetOnInit(sc->sc_mach, TRUE); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't enable reset on init " - "feature.\n"); - dtsec_fm_mac_free(sc); - return (ENXIO); - } - - /* Do not inform about pause frames */ - error = FM_MAC_ConfigException(sc->sc_mach, e_FM_MAC_EX_1G_RX_CTL, - FALSE); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't disable pause frames " - "exception.\n"); - dtsec_fm_mac_free(sc); - return (ENXIO); - } - - error = FM_MAC_Init(sc->sc_mach); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't initialize FM_MAC module." - "\n"); - dtsec_fm_mac_free(sc); - return (ENXIO); - } return (0); } @@ -239,87 +109,6 @@ dtsec_fm_mac_init(struct dtsec_softc *sc, uint8_t *mac) /** - * @group FMan PORT routines. - * @{ - */ -static const char * -dtsec_fm_port_ex_to_str(e_FmPortExceptions exception) -{ - - switch (exception) { - case e_FM_PORT_EXCEPTION_IM_BUSY: - return ("IM: RX busy"); - default: - return ("<Unknown Exception>"); - } -} - -void -dtsec_fm_port_rx_exception_callback(t_Handle app, - e_FmPortExceptions exception) -{ - struct dtsec_softc *sc; - - sc = app; - device_printf(sc->sc_dev, "RX exception: %i: %s.\n", exception, - dtsec_fm_port_ex_to_str(exception)); -} - -void -dtsec_fm_port_tx_exception_callback(t_Handle app, - e_FmPortExceptions exception) -{ - struct dtsec_softc *sc; - - sc = app; - device_printf(sc->sc_dev, "TX exception: %i: %s.\n", exception, - dtsec_fm_port_ex_to_str(exception)); -} - -e_FmPortType -dtsec_fm_port_rx_type(enum eth_dev_type type) -{ - switch (type) { - case ETH_DTSEC: - return (e_FM_PORT_TYPE_RX); - case ETH_10GSEC: - return (e_FM_PORT_TYPE_RX_10G); - default: - return (e_FM_PORT_TYPE_DUMMY); - } -} - -e_FmPortType -dtsec_fm_port_tx_type(enum eth_dev_type type) -{ - - switch (type) { - case ETH_DTSEC: - return (e_FM_PORT_TYPE_TX); - case ETH_10GSEC: - return (e_FM_PORT_TYPE_TX_10G); - default: - return (e_FM_PORT_TYPE_DUMMY); - } -} - -static void -dtsec_fm_port_free_both(struct dtsec_softc *sc) -{ - if (sc->sc_rxph) { - FM_PORT_Free(sc->sc_rxph); - sc->sc_rxph = NULL; - } - - if (sc->sc_txph) { - FM_PORT_Free(sc->sc_txph); - sc->sc_txph = NULL; - } -} -/** @} */ - - -/** * @group IFnet routines. * @{ */ @@ -332,7 +121,7 @@ dtsec_set_mtu(struct dtsec_softc *sc, unsigned int mtu) DTSEC_LOCK_ASSERT(sc); if (mtu >= DTSEC_MIN_FRAME_SIZE && mtu <= DTSEC_MAX_FRAME_SIZE) { - bus_write_4(sc->sc_mem, DTSEC_REG_MAXFRM, mtu); + bus_write_4(sc->sc_base.sc_mem, DTSEC_REG_MAXFRM, mtu); return (mtu); } @@ -342,9 +131,10 @@ dtsec_set_mtu(struct dtsec_softc *sc, unsigned int mtu) static u_int dtsec_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) { - struct dtsec_softc *sc = arg; + uint32_t h, *hashtable = arg; - FM_MAC_AddHashMacAddr(sc->sc_mach, (t_EnetAddr *)LLADDR(sdl)); + h = (ether_crc32_be(LLADDR(sdl), ETHER_ADDR_LEN) >> 24) & 0xFF; + hashtable[(h >> 5)] |= 1 << (0x1F - (h & 0x1F)); return (1); } @@ -352,18 +142,52 @@ dtsec_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) static void dtsec_setup_multicast(struct dtsec_softc *sc) { + uint32_t hashtable[8] = {}; int i; - if (if_getflags(sc->sc_ifnet) & IFF_ALLMULTI) { + if (if_getflags(sc->sc_base.sc_ifnet) & IFF_ALLMULTI) { for (i = 0; i < 8; i++) - bus_write_4(sc->sc_mem, DTSEC_REG_GADDR(i), 0xFFFFFFFF); + bus_write_4(sc->sc_base.sc_mem, DTSEC_REG_GADDR(i), 0xFFFFFFFF); + bus_write_4(sc->sc_base.sc_mem, DTSEC_RCTRL, + bus_read_4(sc->sc_base.sc_mem, DTSEC_RCTRL) | RCTRL_MPROM); return; } + bus_write_4(sc->sc_base.sc_mem, DTSEC_RCTRL, + bus_read_4(sc->sc_base.sc_mem, DTSEC_RCTRL) & ~RCTRL_MPROM); + + if_foreach_llmaddr(sc->sc_base.sc_ifnet, dtsec_hash_maddr, hashtable); + for (i = 0; i < 8; i++) + bus_write_4(sc->sc_base.sc_mem, DTSEC_REG_GADDR(i), + hashtable[i]); +} + +static void +dtsec_if_graceful_stop(struct dtsec_softc *sc) +{ + bus_write_4(sc->sc_base.sc_mem, DTSEC_RCTRL, + bus_read_4(sc->sc_base.sc_mem, DTSEC_RCTRL) | RCTRL_GRS); + if (sc->sc_base.sc_rev_major == 2) + DELAY(100); + else + DELAY(10); - fman_dtsec_reset_filter_table(rman_get_virtual(sc->sc_mem), - true, false); - if_foreach_llmaddr(sc->sc_ifnet, dtsec_hash_maddr, sc); + bus_write_4(sc->sc_base.sc_mem, DTSEC_TCTRL, + bus_read_4(sc->sc_base.sc_mem, DTSEC_TCTRL) | TCTRL_GTS); +} + +static void +dtsec_if_graceful_start(struct dtsec_softc *sc) +{ + bus_write_4(sc->sc_base.sc_mem, DTSEC_RCTRL, + bus_read_4(sc->sc_base.sc_mem, DTSEC_RCTRL) & ~RCTRL_GRS); + if (sc->sc_base.sc_rev_major == 2) + DELAY(100); + else + DELAY(10); + + bus_write_4(sc->sc_base.sc_mem, DTSEC_TCTRL, + bus_read_4(sc->sc_base.sc_mem, DTSEC_TCTRL) & ~TCTRL_GTS); } static int @@ -373,24 +197,22 @@ dtsec_if_enable_locked(struct dtsec_softc *sc) DTSEC_LOCK_ASSERT(sc); - error = FM_MAC_Enable(sc->sc_mach, e_COMM_MODE_RX_AND_TX); - if (error != E_OK) - return (EIO); + dtsec_if_graceful_start(sc); - error = FM_PORT_Enable(sc->sc_rxph); - if (error != E_OK) + error = FMAN_PORT_ENABLE(sc->sc_base.sc_rx_port); + if (error != 0) return (EIO); - error = FM_PORT_Enable(sc->sc_txph); - if (error != E_OK) + error = FMAN_PORT_ENABLE(sc->sc_base.sc_tx_port); + if (error != 0) return (EIO); dtsec_setup_multicast(sc); - if_setdrvflagbits(sc->sc_ifnet, IFF_DRV_RUNNING, 0); + if_setdrvflagbits(sc->sc_base.sc_ifnet, IFF_DRV_RUNNING, 0); /* Refresh link state */ - dtsec_miibus_statchg(sc->sc_dev); + dtsec_miibus_statchg(sc->sc_base.sc_dev); return (0); } @@ -402,19 +224,17 @@ dtsec_if_disable_locked(struct dtsec_softc *sc) DTSEC_LOCK_ASSERT(sc); - error = FM_MAC_Disable(sc->sc_mach, e_COMM_MODE_RX_AND_TX); - if (error != E_OK) - return (EIO); + dtsec_if_graceful_stop(sc); - error = FM_PORT_Disable(sc->sc_rxph); - if (error != E_OK) + error = FMAN_PORT_DISABLE(sc->sc_base.sc_rx_port); + if (error != 0) return (EIO); - error = FM_PORT_Disable(sc->sc_txph); - if (error != E_OK) + error = FMAN_PORT_DISABLE(sc->sc_base.sc_tx_port); + if (error != 0) return (EIO); - if_setdrvflagbits(sc->sc_ifnet, 0, IFF_DRV_RUNNING); + if_setdrvflagbits(sc->sc_base.sc_ifnet, 0, IFF_DRV_RUNNING); return (0); } @@ -443,9 +263,10 @@ dtsec_if_ioctl(if_t ifp, u_long command, caddr_t data) case SIOCSIFFLAGS: DTSEC_LOCK(sc); - if (if_getflags(sc->sc_ifnet) & IFF_UP) - error = dtsec_if_enable_locked(sc); - else + if (if_getflags(ifp) & IFF_UP) { + if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) + dtsec_if_init_locked(sc); + } else error = dtsec_if_disable_locked(sc); DTSEC_UNLOCK(sc); @@ -453,7 +274,7 @@ dtsec_if_ioctl(if_t ifp, u_long command, caddr_t data) case SIOCGIFMEDIA: case SIOCSIFMEDIA: - error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, + error = ifmedia_ioctl(ifp, ifr, &sc->sc_base.sc_mii->mii_media, command); break; @@ -474,8 +295,8 @@ dtsec_if_tick(void *arg) /* TODO */ DTSEC_LOCK(sc); - mii_tick(sc->sc_mii); - callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc); + mii_tick(sc->sc_base.sc_mii); + callout_reset(&sc->sc_base.sc_tick_callout, hz, dtsec_if_tick, sc); DTSEC_UNLOCK(sc); } @@ -487,30 +308,38 @@ dtsec_if_deinit_locked(struct dtsec_softc *sc) DTSEC_LOCK_ASSERT(sc); DTSEC_UNLOCK(sc); - callout_drain(&sc->sc_tick_callout); + callout_drain(&sc->sc_base.sc_tick_callout); DTSEC_LOCK(sc); } static void +dtsec_if_set_macaddr(struct dtsec_softc *sc, const char *addr) +{ + uint32_t reg; + + reg = (addr[5] << 24) | (addr[4] << 16) | (addr[3] << 8) | addr[2]; + bus_write_4(sc->sc_base.sc_mem, DTSEC_MACSTNADDR1, reg); + reg = (addr[1] << 24) | (addr[0] << 16); + bus_write_4(sc->sc_base.sc_mem, DTSEC_MACSTNADDR2, reg); +} + +static void dtsec_if_init_locked(struct dtsec_softc *sc) { int error; + const char *macaddr; DTSEC_LOCK_ASSERT(sc); - /* Set MAC address */ - error = FM_MAC_ModifyMacAddr(sc->sc_mach, - (t_EnetAddr *)if_getlladdr(sc->sc_ifnet)); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't set MAC address.\n"); - goto err; - } + macaddr = if_getlladdr(sc->sc_base.sc_ifnet); + dtsec_if_set_macaddr(sc, macaddr); /* Start MII polling */ - if (sc->sc_mii) - callout_reset(&sc->sc_tick_callout, hz, dtsec_if_tick, sc); + if (sc->sc_base.sc_mii) + callout_reset(&sc->sc_base.sc_tick_callout, hz, + dtsec_if_tick, sc); - if (if_getflags(sc->sc_ifnet) & IFF_UP) { + if (if_getflags(sc->sc_base.sc_ifnet) & IFF_UP) { error = dtsec_if_enable_locked(sc); if (error != 0) goto err; @@ -524,7 +353,7 @@ dtsec_if_init_locked(struct dtsec_softc *sc) err: dtsec_if_deinit_locked(sc); - device_printf(sc->sc_dev, "initialization error.\n"); + device_printf(sc->sc_base.sc_dev, "initialization error.\n"); return; } @@ -547,7 +376,7 @@ dtsec_if_start(if_t ifp) sc = if_getsoftc(ifp); DTSEC_LOCK(sc); - sc->sc_start_locked(sc); + dpaa_eth_if_start_locked(&sc->sc_base); DTSEC_UNLOCK(sc); } @@ -569,7 +398,7 @@ dtsec_ifmedia_upd(if_t ifp) struct dtsec_softc *sc = if_getsoftc(ifp); DTSEC_LOCK(sc); - mii_mediachg(sc->sc_mii); + mii_mediachg(sc->sc_base.sc_mii); DTSEC_UNLOCK(sc); return (0); @@ -582,10 +411,10 @@ dtsec_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) DTSEC_LOCK(sc); - mii_pollstat(sc->sc_mii); + mii_pollstat(sc->sc_base.sc_mii); - ifmr->ifm_active = sc->sc_mii->mii_media_active; - ifmr->ifm_status = sc->sc_mii->mii_media_status; + ifmr->ifm_active = sc->sc_base.sc_mii->mii_media_active; + ifmr->ifm_status = sc->sc_base.sc_mii->mii_media_status; DTSEC_UNLOCK(sc); } @@ -596,118 +425,75 @@ dtsec_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) * @group dTSEC bus interface. * @{ */ -static void -dtsec_configure_mode(struct dtsec_softc *sc) -{ - char tunable[64]; - - snprintf(tunable, sizeof(tunable), "%s.independent_mode", - device_get_nameunit(sc->sc_dev)); - - sc->sc_mode = DTSEC_MODE_REGULAR; - TUNABLE_INT_FETCH(tunable, &sc->sc_mode); - - if (sc->sc_mode == DTSEC_MODE_REGULAR) { - sc->sc_port_rx_init = dtsec_rm_fm_port_rx_init; - sc->sc_port_tx_init = dtsec_rm_fm_port_tx_init; - sc->sc_start_locked = dtsec_rm_if_start_locked; - } else { - sc->sc_port_rx_init = dtsec_im_fm_port_rx_init; - sc->sc_port_tx_init = dtsec_im_fm_port_tx_init; - sc->sc_start_locked = dtsec_im_if_start_locked; - } - - device_printf(sc->sc_dev, "Configured for %s mode.\n", - (sc->sc_mode == DTSEC_MODE_REGULAR) ? "regular" : "independent"); -} int dtsec_attach(device_t dev) { struct dtsec_softc *sc; - device_t parent; + cell_t ports[2]; + phandle_t node; int error; if_t ifp; sc = device_get_softc(dev); - parent = device_get_parent(dev); - sc->sc_dev = dev; - sc->sc_mac_mdio_irq = NO_IRQ; - - /* Check if MallocSmart allocator is ready */ - if (XX_MallocSmartInit() != E_OK) - return (ENXIO); + sc->sc_base.sc_dev = dev; + node = ofw_bus_get_node(dev); /* Init locks */ - mtx_init(&sc->sc_lock, device_get_nameunit(dev), + mtx_init(&sc->sc_base.sc_lock, device_get_nameunit(dev), "DTSEC Global Lock", MTX_DEF); - mtx_init(&sc->sc_mii_lock, device_get_nameunit(dev), + mtx_init(&sc->sc_base.sc_mii_lock, device_get_nameunit(dev), "DTSEC MII Lock", MTX_DEF); /* Init callouts */ - callout_init(&sc->sc_tick_callout, CALLOUT_MPSAFE); + callout_init(&sc->sc_base.sc_tick_callout, CALLOUT_MPSAFE); - /* Read configuraton */ - if ((error = fman_get_handle(parent, &sc->sc_fmh)) != 0) - return (error); - - if ((error = fman_get_muram_handle(parent, &sc->sc_muramh)) != 0) - return (error); - - if ((error = fman_get_bushandle(parent, &sc->sc_fm_base)) != 0) - return (error); - - /* Configure working mode */ - dtsec_configure_mode(sc); + /* Create RX buffer pool */ + error = dpaa_eth_pool_rx_init(&sc->sc_base); + if (error != 0) + return (EIO); - /* If we are working in regular mode configure BMAN and QMAN */ - if (sc->sc_mode == DTSEC_MODE_REGULAR) { - /* Create RX buffer pool */ - error = dtsec_rm_pool_rx_init(sc); - if (error != 0) - return (EIO); + /* Create RX frame queue range */ + error = dpaa_eth_fq_rx_init(&sc->sc_base); + if (error != 0) + return (EIO); - /* Create RX frame queue range */ - error = dtsec_rm_fqr_rx_init(sc); - if (error != 0) - return (EIO); + /* Create frame info pool */ + error = dpaa_eth_fi_pool_init(&sc->sc_base); + if (error != 0) + return (EIO); - /* Create frame info pool */ - error = dtsec_rm_fi_pool_init(sc); - if (error != 0) - return (EIO); + /* Create TX frame queue range */ + error = dpaa_eth_fq_tx_init(&sc->sc_base); + if (error != 0) + return (EIO); - /* Create TX frame queue range */ - error = dtsec_rm_fqr_tx_init(sc); - if (error != 0) - return (EIO); + if (OF_getencprop(node, "fsl,fman-ports", ports, sizeof(ports)) < 0) { + device_printf(dev, "missing ports in device tree\n"); + return (ENXIO); } - /* Init FMan MAC module. */ - error = dtsec_fm_mac_init(sc, sc->sc_mac_addr); + error = dtsec_fm_mac_init(sc, sc->sc_base.sc_mac_addr); if (error != 0) { dtsec_detach(dev); return (ENXIO); } - /* Init FMan TX port */ - error = sc->sc_port_tx_init(sc, device_get_unit(sc->sc_dev)); - if (error != 0) { - dtsec_detach(dev); - return (ENXIO); - } + sc->sc_base.sc_rx_port = OF_device_from_xref(ports[0]); + sc->sc_base.sc_tx_port = OF_device_from_xref(ports[1]); + dpaa_eth_fm_port_rx_init(&sc->sc_base); + dpaa_eth_fm_port_tx_init(&sc->sc_base); - /* Init FMan RX port */ - error = sc->sc_port_rx_init(sc, device_get_unit(sc->sc_dev)); - if (error != 0) { + if (sc->sc_base.sc_rx_port == NULL || sc->sc_base.sc_tx_port == NULL) { + device_printf(dev, "invalid ports"); dtsec_detach(dev); return (ENXIO); } /* Create network interface for upper layers */ - ifp = sc->sc_ifnet = if_alloc(IFT_ETHER); + ifp = sc->sc_base.sc_ifnet = if_alloc(IFT_ETHER); if_setsoftc(ifp, sc); if_setflags(ifp, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST); @@ -716,11 +502,12 @@ dtsec_attach(device_t dev) if_setioctlfn(ifp, dtsec_if_ioctl); if_setsendqlen(ifp, IFQ_MAXLEN); - if (sc->sc_phy_addr >= 0) - if_initname(ifp, device_get_name(sc->sc_dev), - device_get_unit(sc->sc_dev)); + if (sc->sc_base.sc_phy_addr >= 0) + if_initname(ifp, device_get_name(sc->sc_base.sc_dev), + device_get_unit(sc->sc_base.sc_dev)); else - if_initname(ifp, "dtsec_phy", device_get_unit(sc->sc_dev)); + if_initname(ifp, "dtsec_phy", + device_get_unit(sc->sc_base.sc_dev)); /* TODO */ #if 0 @@ -732,18 +519,18 @@ dtsec_attach(device_t dev) if_setcapenable(ifp, if_getcapabilities(ifp)); /* Attach PHY(s) */ - error = mii_attach(sc->sc_dev, &sc->sc_mii_dev, ifp, dtsec_ifmedia_upd, - dtsec_ifmedia_sts, BMSR_DEFCAPMASK, sc->sc_phy_addr, - MII_OFFSET_ANY, 0); + error = mii_attach(sc->sc_base.sc_dev, &sc->sc_base.sc_mii_dev, + ifp, dtsec_ifmedia_upd, dtsec_ifmedia_sts, BMSR_DEFCAPMASK, + sc->sc_base.sc_phy_addr, MII_OFFSET_ANY, 0); if (error) { - device_printf(sc->sc_dev, "attaching PHYs failed: %d\n", error); - dtsec_detach(sc->sc_dev); + device_printf(sc->sc_base.sc_dev, + "attaching PHYs failed: %d\n", error); + dtsec_detach(sc->sc_base.sc_dev); return (error); } - sc->sc_mii = device_get_softc(sc->sc_mii_dev); /* Attach to stack */ - ether_ifattach(ifp, sc->sc_mac_addr); + ether_ifattach(ifp, sc->sc_base.sc_mac_addr); return (0); } @@ -755,7 +542,7 @@ dtsec_detach(device_t dev) if_t ifp; sc = device_get_softc(dev); - ifp = sc->sc_ifnet; + ifp = sc->sc_base.sc_ifnet; if (device_is_attached(dev)) { ether_ifdetach(ifp); @@ -765,28 +552,23 @@ dtsec_detach(device_t dev) DTSEC_UNLOCK(sc); } - if (sc->sc_ifnet) { - if_free(sc->sc_ifnet); - sc->sc_ifnet = NULL; + if (sc->sc_base.sc_ifnet) { + if_free(sc->sc_base.sc_ifnet); + sc->sc_base.sc_ifnet = NULL; } - if (sc->sc_mode == DTSEC_MODE_REGULAR) { - /* Free RX/TX FQRs */ - dtsec_rm_fqr_rx_free(sc); - dtsec_rm_fqr_tx_free(sc); + /* Free RX/TX FQRs */ + dpaa_eth_fq_rx_free(&sc->sc_base); + dpaa_eth_fq_tx_free(&sc->sc_base); - /* Free frame info pool */ - dtsec_rm_fi_pool_free(sc); + /* Free frame info pool */ + dpaa_eth_fi_pool_free(&sc->sc_base); - /* Free RX buffer pool */ - dtsec_rm_pool_rx_free(sc); - } - - dtsec_fm_mac_free(sc); - dtsec_fm_port_free_both(sc); + /* Free RX buffer pool */ + dpaa_eth_pool_rx_free(&sc->sc_base); /* Destroy lock */ - mtx_destroy(&sc->sc_lock); + mtx_destroy(&sc->sc_base.sc_lock); return (0); } @@ -825,7 +607,7 @@ dtsec_miibus_readreg(device_t dev, int phy, int reg) sc = device_get_softc(dev); - return (MIIBUS_READREG(sc->sc_mdio, phy, reg)); + return (MIIBUS_READREG(sc->sc_base.sc_mdio, phy, reg)); } int @@ -836,43 +618,49 @@ dtsec_miibus_writereg(device_t dev, int phy, int reg, int value) sc = device_get_softc(dev); - return (MIIBUS_WRITEREG(sc->sc_mdio, phy, reg, value)); + return (MIIBUS_WRITEREG(sc->sc_base.sc_mdio, phy, reg, value)); } void dtsec_miibus_statchg(device_t dev) { struct dtsec_softc *sc; - e_EnetSpeed speed; + uint32_t reg; bool duplex; - int error; + int speed; sc = device_get_softc(dev); DTSEC_LOCK_ASSERT(sc); - duplex = ((sc->sc_mii->mii_media_active & IFM_GMASK) == IFM_FDX); + duplex = ((sc->sc_base.sc_mii->mii_media_active & IFM_GMASK) == IFM_FDX); - switch (IFM_SUBTYPE(sc->sc_mii->mii_media_active)) { + switch (IFM_SUBTYPE(sc->sc_base.sc_mii->mii_media_active)) { case IFM_1000_T: case IFM_1000_SX: - speed = e_ENET_SPEED_1000; - break; - - case IFM_100_TX: - speed = e_ENET_SPEED_100; - break; - - case IFM_10_T: - speed = e_ENET_SPEED_10; + if (!duplex) { + device_printf(sc->sc_base.sc_dev, + "Only full-duplex supported for 1Gbps speeds"); + return; + } + speed = MACCFG_IF_1G; break; default: - speed = e_ENET_SPEED_10; + speed = MACCFG_IF_10_100; } - error = FM_MAC_AdjustLink(sc->sc_mach, speed, duplex); - if (error != E_OK) - device_printf(sc->sc_dev, "error while adjusting MAC speed.\n"); + reg = bus_read_4(sc->sc_base.sc_mem, DTSEC_MACCFG2); + reg &= ~(MACCFG_IF_M | MACCFG_FULLDUPLEX); + + if (duplex) + reg |= MACCFG_FULLDUPLEX; + reg |= speed; + bus_write_4(sc->sc_base.sc_mem, DTSEC_MACCFG2, reg); + + reg = bus_read_4(sc->sc_base.sc_mem, DTSEC_ECNTRL) & ~ECNTRL_R100M; + if (IFM_SUBTYPE(sc->sc_base.sc_mii->mii_media_active) == IFM_100_TX) + reg |= ECNTRL_R100M; + bus_write_4(sc->sc_base.sc_mem, DTSEC_ECNTRL, reg); } /** @} */ diff --git a/sys/dev/dpaa/if_dtsec.h b/sys/dev/dpaa/if_dtsec.h index 4de0b776e9f5..7e0cb52a2400 100644 --- a/sys/dev/dpaa/if_dtsec.h +++ b/sys/dev/dpaa/if_dtsec.h @@ -34,11 +34,11 @@ #define DTSEC_MODE_REGULAR 0 #define DTSEC_MODE_INDEPENDENT 1 -#define DTSEC_LOCK(sc) mtx_lock(&(sc)->sc_lock) -#define DTSEC_UNLOCK(sc) mtx_unlock(&(sc)->sc_lock) -#define DTSEC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_lock, MA_OWNED) -#define DTSEC_MII_LOCK(sc) mtx_lock(&(sc)->sc_mii_lock) -#define DTSEC_MII_UNLOCK(sc) mtx_unlock(&(sc)->sc_mii_lock) +#define DTSEC_LOCK(sc) mtx_lock(&(sc)->sc_base.sc_lock) +#define DTSEC_UNLOCK(sc) mtx_unlock(&(sc)->sc_base.sc_lock) +#define DTSEC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_base.sc_lock, MA_OWNED) +#define DTSEC_MII_LOCK(sc) mtx_lock(&(sc)->sc_base.sc_mii_lock) +#define DTSEC_MII_UNLOCK(sc) mtx_unlock(&(sc)->sc_base.sc_mii_lock) enum eth_dev_type { ETH_DTSEC = 0x1, @@ -46,97 +46,13 @@ enum eth_dev_type { }; struct dtsec_softc { - /* XXX MII bus requires that struct ifnet is first!!! */ - if_t sc_ifnet; - - device_t sc_dev; - struct resource *sc_mem; - struct mtx sc_lock; - int sc_mode; - - /* Methods */ - int (*sc_port_rx_init) - (struct dtsec_softc *sc, int unit); - int (*sc_port_tx_init) - (struct dtsec_softc *sc, int unit); - void (*sc_start_locked) - (struct dtsec_softc *sc); - - /* dTSEC data */ + struct dpaa_eth_softc sc_base; enum eth_dev_type sc_eth_dev_type; - uint8_t sc_eth_id; /* Ethernet ID within its frame manager */ - uintptr_t sc_mac_mem_offset; - e_EnetMode sc_mac_enet_mode; - int sc_mac_mdio_irq; - uint8_t sc_mac_addr[6]; - int sc_port_rx_hw_id; - int sc_port_tx_hw_id; - uint32_t sc_port_tx_qman_chan; - int sc_phy_addr; - bool sc_hidden; - device_t sc_mdio; - - /* Params from fman_bus driver */ - vm_offset_t sc_fm_base; - t_Handle sc_fmh; - t_Handle sc_muramh; - - t_Handle sc_mach; - t_Handle sc_rxph; - t_Handle sc_txph; - - /* MII data */ - struct mii_data *sc_mii; - device_t sc_mii_dev; - struct mtx sc_mii_lock; - - struct callout sc_tick_callout; - - /* RX Pool */ - t_Handle sc_rx_pool; - uint8_t sc_rx_bpid; - uma_zone_t sc_rx_zone; - char sc_rx_zname[64]; - - /* RX Frame Queue */ - t_Handle sc_rx_fqr; - uint32_t sc_rx_fqid; - - /* TX Frame Queue */ - t_Handle sc_tx_fqr; - bool sc_tx_fqr_full; - t_Handle sc_tx_conf_fqr; - uint32_t sc_tx_conf_fqid; - - /* Frame Info Zone */ - uma_zone_t sc_fi_zone; - char sc_fi_zname[64]; }; /** @} */ /** - * @group dTSEC FMan PORT API. - * @{ - */ -enum dtsec_fm_port_params { - FM_PORT_LIODN_BASE = 0, - FM_PORT_LIODN_OFFSET = 0, - FM_PORT_MEM_ID = 0, - FM_PORT_MEM_ATTR = MEMORY_ATTR_CACHEABLE, - FM_PORT_BUFFER_SIZE = MCLBYTES, -}; - -e_FmPortType dtsec_fm_port_rx_type(enum eth_dev_type type); -void dtsec_fm_port_rx_exception_callback(t_Handle app, - e_FmPortExceptions exception); -void dtsec_fm_port_tx_exception_callback(t_Handle app, - e_FmPortExceptions exception); -e_FmPortType dtsec_fm_port_tx_type(enum eth_dev_type type); -/** @} */ - - -/** * @group dTSEC bus interface. * @{ */ diff --git a/sys/dev/dpaa/if_dtsec_fdt.c b/sys/dev/dpaa/if_dtsec_fdt.c index 441ff9c96c50..85b1998015d5 100644 --- a/sys/dev/dpaa/if_dtsec_fdt.c +++ b/sys/dev/dpaa/if_dtsec_fdt.c @@ -48,9 +48,7 @@ #include "miibus_if.h" -#include <contrib/ncsw/inc/Peripherals/fm_port_ext.h> -#include <contrib/ncsw/inc/xx_ext.h> - +#include "dpaa_eth.h" #include "if_dtsec.h" #include "fman.h" @@ -123,7 +121,7 @@ dtsec_fdt_attach(device_t dev) enet_node = ofw_bus_get_node(dev); if (OF_getprop(enet_node, "local-mac-address", - (void *)sc->sc_mac_addr, 6) == -1) { + (void *)sc->sc_base.sc_mac_addr, 6) == -1) { device_printf(dev, "Could not load local-mac-addr property from DTS\n"); return (ENXIO); @@ -135,17 +133,17 @@ dtsec_fdt_attach(device_t dev) else if (ofw_bus_is_compatible(dev, "fsl,fman-xgec") != 0) sc->sc_eth_dev_type = ETH_10GSEC; else - return(ENXIO); + return (ENXIO); /* Get PHY address */ - if (OF_getprop(enet_node, "phy-handle", (void *)&phy_node, + if (OF_getprop(enet_node, "tbi-handle", (void *)&phy_node, sizeof(phy_node)) <= 0) return (ENXIO); phy_node = OF_node_from_xref(phy_node); - if (OF_getprop(phy_node, "reg", (void *)&sc->sc_phy_addr, - sizeof(sc->sc_phy_addr)) <= 0) + if (OF_getprop(phy_node, "reg", (void *)&sc->sc_base.sc_phy_addr, + sizeof(sc->sc_base.sc_phy_addr)) <= 0) return (ENXIO); phy_dev = OF_device_from_xref(OF_parent(phy_node)); @@ -155,12 +153,12 @@ dtsec_fdt_attach(device_t dev) return (ENXIO); } - sc->sc_mdio = phy_dev; + sc->sc_base.sc_mdio = phy_dev; /* Get MAC memory offset in SoC */ rid = 0; - sc->sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); - if (sc->sc_mem == NULL) + sc->sc_base.sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); + if (sc->sc_base.sc_mem == NULL) return (ENXIO); /* Get PHY connection type */ @@ -168,20 +166,10 @@ dtsec_fdt_attach(device_t dev) sizeof(phy_type)) <= 0) return (ENXIO); - if (!strcmp(phy_type, "sgmii")) - sc->sc_mac_enet_mode = e_ENET_MODE_SGMII_1000; - else if (!strcmp(phy_type, "rgmii")) - sc->sc_mac_enet_mode = e_ENET_MODE_RGMII_1000; - else if (!strcmp(phy_type, "xgmii")) - /* We set 10 Gigabit mode flag however we don't support it */ - sc->sc_mac_enet_mode = e_ENET_MODE_XGMII_10000; - else - return (ENXIO); - if (OF_getencprop(enet_node, "cell-index", (void *)&mac_id, sizeof(mac_id)) <= 0) return (ENXIO); - sc->sc_eth_id = mac_id; + sc->sc_base.sc_eth_id = mac_id; /* Get RX/TX port handles */ if (OF_getprop(enet_node, "fsl,fman-ports", (void *)fman_rxtx_node, @@ -194,32 +182,17 @@ dtsec_fdt_attach(device_t dev) if (fman_rxtx_node[1] == 0) return (ENXIO); - fman_rxtx_node[0] = OF_instance_to_package(fman_rxtx_node[0]); - fman_rxtx_node[1] = OF_instance_to_package(fman_rxtx_node[1]); - - if (ofw_bus_node_is_compatible(fman_rxtx_node[0], - "fsl,fman-v2-port-rx") == 0) - return (ENXIO); - - if (ofw_bus_node_is_compatible(fman_rxtx_node[1], - "fsl,fman-v2-port-tx") == 0) - return (ENXIO); - - /* Get RX port HW id */ - if (OF_getprop(fman_rxtx_node[0], "reg", (void *)&sc->sc_port_rx_hw_id, - sizeof(sc->sc_port_rx_hw_id)) <= 0) - return (ENXIO); + sc->sc_base.sc_rx_port = OF_device_from_xref(fman_rxtx_node[0]); + sc->sc_base.sc_tx_port = OF_device_from_xref(fman_rxtx_node[1]); - /* Get TX port HW id */ - if (OF_getprop(fman_rxtx_node[1], "reg", (void *)&sc->sc_port_tx_hw_id, - sizeof(sc->sc_port_tx_hw_id)) <= 0) + if (sc->sc_base.sc_rx_port == NULL || sc->sc_base.sc_tx_port == NULL) return (ENXIO); if (OF_getprop(fman_rxtx_node[1], "cell-index", &fman_tx_cell, sizeof(fman_tx_cell)) <= 0) return (ENXIO); /* Get QMan channel */ - sc->sc_port_tx_qman_chan = fman_qman_channel_id(device_get_parent(dev), + sc->sc_base.sc_port_tx_qman_chan = fman_qman_channel_id(device_get_parent(dev), fman_tx_cell); return (dtsec_attach(dev)); diff --git a/sys/dev/dpaa/if_dtsec_im.c b/sys/dev/dpaa/if_dtsec_im.c deleted file mode 100644 index 0711275790c3..000000000000 --- a/sys/dev/dpaa/if_dtsec_im.c +++ /dev/null @@ -1,260 +0,0 @@ -/*- - * Copyright (c) 2012 Semihalf. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/module.h> -#include <sys/bus.h> -#include <sys/rman.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/socket.h> -#include <sys/sysctl.h> -#include <sys/sockio.h> - -#include <net/ethernet.h> -#include <net/if.h> -#include <net/if_dl.h> -#include <net/if_media.h> -#include <net/if_types.h> -#include <net/if_arp.h> - -#include <dev/mii/mii.h> -#include <dev/mii/miivar.h> - -#include "miibus_if.h" - -#include <contrib/ncsw/inc/integrations/dpaa_integration_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_mac_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_port_ext.h> -#include <contrib/ncsw/inc/xx_ext.h> - -#include "fman.h" -#include "if_dtsec.h" -#include "if_dtsec_im.h" - - -/** - * @group dTSEC FMan PORT routines. - * @{ - */ -static e_RxStoreResponse -dtsec_im_fm_port_rx_callback(t_Handle app, uint8_t *data, uint16_t length, - uint16_t status, uint8_t position, t_Handle buf_context) -{ - struct dtsec_softc *sc; - struct mbuf *m; - - /* TODO STATUS / Position checking */ - sc = app; - - m = m_devget(data, length, 0, sc->sc_ifnet, NULL); - if (m) - if_input(sc->sc_ifnet, m); - - XX_FreeSmart(data); - - return (e_RX_STORE_RESPONSE_CONTINUE); -} - -static void -dtsec_im_fm_port_tx_conf_callback(t_Handle app, uint8_t *data, uint16_t status, - t_Handle buf_context) -{ - - /* TODO: Check status */ - XX_FreeSmart(data); -} - -static uint8_t * -dtsec_im_fm_port_rx_get_buf(t_Handle buffer_pool, t_Handle *buf_context_handle) -{ - struct dtsec_softc *sc; - uint8_t *buffer; - - sc = buffer_pool; - - buffer = XX_MallocSmart(FM_PORT_BUFFER_SIZE, 0, sizeof(void *)); - if (!buffer) - device_printf(sc->sc_dev, "couldn't allocate RX buffer.\n"); - - return (buffer); -} - -static t_Error -dtsec_im_fm_port_rx_put_buf(t_Handle buffer_pool, uint8_t *buffer, - t_Handle buf_context) -{ - - XX_FreeSmart(buffer); - return (E_OK); -} - -int -dtsec_im_fm_port_rx_init(struct dtsec_softc *sc, int unit) -{ - t_FmPortParams params; - t_BufferPoolInfo *pool_params; - t_FmPortImRxTxParams *im_params; - t_Error error; - - memset(¶ms, 0, sizeof(params)); - - params.baseAddr = sc->sc_fm_base + sc->sc_port_rx_hw_id; - params.h_Fm = sc->sc_fmh; - params.portType = dtsec_fm_port_rx_type(sc->sc_eth_dev_type); - params.portId = sc->sc_eth_id; - params.independentModeEnable = TRUE; - params.liodnBase = FM_PORT_LIODN_BASE; - params.f_Exception = dtsec_fm_port_rx_exception_callback; - params.h_App = sc; - - im_params = ¶ms.specificParams.imRxTxParams; - im_params->h_FmMuram = sc->sc_muramh; - im_params->liodnOffset = FM_PORT_LIODN_OFFSET; - im_params->dataMemId = FM_PORT_MEM_ID; - im_params->dataMemAttributes = FM_PORT_MEM_ATTR; - im_params->f_RxStore = dtsec_im_fm_port_rx_callback; - - pool_params = ¶ms.specificParams.imRxTxParams.rxPoolParams; - pool_params->h_BufferPool = sc; - pool_params->f_GetBuf = dtsec_im_fm_port_rx_get_buf; - pool_params->f_PutBuf = dtsec_im_fm_port_rx_put_buf; - pool_params->bufferSize = FM_PORT_BUFFER_SIZE; - - sc->sc_rxph = FM_PORT_Config(¶ms); - if (sc->sc_rxph == NULL) { - device_printf(sc->sc_dev, "couldn't configure FM Port RX.\n"); - return (ENXIO); - } - - error = FM_PORT_Init(sc->sc_rxph); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't initialize FM Port RX.\n"); - FM_PORT_Free(sc->sc_rxph); - return (ENXIO); - } - - if (bootverbose) - device_printf(sc->sc_dev, "RX hw port 0x%02x initialized.\n", - sc->sc_port_rx_hw_id); - - return (0); -} - -int -dtsec_im_fm_port_tx_init(struct dtsec_softc *sc, int unit) -{ - t_FmPortParams params; - t_FmPortImRxTxParams *im_params; - t_Error error; - - memset(¶ms, 0, sizeof(params)); - - params.baseAddr = sc->sc_fm_base + sc->sc_port_tx_hw_id; - params.h_Fm = sc->sc_fmh; - params.portType = dtsec_fm_port_tx_type(sc->sc_eth_dev_type); - params.portId = unit; - params.independentModeEnable = TRUE; - params.liodnBase = FM_PORT_LIODN_BASE; - params.f_Exception = dtsec_fm_port_tx_exception_callback; - params.h_App = sc; - - im_params = ¶ms.specificParams.imRxTxParams; - im_params->h_FmMuram = sc->sc_muramh; - im_params->liodnOffset = FM_PORT_LIODN_OFFSET; - im_params->dataMemId = FM_PORT_MEM_ID; - im_params->dataMemAttributes = FM_PORT_MEM_ATTR; - im_params->f_TxConf = dtsec_im_fm_port_tx_conf_callback; - - sc->sc_txph = FM_PORT_Config(¶ms); - if (sc->sc_txph == NULL) { - device_printf(sc->sc_dev, "couldn't configure FM Port TX.\n"); - return (ENXIO); - } - - error = FM_PORT_Init(sc->sc_txph); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't initialize FM Port TX.\n"); - FM_PORT_Free(sc->sc_txph); - return (ENXIO); - } - - if (bootverbose) - device_printf(sc->sc_dev, "TX hw port 0x%02x initialized.\n", - sc->sc_port_tx_hw_id); - - return (0); -} -/** @} */ - - -/** - * @group dTSEC IFnet routines. - * @{ - */ -void -dtsec_im_if_start_locked(struct dtsec_softc *sc) -{ - uint8_t *buffer; - uint16_t length; - struct mbuf *m; - int error; - - DTSEC_LOCK_ASSERT(sc); - /* TODO: IFF_DRV_OACTIVE */ - - if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) == 0) - return; - - if ((if_getdrvflags(sc->sc_ifnet) & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) - return; - - while (!if_sendq_empty(sc->sc_ifnet)) { - m = if_dequeue(sc->sc_ifnet); - if (m == NULL) - break; - - length = m_length(m, NULL); - buffer = XX_MallocSmart(length, 0, sizeof(void *)); - if (!buffer) { - m_freem(m); - break; - } - - m_copydata(m, 0, length, buffer); - m_freem(m); - - error = FM_PORT_ImTx(sc->sc_txph, buffer, length, TRUE, buffer); - if (error != E_OK) { - /* TODO: Ring full */ - XX_FreeSmart(buffer); - break; - } - } -} -/** @} */ diff --git a/sys/dev/dpaa/if_dtsec_im.h b/sys/dev/dpaa/if_dtsec_im.h deleted file mode 100644 index e1c8f2a3c0c2..000000000000 --- a/sys/dev/dpaa/if_dtsec_im.h +++ /dev/null @@ -1,39 +0,0 @@ -/*- - * Copyright (c) 2012 Semihalf. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef IF_DTSEC_IM_H_ -#define IF_DTSEC_IM_H_ - -/** - * @group dTSEC Independent Mode API. - * @{ - */ -int dtsec_im_fm_port_tx_init(struct dtsec_softc *sc, int unit); -int dtsec_im_fm_port_rx_init(struct dtsec_softc *sc, int unit); -void dtsec_im_if_start_locked(struct dtsec_softc *sc); -/** @} */ - -#endif /* IF_DTSEC_IM_H_ */ diff --git a/sys/dev/dpaa/if_dtsec_rm.c b/sys/dev/dpaa/if_dtsec_rm.c deleted file mode 100644 index 0b9f8e0ae6c7..000000000000 --- a/sys/dev/dpaa/if_dtsec_rm.c +++ /dev/null @@ -1,651 +0,0 @@ -/*- - * Copyright (c) 2012 Semihalf. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/module.h> -#include <sys/bus.h> -#include <sys/rman.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/socket.h> -#include <sys/sysctl.h> -#include <sys/sockio.h> - -#include <net/ethernet.h> -#include <net/if.h> -#include <net/if_dl.h> -#include <net/if_media.h> -#include <net/if_types.h> -#include <net/if_arp.h> - -#include <dev/mii/mii.h> -#include <dev/mii/miivar.h> - -#include "miibus_if.h" - -#include <contrib/ncsw/inc/integrations/dpaa_integration_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_mac_ext.h> -#include <contrib/ncsw/inc/Peripherals/fm_port_ext.h> -#include <contrib/ncsw/inc/xx_ext.h> - -#include "fman.h" -#include "bman.h" -#include "qman.h" -#include "if_dtsec.h" -#include "if_dtsec_rm.h" - - -/** - * @group dTSEC RM private defines. - * @{ - */ -#define DTSEC_BPOOLS_USED (1) -#define DTSEC_MAX_TX_QUEUE_LEN 256 - -struct dtsec_rm_frame_info { - struct mbuf *fi_mbuf; - t_DpaaSGTE fi_sgt[DPAA_NUM_OF_SG_TABLE_ENTRY]; -}; - -enum dtsec_rm_pool_params { - DTSEC_RM_POOL_RX_LOW_MARK = 16, - DTSEC_RM_POOL_RX_HIGH_MARK = 64, - DTSEC_RM_POOL_RX_MAX_SIZE = 256, - - DTSEC_RM_POOL_FI_LOW_MARK = 16, - DTSEC_RM_POOL_FI_HIGH_MARK = 64, - DTSEC_RM_POOL_FI_MAX_SIZE = 256, -}; - -#define DTSEC_RM_FQR_RX_CHANNEL e_QM_FQ_CHANNEL_POOL1 -#define DTSEC_RM_FQR_TX_CONF_CHANNEL e_QM_FQ_CHANNEL_SWPORTAL0 -enum dtsec_rm_fqr_params { - DTSEC_RM_FQR_RX_WQ = 1, - DTSEC_RM_FQR_TX_WQ = 1, - DTSEC_RM_FQR_TX_CONF_WQ = 1 -}; -/** @} */ - - -/** - * @group dTSEC Frame Info routines. - * @{ - */ -void -dtsec_rm_fi_pool_free(struct dtsec_softc *sc) -{ - - if (sc->sc_fi_zone != NULL) - uma_zdestroy(sc->sc_fi_zone); -} - -int -dtsec_rm_fi_pool_init(struct dtsec_softc *sc) -{ - - snprintf(sc->sc_fi_zname, sizeof(sc->sc_fi_zname), "%s: Frame Info", - device_get_nameunit(sc->sc_dev)); - - sc->sc_fi_zone = uma_zcreate(sc->sc_fi_zname, - sizeof(struct dtsec_rm_frame_info), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - - return (0); -} - -static struct dtsec_rm_frame_info * -dtsec_rm_fi_alloc(struct dtsec_softc *sc) -{ - struct dtsec_rm_frame_info *fi; - - fi = uma_zalloc(sc->sc_fi_zone, M_NOWAIT); - - return (fi); -} - -static void -dtsec_rm_fi_free(struct dtsec_softc *sc, struct dtsec_rm_frame_info *fi) -{ - - uma_zfree(sc->sc_fi_zone, fi); -} -/** @} */ - - -/** - * @group dTSEC FMan PORT routines. - * @{ - */ -int -dtsec_rm_fm_port_rx_init(struct dtsec_softc *sc, int unit) -{ - t_FmPortParams params; - t_FmPortRxParams *rx_params; - t_FmExtPools *pool_params; - t_Error error; - - memset(¶ms, 0, sizeof(params)); - - params.baseAddr = sc->sc_fm_base + sc->sc_port_rx_hw_id; - params.h_Fm = sc->sc_fmh; - params.portType = dtsec_fm_port_rx_type(sc->sc_eth_dev_type); - params.portId = sc->sc_eth_id; - params.independentModeEnable = false; - params.liodnBase = FM_PORT_LIODN_BASE; - params.f_Exception = dtsec_fm_port_rx_exception_callback; - params.h_App = sc; - - rx_params = ¶ms.specificParams.rxParams; - rx_params->errFqid = sc->sc_rx_fqid; - rx_params->dfltFqid = sc->sc_rx_fqid; - rx_params->liodnOffset = 0; - - pool_params = &rx_params->extBufPools; - pool_params->numOfPoolsUsed = DTSEC_BPOOLS_USED; - pool_params->extBufPool->id = sc->sc_rx_bpid; - pool_params->extBufPool->size = FM_PORT_BUFFER_SIZE; - - sc->sc_rxph = FM_PORT_Config(¶ms); - if (sc->sc_rxph == NULL) { - device_printf(sc->sc_dev, "couldn't configure FM Port RX.\n"); - return (ENXIO); - } - - error = FM_PORT_Init(sc->sc_rxph); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't initialize FM Port RX.\n"); - FM_PORT_Free(sc->sc_rxph); - return (ENXIO); - } - - if (bootverbose) - device_printf(sc->sc_dev, "RX hw port 0x%02x initialized.\n", - sc->sc_port_rx_hw_id); - - return (0); -} - -int -dtsec_rm_fm_port_tx_init(struct dtsec_softc *sc, int unit) -{ - t_FmPortParams params; - t_FmPortNonRxParams *tx_params; - t_Error error; - - memset(¶ms, 0, sizeof(params)); - - params.baseAddr = sc->sc_fm_base + sc->sc_port_tx_hw_id; - params.h_Fm = sc->sc_fmh; - params.portType = dtsec_fm_port_tx_type(sc->sc_eth_dev_type); - params.portId = sc->sc_eth_id; - params.independentModeEnable = false; - params.liodnBase = FM_PORT_LIODN_BASE; - params.f_Exception = dtsec_fm_port_tx_exception_callback; - params.h_App = sc; - - tx_params = ¶ms.specificParams.nonRxParams; - tx_params->errFqid = sc->sc_tx_conf_fqid; - tx_params->dfltFqid = sc->sc_tx_conf_fqid; - tx_params->qmChannel = sc->sc_port_tx_qman_chan; -#ifdef FM_OP_PARTITION_ERRATA_FMANx8 - tx_params->opLiodnOffset = 0; -#endif - - sc->sc_txph = FM_PORT_Config(¶ms); - if (sc->sc_txph == NULL) { - device_printf(sc->sc_dev, "couldn't configure FM Port TX.\n"); - return (ENXIO); - } - - error = FM_PORT_Init(sc->sc_txph); - if (error != E_OK) { - device_printf(sc->sc_dev, "couldn't initialize FM Port TX.\n"); - FM_PORT_Free(sc->sc_txph); - return (ENXIO); - } - - if (bootverbose) - device_printf(sc->sc_dev, "TX hw port 0x%02x initialized.\n", - sc->sc_port_tx_hw_id); - - return (0); -} -/** @} */ - - -/** - * @group dTSEC buffer pools routines. - * @{ - */ -static t_Error -dtsec_rm_pool_rx_put_buffer(t_Handle h_BufferPool, uint8_t *buffer, - t_Handle context) -{ - struct dtsec_softc *sc; - - sc = h_BufferPool; - uma_zfree(sc->sc_rx_zone, buffer); - - return (E_OK); -} - -static uint8_t * -dtsec_rm_pool_rx_get_buffer(t_Handle h_BufferPool, t_Handle *context) -{ - struct dtsec_softc *sc; - uint8_t *buffer; - - sc = h_BufferPool; - buffer = uma_zalloc(sc->sc_rx_zone, M_NOWAIT); - - return (buffer); -} - -static void -dtsec_rm_pool_rx_depleted(t_Handle h_App, bool in) -{ - struct dtsec_softc *sc; - unsigned int count; - - sc = h_App; - - if (!in) - return; - - while (1) { - count = bman_count(sc->sc_rx_pool); - if (count > DTSEC_RM_POOL_RX_HIGH_MARK) - return; - - bman_pool_fill(sc->sc_rx_pool, DTSEC_RM_POOL_RX_HIGH_MARK); - } -} - -void -dtsec_rm_pool_rx_free(struct dtsec_softc *sc) -{ - - if (sc->sc_rx_pool != NULL) - bman_pool_destroy(sc->sc_rx_pool); - - if (sc->sc_rx_zone != NULL) - uma_zdestroy(sc->sc_rx_zone); -} - -int -dtsec_rm_pool_rx_init(struct dtsec_softc *sc) -{ - - /* FM_PORT_BUFFER_SIZE must be less than PAGE_SIZE */ - CTASSERT(FM_PORT_BUFFER_SIZE < PAGE_SIZE); - - snprintf(sc->sc_rx_zname, sizeof(sc->sc_rx_zname), "%s: RX Buffers", - device_get_nameunit(sc->sc_dev)); - - sc->sc_rx_zone = uma_zcreate(sc->sc_rx_zname, FM_PORT_BUFFER_SIZE, NULL, - NULL, NULL, NULL, FM_PORT_BUFFER_SIZE - 1, 0); - - sc->sc_rx_pool = bman_pool_create(&sc->sc_rx_bpid, FM_PORT_BUFFER_SIZE, - 0, 0, DTSEC_RM_POOL_RX_MAX_SIZE, dtsec_rm_pool_rx_get_buffer, - dtsec_rm_pool_rx_put_buffer, DTSEC_RM_POOL_RX_LOW_MARK, - DTSEC_RM_POOL_RX_HIGH_MARK, 0, 0, dtsec_rm_pool_rx_depleted, sc, NULL, - NULL); - if (sc->sc_rx_pool == NULL) { - device_printf(sc->sc_dev, "NULL rx pool somehow\n"); - dtsec_rm_pool_rx_free(sc); - return (EIO); - } - - return (0); -} -/** @} */ - - -/** - * @group dTSEC Frame Queue Range routines. - * @{ - */ -static void -dtsec_rm_fqr_mext_free(struct mbuf *m) -{ - struct dtsec_softc *sc; - void *buffer; - - buffer = m->m_ext.ext_arg1; - sc = m->m_ext.ext_arg2; - if (bman_count(sc->sc_rx_pool) <= DTSEC_RM_POOL_RX_MAX_SIZE) - bman_put_buffer(sc->sc_rx_pool, buffer); - else - dtsec_rm_pool_rx_put_buffer(sc, buffer, NULL); -} - -static e_RxStoreResponse -dtsec_rm_fqr_rx_callback(t_Handle app, t_Handle fqr, t_Handle portal, - uint32_t fqid_off, t_DpaaFD *frame) -{ - struct dtsec_softc *sc; - struct mbuf *m; - void *frame_va; - - m = NULL; - sc = app; - - frame_va = DPAA_FD_GET_ADDR(frame); - KASSERT(DPAA_FD_GET_FORMAT(frame) == e_DPAA_FD_FORMAT_TYPE_SHORT_SBSF, - ("%s(): Got unsupported frame format 0x%02X!", __func__, - DPAA_FD_GET_FORMAT(frame))); - - KASSERT(DPAA_FD_GET_OFFSET(frame) == 0, - ("%s(): Only offset 0 is supported!", __func__)); - - if (DPAA_FD_GET_STATUS(frame) != 0) { - device_printf(sc->sc_dev, "RX error: 0x%08X\n", - DPAA_FD_GET_STATUS(frame)); - goto err; - } - - m = m_gethdr(M_NOWAIT, MT_HEADER); - if (m == NULL) - goto err; - - m_extadd(m, frame_va, FM_PORT_BUFFER_SIZE, - dtsec_rm_fqr_mext_free, frame_va, sc, 0, - EXT_NET_DRV); - - m->m_pkthdr.rcvif = sc->sc_ifnet; - m->m_len = DPAA_FD_GET_LENGTH(frame); - m_fixhdr(m); - - if_input(sc->sc_ifnet, m); - - return (e_RX_STORE_RESPONSE_CONTINUE); - -err: - bman_put_buffer(sc->sc_rx_pool, frame_va); - if (m != NULL) - m_freem(m); - - return (e_RX_STORE_RESPONSE_CONTINUE); -} - -static e_RxStoreResponse -dtsec_rm_fqr_tx_confirm_callback(t_Handle app, t_Handle fqr, t_Handle portal, - uint32_t fqid_off, t_DpaaFD *frame) -{ - struct dtsec_rm_frame_info *fi; - struct dtsec_softc *sc; - unsigned int qlen; - t_DpaaSGTE *sgt0; - - sc = app; - - if (DPAA_FD_GET_STATUS(frame) != 0) - device_printf(sc->sc_dev, "TX error: 0x%08X\n", - DPAA_FD_GET_STATUS(frame)); - - /* - * We are storing struct dtsec_rm_frame_info in first entry - * of scatter-gather table. - */ - sgt0 = DPAA_FD_GET_ADDR(frame); - fi = DPAA_SGTE_GET_ADDR(sgt0); - - /* Free transmitted frame */ - m_freem(fi->fi_mbuf); - dtsec_rm_fi_free(sc, fi); - - qlen = qman_fqr_get_counter(sc->sc_tx_conf_fqr, 0, - e_QM_FQR_COUNTERS_FRAME); - - if (qlen == 0) { - DTSEC_LOCK(sc); - - if (sc->sc_tx_fqr_full) { - sc->sc_tx_fqr_full = 0; - dtsec_rm_if_start_locked(sc); - } - - DTSEC_UNLOCK(sc); - } - - return (e_RX_STORE_RESPONSE_CONTINUE); -} - -void -dtsec_rm_fqr_rx_free(struct dtsec_softc *sc) -{ - - if (sc->sc_rx_fqr) - qman_fqr_free(sc->sc_rx_fqr); -} - -int -dtsec_rm_fqr_rx_init(struct dtsec_softc *sc) -{ - t_Error error; - t_Handle fqr; - - /* Default Frame Queue */ - fqr = qman_fqr_create(1, DTSEC_RM_FQR_RX_CHANNEL, DTSEC_RM_FQR_RX_WQ, - false, 0, false, false, true, false, 0, 0, 0); - if (fqr == NULL) { - device_printf(sc->sc_dev, "could not create default RX queue" - "\n"); - return (EIO); - } - - sc->sc_rx_fqr = fqr; - sc->sc_rx_fqid = qman_fqr_get_base_fqid(fqr); - - error = qman_fqr_register_cb(fqr, dtsec_rm_fqr_rx_callback, sc); - if (error != E_OK) { - device_printf(sc->sc_dev, "could not register RX callback\n"); - dtsec_rm_fqr_rx_free(sc); - return (EIO); - } - - return (0); -} - -void -dtsec_rm_fqr_tx_free(struct dtsec_softc *sc) -{ - - if (sc->sc_tx_fqr) - qman_fqr_free(sc->sc_tx_fqr); - - if (sc->sc_tx_conf_fqr) - qman_fqr_free(sc->sc_tx_conf_fqr); -} - -int -dtsec_rm_fqr_tx_init(struct dtsec_softc *sc) -{ - t_Error error; - t_Handle fqr; - - /* TX Frame Queue */ - fqr = qman_fqr_create(1, sc->sc_port_tx_qman_chan, - DTSEC_RM_FQR_TX_WQ, false, 0, false, false, true, false, 0, 0, 0); - if (fqr == NULL) { - device_printf(sc->sc_dev, "could not create default TX queue" - "\n"); - return (EIO); - } - - sc->sc_tx_fqr = fqr; - - /* TX Confirmation Frame Queue */ - fqr = qman_fqr_create(1, DTSEC_RM_FQR_TX_CONF_CHANNEL, - DTSEC_RM_FQR_TX_CONF_WQ, false, 0, false, false, true, false, 0, 0, - 0); - if (fqr == NULL) { - device_printf(sc->sc_dev, "could not create TX confirmation " - "queue\n"); - dtsec_rm_fqr_tx_free(sc); - return (EIO); - } - - sc->sc_tx_conf_fqr = fqr; - sc->sc_tx_conf_fqid = qman_fqr_get_base_fqid(fqr); - - error = qman_fqr_register_cb(fqr, dtsec_rm_fqr_tx_confirm_callback, sc); - if (error != E_OK) { - device_printf(sc->sc_dev, "could not register TX confirmation " - "callback\n"); - dtsec_rm_fqr_tx_free(sc); - return (EIO); - } - - return (0); -} -/** @} */ - - -/** - * @group dTSEC IFnet routines. - * @{ - */ -void -dtsec_rm_if_start_locked(struct dtsec_softc *sc) -{ - vm_size_t dsize, psize, ssize; - struct dtsec_rm_frame_info *fi; - unsigned int qlen, i; - struct mbuf *m0, *m; - vm_offset_t vaddr; - t_DpaaFD fd; - - DTSEC_LOCK_ASSERT(sc); - /* TODO: IFF_DRV_OACTIVE */ - - if ((sc->sc_mii->mii_media_status & IFM_ACTIVE) == 0) - return; - - if ((if_getdrvflags(sc->sc_ifnet) & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) - return; - - while (!if_sendq_empty(sc->sc_ifnet)) { - /* Check length of the TX queue */ - qlen = qman_fqr_get_counter(sc->sc_tx_fqr, 0, - e_QM_FQR_COUNTERS_FRAME); - - if (qlen >= DTSEC_MAX_TX_QUEUE_LEN) { - sc->sc_tx_fqr_full = 1; - return; - } - - fi = dtsec_rm_fi_alloc(sc); - if (fi == NULL) - return; - - m0 = if_dequeue(sc->sc_ifnet); - if (m0 == NULL) { - dtsec_rm_fi_free(sc, fi); - return; - } - - i = 0; - m = m0; - psize = 0; - dsize = 0; - fi->fi_mbuf = m0; - while (m && i < DPAA_NUM_OF_SG_TABLE_ENTRY) { - if (m->m_len == 0) - continue; - - /* - * First entry in scatter-gather table is used to keep - * pointer to frame info structure. - */ - DPAA_SGTE_SET_ADDR(&fi->fi_sgt[i], (void *)fi); - DPAA_SGTE_SET_LENGTH(&fi->fi_sgt[i], 0); - - DPAA_SGTE_SET_EXTENSION(&fi->fi_sgt[i], 0); - DPAA_SGTE_SET_FINAL(&fi->fi_sgt[i], 0); - DPAA_SGTE_SET_BPID(&fi->fi_sgt[i], 0); - DPAA_SGTE_SET_OFFSET(&fi->fi_sgt[i], 0); - i++; - - dsize = m->m_len; - vaddr = (vm_offset_t)m->m_data; - while (dsize > 0 && i < DPAA_NUM_OF_SG_TABLE_ENTRY) { - ssize = PAGE_SIZE - (vaddr & PAGE_MASK); - if (m->m_len < ssize) - ssize = m->m_len; - - DPAA_SGTE_SET_ADDR(&fi->fi_sgt[i], - (void *)vaddr); - DPAA_SGTE_SET_LENGTH(&fi->fi_sgt[i], ssize); - - DPAA_SGTE_SET_EXTENSION(&fi->fi_sgt[i], 0); - DPAA_SGTE_SET_FINAL(&fi->fi_sgt[i], 0); - DPAA_SGTE_SET_BPID(&fi->fi_sgt[i], 0); - DPAA_SGTE_SET_OFFSET(&fi->fi_sgt[i], 0); - - dsize -= ssize; - vaddr += ssize; - psize += ssize; - i++; - } - - if (dsize > 0) - break; - - m = m->m_next; - } - - /* Check if SG table was constructed properly */ - if (m != NULL || dsize != 0) { - dtsec_rm_fi_free(sc, fi); - m_freem(m0); - continue; - } - - DPAA_SGTE_SET_FINAL(&fi->fi_sgt[i-1], 1); - - DPAA_FD_SET_ADDR(&fd, fi->fi_sgt); - DPAA_FD_SET_LENGTH(&fd, psize); - DPAA_FD_SET_FORMAT(&fd, e_DPAA_FD_FORMAT_TYPE_SHORT_MBSF); - - fd.liodn = 0; - fd.bpid = 0; - fd.elion = 0; - DPAA_FD_SET_OFFSET(&fd, 0); - DPAA_FD_SET_STATUS(&fd, 0); - - DTSEC_UNLOCK(sc); - if (qman_fqr_enqueue(sc->sc_tx_fqr, 0, &fd) != E_OK) { - dtsec_rm_fi_free(sc, fi); - m_freem(m0); - } - DTSEC_LOCK(sc); - } -} -/** @} */ diff --git a/sys/dev/dpaa/if_dtsec_rm.h b/sys/dev/dpaa/if_dtsec_rm.h deleted file mode 100644 index 28f779a11386..000000000000 --- a/sys/dev/dpaa/if_dtsec_rm.h +++ /dev/null @@ -1,51 +0,0 @@ -/*- - * Copyright (c) 2012 Semihalf. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#ifndef IF_DTSEC_RM_H_ -#define IF_DTSEC_RM_H_ - -/** - * @group dTSEC Regular Mode API. - * @{ - */ -int dtsec_rm_fm_port_rx_init(struct dtsec_softc *sc, int unit); -int dtsec_rm_fm_port_tx_init(struct dtsec_softc *sc, int unit); - -void dtsec_rm_if_start_locked(struct dtsec_softc *sc); - -int dtsec_rm_pool_rx_init(struct dtsec_softc *sc); -void dtsec_rm_pool_rx_free(struct dtsec_softc *sc); - -int dtsec_rm_fi_pool_init(struct dtsec_softc *sc); -void dtsec_rm_fi_pool_free(struct dtsec_softc *sc); - -int dtsec_rm_fqr_rx_init(struct dtsec_softc *sc); -int dtsec_rm_fqr_tx_init(struct dtsec_softc *sc); -void dtsec_rm_fqr_rx_free(struct dtsec_softc *sc); -void dtsec_rm_fqr_tx_free(struct dtsec_softc *sc); -/** @} */ - -#endif /* IF_DTSEC_RM_H_ */ diff --git a/sys/dev/dpaa/if_memac.c b/sys/dev/dpaa/if_memac.c new file mode 100644 index 000000000000..6d0fa3f5e337 --- /dev/null +++ b/sys/dev/dpaa/if_memac.c @@ -0,0 +1,820 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/rman.h> +#include <sys/socket.h> +#include <sys/sysctl.h> +#include <sys/sockio.h> + +#include <machine/bus.h> +#include <machine/resource.h> + +#include <net/ethernet.h> +#include <net/if.h> +#include <net/if_dl.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/if_arp.h> + +#include <dev/mii/mii.h> +#include <dev/mii/miivar.h> +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/openfirm.h> + +#include "miibus_if.h" + +#include "dpaa_eth.h" +#include "fman.h" +#include "fman_port.h" +#include "if_memac.h" + +#include "fman_if.h" +#include "fman_port_if.h" + +#define MEMAC_MIN_FRAME_SIZE 64 +#define MEMAC_MAX_FRAME_SIZE 32736 + +#define MEMAC_COMMAND_CONFIG 0x008 +#define COMMAND_CONFIG_RXSTP 0x20000000 +#define COMMAND_CONFIG_NO_LEN_CHK 0x00020000 +#define COMMAND_CONFIG_SWR 0x00001000 +#define COMMAND_CONFIG_TXP 0x00000800 +#define COMMAND_CONFIG_CRC 0x00000040 +#define COMMAND_CONFIG_PROMISC 0x00000010 +#define COMMAND_CONFIG_RX_EN 0x00000002 +#define COMMAND_CONFIG_TX_EN 0x00000001 +#define MEMAC_MAC_ADDR_0 0x00c +#define MEMAC_MAC_ADDR_1 0x010 +#define MEMAC_REG_MAXFRM 0x14 +#define MEMAC_REG_TX_FIFO_SECTIONS 0x020 +#define TX_FIFO_SECTIONS_TX_EMPTY_M 0xffff0000 +#define TX_FIFO_SECTIONS_TX_EMPTY_S 16 +#define TX_FIFO_SECTIONS_TX_AVAIL_M 0x0000ffff + +#define HASHTABLE_CTRL 0x02c +#define CTRL_MCAST 0x00000100 +#define CTRL_HASH_ADDR_M 0x0000003f +#define HASHTABLE_SIZE 64 +#define MEMAC_IEVENT 0x040 +#define IEVENT_RX_EMPTY 0x00000040 +#define IEVENT_TX_EMPTY 0x00000020 +#define MEMAC_CL01_PAUSE_QUANTA 0x054 +#define MEMAC_IF_MODE 0x300 +#define IF_MODE_ENA 0x00008000 +#define IF_MODE_SSP_M 0x00006000 +#define IF_MODE_SSP_100MB 0x00000000 +#define IF_MODE_SSP_10MB 0x00002000 +#define IF_MODE_SSP_1GB 0x00004000 +#define IF_MODE_SFD 0x00001000 +#define IF_MODE_MSG 0x00000200 +#define IF_MODE_HG 0x00000100 +#define IF_MODE_HD 0x00000040 +#define IF_MODE_RLP 0x00000020 +#define IF_MODE_RG 0x00000004 +#define IF_MODE_IFMODE_M 0x00000003 +#define IF_MODE_IFMODE_XGMII 0x00000000 +#define IF_MODE_IFMODE_MII 0x00000001 +#define IF_MODE_IFMODE_GMII 0x00000002 + +#define DEFAULT_PAUSE_QUANTA 0xf000 + +#define DPAA_CSUM_TX_OFFLOAD (CSUM_IP | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6) + + +/** + * @group FMan MAC routines. + * @{ + */ +#define MEMAC_MAC_EXCEPTIONS_END (-1) + +static void memac_if_init_locked(struct memac_softc *sc); + +static int +memac_fm_mac_init(struct memac_softc *sc, uint8_t *mac) +{ + uint32_t reg; + + FMAN_GET_REVISION(device_get_parent(sc->sc_base.sc_dev), &sc->sc_base.sc_rev_major, + &sc->sc_base.sc_rev_minor); + + if (FMAN_RESET_MAC(device_get_parent(sc->sc_base.sc_dev), sc->sc_base.sc_eth_id) != 0) + return (ENXIO); + + reg = bus_read_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG); + reg |= COMMAND_CONFIG_SWR; + bus_write_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG, reg); + + while (bus_read_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG) & COMMAND_CONFIG_SWR) + ; + + /* TODO: TX_FIFO_SECTIONS */ + /* TODO: CL01 pause quantum */ + bus_write_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG, + COMMAND_CONFIG_NO_LEN_CHK | COMMAND_CONFIG_TXP | COMMAND_CONFIG_CRC); + + reg = bus_read_4(sc->sc_base.sc_mem, MEMAC_IF_MODE); + reg &= ~(IF_MODE_IFMODE_M | IF_MODE_RG); + switch (sc->sc_base.sc_mac_enet_mode) { + case MII_CONTYPE_RGMII: + reg |= IF_MODE_RG; + /* FALLTHROUGH */ + case MII_CONTYPE_GMII: + case MII_CONTYPE_SGMII: + case MII_CONTYPE_QSGMII: + reg |= IF_MODE_IFMODE_GMII; + break; + case MII_CONTYPE_RMII: + reg |= IF_MODE_RG; + /* FALLTHROUGH */ + case MII_CONTYPE_MII: + reg |= IF_MODE_IFMODE_MII; + break; + } + + bus_write_4(sc->sc_base.sc_mem, MEMAC_IF_MODE, reg); + + return (0); +} +/** @} */ + + +/** + * @group IFnet routines. + * @{ + */ +static int +memac_set_mtu(struct memac_softc *sc, unsigned int mtu) +{ + + mtu += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN; + + MEMAC_LOCK_ASSERT(sc); + + if (mtu >= MEMAC_MIN_FRAME_SIZE && mtu <= MEMAC_MAX_FRAME_SIZE) { + bus_write_4(sc->sc_base.sc_mem, MEMAC_REG_MAXFRM, mtu); + return (mtu); + } + + return (0); +} + +static u_int +memac_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt) +{ + struct memac_softc *sc = arg; + uint8_t *addr = LLADDR(sdl); + uint32_t hash = 0; + uint8_t a, h; + + /* Hash is 6 bits, composed if [XOR{47:40},XOR{39:32},....] */ + for (int i = 0; i < 6; i++) { + a = addr[i]; + h = 0; + for (int j = 0; j < 8; j++, a >>= 1) { + h ^= (a & 0x1); + } + hash |= (h << i); + } + bus_write_4(sc->sc_base.sc_mem, HASHTABLE_CTRL, hash | CTRL_MCAST); + + return (1); +} + +static void +memac_setup_multicast(struct memac_softc *sc) +{ + + if (if_getflags(sc->sc_base.sc_ifnet) & IFF_ALLMULTI) { + for (int i = 0; i < HASHTABLE_SIZE; i++) + bus_write_4(sc->sc_base.sc_mem, + HASHTABLE_CTRL, CTRL_MCAST | i); + } else { + /* Clear the hash table */ + for (int i = 0; i < HASHTABLE_SIZE; i++) + bus_write_4(sc->sc_base.sc_mem, + HASHTABLE_CTRL, i); + } + + if_foreach_llmaddr(sc->sc_base.sc_ifnet, memac_hash_maddr, sc); +} + +static void +memac_setup_promisc(struct memac_softc *sc) +{ + uint32_t reg; + + reg = bus_read_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG); + reg &= ~COMMAND_CONFIG_PROMISC; + + if ((if_getflags(sc->sc_base.sc_ifnet) & IFF_PROMISC) != 0) + bus_write_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG, + reg | COMMAND_CONFIG_PROMISC); +} + +static void +memac_if_graceful_stop(struct memac_softc *sc) +{ + struct resource *regs = sc->sc_base.sc_mem; + uint32_t reg; + + reg = bus_read_4(regs, MEMAC_COMMAND_CONFIG); + reg |= COMMAND_CONFIG_RXSTP; + + bus_write_4(regs, MEMAC_COMMAND_CONFIG, reg); + while ((bus_read_4(regs, MEMAC_IEVENT) & IEVENT_RX_EMPTY) == 0) + ; + reg &= COMMAND_CONFIG_RX_EN; + bus_write_4(regs, MEMAC_COMMAND_CONFIG, reg); + + while ((bus_read_4(regs, MEMAC_IEVENT) & IEVENT_TX_EMPTY) == 0) + ; + bus_write_4(regs, MEMAC_COMMAND_CONFIG, reg & ~COMMAND_CONFIG_TX_EN); +} + +static void +memac_mac_enable(struct memac_softc *sc) +{ + uint32_t reg = bus_read_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG); + + reg |= (COMMAND_CONFIG_RX_EN | COMMAND_CONFIG_TX_EN); + + bus_write_4(sc->sc_base.sc_mem, MEMAC_COMMAND_CONFIG, reg); +} + +static int +memac_if_enable_locked(struct memac_softc *sc) +{ + int error; + + MEMAC_LOCK_ASSERT(sc); + + memac_set_mtu(sc, if_getmtu(sc->sc_base.sc_ifnet)); + memac_mac_enable(sc); + + error = FMAN_PORT_ENABLE(sc->sc_base.sc_rx_port); + if (error != 0) + return (EIO); + + error = FMAN_PORT_ENABLE(sc->sc_base.sc_tx_port); + if (error != 0) + return (EIO); + + bus_write_4(sc->sc_base.sc_mem, MEMAC_IEVENT, 0); + memac_setup_multicast(sc); + memac_setup_promisc(sc); + + if_setdrvflagbits(sc->sc_base.sc_ifnet, IFF_DRV_RUNNING, 0); + + /* Refresh link state */ + memac_miibus_statchg(sc->sc_base.sc_dev); + + return (0); +} + +static int +memac_if_disable_locked(struct memac_softc *sc) +{ + int error; + + MEMAC_LOCK_ASSERT(sc); + + error = FMAN_PORT_DISABLE(sc->sc_base.sc_tx_port); + if (error != 0) + return (EIO); + + memac_if_graceful_stop(sc); + + error = FMAN_PORT_DISABLE(sc->sc_base.sc_rx_port); + if (error != 0) + return (EIO); + + if_setdrvflagbits(sc->sc_base.sc_ifnet, 0, IFF_DRV_RUNNING); + + return (0); +} + +static int +memac_if_ioctl(if_t ifp, u_long command, caddr_t data) +{ + struct memac_softc *sc; + struct ifreq *ifr; + uint32_t changed; + int error; + + sc = if_getsoftc(ifp); + ifr = (struct ifreq *)data; + error = 0; + + /* Basic functionality to achieve media status reports */ + switch (command) { + case SIOCSIFMTU: + MEMAC_LOCK(sc); + if (memac_set_mtu(sc, ifr->ifr_mtu)) + if_setmtu(ifp, ifr->ifr_mtu); + else + error = EINVAL; + MEMAC_UNLOCK(sc); + break; + case SIOCSIFFLAGS: + MEMAC_LOCK(sc); + if (if_getflags(ifp) & IFF_UP) { + if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) + memac_if_init_locked(sc); + } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) + error = memac_if_disable_locked(sc); + + MEMAC_UNLOCK(sc); + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + if (if_getflags(sc->sc_base.sc_ifnet) & IFF_UP) { + MEMAC_LOCK(sc); + memac_setup_multicast(sc); + MEMAC_UNLOCK(sc); + } + break; + + case SIOCSIFCAP: + changed = if_getcapenable(ifp) ^ ifr->ifr_reqcap; + if ((changed & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) != 0) + if_togglecapenable(ifp, + IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); + if ((changed & (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6)) != 0) { + if_togglecapenable(ifp, + IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); + if_togglehwassist(ifp, DPAA_CSUM_TX_OFFLOAD); + } + break; + + case SIOCGIFMEDIA: + case SIOCSIFMEDIA: + error = ifmedia_ioctl(ifp, ifr, &sc->sc_base.sc_mii->mii_media, + command); + break; + + default: + error = ether_ioctl(ifp, command, data); + } + + return (error); +} + +static void +memac_if_tick(void *arg) +{ + struct memac_softc *sc; + + sc = arg; + + /* TODO */ + MEMAC_LOCK(sc); + + mii_tick(sc->sc_base.sc_mii); + callout_reset(&sc->sc_base.sc_tick_callout, hz, memac_if_tick, sc); + + MEMAC_UNLOCK(sc); +} + +static void +memac_if_deinit_locked(struct memac_softc *sc) +{ + + MEMAC_LOCK_ASSERT(sc); + + MEMAC_UNLOCK(sc); + callout_drain(&sc->sc_base.sc_tick_callout); + MEMAC_LOCK(sc); +} + +static void +memac_if_set_macaddr(struct memac_softc *sc, const char *addr) +{ + uint32_t reg; + + reg = (addr[3] << 24) | (addr[2] << 16) | (addr[1] << 8) | addr[0]; + bus_write_4(sc->sc_base.sc_mem, MEMAC_MAC_ADDR_0, reg); + reg = (addr[5] << 8) | (addr[4]); + bus_write_4(sc->sc_base.sc_mem, MEMAC_MAC_ADDR_1, reg); +} + +static void +memac_if_init_locked(struct memac_softc *sc) +{ + int error; + const char *macaddr; + + MEMAC_LOCK_ASSERT(sc); + + macaddr = if_getlladdr(sc->sc_base.sc_ifnet); + memac_if_set_macaddr(sc, macaddr); + + /* Start MII polling */ + if (sc->sc_base.sc_mii) + callout_reset(&sc->sc_base.sc_tick_callout, hz, + memac_if_tick, sc); + + if (if_getflags(sc->sc_base.sc_ifnet) & IFF_UP) { + error = memac_if_enable_locked(sc); + if (error != 0) + goto err; + } else { + error = memac_if_disable_locked(sc); + if (error != 0) + goto err; + } + + if_link_state_change(sc->sc_base.sc_ifnet, LINK_STATE_UP); + + bus_write_4(sc->sc_base.sc_mem, MEMAC_CL01_PAUSE_QUANTA, + DEFAULT_PAUSE_QUANTA); + + return; + +err: + memac_if_deinit_locked(sc); + device_printf(sc->sc_base.sc_dev, "initialization error.\n"); + return; +} + +static void +memac_if_init(void *data) +{ + struct memac_softc *sc; + + sc = data; + + MEMAC_LOCK(sc); + memac_if_init_locked(sc); + MEMAC_UNLOCK(sc); +} + +static void +memac_if_start(if_t ifp) +{ + struct memac_softc *sc; + + sc = if_getsoftc(ifp); + MEMAC_LOCK(sc); + dpaa_eth_if_start_locked(&sc->sc_base); + MEMAC_UNLOCK(sc); +} + +static void +memac_if_watchdog(if_t ifp) +{ + /* TODO */ +} +/** @} */ + + +/** + * @group IFmedia routines. + * @{ + */ +static int +memac_ifmedia_upd(if_t ifp) +{ + struct memac_softc *sc = if_getsoftc(ifp); + + return (0); + MEMAC_LOCK(sc); + mii_mediachg(sc->sc_base.sc_mii); + MEMAC_UNLOCK(sc); + + return (0); +} + +static void +memac_ifmedia_fixed_sts(if_t ifp, struct ifmediareq *ifmr) +{ + struct memac_softc *sc = if_getsoftc(ifp); + + MEMAC_LOCK(sc); + ifmr->ifm_count = 1; + ifmr->ifm_mask = 0; + ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; + ifmr->ifm_current = ifmr->ifm_active = + sc->sc_base.sc_mii->mii_media.ifm_cur->ifm_media; + ifmr->ifm_active = ifmr->ifm_current; + + /* + * In non-PHY usecases, we need to signal link state up, otherwise + * certain things requiring a link event (e.g async DHCP client) from + * devd do not happen. + */ + if (if_getlinkstate(ifp) == LINK_STATE_UNKNOWN) { + if_link_state_change(ifp, LINK_STATE_UP); + } + + /* We assume the link is static, as in a peer switch. */ + + MEMAC_UNLOCK(sc); + + return; +} + +static void +memac_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) +{ + struct memac_softc *sc = if_getsoftc(ifp); + + MEMAC_LOCK(sc); + + mii_pollstat(sc->sc_base.sc_mii); + + ifmr->ifm_active = sc->sc_base.sc_mii->mii_media_active; + ifmr->ifm_status = sc->sc_base.sc_mii->mii_media_status; + + MEMAC_UNLOCK(sc); +} +/** @} */ + + +/** + * @group dTSEC bus interface. + * @{ + */ + +int +memac_attach(device_t dev) +{ + struct memac_softc *sc; + int error; + if_t ifp; + + sc = device_get_softc(dev); + + sc->sc_base.sc_dev = dev; + + /* Init locks */ + mtx_init(&sc->sc_base.sc_lock, device_get_nameunit(dev), + "mEMAC Global Lock", MTX_DEF); + + mtx_init(&sc->sc_base.sc_mii_lock, device_get_nameunit(dev), + "mEMAC MII Lock", MTX_DEF); + + /* Init callouts */ + callout_init(&sc->sc_base.sc_tick_callout, CALLOUT_MPSAFE); + + /* Create RX buffer pool */ + error = dpaa_eth_pool_rx_init(&sc->sc_base); + if (error != 0) + return (EIO); + + /* Create RX frame queue range */ + error = dpaa_eth_fq_rx_init(&sc->sc_base); + if (error != 0) + return (EIO); + + /* Create frame info pool */ + error = dpaa_eth_fi_pool_init(&sc->sc_base); + if (error != 0) + return (EIO); + + /* Create TX frame queue range */ + error = dpaa_eth_fq_tx_init(&sc->sc_base); + if (error != 0) + return (EIO); + + /* Init FMan MAC module. */ + error = memac_fm_mac_init(sc, sc->sc_base.sc_mac_addr); + if (error != 0) { + memac_detach(dev); + return (ENXIO); + } + + dpaa_eth_fm_port_rx_init(&sc->sc_base); + dpaa_eth_fm_port_tx_init(&sc->sc_base); + + /* Create network interface for upper layers */ + ifp = sc->sc_base.sc_ifnet = if_alloc(IFT_ETHER); + if_setsoftc(ifp, sc); + + if_setflags(ifp, IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST); + if_setinitfn(ifp, memac_if_init); + if_setstartfn(ifp, memac_if_start); + if_setioctlfn(ifp, memac_if_ioctl); + if_setsendqlen(ifp, IFQ_MAXLEN); + if_setsendqready(ifp); + + if (sc->sc_base.sc_phy_addr >= 0) + if_initname(ifp, device_get_name(sc->sc_base.sc_dev), + device_get_unit(sc->sc_base.sc_dev)); + else + if_initname(ifp, "memac_phy", + device_get_unit(sc->sc_base.sc_dev)); + + + if_setcapabilities(ifp, IFCAP_JUMBO_MTU | + IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM | + IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | + IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6); + if_setcapenable(ifp, if_getcapabilities(ifp)); + if_sethwassist(ifp, DPAA_CSUM_TX_OFFLOAD); + + /* Attach PHY(s) */ + if (!sc->sc_fixed_link) { + error = mii_attach(sc->sc_base.sc_dev, &sc->sc_base.sc_mii_dev, + ifp, memac_ifmedia_upd, memac_ifmedia_sts, BMSR_DEFCAPMASK, + sc->sc_base.sc_phy_addr, MII_OFFSET_ANY, 0); + if (error) { + device_printf(sc->sc_base.sc_dev, + "attaching PHYs failed: %d\n", error); + memac_detach(sc->sc_base.sc_dev); + return (error); + } + sc->sc_base.sc_mii = device_get_softc(sc->sc_base.sc_mii_dev); + } else { + phandle_t node; + uint32_t type = IFM_ETHER; + uint32_t speed; + + node = ofw_bus_find_child(ofw_bus_get_node(dev), "fixed-link"); + if (OF_getencprop(node, "speed", &speed, sizeof(speed)) <= 0) { + device_printf(dev, + "fixed link has no speed property\n"); + memac_detach(sc->sc_base.sc_dev); + return (ENXIO); + } + switch (speed) { + case 10: + type |= IFM_10_T; + break; + case 100: + type |= IFM_100_TX; + break; + case 1000: + type |= IFM_1000_T; + break; + case 2500: + type |= IFM_2500_T; + break; + case 5000: + type |= IFM_5000_T; + break; + case 10000: + type |= IFM_10G_T; + break; + } + if (OF_hasprop(node, "full-duplex")) + type |= IFM_FDX; + sc->sc_base.sc_mii = malloc(sizeof(*sc->sc_base.sc_mii), + M_DEVBUF, M_WAITOK | M_ZERO); + ifmedia_init(&sc->sc_base.sc_mii->mii_media, 0, + memac_ifmedia_upd, memac_ifmedia_fixed_sts); + ifmedia_add(&sc->sc_base.sc_mii->mii_media, type, 0, NULL); + ifmedia_set(&sc->sc_base.sc_mii->mii_media, type); + } + + /* Attach to stack */ + ether_ifattach(ifp, sc->sc_base.sc_mac_addr); + + return (0); +} + +int +memac_detach(device_t dev) +{ + struct memac_softc *sc; + if_t ifp; + + sc = device_get_softc(dev); + ifp = sc->sc_base.sc_ifnet; + + if (device_is_attached(dev)) { + ether_ifdetach(ifp); + /* Shutdown interface */ + MEMAC_LOCK(sc); + memac_if_deinit_locked(sc); + MEMAC_UNLOCK(sc); + } + + if (sc->sc_base.sc_ifnet) { + if_free(sc->sc_base.sc_ifnet); + sc->sc_base.sc_ifnet = NULL; + } + + /* Free RX/TX FQRs */ + dpaa_eth_fq_rx_free(&sc->sc_base); + dpaa_eth_fq_tx_free(&sc->sc_base); + + /* Free frame info pool */ + dpaa_eth_fi_pool_free(&sc->sc_base); + + /* Free RX buffer pool */ + dpaa_eth_pool_rx_free(&sc->sc_base); + + /* Destroy lock */ + mtx_destroy(&sc->sc_base.sc_lock); + + return (0); +} + +int +memac_suspend(device_t dev) +{ + + return (0); +} + +int +memac_resume(device_t dev) +{ + + return (0); +} + +int +memac_shutdown(device_t dev) +{ + + return (0); +} +/** @} */ + + +/** + * @group MII bus interface. + * @{ + */ +int +memac_miibus_readreg(device_t dev, int phy, int reg) +{ + struct memac_softc *sc; + + sc = device_get_softc(dev); + + return (MIIBUS_READREG(sc->sc_base.sc_mdio, phy, reg)); +} + +int +memac_miibus_writereg(device_t dev, int phy, int reg, int value) +{ + + struct memac_softc *sc; + + sc = device_get_softc(dev); + + return (MIIBUS_WRITEREG(sc->sc_base.sc_mdio, phy, reg, value)); +} + +void +memac_miibus_statchg(device_t dev) +{ + struct memac_softc *sc; + uint32_t reg; + bool duplex; + int speed; + + sc = device_get_softc(dev); + + MEMAC_LOCK_ASSERT(sc); + + duplex = ((sc->sc_base.sc_mii->mii_media_active & IFM_GMASK) == IFM_FDX); + + switch (IFM_SUBTYPE(sc->sc_base.sc_mii->mii_media_active)) { + case IFM_AUTO: + speed = IF_MODE_ENA; + break; + case IFM_1000_T: + case IFM_1000_SX: + if (!duplex) { + device_printf(sc->sc_base.sc_dev, + "Only full-duplex supported for 1Gbps speeds"); + return; + } + speed = IF_MODE_SSP_1GB; + break; + + case IFM_100_TX: + speed = IF_MODE_SSP_100MB; + break; + default: + speed = IF_MODE_SSP_10MB; + break; + } + + reg = bus_read_4(sc->sc_base.sc_mem, MEMAC_IF_MODE); + reg &= ~(IF_MODE_ENA | IF_MODE_SSP_M | IF_MODE_SFD); + reg |= 0x2; + + if (duplex) + reg |= IF_MODE_SFD; + else + reg |= IF_MODE_HD; + reg |= speed; + bus_write_4(sc->sc_base.sc_mem, MEMAC_IF_MODE, reg); +} +/** @} */ diff --git a/sys/dev/dpaa/if_memac.h b/sys/dev/dpaa/if_memac.h new file mode 100644 index 000000000000..98942abaf79c --- /dev/null +++ b/sys/dev/dpaa/if_memac.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * Copyright (c) 2011-2012 Semihalf. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef IF_MEMAC_H_ +#define IF_MEMAC_H_ + +/** + * @group dTSEC common API. + * @{ + */ +#define MEMAC_MODE_REGULAR 0 + +#define MEMAC_LOCK(sc) mtx_lock(&(sc)->sc_base.sc_lock) +#define MEMAC_UNLOCK(sc) mtx_unlock(&(sc)->sc_base.sc_lock) +#define MEMAC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_base.sc_lock, MA_OWNED) +#define MEMAC_MII_LOCK(sc) mtx_lock(&(sc)->sc_base.sc_mii_lock) +#define MEMAC_MII_UNLOCK(sc) mtx_unlock(&(sc)->sc_base.sc_mii_lock) + +enum eth_dev_type { + ETH_MEMAC = 0x1, + ETH_10GSEC = 0x2 +}; + +struct memac_softc { + struct dpaa_eth_softc sc_base; + bool sc_fixed_link; +}; +/** @} */ + + +/** + * @group dTSEC bus interface. + * @{ + */ +int memac_attach(device_t dev); +int memac_detach(device_t dev); +int memac_suspend(device_t dev); +int memac_resume(device_t dev); +int memac_shutdown(device_t dev); +int memac_miibus_readreg(device_t dev, int phy, int reg); +int memac_miibus_writereg(device_t dev, int phy, int reg, + int value); +void memac_miibus_statchg(device_t dev); +/** @} */ + +#endif /* IF_MEMAC_H_ */ diff --git a/sys/dev/dpaa/if_memac_fdt.c b/sys/dev/dpaa/if_memac_fdt.c new file mode 100644 index 000000000000..f136608a906c --- /dev/null +++ b/sys/dev/dpaa/if_memac_fdt.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * Copyright (c) 2012 Semihalf. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/module.h> +#include <sys/rman.h> +#include <sys/socket.h> + +#include <machine/bus.h> + +#include <powerpc/mpc85xx/mpc85xx.h> + +#include <net/if.h> +#include <net/if_media.h> + +#include <dev/mii/mii.h> +#include <dev/mii/miivar.h> +#include <dev/mii/mii_fdt.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> +#include <dev/ofw/openfirm.h> + +#include "miibus_if.h" + +#include "dpaa_eth.h" +#include "if_memac.h" +#include "fman.h" + + +static int memac_fdt_probe(device_t dev); +static int memac_fdt_attach(device_t dev); + +static device_method_t memac_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, memac_fdt_probe), + DEVMETHOD(device_attach, memac_fdt_attach), + DEVMETHOD(device_detach, memac_detach), + + DEVMETHOD(device_shutdown, memac_shutdown), + DEVMETHOD(device_suspend, memac_suspend), + DEVMETHOD(device_resume, memac_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_driver_added, bus_generic_driver_added), + + /* MII interface */ + DEVMETHOD(miibus_readreg, memac_miibus_readreg), + DEVMETHOD(miibus_writereg, memac_miibus_writereg), + DEVMETHOD(miibus_statchg, memac_miibus_statchg), + + DEVMETHOD_END +}; + +DEFINE_CLASS_0(memac, memac_driver, memac_methods, sizeof(struct memac_softc)); + +DRIVER_MODULE(memac, fman, memac_driver, 0, 0); +DRIVER_MODULE(miibus, memac, miibus_driver, 0, 0); +MODULE_DEPEND(memac, ether, 1, 1, 1); +MODULE_DEPEND(memac, miibus, 1, 1, 1); + +static int +memac_fdt_probe(device_t dev) +{ + + if (!ofw_bus_status_okay(dev)) + return (ENXIO); + + if (!ofw_bus_is_compatible(dev, "fsl,fman-memac")) + return (ENXIO); + + device_set_desc(dev, + "Freescale Multirate Ethernet Media Access Controller"); + + return (BUS_PROBE_DEFAULT); +} + +static int +memac_fdt_attach(device_t dev) +{ + struct memac_softc *sc; + device_t phy_dev; + phandle_t enet_node, phy_node; + phandle_t fman_rxtx_node[2]; + pcell_t fman_tx_cell, mac_id; + + sc = device_get_softc(dev); + enet_node = ofw_bus_get_node(dev); + + if (OF_getprop(enet_node, "local-mac-address", + (void *)sc->sc_base.sc_mac_addr, 6) == -1) { + device_printf(dev, + "Could not load local-mac-addr property from DTS\n"); + return (ENXIO); + } + + /* Get PHY connection type */ + sc->sc_base.sc_mac_enet_mode = mii_fdt_get_contype(enet_node); + + sc->sc_fixed_link = OF_hasprop(enet_node, "fixed-link") || + (ofw_bus_find_child(enet_node, "fixed-link") != 0); + if (!sc->sc_fixed_link) { + OF_getprop(enet_node, "phy-handle", &phy_node, sizeof(phy_node)); + phy_node = OF_node_from_xref(phy_node); + + if (OF_getencprop(phy_node, "reg", (void *)&sc->sc_base.sc_phy_addr, + sizeof(sc->sc_base.sc_phy_addr)) <= 0) + return (ENXIO); + + phy_dev = OF_device_from_xref(OF_xref_from_node(OF_parent(phy_node))); + + if (phy_dev == NULL) { + device_printf(dev, "No PHY found.\n"); + return (ENXIO); + } + + sc->sc_base.sc_mdio = phy_dev; + } + + /* Get MAC memory offset in SoC */ + sc->sc_base.sc_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 0, RF_ACTIVE); + if (sc->sc_base.sc_mem == NULL) + return (ENXIO); + + sc->sc_base.sc_mac_enet_mode = mii_fdt_get_contype(enet_node); + + if (sc->sc_base.sc_mac_enet_mode == MII_CONTYPE_UNKNOWN) { + device_printf(dev, "unknown MII type, defaulting to SGMII\n"); + sc->sc_base.sc_mac_enet_mode = MII_CONTYPE_SGMII; + } + + if (OF_getencprop(enet_node, "cell-index", + (void *)&mac_id, sizeof(mac_id)) <= 0) + return (ENXIO); + sc->sc_base.sc_eth_id = mac_id; + + /* Get RX/TX port handles */ + if (OF_getencprop(enet_node, "fsl,fman-ports", (void *)fman_rxtx_node, + sizeof(fman_rxtx_node)) <= 0) + return (ENXIO); + + if (fman_rxtx_node[0] == 0) + return (ENXIO); + + if (fman_rxtx_node[1] == 0) + return (ENXIO); + + sc->sc_base.sc_rx_port = OF_device_from_xref(fman_rxtx_node[0]); + sc->sc_base.sc_tx_port = OF_device_from_xref(fman_rxtx_node[1]); + + if (sc->sc_base.sc_rx_port == NULL || sc->sc_base.sc_tx_port == NULL) + return (ENXIO); + + fman_rxtx_node[1] = OF_node_from_xref(fman_rxtx_node[1]); + if (OF_getencprop(fman_rxtx_node[1], "cell-index", &fman_tx_cell, + sizeof(fman_tx_cell)) <= 0) + return (ENXIO); + /* Get QMan channel */ + sc->sc_base.sc_port_tx_qman_chan = fman_qman_channel_id(device_get_parent(dev), + fman_tx_cell); + + return (memac_attach(dev)); +} diff --git a/sys/dev/dpaa/portals.h b/sys/dev/dpaa/portals.h index a2d6294f3d8e..ef550b09a726 100644 --- a/sys/dev/dpaa/portals.h +++ b/sys/dev/dpaa/portals.h @@ -24,39 +24,30 @@ * SUCH DAMAGE. */ -typedef struct dpaa_portal { - int dp_irid; /* interrupt rid */ - struct resource *dp_ires; /* interrupt resource */ +#ifndef DPAA_PORTALS_H +#define DPAA_PORTALS_H - bool dp_regs_mapped; /* register mapping status */ - - t_Handle dp_ph; /* portal's handle */ - vm_paddr_t dp_ce_pa; /* portal's CE area PA */ - vm_paddr_t dp_ci_pa; /* portal's CI area PA */ - uint32_t dp_ce_size; /* portal's CE area size */ - uint32_t dp_ci_size; /* portal's CI area size */ - uintptr_t dp_intr_num; /* portal's intr. number */ -} dpaa_portal_t; - -struct dpaa_portals_softc { +struct dpaa_portal_softc { device_t sc_dev; /* device handle */ - vm_paddr_t sc_dp_pa; /* portal's PA */ - uint32_t sc_dp_size; /* portal's size */ - int sc_rrid[2]; /* memory rid */ - struct resource *sc_rres[2]; /* memory resource */ - dpaa_portal_t sc_dp[MAXCPU]; -}; - -struct dpaa_portals_devinfo { - struct resource_list di_res; - int di_intr_rid; + vm_paddr_t sc_ce_pa; /* portal's CE PA */ + vm_offset_t sc_ce_va; + vm_paddr_t sc_ci_pa; /* portal's CI PA */ + vm_offset_t sc_ci_va; + int sc_cpu; + uint32_t sc_ce_size; /* portal's CE size */ + uint32_t sc_ci_size; /* portal's CI size */ + struct resource *sc_mres[2]; /* memory resource */ + struct resource *sc_ires; /* Interrupt */ + void *sc_intr_cookie; + bool sc_regs_mapped; /* register mapping status */ }; -int bman_portals_attach(device_t); -int bman_portals_detach(device_t); +int bman_portal_attach(device_t, int); +int bman_portal_detach(device_t); -int qman_portals_attach(device_t); -int qman_portals_detach(device_t); +int qman_portal_attach(device_t, int); +int qman_portal_detach(device_t); -int dpaa_portal_alloc_res(device_t, struct dpaa_portals_devinfo *, int); -void dpaa_portal_map_registers(struct dpaa_portals_softc *); +int dpaa_portal_alloc_res(device_t, int); +void dpaa_portal_map_registers(struct dpaa_portal_softc *); +#endif diff --git a/sys/dev/dpaa/portals_common.c b/sys/dev/dpaa/portals_common.c index ed8e577694f2..c4501af05889 100644 --- a/sys/dev/dpaa/portals_common.c +++ b/sys/dev/dpaa/portals_common.c @@ -41,127 +41,46 @@ #include <machine/resource.h> #include <machine/tlb.h> -#include <contrib/ncsw/inc/error_ext.h> -#include <contrib/ncsw/inc/xx_ext.h> - #include "portals.h" int -dpaa_portal_alloc_res(device_t dev, struct dpaa_portals_devinfo *di, int cpu) +dpaa_portal_alloc_res(device_t dev, int cpu) { - struct dpaa_portals_softc *sc = device_get_softc(dev); - struct resource_list_entry *rle; - int err; - struct resource_list *res; + struct dpaa_portal_softc *sc = device_get_softc(dev); - /* Check if MallocSmart allocator is ready */ - if (XX_MallocSmartInit() != E_OK) + sc->sc_mres[0] = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + 0, RF_ACTIVE); + if (sc->sc_mres[0] == NULL) { + device_printf(dev, + "Could not allocate cache enabled memory.\n"); return (ENXIO); - - res = &di->di_res; - - /* - * Allocate memory. - * Reserve only one pair of CE/CI virtual memory regions - * for all CPUs, in order to save the space. - */ - if (sc->sc_rres[0] == NULL) { - /* Cache enabled area */ - rle = resource_list_find(res, SYS_RES_MEMORY, 0); - sc->sc_rrid[0] = 0; - sc->sc_rres[0] = bus_alloc_resource(dev, - SYS_RES_MEMORY, &sc->sc_rrid[0], rle->start + sc->sc_dp_pa, - rle->end + sc->sc_dp_pa, rle->count, RF_ACTIVE); - if (sc->sc_rres[0] == NULL) { - device_printf(dev, - "Could not allocate cache enabled memory.\n"); - return (ENXIO); - } - tlb1_set_entry(rman_get_bushandle(sc->sc_rres[0]), - rle->start + sc->sc_dp_pa, rle->count, _TLB_ENTRY_MEM); - /* Cache inhibited area */ - rle = resource_list_find(res, SYS_RES_MEMORY, 1); - sc->sc_rrid[1] = 1; - sc->sc_rres[1] = bus_alloc_resource(dev, - SYS_RES_MEMORY, &sc->sc_rrid[1], rle->start + sc->sc_dp_pa, - rle->end + sc->sc_dp_pa, rle->count, RF_ACTIVE); - if (sc->sc_rres[1] == NULL) { - device_printf(dev, - "Could not allocate cache inhibited memory.\n"); - bus_release_resource(dev, SYS_RES_MEMORY, - sc->sc_rrid[0], sc->sc_rres[0]); - return (ENXIO); - } - tlb1_set_entry(rman_get_bushandle(sc->sc_rres[1]), - rle->start + sc->sc_dp_pa, rle->count, _TLB_ENTRY_IO); - sc->sc_dp[cpu].dp_regs_mapped = 1; } - /* Acquire portal's CE_PA and CI_PA */ - rle = resource_list_find(res, SYS_RES_MEMORY, 0); - sc->sc_dp[cpu].dp_ce_pa = rle->start + sc->sc_dp_pa; - sc->sc_dp[cpu].dp_ce_size = rle->count; - rle = resource_list_find(res, SYS_RES_MEMORY, 1); - sc->sc_dp[cpu].dp_ci_pa = rle->start + sc->sc_dp_pa; - sc->sc_dp[cpu].dp_ci_size = rle->count; - - /* Allocate interrupts */ - rle = resource_list_find(res, SYS_RES_IRQ, 0); - sc->sc_dp[cpu].dp_irid = 0; - sc->sc_dp[cpu].dp_ires = bus_alloc_resource(dev, - SYS_RES_IRQ, &sc->sc_dp[cpu].dp_irid, rle->start, rle->end, - rle->count, RF_ACTIVE); - /* Save interrupt number for later use */ - sc->sc_dp[cpu].dp_intr_num = rle->start; - - if (sc->sc_dp[cpu].dp_ires == NULL) { - device_printf(dev, "Could not allocate irq.\n"); + /* Cache inhibited area */ + sc->sc_mres[1] = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + 1, RF_ACTIVE); + if (sc->sc_mres[1] == NULL) { + device_printf(dev, + "Could not allocate cache inhibited memory.\n"); + bus_release_resource(dev, SYS_RES_MEMORY, 0, sc->sc_mres[0]); return (ENXIO); } - err = XX_PreallocAndBindIntr(dev, (uintptr_t)sc->sc_dp[cpu].dp_ires, cpu); + sc->sc_dev = dev; + sc->sc_ce_va = rman_get_bushandle(sc->sc_mres[0]); + sc->sc_ce_size = rman_get_size(sc->sc_mres[0]); + sc->sc_ce_pa = pmap_kextract(sc->sc_ce_va); + sc->sc_ci_va = rman_get_bushandle(sc->sc_mres[1]); + sc->sc_ci_size = rman_get_size(sc->sc_mres[1]); + sc->sc_ci_pa = pmap_kextract(sc->sc_ci_va); + tlb1_set_entry(sc->sc_ce_va, sc->sc_ce_pa, sc->sc_ce_size, + _TLB_ENTRY_MEM | _TLB_ENTRY_SHARED); + sc->sc_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, 0, RF_ACTIVE); - if (err != E_OK) { - device_printf(dev, "Could not prealloc and bind interrupt\n"); - bus_release_resource(dev, SYS_RES_IRQ, - sc->sc_dp[cpu].dp_irid, sc->sc_dp[cpu].dp_ires); - sc->sc_dp[cpu].dp_ires = NULL; + /* Allocate interrupts */ + if (sc->sc_ires == NULL) { + device_printf(dev, "Could not allocate irq.\n"); return (ENXIO); } -#if 0 - err = bus_generic_config_intr(dev, rle->start, di->di_intr_trig, - di->di_intr_pol); - if (err != 0) { - device_printf(dev, "Could not configure interrupt\n"); - bus_release_resource(dev, SYS_RES_IRQ, - sc->sc_dp[cpu].dp_irid, sc->sc_dp[cpu].dp_ires); - sc->sc_dp[cpu].dp_ires = NULL; - return (err); - } -#endif - return (0); } - -void -dpaa_portal_map_registers(struct dpaa_portals_softc *sc) -{ - unsigned int cpu; - - sched_pin(); - cpu = PCPU_GET(cpuid); - if (sc->sc_dp[cpu].dp_regs_mapped) - goto out; - - tlb1_set_entry(rman_get_bushandle(sc->sc_rres[0]), - sc->sc_dp[cpu].dp_ce_pa, sc->sc_dp[cpu].dp_ce_size, - _TLB_ENTRY_MEM); - tlb1_set_entry(rman_get_bushandle(sc->sc_rres[1]), - sc->sc_dp[cpu].dp_ci_pa, sc->sc_dp[cpu].dp_ci_size, - _TLB_ENTRY_IO); - - sc->sc_dp[cpu].dp_regs_mapped = 1; - -out: - sched_unpin(); -} diff --git a/sys/dev/dpaa/qman.c b/sys/dev/dpaa/qman.c index fccaa853918b..9143ebde5cb4 100644 --- a/sys/dev/dpaa/qman.c +++ b/sys/dev/dpaa/qman.c @@ -1,3 +1,8 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ /*- * Copyright (c) 2011-2012 Semihalf. * All rights reserved. @@ -29,6 +34,7 @@ #include <sys/kernel.h> #include <sys/bus.h> #include <sys/lock.h> +#include <sys/malloc.h> #include <sys/module.h> #include <sys/mutex.h> #include <sys/proc.h> @@ -41,139 +47,276 @@ #include <machine/resource.h> #include <machine/tlb.h> -#include "qman.h" +#include "dpaa_common.h" #include "portals.h" +#include "qman.h" +#include "qman_var.h" +#include "qman_portal_if.h" + +/* Registers */ +#define QCSP_IO_CFG(n) (0x004 + (n) * 16) +#define IO_CFG_SDEST_M 0x00ff0000 +#define IO_CFG_SDEST_S 16 +#define QMAN_DCP_CFG(n) (0x300 + (n) * 0x10) +#define DCP_CFG_ED 0x00000100 +#define DCP_CFG_ED_3 0x00001000 +#define QMAN_PFDR_FP_LWIT 0x410 +#define QMAN_PFDR_CFG 0x414 +#define QMAN_SFDR_CFG 0x500 +#define QMAN_MCR 0xb00 +#define MCR_INIT_PFDR 0x01000000 +#define MCR_READ_PFDR 0x02000000 +#define MCR_READ_SFDR 0x03000000 +#define MCR_QUERY_FQD_FILL 0x10000000 +#define MCR_QUERY_FQD_TAGS 0x11000000 +#define MCR_QUERY_FQD_CACHE 0x12000000 +#define MCR_QUERY_WQ 0x20000000 +#define MCR_RSLT_OK 0xf0000000 +#define MCR_RSLT_OK_DATA 0xf1000000 +#define MCR_RSLT_ABRT_INV 0xf4000000 +#define MCR_RSLT_ABRT_DIS 0xf8000000 +#define MCR_RSLT_ABRT_IDX 0xff000000 +#define MCR_RSLT_ABRT_MASK 0xff000000 +#define QMAN_MCP0 0xb04 +#define QMAN_MCP1 0xb08 +#define QMAN_IP_REV_1 0xbf8 +#define IP_MJ_M 0x0000ff00 +#define IP_MJ_S 8 +#define IP_MN_M 0x000000ff +#define QMAN_FQD_BARE 0xc00 +#define QMAN_FQD_BAR 0xc04 +#define QMAN_FQD_AR 0xc10 +#define AR_EN 0x80000000 +#define QMAN_PFDR_BARE 0xc20 +#define QMAN_PFDR_BAR 0xc24 +#define QMAN_PFDR_AR 0xc30 +#define QMAN_QCSP_BARE 0xc80 +#define QMAN_QCSP_BAR 0xc84 +#define QMAN_QCSP_AR 0xc90 +#define QMAN_CI_SCHED_CFG 0xd00 +#define CI_SCHED_CFG_SW 0x80000000 +#define CI_SCHED_CFG_SRCCIV 0x04000000 /* Recommended */ +#define CI_SCHED_CFG_SRQ_W_M 0x00000700 +#define CI_SCHED_CFG_SRQ_W_S 8 +#define CI_SCHED_CFG_RW_W_M 0x00000070 +#define CI_SCHED_CFG_RW_W_S 4 +#define CI_SCHED_CFG_BMAN_W_M 0x00000007 +#define QMAN_ERR_ISR 0xe00 +#define QMAN_ERR_IER 0xe04 +#define QCSP_IO_CFG_3(n) (0x1004 + (n) * 16) + +/* Software portals. Cache-inhibited registers */ + +#define QCSP_DQRR_PDQCR 0x05c + +/* Software portals. Cache-enabled registers */ + +#define QCSP_VERB_INIT_FQ_PARK 0x40 +#define QCSP_VERB_INIT_FQ_SCHED 0x41 +#define QCSP_VERB_QUERY_FQ 0x44 +#define QCSP_VERB_QUERY_FQ_NP 0x45 +#define QCSP_VERB_ALTER_FQ_SCHED 0x48 +#define QCSP_VERB_ALTER_FQ_FE 0x49 +#define QCSP_VERB_ALTER_FQ_RETIRE 0x4a +#define QCSP_VERB_ALTER_FQ_TAKE_OUT 0x4b +#define QCSP_VERB_ALTER_FQ_RETIRE_CTXB 0x4c +#define QCSP_VERB_ALTER_FQ_XON 0x4d +#define QCSP_VERB_ALTER_FQ_XOFF 0x4e + +/* Init FQ */ +#define QCSP_INIT_FQ_WE_OAC 0x0100 +#define QCSP_INIT_FQ_WE_ORPC 0x0080 +#define QCSP_INIT_FQ_WE_CGID 0x0040 +#define QCSP_INIT_FQ_WE_FQ_CTRL 0x0020 +#define QCSP_INIT_FQ_WE_DEST_WQ 0x0010 +#define QCSP_INIT_FQ_WE_ICS_CRED 0x0008 +#define QCSP_INIT_FQ_WE_TD_THRESH 0x0004 +#define QCSP_INIT_FQ_WE_CONTEXT_B 0x0002 +#define QCSP_INIT_FQ_WE_CONTEXT_A 0x0001 + +#define QMAN_MC_RES_OK 0xf0 + +#define QMAN_MC_AFQS_NE 0x01 + +/* Init FQ options */ +#define QM_FQCTRL_CGE 0x0400 +#define QM_FQCTRL_TDE 0x0200 +#define QM_FQCTRL_ORP 0x0100 +#define QM_FQCTRL_CTXASTASH 0x0080 +#define QM_FQCTRL_CPCSTASH 0x0040 +#define QM_FQCTRL_FORCESFDR 0x0008 +#define QM_FQCTRL_AVOIDBLOCK 0x0004 +#define QM_FQCTRL_HOLDACTIVE 0x0002 +#define QM_FQCTRL_LIC 0x0001 + +#define QMAN_CHANNEL_POOL1_REV1 0x21 +#define QMAN_CHANNEL_POOL1_REV3 0x401 + +#define QMAN_PFDR_MAX 0xfffeff + +/* P1023 has only 3 pool channels, but we don't support that SoC. */ +#define QMAN_POOL_CHANNELS 15 + +/* P1023 only supports 64 congestion groups... */ +#define QMAN_CGRS 256 -extern struct dpaa_portals_softc *qp_sc; static struct qman_softc *qman_sc; -extern t_Handle qman_portal_setup(struct qman_softc *qsc); +static MALLOC_DEFINE(M_QMAN, "qman", "DPAA Queue Manager structures"); + +int qman_channel_base; +int qman_total_fqids; +struct qman_fq **qman_fq_list; + +/* Entries sorted right-to-left in bit order of the ISR */ +static const char * const qman_errors[] = { + "Invalid enqueue queue", + "Invalid enqueue channel!", + "Invalid enqueue state", + "Invalid enqueue overflow", + "Invalid enqueue configuration", + NULL, + NULL, + NULL, + "Invalid dequeue queue", + "Invalid dequeue source", + "Invalid dequeue FQ", + "Invalid dequeue direct connect portal", + NULL, + NULL, + NULL, + NULL, + "Invalid command verb", + "Invalid FQ flow control state", + NULL, + NULL, + NULL, + NULL, + NULL, + "Insufficient free PFDRs", + "Single-bit ECC error", + "Multi-bit ECC error", + "PFDR low watermark", + "Invalid target transaction", + "Initiator data error", + NULL, + NULL +}; static void -qman_exception(t_Handle app, e_QmExceptions exception) +qman_isr(void *arg) { - struct qman_softc *sc; - const char *message; + struct qman_softc *sc = arg; + uint32_t ier, isr, isr_bit; + int i; - sc = app; + ier = bus_read_4(sc->sc_rres, QMAN_ERR_IER); + isr = bus_read_4(sc->sc_rres, QMAN_ERR_ISR); - switch (exception) { - case e_QM_EX_CORENET_INITIATOR_DATA: - message = "Initiator Data Error"; - break; - case e_QM_EX_CORENET_TARGET_DATA: - message = "CoreNet Target Data Error"; - break; - case e_QM_EX_CORENET_INVALID_TARGET_TRANSACTION: - message = "Invalid Target Transaction"; - break; - case e_QM_EX_PFDR_THRESHOLD: - message = "PFDR Low Watermark Interrupt"; - break; - case e_QM_EX_PFDR_ENQUEUE_BLOCKED: - message = "PFDR Enqueues Blocked Interrupt"; - break; - case e_QM_EX_SINGLE_ECC: - message = "Single Bit ECC Error Interrupt"; - break; - case e_QM_EX_MULTI_ECC: - message = "Multi Bit ECC Error Interrupt"; - break; - case e_QM_EX_INVALID_COMMAND: - message = "Invalid Command Verb Interrupt"; - break; - case e_QM_EX_DEQUEUE_DCP: - message = "Invalid Dequeue Direct Connect Portal Interrupt"; - break; - case e_QM_EX_DEQUEUE_FQ: - message = "Invalid Dequeue FQ Interrupt"; - break; - case e_QM_EX_DEQUEUE_SOURCE: - message = "Invalid Dequeue Source Interrupt"; - break; - case e_QM_EX_DEQUEUE_QUEUE: - message = "Invalid Dequeue Queue Interrupt"; - break; - case e_QM_EX_ENQUEUE_OVERFLOW: - message = "Invalid Enqueue Overflow Interrupt"; - break; - case e_QM_EX_ENQUEUE_STATE: - message = "Invalid Enqueue State Interrupt"; - break; - case e_QM_EX_ENQUEUE_CHANNEL: - message = "Invalid Enqueue Channel Interrupt"; - break; - case e_QM_EX_ENQUEUE_QUEUE: - message = "Invalid Enqueue Queue Interrupt"; - break; - case e_QM_EX_CG_STATE_CHANGE: - message = "CG change state notification"; - break; - default: - message = "Unknown error"; + if ((ier & isr) == 0) + return; + + isr_bit = (isr & ier); + for (i = 0; isr_bit != 0; i++, isr_bit >>= 1) { + if (isr_bit & 1) + device_printf(sc->sc_dev, "%s", qman_errors[i]); } - device_printf(sc->sc_dev, "QMan Exception: %s.\n", message); + bus_write_4(sc->sc_rres, QMAN_ERR_ISR, isr); } -/** - * General received frame callback. - * This is called, when user did not register his own callback for a given - * frame queue range (fqr). - */ -e_RxStoreResponse -qman_received_frame_callback(t_Handle app, t_Handle qm_fqr, t_Handle qm_portal, - uint32_t fqid_offset, t_DpaaFD *frame) + +/* Set up reserved memory configuration for PFDR and FQD, per `off`. */ +static int +qman_set_memory(struct qman_softc *sc, vm_paddr_t pa, + vm_size_t size, bus_size_t off) { - struct qman_softc *sc; + uint32_t bar, bare; + vm_paddr_t old_bar; + + /* + * Register offsets: + * 0 - BARE + * 4 - BAR + * 0x10 - AR + */ + bare = bus_read_4(sc->sc_rres, off); + bar = bus_read_4(sc->sc_rres, off + 4); + old_bar = (vm_paddr_t)bare << 32 | bar; - sc = app; + if (old_bar != 0 && old_bar != pa) { + device_printf(sc->sc_dev, "QMan BAR already initialized!\n"); + return (ENOMEM); + } else if (old_bar == pa) + return (EEXIST); - device_printf(sc->sc_dev, "dummy callback for received frame.\n"); - return (e_RX_STORE_RESPONSE_CONTINUE); + /* + * Zero the memory and flush cache through DMAP. QMan accesses the + * memory as non-coherent. + */ + memset((void *)PHYS_TO_DMAP(pa), 0, size); + cpu_flush_dcache((void *)PHYS_TO_DMAP(pa), size); + + bus_write_4(sc->sc_rres, off, pa >> 32); + bus_write_4(sc->sc_rres, off + 4, (uint32_t)pa); + bus_write_4(sc->sc_rres, off + 0x10, AR_EN | (ilog2(size) - 1)); + + return (0); } -/** - * General rejected frame callback. - * This is called, when user did not register his own callback for a given - * frame queue range (fqr). +/* + * Set up PFDR structures. Some things to keep in mind: + * - npfdr is the total number of PFDRs in the private memory. PFDRs are 64 + * bytes in size, so npfdr is (pfdr_sz/64). + * - PFDR 0-7 are reserved, so the base starts at 8, not 0, so we adjust + * internally. + * - The second parameter is the last PFDR, not the number of PFDRs, so needs to + * be adjusted down one more, so subtract 9. */ -e_RxStoreResponse -qman_rejected_frame_callback(t_Handle app, t_Handle qm_fqr, t_Handle qm_portal, - uint32_t fqid_offset, t_DpaaFD *frame, - t_QmRejectedFrameInfo *qm_rejected_frame_info) +static int +qman_setup_pfdr(struct qman_softc *sc, int npfdr) { - struct qman_softc *sc; + uint32_t res; + + npfdr = min(npfdr, QMAN_PFDR_MAX); + bus_write_4(sc->sc_rres, QMAN_MCP0, 8); + bus_write_4(sc->sc_rres, QMAN_MCP1, npfdr - 9); + bus_write_4(sc->sc_rres, QMAN_MCR, MCR_INIT_PFDR); + + for (int timeout = 100000; timeout > 0; timeout--) { + DELAY(1); + res = bus_read_4(sc->sc_rres, QMAN_MCR); + if (res >= MCR_RSLT_OK) + break; + } - sc = app; + if (res < MCR_RSLT_OK) + return (EBUSY); + if (res == MCR_RSLT_OK) + return (0); - device_printf(sc->sc_dev, "dummy callback for rejected frame.\n"); - return (e_RX_STORE_RESPONSE_CONTINUE); + return (ENXIO); } int qman_attach(device_t dev) { struct qman_softc *sc; - t_QmParam qp; - t_Error error; - t_QmRevisionInfo rev; + int error; + vm_paddr_t fqd_pa, pfdr_pa; + vm_size_t fqd_sz, pfdr_sz; + int qman_channel_pool1 = QMAN_CHANNEL_POOL1_REV1; + uint32_t ver; + uint32_t nfqd; + bool qman3 = false; sc = device_get_softc(dev); sc->sc_dev = dev; qman_sc = sc; - if (XX_MallocSmartInit() != E_OK) { - device_printf(dev, "could not initialize smart allocator.\n"); - return (ENXIO); - } - - sched_pin(); - /* Allocate resources */ sc->sc_rrid = 0; - sc->sc_rres = bus_alloc_resource(dev, SYS_RES_MEMORY, - &sc->sc_rrid, 0, ~0, QMAN_CCSR_SIZE, RF_ACTIVE); + sc->sc_rres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 0, RF_ACTIVE); if (sc->sc_rres == NULL) { device_printf(dev, "could not allocate memory.\n"); goto err; @@ -186,57 +329,79 @@ qman_attach(device_t dev) device_printf(dev, "could not allocate error interrupt.\n"); goto err; } + error = dpaa_map_private_memory(dev, 0, "fsl,qman-fqd", + &fqd_pa, &fqd_sz); + error = dpaa_map_private_memory(dev, 1, "fsl,qman-pfdr", + &pfdr_pa, &pfdr_sz); + + bzero((void *)PHYS_TO_DMAP(fqd_pa), fqd_sz); + cpu_flush_dcache((void *)PHYS_TO_DMAP(fqd_pa), fqd_sz); + /* + * FQDs are 64 bytes in size, with 24 bit pointers, so FQIDs are 24 + * bits, fits fine in a uint32_t. + */ + nfqd = fqd_sz / 64; + qman_total_fqids = nfqd; + qman_channel_base = qman_channel_pool1; + qman_fq_list = malloc(nfqd * sizeof(struct qman_fq *), M_QMAN, + M_WAITOK); - if (qp_sc == NULL) + error = qman_set_memory(sc, fqd_pa, fqd_sz, QMAN_FQD_BARE); + if (error != 0 && error != EEXIST) goto err; + error = qman_set_memory(sc, pfdr_pa, pfdr_sz, QMAN_PFDR_BARE); + if (error != 0 && error != EEXIST) + goto err; + if (error == 0) { + /* Initialize PFDRs if it hasn't been initialized before */ + error = qman_setup_pfdr(sc, pfdr_sz / 64); + if (error != 0) + goto err; + /* Magic constant from documentation */ + bus_write_4(sc->sc_rres, QMAN_PFDR_CFG, 64); + } - dpaa_portal_map_registers(qp_sc); + bus_write_4(sc->sc_rres, QMAN_ERR_ISR, 0xffffffff); + bus_write_4(sc->sc_rres, QMAN_ERR_IER, 0xffffffff); - /* Initialize QMan */ - qp.guestId = NCSW_MASTER_ID; - qp.baseAddress = rman_get_bushandle(sc->sc_rres); - qp.swPortalsBaseAddress = rman_get_bushandle(qp_sc->sc_rres[0]); - qp.liodn = 0; - qp.totalNumOfFqids = QMAN_MAX_FQIDS; - qp.fqdMemPartitionId = NCSW_MASTER_ID; - qp.pfdrMemPartitionId = NCSW_MASTER_ID; - qp.f_Exception = qman_exception; - qp.h_App = sc; - qp.errIrq = (uintptr_t)sc->sc_ires; - qp.partFqidBase = QMAN_FQID_BASE; - qp.partNumOfFqids = QMAN_MAX_FQIDS; - qp.partCgsBase = 0; - qp.partNumOfCgs = 0; + ver = bus_read_4(sc->sc_rres, QMAN_IP_REV_1); + sc->sc_qman_major = ((ver & IP_MJ_M) >> IP_MJ_S); + if (sc->sc_qman_major >= 3) + qman3 = true; - sc->sc_qh = QM_Config(&qp); - if (sc->sc_qh == NULL) { - device_printf(dev, "could not be configured\n"); - goto err; - } + if (qman3) + qman_channel_pool1 = QMAN_CHANNEL_POOL1_REV3; - error = QM_Init(sc->sc_qh); - if (error != E_OK) { - device_printf(dev, "could not be initialized\n"); + sc->sc_qman_base_channel = qman_channel_pool1; + + sc->sc_fqalloc = + vmem_create("qman-fqalloc", 1, nfqd - 1, 1, 0, M_WAITOK); + sc->sc_qpalloc = + vmem_create("qman-fqalloc", qman_channel_pool1, + QMAN_POOL_CHANNELS, 1, 0, M_WAITOK); + sc->sc_cgalloc = vmem_create("qman->cgalloc", 0, QMAN_CGRS, + 1, 0, M_WAITOK); + + if (bus_setup_intr(dev, sc->sc_ires, INTR_TYPE_NET, NULL, qman_isr, + sc, &sc->sc_intr_cookie) != 0) goto err; - } - error = QM_GetRevision(sc->sc_qh, &rev); - if (error != E_OK) { - device_printf(dev, "could not get QMan revision\n"); + if (error != 0) { + device_printf(dev, "could not be initialized\n"); goto err; } + bus_write_4(sc->sc_rres, QMAN_DCP_CFG(0), + qman3 ? DCP_CFG_ED_3 : DCP_CFG_ED); + bus_write_4(sc->sc_rres, QMAN_DCP_CFG(1), + qman3 ? DCP_CFG_ED_3 : DCP_CFG_ED); - device_printf(dev, "Hardware version: %d.%d.\n", - rev.majorRev, rev.minorRev); + bus_write_4(sc->sc_rres, 0xd00, 0x80000322); - sched_unpin(); - - qman_portal_setup(sc); + /* TODO: DO we need a taskqueue? Allocate here if so */ return (0); err: - sched_unpin(); qman_detach(dev); return (ENXIO); } @@ -248,11 +413,15 @@ qman_detach(device_t dev) sc = device_get_softc(dev); - if (sc->sc_qh) - QM_Free(sc->sc_qh); + if (sc->sc_fqalloc != NULL) + vmem_destroy(sc->sc_fqalloc); + if (sc->sc_qpalloc != NULL) + vmem_destroy(sc->sc_qpalloc); + if (sc->sc_cgalloc != NULL) + vmem_destroy(sc->sc_cgalloc); - if (sc->sc_ires != NULL) - XX_DeallocIntr((uintptr_t)sc->sc_ires); + if (sc->sc_intr_cookie != NULL) + bus_teardown_intr(dev, sc->sc_ires, sc->sc_intr_cookie); if (sc->sc_ires != NULL) bus_release_resource(dev, SYS_RES_IRQ, @@ -262,6 +431,9 @@ qman_detach(device_t dev) bus_release_resource(dev, SYS_RES_MEMORY, sc->sc_rrid, sc->sc_rres); + free(qman_fq_list, M_QMAN); + qman_fq_list = NULL; + return (0); } @@ -286,261 +458,233 @@ qman_shutdown(device_t dev) return (0); } +int +qman_alloc_channel(void) +{ + struct qman_softc *sc = qman_sc; + vmem_addr_t channel; + + vmem_alloc(sc->sc_qpalloc, 1, M_BESTFIT | M_WAITOK, &channel); + + return (channel); +} + +void +qman_free_channel(int channel) +{ + struct qman_softc *sc = qman_sc; + + vmem_free(sc->sc_qpalloc, channel, 1); +} /** * @group QMan API functions implementation. * @{ */ -t_Handle -qman_fqr_create(uint32_t fqids_num, e_QmFQChannel channel, uint8_t wq, +struct qman_fq * +qman_fq_from_index(uint32_t fqid) +{ + if (fqid > qman_total_fqids) + return (NULL); + return (qman_fq_list[fqid]); +} + +/* Allocate and initialize an FQ Range */ +struct qman_fq * +qman_fq_create(uint32_t fqids_num, int channel, uint8_t wq, bool force_fqid, uint32_t fqid_or_align, bool init_parked, bool hold_active, bool prefer_in_cache, bool congst_avoid_ena, - t_Handle congst_group, int8_t overhead_accounting_len, + void *congst_group, int8_t overhead_accounting_len, uint32_t tail_drop_threshold) { + union qman_mc_command cmd; struct qman_softc *sc; - t_QmFqrParams fqr; - t_Handle fqrh, portal; + union qman_mc_result *res; + struct qman_fq *fqh; + device_t portal; + vmem_addr_t fqid_base; + uint8_t rslt; sc = qman_sc; - sched_pin(); - - /* Ensure we have got QMan port initialized */ - portal = qman_portal_setup(sc); - if (portal == NULL) { - device_printf(sc->sc_dev, "could not setup QMan portal\n"); - goto err; + if (fqids_num != 1) { + device_printf(sc->sc_dev, + "Only one fq allocation allowed currently\n"); + return (NULL); } - fqr.h_Qm = sc->sc_qh; - fqr.h_QmPortal = portal; - fqr.initParked = init_parked; - fqr.holdActive = hold_active; - fqr.preferInCache = prefer_in_cache; + bzero(&cmd, sizeof(cmd)); + vmem_alloc(sc->sc_fqalloc, fqids_num, M_BESTFIT | M_WAITOK, &fqid_base); + cmd.init_fq.fqid = fqid_base; + cmd.init_fq.count = fqids_num - 1; + cmd.init_fq.dest_chan = channel; + cmd.init_fq.dest_wq = wq; + cmd.init_fq.we_mask = QCSP_INIT_FQ_WE_DEST_WQ | QCSP_INIT_FQ_WE_FQ_CTRL; + if (init_parked) + cmd.init_fq.verb = QCSP_VERB_INIT_FQ_PARK; + else + cmd.init_fq.verb = QCSP_VERB_INIT_FQ_SCHED; + cmd.init_fq.fq_ctrl = (prefer_in_cache ? QM_FQCTRL_LIC : 0) | + (hold_active ? QM_FQCTRL_HOLDACTIVE : 0) | + (congst_avoid_ena ? QM_FQCTRL_AVOIDBLOCK : 0); - /* We do not support stashing */ - fqr.useContextAForStash = FALSE; - fqr.p_ContextA = 0; - fqr.p_ContextB = 0; + critical_enter(); - fqr.channel = channel; - fqr.wq = wq; - fqr.shadowMode = FALSE; - fqr.numOfFqids = fqids_num; + /* Ensure we have got QMan port initialized */ + portal = DPCPU_GET(qman_affine_portal); + res = QMAN_PORTAL_MC_SEND_RAW(portal, &cmd); - /* FQID */ - fqr.useForce = force_fqid; - if (force_fqid) { - fqr.qs.frcQ.fqid = fqid_or_align; - } else { - fqr.qs.nonFrcQs.align = fqid_or_align; - } + rslt = 0; + if (res != NULL) + rslt = res->init_fq.rslt; - /* Congestion Avoidance */ - fqr.congestionAvoidanceEnable = congst_avoid_ena; - if (congst_avoid_ena) { - fqr.congestionAvoidanceParams.h_QmCg = congst_group; - fqr.congestionAvoidanceParams.overheadAccountingLength = - overhead_accounting_len; - fqr.congestionAvoidanceParams.fqTailDropThreshold = - tail_drop_threshold; - } else { - fqr.congestionAvoidanceParams.h_QmCg = 0; - fqr.congestionAvoidanceParams.overheadAccountingLength = 0; - fqr.congestionAvoidanceParams.fqTailDropThreshold = 0; - } - - fqrh = QM_FQR_Create(&fqr); - if (fqrh == NULL) { - device_printf(sc->sc_dev, "could not create Frame Queue Range" - "\n"); + critical_exit(); + if (res == NULL || rslt != QMAN_MC_RES_OK) { + vmem_free(sc->sc_fqalloc, fqid_base, fqids_num); goto err; } - sc->sc_fqr_cpu[QM_FQR_GetFqid(fqrh)] = PCPU_GET(cpuid); + fqh = malloc(sizeof(*fqh), M_QMAN, M_WAITOK | M_ZERO); + fqh->fqid = fqid_base; - sched_unpin(); + qman_fq_list[fqid_base] = fqh; - return (fqrh); + return (fqh); err: - sched_unpin(); return (NULL); } -t_Error -qman_fqr_free(t_Handle fqr) +static int +qman_fq_retire(device_t portal, struct qman_fq *fq) { - struct qman_softc *sc; - t_Error error; - - sc = qman_sc; - thread_lock(curthread); - sched_bind(curthread, sc->sc_fqr_cpu[QM_FQR_GetFqid(fqr)]); - thread_unlock(curthread); + union qman_mc_command cmd; + union qman_mc_result *rr; - error = QM_FQR_Free(fqr); + bzero(&cmd, sizeof(cmd)); - thread_lock(curthread); - sched_unbind(curthread); - thread_unlock(curthread); + cmd.alter_fqs.verb = QCSP_VERB_ALTER_FQ_RETIRE; + cmd.alter_fqs.fqid = fq->fqid; + rr = QMAN_PORTAL_MC_SEND_RAW(portal, &cmd); + if (rr == NULL) + return (ETIMEDOUT); - return (error); -} - -t_Error -qman_fqr_register_cb(t_Handle fqr, t_QmReceivedFrameCallback *callback, - t_Handle app) -{ - struct qman_softc *sc; - t_Error error; - t_Handle portal; - - sc = qman_sc; - sched_pin(); - - /* Ensure we have got QMan port initialized */ - portal = qman_portal_setup(sc); - if (portal == NULL) { - device_printf(sc->sc_dev, "could not setup QMan portal\n"); - sched_unpin(); - return (E_NOT_SUPPORTED); + if (rr->alter_fqs.rslt == QMAN_MC_RES_OK) { + if (rr->alter_fqs.fqs & QMAN_MC_AFQS_NE) { + /* TODO: Drain.... */ + } + return (0); } - error = QM_FQR_RegisterCB(fqr, callback, app); - - sched_unpin(); - - return (error); + return (0); } -t_Error -qman_fqr_enqueue(t_Handle fqr, uint32_t fqid_off, t_DpaaFD *frame) +int +qman_fq_free(struct qman_fq *fq) { struct qman_softc *sc; - t_Error error; - t_Handle portal; + int error; sc = qman_sc; - sched_pin(); - - /* Ensure we have got QMan port initialized */ - portal = qman_portal_setup(sc); - if (portal == NULL) { - device_printf(sc->sc_dev, "could not setup QMan portal\n"); - sched_unpin(); - return (E_NOT_SUPPORTED); - } - - error = QM_FQR_Enqueue(fqr, portal, fqid_off, frame); - sched_unpin(); + critical_enter(); + error = qman_fq_retire(DPCPU_GET(qman_affine_portal), fq); + /* TODO: Take FQ out of service. */ + critical_exit(); + if (error != 0) + return (error); + vmem_free(sc->sc_fqalloc, fq->fqid, 1); + qman_fq_list[fq->fqid] = NULL; + free(fq, M_QMAN); - return (error); + return (0); } -uint32_t -qman_fqr_get_counter(t_Handle fqr, uint32_t fqid_off, - e_QmFqrCounters counter) +int +qman_fq_register_cb(struct qman_fq *fq, qman_cb_dqrr callback, + void *ctx) { - struct qman_softc *sc; - uint32_t val; - t_Handle portal; - - sc = qman_sc; - sched_pin(); - - /* Ensure we have got QMan port initialized */ - portal = qman_portal_setup(sc); - if (portal == NULL) { - device_printf(sc->sc_dev, "could not setup QMan portal\n"); - sched_unpin(); - return (0); - } + fq->cb.dqrr = callback; + fq->cb.ctx = ctx; - val = QM_FQR_GetCounter(fqr, portal, fqid_off, counter); - - sched_unpin(); - - return (val); + return (0); } -t_Error -qman_fqr_pull_frame(t_Handle fqr, uint32_t fqid_off, t_DpaaFD *frame) +int +qman_fq_enqueue(struct qman_fq *fq, struct dpaa_fd *frame) { struct qman_softc *sc; - t_Error error; - t_Handle portal; + int error; + void *portal; sc = qman_sc; - sched_pin(); + critical_enter(); /* Ensure we have got QMan port initialized */ - portal = qman_portal_setup(sc); + portal = DPCPU_GET(qman_affine_portal); if (portal == NULL) { device_printf(sc->sc_dev, "could not setup QMan portal\n"); - sched_unpin(); - return (E_NOT_SUPPORTED); + critical_exit(); + return (ENXIO); } - error = QM_FQR_PullFrame(fqr, portal, fqid_off, frame); + error = QMAN_PORTAL_ENQUEUE(portal, fq, frame); - sched_unpin(); + critical_exit(); return (error); } uint32_t -qman_fqr_get_base_fqid(t_Handle fqr) +qman_fq_get_fqid(struct qman_fq *fq) { - struct qman_softc *sc; - uint32_t val; - t_Handle portal; + return (fq->fqid); +} - sc = qman_sc; - sched_pin(); - /* Ensure we have got QMan port initialized */ - portal = qman_portal_setup(sc); - if (portal == NULL) { - device_printf(sc->sc_dev, "could not setup QMan portal\n"); - sched_unpin(); - return (0); - } +uint32_t +qman_fq_get_counter(struct qman_fq *fq, int counter) +{ + union qman_mc_result *cmd_res; + union qman_mc_command command; + device_t portal; + u_int ret = 0; - val = QM_FQR_GetFqid(fqr); + bzero(&command, sizeof(command)); + command.query_fq_np.verb = QCSP_VERB_QUERY_FQ_NP; + command.query_fq_np.fqid = fq->fqid; + critical_enter(); + portal = DPCPU_GET(qman_affine_portal); + cmd_res = QMAN_PORTAL_MC_SEND_RAW(portal, &command); + if (counter == QMAN_COUNTER_FRAME) + ret = cmd_res->query_fq_np.frm_cnt; + else if (counter == QMAN_COUNTER_BYTES) + ret = cmd_res->query_fq_np.byte_cnt; - sched_unpin(); + critical_exit(); - return (val); + return (ret); } -t_Error -qman_poll(e_QmPortalPollSource source) +void +qman_set_sdest(uint16_t channel, int cpu) { - struct qman_softc *sc; - t_Error error; - t_Handle portal; - - sc = qman_sc; - sched_pin(); + struct qman_softc *sc = qman_sc; + uint32_t reg; - /* Ensure we have got QMan port initialized */ - portal = qman_portal_setup(sc); - if (portal == NULL) { - device_printf(sc->sc_dev, "could not setup QMan portal\n"); - sched_unpin(); - return (E_NOT_SUPPORTED); + if (sc->sc_qman_major >= 3) { + reg = bus_read_4(sc->sc_rres, QCSP_IO_CFG_3(channel)); + reg &= IO_CFG_SDEST_M; + reg |= (cpu << IO_CFG_SDEST_S); + bus_write_4(sc->sc_rres, QCSP_IO_CFG_3(channel), reg); + } else { + reg = bus_read_4(sc->sc_rres, QCSP_IO_CFG(channel)); + reg &= IO_CFG_SDEST_M; + reg |= (cpu << IO_CFG_SDEST_S); + bus_write_4(sc->sc_rres, QCSP_IO_CFG(channel), reg); } - - error = QM_Poll(sc->sc_qh, source); - - sched_unpin(); - - return (error); } /* diff --git a/sys/dev/dpaa/qman.h b/sys/dev/dpaa/qman.h index 815ef1f6d33a..0e841dbc6ae6 100644 --- a/sys/dev/dpaa/qman.h +++ b/sys/dev/dpaa/qman.h @@ -27,10 +27,13 @@ #ifndef _QMAN_H #define _QMAN_H +#include <sys/vmem.h> #include <machine/vmparam.h> -#include <contrib/ncsw/inc/Peripherals/qm_ext.h> - +struct qman_fq; +struct qman_fq; +struct dpaa_fd; +struct qman_portal; /** * @group QMan private defines/declarations @@ -44,13 +47,15 @@ /** * Pool channel common to all software portals. * @note Value of 0 reflects the e_QM_FQ_CHANNEL_POOL1 from e_QmFQChannel - * type used in qman_fqr_create(). + * type used in qman_fq_create(). */ #define QMAN_COMMON_POOL_CHANNEL 0 #define QMAN_FQID_BASE 1 -#define QMAN_CCSR_SIZE 0x1000 +/* Counters */ +#define QMAN_COUNTER_FRAME 0 +#define QMAN_COUNTER_BYTES 1 /* * Portal defines @@ -69,18 +74,103 @@ struct qman_softc { struct resource *sc_rres; /* register resource */ int sc_irid; /* interrupt rid */ struct resource *sc_ires; /* interrupt resource */ + vmem_t *sc_fqalloc; + vmem_t *sc_qpalloc; + vmem_t *sc_cgalloc; + void *sc_intr_cookie; + int sc_qman_base_channel; + int sc_qman_major; - bool sc_regs_mapped[MAXCPU]; - - t_Handle sc_qh; /* QMAN handle */ - t_Handle sc_qph[MAXCPU]; /* QMAN portal handles */ vm_paddr_t sc_qp_pa; /* QMAN portal PA */ - int sc_fqr_cpu[QMAN_MAX_FQIDS]; + int sc_fq_cpu[QMAN_MAX_FQIDS]; +}; + +struct qman_fd { + uint64_t dd:2; + uint64_t liodn_off:6; + uint64_t bpid:8; + uint64_t eliodn_off:4; + uint64_t _rsvd0:4; + uint64_t addr:40; + union { + struct { + uint32_t format:3; + uint32_t offset:9; + uint32_t length:20; + }; + struct { + uint32_t format2:3; + uint32_t wlength:29; + }; + }; + uint32_t cmd_stat; +}; + +_Static_assert(sizeof(struct qman_fd) == 16, "qman_fd size mismatch"); + +struct qman_dqrr_entry { + uint8_t verb; + uint8_t stat; + uint16_t seqnum; + uint8_t tok; + uint8_t _rsvd0[3]; + uint32_t fqid; + uint32_t ctxb; + struct qman_fd fd; + uint8_t _rsvd1[32]; +}; + +/* Bits for qman_dqrr_entry fields */ +#define QMAN_DQRR_STAT_FQ_EMPTY 0x80 +#define QMAN_DQRR_STAT_FQ_HELD_ACTIVE 0x40 +#define QMAN_DQRR_STAT_FQ_FORCED 0x20 +#define QMAN_DQRR_STAT_HAS_FRAME 0x10 +#define QMAN_DQRR_STAT_VDQCR 0x02 +#define QMAN_DQRR_STAT_EXPIRED 0x01 + +struct qman_mr_entry { + union { + struct { + uint8_t verb; + uint8_t data[63]; + }; + struct { + uint8_t verb; + uint8_t dca; + uint16_t seqnum; + uint32_t rc:8; + uint32_t orp:24; + uint32_t fqid; + uint32_t tag; + struct qman_fd fd; + uint8_t _rsvd[32]; + } ern; + struct { + uint8_t verb; + uint8_t fqs; + uint8_t _rsvd0[6]; + uint32_t fqid; + uint32_t ctxb; + uint8_t _rsvd1[48]; + } fqscn; + }; }; + +_Static_assert(sizeof(struct qman_mr_entry) == 64, "bad sizeof qman_mr"); /** @> */ +typedef int (*qman_cb_dqrr)(device_t, struct qman_fq *, + struct qman_fd *, void *); +typedef void (*qman_cb_mr)(device_t, struct qman_fq *, + struct qman_mr_entry *); +struct qman_cb { + qman_cb_dqrr dqrr; + qman_cb_mr ern; + qman_cb_mr fqscn; + void *ctx; +}; /** * @group QMan bus interface * @{ @@ -91,6 +181,8 @@ int qman_suspend(device_t dev); int qman_resume(device_t dev); int qman_shutdown(device_t dev); /** @> */ +int qman_create_affine_portal(device_t, vm_offset_t, vm_offset_t, int); +void qman_set_sdest(uint16_t, int); /** @@ -149,69 +241,77 @@ int qman_shutdown(device_t dev); * * @return A handle to newly created FQR object. */ -t_Handle qman_fqr_create(uint32_t fqids_num, e_QmFQChannel channel, uint8_t wq, - bool force_fqid, uint32_t fqid_or_align, bool init_parked, +struct qman_fq *qman_fq_create(uint32_t fqids_num, int channel, + uint8_t wq, bool force_fqid, uint32_t fqid_or_align, bool init_parked, bool hold_active, bool prefer_in_cache, bool congst_avoid_ena, - t_Handle congst_group, int8_t overhead_accounting_len, + void *congst_group, int8_t overhead_accounting_len, uint32_t tail_drop_threshold); /** * Free Frame Queue Range. * - * @param fqr A handle to FQR to be freed. + * @param fq A handle to FQR to be freed. * @return E_OK on success; error code otherwise. */ -t_Error qman_fqr_free(t_Handle fqr); +int qman_fq_free(struct qman_fq *fq); /** * Register the callback function. * The callback function will be called when a frame comes from this FQR. * - * @param fqr A handle to FQR. + * @param fq A handle to FQR. * @param callback A pointer to the callback function. * @param app A pointer to the user's data. * @return E_OK on success; error code otherwise. */ -t_Error qman_fqr_register_cb(t_Handle fqr, t_QmReceivedFrameCallback *callback, - t_Handle app); +int qman_fq_register_cb(struct qman_fq *fq, qman_cb_dqrr callback, + void *ctx); /** - * Enqueue a frame on a given FQR. + * Enqueue a frame on a given FQ. * - * @param fqr A handle to FQR. - * @param fqid_off FQID offset wihin the FQR. + * @param fq A handle to FQ. * @param frame A frame to be enqueued to the transmission. * @return E_OK on success; error code otherwise. */ -t_Error qman_fqr_enqueue(t_Handle fqr, uint32_t fqid_off, t_DpaaFD *frame); +int qman_fq_enqueue(struct qman_fq *fq, struct dpaa_fd *frame); /** - * Get one of the FQR counter's value. + * Get one of the FQ counter's value. * - * @param fqr A handle to FQR. - * @param fqid_off FQID offset within the FQR. + * @param fq A handle to FQ. * @param counter The requested counter. * @return Counter's current value. */ -uint32_t qman_fqr_get_counter(t_Handle fqr, uint32_t fqid_off, - e_QmFqrCounters counter); +uint32_t qman_fq_get_counter(struct qman_fq *fq, int counter); /** - * Pull frame from FQR. + * Pull frame from FQ. * - * @param fqr A handle to FQR. - * @param fqid_off FQID offset within the FQR. + * @param fq A handle to FQ. * @param frame The received frame. * @return E_OK on success; error code otherwise. */ -t_Error qman_fqr_pull_frame(t_Handle fqr, uint32_t fqid_off, t_DpaaFD *frame); +int qman_fq_pull_frame(struct qman_fq *fq, struct dpaa_fd *frame); /** - * Get base FQID of the FQR. - * @param fqr A handle to FQR. - * @return Base FQID of the FQR. + * Get FQID of the FQ. + * @param fq A handle to FQ. + * @return FQID of the FQ. + */ +uint32_t qman_fq_get_fqid(struct qman_fq *fq); + +/* + * Allocate a QMan channel to be used with an FQ. + * @return Channel ID + */ +int qman_alloc_channel(void); + +/* + * Free a channel + * @param chan Channel ID returned from qman_alloc_channel(). */ -uint32_t qman_fqr_get_base_fqid(t_Handle fqr); +void qman_free_channel(int); /** * Poll frames from QMan. @@ -220,24 +320,24 @@ uint32_t qman_fqr_get_base_fqid(t_Handle fqr); * @param source Type of frames to be polled. * @return E_OK on success; error otherwise. */ -t_Error qman_poll(e_QmPortalPollSource source); +int qman_poll(int source); /** * General received frame callback. * This is called, when user did not register his own callback for a given - * frame queue range (fqr). + * frame queue range (fq). */ -e_RxStoreResponse qman_received_frame_callback(t_Handle app, t_Handle qm_fqr, - t_Handle qm_portal, uint32_t fqid_offset, t_DpaaFD *frame); +int qman_received_frame_callback(void *ctx, struct qman_fq *fq, + void *qm_portal, uint32_t fqid_offset, struct dpaa_fd *frame); /** * General rejected frame callback. * This is called, when user did not register his own callback for a given - * frame queue range (fqr). + * frame queue range (fq). */ -e_RxStoreResponse qman_rejected_frame_callback(t_Handle app, t_Handle qm_fqr, - t_Handle qm_portal, uint32_t fqid_offset, t_DpaaFD *frame, - t_QmRejectedFrameInfo *qm_rejected_frame_info); +int qman_rejected_frame_callback(void *ctx, struct qman_fq *fq, + void *qm_portal, uint32_t fqid_offset, struct dpaa_fd *frame, + void *qm_rejected_frame_info); /** @} */ diff --git a/sys/dev/dpaa/qman_fdt.c b/sys/dev/dpaa/qman_fdt.c index 89bf802e0067..3536042abf9c 100644 --- a/sys/dev/dpaa/qman_fdt.c +++ b/sys/dev/dpaa/qman_fdt.c @@ -41,6 +41,8 @@ #include "qman.h" #include "portals.h" +#include "qman_var.h" +#include "qman_portal_if.h" #define FQMAN_DEVSTR "Freescale Queue Manager" @@ -59,12 +61,7 @@ static device_method_t qman_methods[] = { DEVMETHOD_END }; -static driver_t qman_driver = { - "qman", - qman_methods, - sizeof(struct qman_softc), -}; - +DEFINE_CLASS_0(qman, qman_driver, qman_methods, sizeof(struct qman_softc)); EARLY_DRIVER_MODULE(qman, simplebus, qman_driver, 0, 0, BUS_PASS_SUPPORTDEV); static int @@ -82,53 +79,38 @@ qman_fdt_probe(device_t dev) /* * QMAN Portals */ -#define QMAN_PORT_DEVSTR "Freescale Queue Manager - Portals" +#define QMAN_PORT_DEVSTR "Freescale Queue Manager - Portal" -static device_probe_t qman_portals_fdt_probe; -static device_attach_t qman_portals_fdt_attach; +static int portal_ncpus; +static device_probe_t qman_portal_fdt_probe; +static device_attach_t qman_portal_fdt_attach; -static device_method_t qm_portals_methods[] = { +static device_method_t qman_portal_methods[] = { /* Device interface */ - DEVMETHOD(device_probe, qman_portals_fdt_probe), - DEVMETHOD(device_attach, qman_portals_fdt_attach), - DEVMETHOD(device_detach, qman_portals_detach), + DEVMETHOD(device_probe, qman_portal_fdt_probe), + DEVMETHOD(device_attach, qman_portal_fdt_attach), + DEVMETHOD(device_detach, qman_portal_detach), - DEVMETHOD_END -}; + DEVMETHOD(qman_portal_enqueue, qman_portal_fq_enqueue), + DEVMETHOD(qman_portal_mc_send_raw, qman_portal_mc_send_raw), + DEVMETHOD(qman_portal_static_dequeue_channel, + qman_portal_static_dequeue_channel), + DEVMETHOD(qman_portal_static_dequeue_rm_channel, + qman_portal_static_dequeue_rm_channel), -static driver_t qm_portals_driver = { - "qman-portals", - qm_portals_methods, - sizeof(struct dpaa_portals_softc), + DEVMETHOD_END }; -EARLY_DRIVER_MODULE(qman_portals, ofwbus, qm_portals_driver, 0, 0, - BUS_PASS_BUS); - -static void -get_addr_props(phandle_t node, uint32_t *addrp, uint32_t *sizep) -{ - - *addrp = 2; - *sizep = 1; - OF_getencprop(node, "#address-cells", addrp, sizeof(*addrp)); - OF_getencprop(node, "#size-cells", sizep, sizeof(*sizep)); -} +DEFINE_CLASS_0(qman_portal, qman_portal_driver, qman_portal_methods, + sizeof(struct qman_softc)); +EARLY_DRIVER_MODULE(qman_portal, simplebus, qman_portal_driver, 0, 0, + BUS_PASS_SUPPORTDEV + BUS_PASS_ORDER_MIDDLE); static int -qman_portals_fdt_probe(device_t dev) +qman_portal_fdt_probe(device_t dev) { - phandle_t node; - if (ofw_bus_is_compatible(dev, "simple-bus")) { - node = ofw_bus_get_node(dev); - for (node = OF_child(node); node > 0; node = OF_peer(node)) { - if (ofw_bus_node_is_compatible(node, "fsl,qman-portal")) - break; - } - if (node <= 0) - return (ENXIO); - } else if (!ofw_bus_is_compatible(dev, "fsl,qman-portals")) + if (!ofw_bus_is_compatible(dev, "fsl,qman-portal")) return (ENXIO); device_set_desc(dev, QMAN_PORT_DEVSTR); @@ -137,105 +119,15 @@ qman_portals_fdt_probe(device_t dev) } static int -qman_portals_fdt_attach(device_t dev) +qman_portal_fdt_attach(device_t dev) { - struct dpaa_portals_softc *sc; - phandle_t node, child, cpu_node; - vm_paddr_t portal_pa, portal_par_pa; - vm_size_t portal_size; - uint32_t addr, paddr, size; - ihandle_t cpu; - int cpu_num, cpus, intr_rid; - struct dpaa_portals_devinfo di; - struct ofw_bus_devinfo ofw_di = {}; - cell_t *range; - int nrange; - int i; - - cpus = 0; - sc = device_get_softc(dev); - sc->sc_dev = dev; - - node = ofw_bus_get_node(dev); - - /* Get this node's range */ - get_addr_props(ofw_bus_get_node(device_get_parent(dev)), &paddr, &size); - get_addr_props(node, &addr, &size); + int portal_cpu = portal_ncpus; - nrange = OF_getencprop_alloc_multi(node, "ranges", - sizeof(*range), (void **)&range); - if (nrange < addr + paddr + size) + /* Don't attach to more portals than we have CPUs */ + if (mp_ncpus == portal_ncpus) return (ENXIO); - portal_pa = portal_par_pa = 0; - portal_size = 0; - for (i = 0; i < addr; i++) { - portal_pa <<= 32; - portal_pa |= range[i]; - } - for (; i < paddr + addr; i++) { - portal_par_pa <<= 32; - portal_par_pa |= range[i]; - } - portal_pa += portal_par_pa; - for (; i < size + paddr + addr; i++) { - portal_size = (uintmax_t)portal_size << 32; - portal_size |= range[i]; - } - OF_prop_free(range); - sc->sc_dp_size = portal_size; - sc->sc_dp_pa = portal_pa; - - /* Find portals tied to CPUs */ - for (child = OF_child(node); child != 0; child = OF_peer(child)) { - if (cpus >= mp_ncpus) - break; - if (!ofw_bus_node_is_compatible(child, "fsl,qman-portal")) { - continue; - } - /* Checkout related cpu */ - if (OF_getprop(child, "cpu-handle", (void *)&cpu, - sizeof(cpu)) > 0) { - cpu_node = OF_instance_to_package(cpu); - /* Acquire cpu number */ - if (OF_getencprop(cpu_node, "reg", &cpu_num, sizeof(cpu_num)) <= 0) { - device_printf(dev, "Could not retrieve CPU number.\n"); - return (ENXIO); - } - } else - cpu_num = cpus; - cpus++; - - if (ofw_bus_gen_setup_devinfo(&ofw_di, child) != 0) { - device_printf(dev, "could not set up devinfo\n"); - continue; - } - - resource_list_init(&di.di_res); - if (ofw_bus_reg_to_rl(dev, child, addr, size, &di.di_res)) { - device_printf(dev, "%s: could not process 'reg' " - "property\n", ofw_di.obd_name); - ofw_bus_gen_destroy_devinfo(&ofw_di); - continue; - } - if (ofw_bus_intr_to_rl(dev, child, &di.di_res, &intr_rid)) { - device_printf(dev, "%s: could not process " - "'interrupts' property\n", ofw_di.obd_name); - resource_list_free(&di.di_res); - ofw_bus_gen_destroy_devinfo(&ofw_di); - continue; - } - di.di_intr_rid = intr_rid; - - if (dpaa_portal_alloc_res(dev, &di, cpu_num)) - goto err; - } - ofw_bus_gen_destroy_devinfo(&ofw_di); + portal_ncpus++; - return (qman_portals_attach(dev)); -err: - resource_list_free(&di.di_res); - ofw_bus_gen_destroy_devinfo(&ofw_di); - qman_portals_detach(dev); - return (ENXIO); + return (qman_portal_attach(dev, portal_cpu)); } diff --git a/sys/dev/dpaa/qman_portal_if.m b/sys/dev/dpaa/qman_portal_if.m new file mode 100644 index 000000000000..2efe31dd6980 --- /dev/null +++ b/sys/dev/dpaa/qman_portal_if.m @@ -0,0 +1,37 @@ +# +# Copyright (c) 2026 Justin Hibbits +# +# SPDX-License-Identifier: BSD-2-Clause + +#include <sys/pcpu.h> +#include <machine/bus.h> +#include <dev/dpaa/portals.h> +#include <dev/dpaa/qman.h> +#include <dev/dpaa/qman_var.h> + +/** + * @brief DPAA QMan portal interface + * + */ +INTERFACE qman_portal; + +METHOD int enqueue { + device_t dev; + struct qman_fq *fq; + struct dpaa_fd *fd; +}; + +METHOD union qman_mc_result * mc_send_raw { + device_t dev; + union qman_mc_command *cmd; +}; + +METHOD void static_dequeue_channel { + device_t dev; + int channel; +} + +METHOD void static_dequeue_rm_channel { + device_t dev; + int channel; +} diff --git a/sys/dev/dpaa/qman_portals.c b/sys/dev/dpaa/qman_portals.c index 0f00a9f1a173..3b64aca71cb7 100644 --- a/sys/dev/dpaa/qman_portals.c +++ b/sys/dev/dpaa/qman_portals.c @@ -1,27 +1,7 @@ -/*- - * Copyright (c) 2012 Semihalf. - * All rights reserved. +/* + * Copyright (c) 2026 Justin Hibbits * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * SPDX-License-Identifier: BSD-2-Clause */ #include "opt_platform.h" @@ -36,6 +16,7 @@ #include <sys/proc.h> #include <sys/pcpu.h> #include <sys/sched.h> +#include <ddb/ddb.h> #include <machine/bus.h> #include <machine/tlb.h> @@ -47,143 +28,412 @@ #include "qman.h" #include "portals.h" +#include "qman_var.h" + + +/* Cache-enabled registers */ +#define QCSP_EQCR_N(n) (0x0000 + (n * 64)) +#define QMAN_EQCR_COUNT 8 +#define QCSP_DQRR_N(n) (0x1000 + (n * 64)) +#define QMAN_DQRR_COUNT 16 +#define QCSP_MR_N(n) (0x2000 + (n * 64)) +#define QMAN_MR_COUNT 8 +#define QCSP_CR 0x3800 +#define QCSP_RR(n) (0x3900 + 0x40 * (n)) + +#define QCSP_EQCR_PI_CENA 0x0000 +#define EQCR_PI_VP 0x00000008 +#define EQCR_PI_PI_M 0x00000007 +#define QCSP_EQCR_CI_CENA 0x0004 +#define EQCR_CI_C 0x00000008 +#define EQCR_CI_CI_M 0x00000007 +#define QCSP_DQRR_PI_CENA 0x0000 +#define DQRR_PI_VP 0x00000010 +#define DQRR_PI_PI_M 0x0000000f +#define QCSP_DQRR_CI_CENA 0x0004 +#define DQRR_CI_C 0x00000010 +#define DQRR_CI_CI_M 0x0000000f + +#define QMAN_MC_VERB_VBIT 0x80 + + +/* Cache-inhibited registers */ +#define QCSP_EQCR_PI_CINH 0x0000 +#define QCSP_EQCR_CI_CINH 0x0004 +#define QCSP_DQRR_PI_CINH 0x0040 +#define QCSP_DQRR_CI_CINH 0x0044 +#define QCSP_EQCR_ITR 0x0008 +#define QCSP_DQRR_ITR 0x0048 +#define QCSP_DQRR_SDQCR 0x0054 +#define SDQCR_SS 0x40000000 +#define SDQCR_FC 0x20000000 +#define SDQCR_DP 0x10000000 +#define SDQCR_DCT_NUL 0x00000000 +#define SDQCR_DCT_PRI_PREC 0x01000000 +#define SDQCR_DCT_ACTIVE_WQ 0x02000000 +#define SDQCR_DCT_ACTIVE_FQ_O 0x03000000 +#define SDQCR_DCT_M 0x03000000 +#define SDQCR_TOKEN_M 0x00ff0000 +#define SDQCR_TOKEN_S 16 +#define DQRR_DQ_SRC_M 0x0000ffff +#define DQRR_DQ_SRC_DCP 0x00008000 +#define SDQCR_DQ_SRC_CHAN(n) (0x8000 >> (n + 1)) +#define QCSP_DQRR_VDQCR 0x0058 +#define QCSP_DQRR_PDQCR 0x005c +#define QCSP_MR_ITR 0x0088 +#define QCSP_CFG 0x0100 +#define CFG_EST_M 0x70000000 +#define CFG_EST_S 28 +#define CFG_EP 0x04000000 +#define CFG_EPM_M 0x03000000 +#define CFG_EPM_PI_CI 0x00000000 +#define CFG_EPM_PI_CE 0x01000000 +#define CFG_EPM_VB1 0x02000000 +#define CFG_EPM_VB2 0x03000000 +#define CFG_DQRR_MF_M 0x00f00000 +#define CFG_DQRR_MF_S 20 +#define CFG_DP 0x00040000 +#define CFG_DCM_C_M 0x00030000 +#define CFG_DCM_CI_CI 0x00000000 +#define CFG_DCM_CI_CE 0x00010000 +#define CFG_DCM_DCA1 0x00020000 +#define CFG_DCM_DCA2 0x00030000 +#define CFG_SD 0x00000200 +#define CFG_MM 0x00000100 +#define CFG_RE 0x00000080 +#define CFG_RP 0x00000040 +#define CFG_SE 0x00000020 +#define CFG_SP 0x00000010 +#define CFG_SDEST_M 0x00000007 +#define QCSP_ISR 0x0e00 +#define QM_PIRQ_CSCI 0x00100000 +#define QM_PIRQ_EQCI 0x00080000 +#define QM_PIRQ_EQRI 0x00040000 +#define QM_PIRQ_DQRI 0x00020000 +#define QM_PIRQ_MRI 0x00010000 +#define QM_PIRQ_DQ_AVAIL_M 0x0000ffff +#define QCSP_IER 0x0e04 +#define QCSP_ISDR 0x0e08 +#define QCSP_IIR 0xe0c + +#define QM_EQCR_VERB_CMD_ENQUEUE 0x01 +#define QM_EQCR_VERB_BIT_INT 0x04 + +#define DEF_SDQCR_TOKEN 0xab -extern e_RxStoreResponse qman_received_frame_callback(t_Handle, t_Handle, - t_Handle, uint32_t, t_DpaaFD *); -extern e_RxStoreResponse qman_rejected_frame_callback(t_Handle, t_Handle, - t_Handle, uint32_t, t_DpaaFD *, t_QmRejectedFrameInfo *); +static void qman_portal_loop_rings(struct qman_portal_softc *sc); +static void qman_portal_isr(void *); -t_Handle qman_portal_setup(struct qman_softc *); +DPCPU_DEFINE(device_t, qman_affine_portal); +DPAA_RING(qman_eqcr, QMAN_EQCR_COUNT, QCSP_EQCR_PI_CENA, QCSP_EQCR_CI_CENA, + QCSP_EQCR_PI_CINH, QCSP_EQCR_CI_CINH); +DPAA_RING(qman_dqrr, QMAN_DQRR_COUNT, QCSP_DQRR_PI_CENA, QCSP_DQRR_CI_CENA, + QCSP_DQRR_PI_CINH, QCSP_DQRR_CI_CINH); -struct dpaa_portals_softc *qp_sc; +/* + * pmode: one of the CFG_EPM constants. + * stash_prio: 0 or CFG_EP + * stash_thresh: 0-7 + */ +static int +qman_eqcr_init(struct qman_portal_softc *sc, int pmode, u_int stash_thresh, + u_int stash_prio) +{ + struct resource *regs = sc->sc_base.sc_mres[1]; + uint32_t reg; + + sc->sc_eqcr.ring = + (struct qman_eqcr_entry *)(sc->sc_base.sc_ce_va + QCSP_EQCR_N(0)); + qman_eqcr_ring_init(&sc->sc_eqcr, &sc->sc_base); + reg = bus_read_4(regs, QCSP_CFG); + reg &= 0x00ffffff; + reg |= pmode; + reg |= ((stash_thresh << CFG_EST_S) & CFG_EST_M); + reg |= stash_prio; + + bus_write_4(regs, QCSP_CFG, reg); + return (0); +} + +static int +qman_dqrr_init(struct qman_portal_softc *sc) +{ + struct resource *regs = sc->sc_base.sc_mres[1]; + uint32_t reg; + + /* Dequeue from the direct-connect channel and pool 0, up to 3 frames */ + bus_write_4(regs, QCSP_DQRR_SDQCR, + SDQCR_FC | SDQCR_DP | SDQCR_DCT_PRI_PREC | + (DEF_SDQCR_TOKEN << SDQCR_TOKEN_S) | + DQRR_DQ_SRC_DCP | SDQCR_DQ_SRC_CHAN(0)); + bus_write_4(regs, QCSP_DQRR_VDQCR, 0); + bus_write_4(regs, QCSP_DQRR_PDQCR, 0); + + sc->sc_dqrr.ring = + (struct qman_dqrr_entry *)(sc->sc_base.sc_ce_va + QCSP_DQRR_N(0)); + qman_dqrr_ring_init(&sc->sc_dqrr, &sc->sc_base); + + /* Set DQRR max fill to 15 */ + reg = bus_read_4(regs, QCSP_CFG); + reg |= (0xf << CFG_DQRR_MF_S); + bus_write_4(regs, QCSP_CFG, reg); + + for (int i = 0; i < QMAN_DQRR_COUNT; i++) + __asm __volatile ("dcbi 0,%0" :: "r"(&sc->sc_dqrr.ring[i]) : "memory"); + + return (0); +} int -qman_portals_attach(device_t dev) +qman_portal_attach(device_t dev, int cpu) { - struct dpaa_portals_softc *sc; + struct qman_portal_softc *sc = device_get_softc(dev); + union qman_mc_command *cr; + pcell_t cell; + phandle_t node; + + sc->sc_base.sc_cpu = cpu; + dpaa_portal_alloc_res(dev, cpu); - sc = qp_sc = device_get_softc(dev); - - /* Map bman portal to physical address space */ - if (law_enable(OCP85XX_TGTIF_QMAN, sc->sc_dp_pa, sc->sc_dp_size)) { - qman_portals_detach(dev); + qman_eqcr_init(sc, CFG_EPM_VB1, 0, 0); + qman_dqrr_init(sc); + bus_setup_intr(dev, sc->sc_base.sc_ires, INTR_TYPE_NET | INTR_MPSAFE, + NULL, qman_portal_isr, sc, &sc->sc_base.sc_intr_cookie); + bus_bind_intr(dev, sc->sc_base.sc_ires, cpu); + + node = ofw_bus_get_node(dev); + if (OF_getencprop(node, "cell-index", &cell, sizeof(cell)) <= 0) { + device_printf(dev, "missing 'cell-index' property\n"); return (ENXIO); } - /* Set portal properties for XX_VirtToPhys() */ - XX_PortalSetInfo(dev); + sc->sc_affine_channel = cell; + DPCPU_ID_SET(cpu, qman_affine_portal, dev); + bus_write_4(sc->sc_base.sc_mres[1], QCSP_IER, + QM_PIRQ_EQCI | QM_PIRQ_EQRI | QM_PIRQ_MRI | QM_PIRQ_CSCI | + QM_PIRQ_DQRI); + bus_write_4(sc->sc_base.sc_mres[1], QCSP_ISDR, 0); + + /* Initialize the MC polarity bit, it may not be 0. */ + cr = (union qman_mc_command *)(sc->sc_base.sc_ce_va + QCSP_CR); + sc->sc_mc.polarity = + (cr->common.verb & QMAN_MC_VERB_VBIT) ^ QMAN_MC_VERB_VBIT; + /* TODO: LIODN. Fake it for now */ + + qman_set_sdest(sc->sc_affine_channel, cpu); - bus_attach_children(dev); return (0); } + int -qman_portals_detach(device_t dev) +qman_portal_detach(device_t dev) { - struct dpaa_portals_softc *sc; + struct qman_portal_softc *sc; int i; - qp_sc = NULL; sc = device_get_softc(dev); - for (i = 0; i < ARRAY_SIZE(sc->sc_dp); i++) { - if (sc->sc_dp[i].dp_ph != NULL) { - thread_lock(curthread); - sched_bind(curthread, i); - thread_unlock(curthread); + /* TODO: Unmap TLB regions */ + thread_lock(curthread); + sched_bind(curthread, sc->sc_base.sc_cpu); + thread_unlock(curthread); - QM_PORTAL_Free(sc->sc_dp[i].dp_ph); + if (sc->sc_base.sc_ires != NULL) + bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_base.sc_ires); - thread_lock(curthread); - sched_unbind(curthread); - thread_unlock(curthread); - } - - if (sc->sc_dp[i].dp_ires != NULL) { - XX_DeallocIntr((uintptr_t)sc->sc_dp[i].dp_ires); - bus_release_resource(dev, SYS_RES_IRQ, - sc->sc_dp[i].dp_irid, sc->sc_dp[i].dp_ires); - } - } - for (i = 0; i < ARRAY_SIZE(sc->sc_rres); i++) { - if (sc->sc_rres[i] != NULL) + for (i = 0; i < nitems(sc->sc_base.sc_mres); i++) { + if (sc->sc_base.sc_mres[i] != NULL) bus_release_resource(dev, SYS_RES_MEMORY, - sc->sc_rrid[i], - sc->sc_rres[i]); + i, sc->sc_base.sc_mres[i]); } + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + return (0); } -t_Handle -qman_portal_setup(struct qman_softc *qsc) +static void +qman_portal_isr(void *arg) { - struct dpaa_portals_softc *sc; - t_QmPortalParam qpp; - unsigned int cpu; - uintptr_t p; - t_Handle portal; + struct qman_portal_softc *sc = arg; - /* Return NULL if we're not ready or while detach */ - if (qp_sc == NULL) - return (NULL); + qman_portal_loop_rings(sc); +} + +int +qman_portal_fq_enqueue(device_t dev, struct qman_fq *fq, struct dpaa_fd *frame) +{ + struct qman_portal_softc *sc = device_get_softc(dev); + struct qman_eqcr_entry *eqcr; + + /* Get available... */ + eqcr = qman_eqcr_start(&sc->sc_eqcr, &sc->sc_base); + if (eqcr == NULL) + return (EBUSY); + eqcr->fd = *frame; + eqcr->fqid = fq->fqid; + qman_eqcr_commit(&sc->sc_eqcr, QM_EQCR_VERB_CMD_ENQUEUE); + + return (0); +} + +static int +qman_portal_loop_dqrr(struct qman_portal_softc *sc) +{ + struct qman_dqrr_entry *dqrr; + struct qman_dqrr_entry *base; + struct qman_fq *fq; + int ci = bus_read_4(sc->sc_base.sc_mres[1], QCSP_DQRR_CI_CINH) & + DQRR_CI_CI_M; + int pi = bus_read_4(sc->sc_base.sc_mres[1], QCSP_DQRR_PI_CINH) & + DQRR_PI_PI_M; + + base = sc->sc_dqrr.ring; + do { + dqrr = &base[ci]; + dpaa_flush_line(dqrr); + dpaa_touch_line(dqrr); + if ((dqrr->stat & QMAN_DQRR_STAT_HAS_FRAME)) { + fq = qman_fq_from_index(dqrr->fqid); + if (fq != NULL && fq->cb.dqrr != NULL) { + fq->cb.dqrr(sc->sc_base.sc_dev, fq, + &dqrr->fd, fq->cb.ctx); + } + } else + break; + ci = (ci + 1) & DQRR_CI_CI_M; + bus_write_4(sc->sc_base.sc_mres[1], QCSP_DQRR_CI_CINH, ci); + } while (ci != pi); + + return (0); +} + +static void +qman_portal_loop_rings(struct qman_portal_softc *sc) +{ + uint32_t isr; - sc = qp_sc; + isr = bus_read_4(sc->sc_base.sc_mres[1], QCSP_ISR); - sched_pin(); - portal = NULL; - cpu = PCPU_GET(cpuid); + /* Handle DQRR first. */ + if ((isr & QM_PIRQ_DQRI)) { + qman_portal_loop_dqrr(sc); + } + if ((isr & QM_PIRQ_CSCI)) { + } + if ((isr & QM_PIRQ_EQRI)) { + qman_eqcr_update(&sc->sc_eqcr, &sc->sc_base); + } + bus_write_4(sc->sc_base.sc_mres[1], QCSP_ISR, isr); +} - /* Check if portal is ready */ - while (atomic_cmpset_acq_ptr((uintptr_t *)&sc->sc_dp[cpu].dp_ph, - 0, -1) == 0) { - p = atomic_load_acq_ptr((uintptr_t *)&sc->sc_dp[cpu].dp_ph); +/* MC commands */ + +/* Assumes pinned */ +union qman_mc_result * +qman_portal_mc_send_raw(device_t dev, union qman_mc_command *c) +{ + struct qman_portal_softc *sc; + int res_idx; + union qman_mc_result *rr; + union qman_mc_command *cr; + int timeout = 10000; + uint8_t verb; + + sc = device_get_softc(dev); - /* Return if portal is already initialized */ - if (p != 0 && p != -1) { - sched_unpin(); - return ((t_Handle)p); - } + verb = c->common.verb; + c->common.verb = 0; + cr = (union qman_mc_command *)(sc->sc_base.sc_ce_va + QCSP_CR); + dpaa_zero_line(cr); + *cr = *c; + dpaa_lw_barrier(); + cr->common.verb = verb | sc->sc_mc.polarity; + res_idx = (sc->sc_mc.polarity ? 1 : 0); + sc->sc_mc.polarity ^= QMAN_MC_VERB_VBIT; + dpaa_flush_line(cr); + dpaa_touch_line(cr); - /* Not inititialized and "owned" by another thread */ - sched_relinquish(curthread); + rr = (union qman_mc_result *)(sc->sc_base.sc_ce_va + QCSP_RR(res_idx)); + for (; timeout > 0; --timeout) { + dpaa_flush_line(rr); + if (rr->common.verb != 0) + break; } + if (timeout == 0) + return (NULL); + return (rr); +} - /* Map portal registers */ - dpaa_portal_map_registers(sc); +void +qman_portal_static_dequeue_channel(device_t dev, int channel) +{ + struct qman_portal_softc *sc = device_get_softc(dev); + uint32_t reg; - /* Configure and initialize portal */ - qpp.ceBaseAddress = rman_get_bushandle(sc->sc_rres[0]); - qpp.ciBaseAddress = rman_get_bushandle(sc->sc_rres[1]); - qpp.h_Qm = qsc->sc_qh; - qpp.swPortalId = cpu; - qpp.irq = (uintptr_t)sc->sc_dp[cpu].dp_ires; - qpp.fdLiodnOffset = 0; - qpp.f_DfltFrame = qman_received_frame_callback; - qpp.f_RejectedFrame = qman_rejected_frame_callback; - qpp.h_App = qsc; + reg = bus_read_4(sc->sc_base.sc_mres[1], QCSP_DQRR_SDQCR); + reg |= (1 << (15 - (channel - qman_channel_base))); + bus_write_4(sc->sc_base.sc_mres[1], QCSP_DQRR_SDQCR, reg); +} - portal = QM_PORTAL_Config(&qpp); - if (portal == NULL) - goto err; +void +qman_portal_static_dequeue_rm_channel(device_t dev, int channel) +{ + struct qman_portal_softc *sc = device_get_softc(dev); + uint32_t reg; - if (QM_PORTAL_Init(portal) != E_OK) - goto err; + reg = bus_read_4(sc->sc_base.sc_mres[1], QCSP_DQRR_SDQCR); + reg &= ~(1 << (15 - (channel - qman_channel_base))); + bus_write_4(sc->sc_base.sc_mres[1], QCSP_DQRR_SDQCR, reg); +} - if (QM_PORTAL_AddPoolChannel(portal, QMAN_COMMON_POOL_CHANNEL) != E_OK) - goto err; +DB_SHOW_COMMAND(fqid, qman_show_fqid) +{ + union qman_mc_command cmd; + union qman_mc_result *res; + union qman_mc_result save_res; + device_t portal; - atomic_store_rel_ptr((uintptr_t *)&sc->sc_dp[cpu].dp_ph, - (uintptr_t)portal); - sched_unpin(); + if (!have_addr) + return; - return (portal); + bzero(&cmd, sizeof(cmd)); + cmd.query_fq_np.fqid = addr; -err: - if (portal != NULL) - QM_PORTAL_Free(portal); + /* Ensure we have got QMan port initialized */ + portal = DPCPU_GET(qman_affine_portal); + res = qman_portal_mc_send_raw(portal, &cmd); - atomic_store_rel_32((uint32_t *)&sc->sc_dp[cpu].dp_ph, 0); - sched_unpin(); + if (res != NULL) + save_res = *res; - return (NULL); + /* Dump all NP fields */ + if (res != NULL && save_res.query_fq_np.rslt == 0xf0) { + db_printf("FQID: %d\n", (int)addr); + db_printf(" State: %x\n", save_res.query_fq_np.state); + db_printf(" Link: %x\n", save_res.query_fq_np.fqd_link); + db_printf(" ODP_SEQ: %x\n", save_res.query_fq_np.odp_seq); + db_printf(" ORP_NESN: %x\n", save_res.query_fq_np.orp_nesn); + db_printf(" ORP_EA_HSEQ: %x\n", + save_res.query_fq_np.orp_ea_hseq); + db_printf(" ORP_EA_TSEQ: %x\n", + save_res.query_fq_np.orp_ea_tseq); + db_printf(" ORP_EA_HPTR: %x\n", + save_res.query_fq_np.orp_ea_hptr); + db_printf(" ORP_EA_TPTR: %x\n", + save_res.query_fq_np.orp_ea_tptr); + db_printf(" pfdr_hptr: %x\n", save_res.query_fq_np.pfdr_hptr); + db_printf(" pfdr_tptr: %x\n", save_res.query_fq_np.pfdr_tptr); + db_printf(" IS: %x\n", save_res.query_fq_np.is); + db_printf(" ICS_SURP: %x\n", save_res.query_fq_np.ics_surp); + db_printf(" byte_cnt: %x\n", save_res.query_fq_np.byte_cnt); + db_printf(" frm_cnt: %x\n", save_res.query_fq_np.frm_cnt); + db_printf(" ra1_sfdr: %x\n", save_res.query_fq_np.ra1_sfdr); + db_printf(" ra2_sfdr: %x\n", save_res.query_fq_np.ra2_sfdr); + db_printf(" od1_sfdr: %x\n", save_res.query_fq_np.od1_sfdr); + db_printf(" od2_sfdr: %x\n", save_res.query_fq_np.od2_sfdr); + db_printf(" od3_sfdr: %x\n", save_res.query_fq_np.od3_sfdr); + } } diff --git a/sys/dev/dpaa/qman_var.h b/sys/dev/dpaa/qman_var.h new file mode 100644 index 000000000000..8ed36bbb6a35 --- /dev/null +++ b/sys/dev/dpaa/qman_var.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef QMAN_VAR_H +#define QMAN_VAR_H + +#include "dpaa_common.h" +#include "portals.h" + +struct qman_eqcr_entry { + uint8_t verb; + uint8_t dca; + uint16_t seqnum; + uint32_t orp; + uint32_t fqid; + uint32_t tag; + struct dpaa_fd fd; + uint8_t _rsvd[32]; +}; +_Static_assert(sizeof(struct qman_eqcr_entry) == 64, "EQCR entry mis-sized"); +DPAA_RING_DECLARE(qman_eqcr); +DPAA_RING_DECLARE(qman_dqrr); +DPAA_RING_DECLARE(qman_mr); + +union qman_mc_command { + struct { + uint8_t verb; + uint8_t data[63]; + } common; + struct { + uint8_t verb; + uint8_t _rsvd0; + uint16_t we_mask; + uint32_t fqid; /* Only bottom 24 bits allowed */ + uint16_t count; + uint8_t orpc; + uint8_t cgid; + uint16_t fq_ctrl; + uint16_t dest_chan:13; + uint16_t dest_wq:3; + uint16_t ics_cred; + uint16_t td_thresh_oac; + uint32_t context_b; + uint32_t context_a; + uint8_t _rsvd1[32]; + } init_fq; + struct { + uint8_t verb; + uint8_t _rsvd0[3]; + uint32_t fqid; /* Only bottom 24 bits used */ + uint8_t _rsvd1[56]; + } query_fq; + struct { + uint8_t verb; + uint8_t _rsvd0[3]; + uint32_t fqid; + uint8_t _rsvd1[56]; + } query_fq_np; + struct { + uint8_t verb; + uint8_t _rsvd0[3]; + uint32_t fqid; + uint8_t _rsvd1; + uint8_t count; + uint8_t _rsvd2[10]; + uint32_t context_b; + uint8_t _rsvd3[40]; + } alter_fqs; +}; + +union qman_mc_result { + struct { + uint8_t verb; + uint8_t data[63]; + } common; + struct { + uint8_t verb; + uint8_t rslt; + uint8_t _rsvd[62]; + } init_fq; + struct { + uint8_t verb; + uint8_t rslt; + uint8_t _rsvd0[8]; + uint8_t orpc; + uint8_t cgid; + uint16_t fq_ctrl; + uint16_t dest_wq; + uint16_t ics_cred; + uint16_t td_thresh; + uint32_t context_b; + uint32_t context_a; + uint16_t oac; + uint8_t _rsvd1[30]; + } query_fq; + struct { + uint8_t verb; + uint8_t rslt; + uint8_t _rsvd0; + uint8_t state; + uint32_t fqd_link; + uint16_t odp_seq; + uint16_t orp_nesn; + uint16_t orp_ea_hseq; + uint16_t orp_ea_tseq; + uint32_t orp_ea_hptr; + uint32_t orp_ea_tptr; + uint32_t pfdr_hptr; + uint32_t pfdr_tptr; + uint8_t _rsvd1[5]; + uint8_t is; + uint16_t ics_surp; + uint32_t byte_cnt; + uint32_t frm_cnt; + uint32_t _rsvd2; + uint16_t ra1_sfdr; + uint16_t ra2_sfdr; + uint16_t _rsvd3; + uint16_t od1_sfdr; + uint16_t od2_sfdr; + uint16_t od3_sfdr; + } query_fq_np; + struct { + uint8_t verb; + uint8_t rslt; + uint8_t fqs; + uint8_t _rsvd[61]; + } alter_fqs; +}; + +struct qman_mc { + uint8_t polarity; + bool busy; +}; + +struct qman_fq { + uint32_t fqid; + struct qman_cb cb; +}; + +struct qman_portal_softc { + struct dpaa_portal_softc sc_base; + + /* Rings (Enqueue, Dequeue, Message */ + struct qman_eqcr_ring sc_eqcr; + struct qman_dqrr_ring sc_dqrr; + struct qman_mr_ring sc_mr; + struct qman_mc sc_mc; + + int sc_affine_channel; +}; + +struct qman_fq *qman_fq_from_index(uint32_t fqid); + +union qman_mc_result * +qman_portal_mc_send_raw(device_t, union qman_mc_command *); +int qman_portal_fq_enqueue(device_t, struct qman_fq *, struct dpaa_fd *); +void qman_portal_static_dequeue_channel(device_t, int); +void qman_portal_static_dequeue_rm_channel(device_t dev, int channel); + +extern int qman_channel_base; +DPCPU_DECLARE(device_t, qman_affine_portal); + +#endif /* QMAN_VAR_H */ diff --git a/sys/dev/ichsmb/ichsmb_pci.c b/sys/dev/ichsmb/ichsmb_pci.c index af3e0f004b88..7f9409e4452c 100644 --- a/sys/dev/ichsmb/ichsmb_pci.c +++ b/sys/dev/ichsmb/ichsmb_pci.c @@ -110,7 +110,8 @@ #define ID_ELKHARTLAKE 0x4b23 #define ID_GEMINILAKE 0x31d4 #define ID_CEDARFORK 0x18df -#define ID_ICELAKE 0x34a3 +#define ID_ICELAKELP 0x34a3 +#define ID_ICELAKEN 0x38a3 #define ID_ALDERLAKE 0x7aa3 #define ID_ALDERLAKE2 0x51a3 #define ID_ALDERLAKE3 0x54a3 @@ -255,8 +256,10 @@ static const struct pci_device_table ichsmb_devices[] = { PCI_DESCR("Intel Gemini Lake SMBus controller") }, { PCI_DEV(PCI_VENDOR_INTEL, ID_CEDARFORK), PCI_DESCR("Intel Cedar Fork SMBus controller") }, - { PCI_DEV(PCI_VENDOR_INTEL, ID_ICELAKE), - PCI_DESCR("Intel Ice Lake SMBus controller") }, + { PCI_DEV(PCI_VENDOR_INTEL, ID_ICELAKELP), + PCI_DESCR("Intel Ice Lake-LP SMBus controller") }, + { PCI_DEV(PCI_VENDOR_INTEL, ID_ICELAKEN), + PCI_DESCR("Intel Ice Lake-N SMBus controller") }, { PCI_DEV(PCI_VENDOR_INTEL, ID_ALDERLAKE), .driver_data = (uintptr_t)ICHSMB_FEATURE_BLOCK_BUFFER, PCI_DESCR("Intel Alder Lake SMBus controller") }, diff --git a/sys/dev/iicbus/rtc/rs5c372a.c b/sys/dev/iicbus/rtc/rs5c372a.c new file mode 100644 index 000000000000..1511025801f5 --- /dev/null +++ b/sys/dev/iicbus/rtc/rs5c372a.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2026 Justin Hibbits + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bus.h> +#include <sys/clock.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/module.h> + +#include <dev/ofw/ofw_bus.h> +#include <dev/ofw/ofw_bus_subr.h> + +#include <dev/iicbus/iiconf.h> +#include <dev/iicbus/iicbus.h> + +#include "clock_if.h" +#include "iicbus_if.h" + +/* + * Driver for the Richo rs5c372a RTC. The chip itself includes 2 alarm clocks + * in addition to the clock component, but this driver offers only the RTC + * component. + * + * Like many other RTCs, this reports the date and time in BCD. + * + * The `Hour' register uses bit 5 in a dual role: In 24-hour time, it's a part + * of the first digit (0, 1, 2). In 12-hour time it denotes PM, so 12PM is + * reported as 0x32, 1PM is 0x21, etc. + */ +#define RS5C372_REG_SEC 0x0 +#define RS5C372_REG_MIN 0x1 +#define RS5C372_REG_HOUR 0x2 +#define HOUR_HR_M 0x1f +#define HOUR_PM 0x20 +#define RS5C372_REG_DOW 0x3 +#define RS5C372_REG_DAY 0x4 +#define RS5C372_REG_MON 0x5 +#define RS5C372_REG_YEAR 0x6 +#define RS5C372_REG_CTRL1 0xe +#define RS5C372_REG_CTRL2 0xf +#define CTRL_PM 0x20 + +static struct ofw_compat_data compat_data[] = { + { "ricoh,rs5c372a", 1 }, + { NULL, 0 } +}; + +static int +rs5c372a_gettime(device_t dev, struct timespec *ts) +{ + struct bcd_clocktime ct = {}; + uint8_t clock_regs[7]; + int err; + uint8_t ctrl2; + bool is_12hr = true; + + err = iicdev_readfrom(dev, RS5C372_REG_CTRL2, &ctrl2, + sizeof(ctrl2), IIC_WAIT); + if (err != 0) + return (err); + err = iicdev_readfrom(dev, RS5C372_REG_SEC, clock_regs, + sizeof(clock_regs), IIC_WAIT); + if (err != 0) + return (err); + + if (ctrl2 & CTRL_PM) + is_12hr = false; + ct.sec = clock_regs[RS5C372_REG_SEC]; + ct.min = clock_regs[RS5C372_REG_MIN]; + ct.hour = clock_regs[RS5C372_REG_HOUR]; + ct.dow = clock_regs[RS5C372_REG_DOW]; + ct.day = clock_regs[RS5C372_REG_DAY]; + ct.mon = clock_regs[RS5C372_REG_MON]; + ct.year = clock_regs[RS5C372_REG_YEAR]; + + if (is_12hr) { + ct.ispm = ct.hour & HOUR_PM; + ct.hour &= HOUR_HR_M; + } + clock_bcd_to_ts(&ct, ts, ct.ispm); + + return (0); +} + +static int +rs5c372a_settime(device_t dev, struct timespec *ts) +{ + struct bcd_clocktime ct; + uint8_t clock_regs[7]; + uint8_t ctrl2; + int err; + bool is_12hr = true; + + err = iicdev_readfrom(dev, RS5C372_REG_CTRL2, &ctrl2, + sizeof(ctrl2), IIC_WAIT); + if (err != 0) + return (err); + if (ctrl2 & CTRL_PM) + is_12hr = false; + clock_ts_to_bcd(ts, &ct, is_12hr); + clock_regs[RS5C372_REG_SEC] = ct.sec; + clock_regs[RS5C372_REG_MIN] = ct.min; + clock_regs[RS5C372_REG_HOUR] = ct.hour; + clock_regs[RS5C372_REG_DAY] = ct.day; + clock_regs[RS5C372_REG_DOW] = ct.dow; + clock_regs[RS5C372_REG_MON] = ct.mon; + clock_regs[RS5C372_REG_YEAR] = ct.year & 0xff; + + if (is_12hr) { + if (ct.ispm) + clock_regs[RS5C372_REG_HOUR] |= HOUR_PM; + } + + err = iicdev_writeto(dev, RS5C372_REG_SEC, clock_regs, + sizeof(clock_regs), IIC_WAIT); + + return (err); +} + +static int +rs5c372a_probe(device_t dev) +{ + if (ofw_bus_search_compatible(dev, compat_data)->ocd_data == 0) + return (ENXIO); + + device_set_desc(dev, "Richo RS5C372A RTC"); + + return (BUS_PROBE_DEFAULT); +} + +static int +rs5c372a_attach(device_t dev) +{ + + /* Register with 1s resolution */ + clock_register(dev, 1000000); + clock_schedule(dev, 1); + return (0); +} + +static device_method_t rs5c372a_methods[] = { + /* Device methods */ + DEVMETHOD(device_probe, rs5c372a_probe), + DEVMETHOD(device_attach, rs5c372a_attach), + + /* Clock methods */ + DEVMETHOD(clock_gettime, rs5c372a_gettime), + DEVMETHOD(clock_settime, rs5c372a_settime), + DEVMETHOD_END +}; + + +DEFINE_CLASS_0(rs5c372a, rs5c372a_driver, rs5c372a_methods, 0); +DRIVER_MODULE(rs5c372a, iicbus, rs5c372a_driver, NULL, NULL); +MODULE_VERSION(rs5c372a, 1); +MODULE_DEPEND(rs5c372a, iicbus, IICBUS_MINVER, IICBUS_PREFVER, IICBUS_MAXVER); +IICBUS_FDT_PNP_INFO(compat_data); diff --git a/sys/dev/mii/miidevs b/sys/dev/mii/miidevs index ca7e78a1cdd5..89d8e54a1843 100644 --- a/sys/dev/mii/miidevs +++ b/sys/dev/mii/miidevs @@ -72,7 +72,7 @@ oui RDC 0x00d02d RDC Semiconductor oui REALTEK 0x00e04c Realtek Semicondctor oui SEEQ 0x00a07d Seeq Technology oui SIS 0x00e006 Silicon Integrated Systems -oui SMC 0x00800f SMC +oui SMSC 0x00800f Microchip (formerly SMSC) oui TI 0x080028 Texas Instruments oui TSC 0x00c039 TDK Semiconductor oui VITESSE 0x0001c1 Vitesse Semiconductor @@ -361,6 +361,6 @@ model xxVITESSE VSC8514 0x0027 Vitesse VSC8514 10/100/1000TX PHY /* XaQti Corp. PHYs */ model xxXAQTI XMACII 0x0000 XaQti Corp. XMAC II gigabit interface -/* SMC */ -model SMC LAN8710A 0x000F SMC LAN8710A 10/100 interface -model SMC LAN8700 0x000C SMC LAN8700 10/100 interface +/* Microchip (formerly SMSC) */ +model SMSC LAN8710A 0x000F Microchip LAN8710A 10/100 interface +model SMSC LAN8700 0x000C Microchip LAN8700 10/100 interface diff --git a/sys/dev/mii/smscphy.c b/sys/dev/mii/smscphy.c index 4e0d3cd3e18e..d578242f5a61 100644 --- a/sys/dev/mii/smscphy.c +++ b/sys/dev/mii/smscphy.c @@ -74,8 +74,8 @@ static driver_t smscphy_driver = { DRIVER_MODULE(smscphy, miibus, smscphy_driver, 0, 0); static const struct mii_phydesc smscphys[] = { - MII_PHY_DESC(SMC, LAN8710A), - MII_PHY_DESC(SMC, LAN8700), + MII_PHY_DESC(SMSC, LAN8710A), + MII_PHY_DESC(SMSC, LAN8700), MII_PHY_END }; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c index 89d2010656c5..b6a9a0c01d09 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls_rx.c @@ -596,14 +596,21 @@ mlx5e_tls_rx_work(struct work_struct *work) if (ptag->flow_rule != NULL) mlx5e_accel_fs_del_inpcb(ptag->flow_rule); + /* + * Destroy TIR before DEK. DESTROY_TIR for a TLS- + * enabled TIR issues a TRA RX fence that drains all + * in-flight packets from the crypto pipeline. If the + * DEK were destroyed first, packets still in flight + * would hit a TPT encryption error (vendor syndrome + * 0x55) because the key they reference is already gone. + */ + if (ptag->tirn != 0) + mlx5_tls_close_tir(priv->mdev, ptag->tirn); + /* try to destroy DEK context by ID */ if (ptag->dek_index_ok) mlx5_encryption_key_destroy(priv->mdev, ptag->dek_index); - /* try to destroy TIR context by ID */ - if (ptag->tirn != 0) - mlx5_tls_close_tir(priv->mdev, ptag->tirn); - /* free tag */ mlx5e_tls_rx_tag_zfree(ptag); break; diff --git a/sys/dev/smartpqi/smartpqi_cam.c b/sys/dev/smartpqi/smartpqi_cam.c index 690b38c9f855..6ded8aa97e39 100644 --- a/sys/dev/smartpqi/smartpqi_cam.c +++ b/sys/dev/smartpqi/smartpqi_cam.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1300,7 +1300,7 @@ register_sim(struct pqisrc_softstate *softs, int card_index) csa.callback_arg = softs; xpt_action((union ccb *)&csa); if (csa.ccb_h.status != CAM_REQ_CMP) { - DBG_ERR("Unable to register smartpqi_aysnc handler: %d!\n", + DBG_ERR("Unable to register smartpqi_async handler: %d!\n", csa.ccb_h.status); } diff --git a/sys/dev/smartpqi/smartpqi_defines.h b/sys/dev/smartpqi/smartpqi_defines.h index 0277abd3e318..c4084f069588 100644 --- a/sys/dev/smartpqi/smartpqi_defines.h +++ b/sys/dev/smartpqi/smartpqi_defines.h @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -944,12 +944,12 @@ typedef uint8_t *passthru_buf_type_t; #define PQISRC_DRIVER_MAJOR __FreeBSD__ #if __FreeBSD__ <= 14 -#define PQISRC_DRIVER_MINOR 4690 +#define PQISRC_DRIVER_MINOR 4691 #else #define PQISRC_DRIVER_MINOR 2 #endif -#define PQISRC_DRIVER_RELEASE 0 -#define PQISRC_DRIVER_REVISION 2008 +#define PQISRC_DRIVER_RELEASE 1 +#define PQISRC_DRIVER_REVISION 2000 #define STR(s) # s #define PQISRC_VERSION(a, b, c, d) STR(a.b.c-d) diff --git a/sys/dev/smartpqi/smartpqi_discovery.c b/sys/dev/smartpqi/smartpqi_discovery.c index a7de5a149810..8682e6cabd7e 100644 --- a/sys/dev/smartpqi/smartpqi_discovery.c +++ b/sys/dev/smartpqi/smartpqi_discovery.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -155,7 +155,7 @@ pqisrc_remove_target_bit(pqisrc_softstate_t *softs, int target) softs->bit_map.bit_vector[target] = SLOT_AVAILABLE; } -/* Use bit map to find availible targets */ +/* Use bit map to find available targets */ int pqisrc_find_avail_target(pqisrc_softstate_t *softs) { @@ -1423,7 +1423,11 @@ pqisrc_add_device(pqisrc_softstate_t *softs, pqi_scsi_dev_t *device) if(device->expose_device) { pqisrc_init_device_active_io(softs, device); - /* TBD: Call OS upper layer function to add the device entry */ + device_printf(softs->os_specific.pqi_dev, + "device added: vendor=%s model=%s B%d:T%d:L%d type=%s\n", + device->vendor, device->model, + device->bus, device->target, device->lun, + device->is_physical_device ? "physical" : "logical"); os_add_device(softs,device); } DBG_FUNC("OUT\n"); @@ -1451,6 +1455,11 @@ pqisrc_remove_device(pqisrc_softstate_t *softs, pqi_scsi_dev_t *device) } /* Wait for device outstanding Io's */ pqisrc_wait_for_device_commands_to_complete(softs, device); + device_printf(softs->os_specific.pqi_dev, + "device removed: vendor=%s model=%s B%d:T%d:L%d type=%s\n", + device->vendor, device->model, + device->bus, device->target, device->lun, + device->is_physical_device ? "physical" : "logical"); /* Call OS upper layer function to remove the exposed device entry */ os_remove_device(softs,device); DBG_FUNC("OUT\n"); @@ -1674,10 +1683,14 @@ pqisrc_update_device_list(pqisrc_softstate_t *softs, case DEVICE_NOT_FOUND: /* Device not found in existing list */ device->new_device = true; + DBG_DISC("new device found B%d:T%d:L%d\n", + device->bus, device->target, device->lun); break; case DEVICE_CHANGED: /* Actual device gone need to add device to list*/ device->new_device = true; + DBG_DISC("device changed B%d:T%d:L%d\n", + device->bus, device->target, device->lun); break; case DEVICE_IN_REMOVE: /*Older device with same target/lun is in removal stage*/ @@ -1686,6 +1699,8 @@ pqisrc_update_device_list(pqisrc_softstate_t *softs, * free call*/ device->new_device = false; same_device->schedule_rescan = true; + DBG_DISC("device in removal B%d:T%d:L%d, scheduling rescan\n", + device->bus, device->target, device->lun); break; default: break; diff --git a/sys/dev/smartpqi/smartpqi_event.c b/sys/dev/smartpqi/smartpqi_event.c index 77a70f9fb031..c3c27c9e1c0b 100644 --- a/sys/dev/smartpqi/smartpqi_event.c +++ b/sys/dev/smartpqi/smartpqi_event.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -204,6 +204,17 @@ pqisrc_process_event_intr_src(pqisrc_softstate_t *softs,int obq_id) } if (event_index >= 0) { + static const char *event_names[] = { + [PQI_EVENT_HOTPLUG] = "hotplug", + [PQI_EVENT_HARDWARE] = "hardware", + [PQI_EVENT_PHYSICAL_DEVICE] = "physical device", + [PQI_EVENT_LOGICAL_DEVICE] = "logical device", + [PQI_EVENT_AIO_STATE_CHANGE] = "AIO state change", + [PQI_EVENT_AIO_CONFIG_CHANGE] = "AIO config change", + }; + device_printf(softs->os_specific.pqi_dev, + "event: %s (type=0x%x)\n", + event_names[event_index], response.event_type); if(response.request_acknowledge) { pending_event = &softs->pending_events[event_index]; pending_event->pending = true; @@ -385,7 +396,7 @@ pqisrc_report_event_config(pqisrc_softstate_t *softs) pqi_event_config_request_t request; pqi_event_config_t *event_config_p ; dma_mem_t buf_report_event ; - /*bytes to be allocaed for report event config data-in buffer */ + /*bytes to be allocated for report event config data-in buffer */ uint32_t alloc_size = sizeof(pqi_event_config_t) ; memset(&request, 0 , sizeof(request)); @@ -446,7 +457,7 @@ pqisrc_set_event_config(pqisrc_softstate_t *softs) pqi_event_config_request_t request; pqi_event_config_t *event_config_p; dma_mem_t buf_set_event; - /*bytes to be allocaed for set event config data-out buffer */ + /*bytes to be allocated for set event config data-out buffer */ uint32_t alloc_size = sizeof(pqi_event_config_t); memset(&request, 0 , sizeof(request)); diff --git a/sys/dev/smartpqi/smartpqi_main.c b/sys/dev/smartpqi/smartpqi_main.c index 1f006939bf7c..fbfbcc962f35 100644 --- a/sys/dev/smartpqi/smartpqi_main.c +++ b/sys/dev/smartpqi/smartpqi_main.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -427,6 +427,16 @@ smartpqi_attach(device_t dev) goto out; } + /* Register sysctl for runtime debug_level changes */ + { + struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); + struct sysctl_oid *tree = device_get_sysctl_tree(dev); + + SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, + "debug_level", CTLFLAG_RW, &logging_level, + "Debug logging bitmask"); + } + goto out; dma_out: diff --git a/sys/dev/smartpqi/smartpqi_misc.c b/sys/dev/smartpqi/smartpqi_misc.c index 6db0d80ed993..fd0b907aa252 100644 --- a/sys/dev/smartpqi/smartpqi_misc.c +++ b/sys/dev/smartpqi/smartpqi_misc.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -252,7 +252,7 @@ bsd_set_hint_scatter_gather_config(struct pqisrc_softstate *softs) DBG_FUNC("IN\n"); - /* At least > 16 sg's required to wotk hint correctly. + /* At least > 16 sg's required to work hint correctly. * Default the sg count set by driver/controller. */ if ((!softs->hint.sg_segments) || (softs->hint.sg_segments > diff --git a/sys/dev/smartpqi/smartpqi_queue.c b/sys/dev/smartpqi/smartpqi_queue.c index e8a467531aa4..679d956f6f36 100644 --- a/sys/dev/smartpqi/smartpqi_queue.c +++ b/sys/dev/smartpqi/smartpqi_queue.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -729,7 +729,7 @@ pqisrc_create_op_ibq(pqisrc_softstate_t *softs, op_ib_q->pi_register_offset); } else { int i = 0; - DBG_WARN("Error Status Decsriptors\n"); + DBG_WARN("Error Status Descriptors\n"); for (i = 0; i < 4; i++) DBG_WARN(" %x\n",admin_resp.resp_type.create_op_iq.status_desc[i]); } diff --git a/sys/dev/smartpqi/smartpqi_request.c b/sys/dev/smartpqi/smartpqi_request.c index c5f8ac3c41ba..655660615797 100644 --- a/sys/dev/smartpqi/smartpqi_request.c +++ b/sys/dev/smartpqi/smartpqi_request.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1998,7 +1998,7 @@ pqisrc_send_raid_tmf(pqisrc_softstate_t *softs, pqi_scsi_dev_t const *devp, if (softs->timeout_in_tmf && tmf_type == SOP_TASK_MANAGEMENT_LUN_RESET) { - /* OS_TMF_TIMEOUT_SEC - 1 to accomodate driver processing */ + /* OS_TMF_TIMEOUT_SEC - 1 to accommodate driver processing */ tmf_req.timeout_in_sec = OS_TMF_TIMEOUT_SEC - 1; } diff --git a/sys/dev/smartpqi/smartpqi_response.c b/sys/dev/smartpqi/smartpqi_response.c index 38695860e520..1ae74a5b95ff 100644 --- a/sys/dev/smartpqi/smartpqi_response.c +++ b/sys/dev/smartpqi/smartpqi_response.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -275,7 +275,7 @@ pqisrc_is_innocuous_error(pqisrc_softstate_t *softs, rcb_t *rcb, void *err_info) if (raid_err->data_out_result == PQI_RAID_DATA_IN_OUT_UNDERFLOW) return true; - /* We get these a alot: leave a tiny breadcrumb about the error, + /* We get these a lot: leave a tiny breadcrumb about the error, but don't do full spew about it */ if (raid_err->status == PQI_AIO_STATUS_CHECK_CONDITION) { diff --git a/sys/dev/smartpqi/smartpqi_sis.c b/sys/dev/smartpqi/smartpqi_sis.c index 82eb999ca4b8..99aa952eb149 100644 --- a/sys/dev/smartpqi/smartpqi_sis.c +++ b/sys/dev/smartpqi/smartpqi_sis.c @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -26,7 +26,7 @@ #include "smartpqi_includes.h" -/* Function for disabling msix interrupots */ +/* Function for disabling msix interrupts */ void sis_disable_msix(pqisrc_softstate_t *softs) { @@ -96,7 +96,7 @@ sis_disable_interrupt(pqisrc_softstate_t *softs) sis_disable_msix(softs); break; default: - DBG_ERR("Inerrupt mode none!\n"); + DBG_ERR("Interrupt mode none!\n"); break; } diff --git a/sys/dev/smartpqi/smartpqi_structures.h b/sys/dev/smartpqi/smartpqi_structures.h index 0c9ad375823d..ada6676ada8f 100644 --- a/sys/dev/smartpqi/smartpqi_structures.h +++ b/sys/dev/smartpqi/smartpqi_structures.h @@ -1,5 +1,5 @@ /*- - * Copyright 2016-2025 Microchip Technology, Inc. and/or its subsidiaries. + * Copyright 2016-2026 Microchip Technology, Inc. and/or its subsidiaries. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -1168,7 +1168,7 @@ typedef struct bmic_sense_feature_page_header { uint8_t page; uint8_t sub_page; uint16_t total_length; /** Total length of the page. - * The length is the same wheteher the request buffer is too short or not. + * The length is the same whether the request buffer is too short or not. * When printing out the page, only print the buffer length. */ } OS_ATTRIBUTE_PACKED bmic_sense_feature_page_header_t; diff --git a/sys/dev/usb/net/if_smsc.c b/sys/dev/usb/net/if_smsc.c index 8e16b8609144..c3c21fd22472 100644 --- a/sys/dev/usb/net/if_smsc.c +++ b/sys/dev/usb/net/if_smsc.c @@ -1296,7 +1296,7 @@ smsc_phy_init(struct smsc_softc *sc) } while ((bmcr & BMCR_RESET) && ((ticks - start_ticks) < max_ticks)); if (((usb_ticks_t)(ticks - start_ticks)) >= max_ticks) { - smsc_err_printf(sc, "PHY reset timed-out"); + smsc_err_printf(sc, "PHY reset timed-out\n"); return (EIO); } diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 40792482672c..ef01833b9e03 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -96,17 +96,6 @@ #define VTNET_ETHER_ALIGN ETHER_ALIGN #endif -/* - * Worst case offset to ensure header doesn't share any cache lines with - * payload. - */ -#define VTNET_RX_BUFFER_HEADER_OFFSET 128 - -struct vtnet_rx_buffer_header { - bus_addr_t addr; - bus_dmamap_t dmap; -}; - static int vtnet_modevent(module_t, int, void *); static int vtnet_probe(device_t); @@ -219,7 +208,7 @@ static void vtnet_init_locked(struct vtnet_softc *, int); static void vtnet_init(void *); static void vtnet_free_ctrl_vq(struct vtnet_softc *); -static int vtnet_exec_ctrl_cmd(struct vtnet_softc *, uint8_t *, +static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, struct sglist *, int, int); static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t); @@ -395,17 +384,6 @@ MODULE_DEPEND(vtnet, netmap, 1, 1, 1); VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter"); -static struct vtnet_rx_buffer_header * -vtnet_mbuf_to_rx_buffer_header(struct vtnet_softc *sc, struct mbuf *m) -{ - if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) - return (struct vtnet_rx_buffer_header *)((uintptr_t)m->m_data - - VTNET_RX_BUFFER_HEADER_OFFSET - VTNET_ETHER_ALIGN); - else - return (struct vtnet_rx_buffer_header *)((uintptr_t)m->m_data - - VTNET_RX_BUFFER_HEADER_OFFSET); -} - static int vtnet_modevent(module_t mod __unused, int type, void *unused __unused) { @@ -479,106 +457,6 @@ vtnet_attach(device_t dev) goto fail; } - mtx_init(&sc->vtnet_rx_mtx, device_get_nameunit(dev), - "VirtIO Net RX lock", MTX_DEF); - - error = bus_dma_tag_create( - bus_get_dma_tag(dev), /* parent */ - sizeof(uint16_t), /* alignment */ - 0, /* boundary */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - MJUM9BYTES, /* max request size */ - 1, /* max # segments */ - MJUM9BYTES, /* maxsegsize - worst case */ - BUS_DMA_COHERENT, /* flags */ - busdma_lock_mutex, /* lockfunc */ - &sc->vtnet_rx_mtx, /* lockarg */ - &sc->vtnet_rx_dmat); - if (error) { - device_printf(dev, "cannot create bus_dma_tag\n"); - goto fail; - } - - mtx_init(&sc->vtnet_tx_mtx, device_get_nameunit(dev), - "VirtIO Net TX lock", MTX_DEF); - - error = bus_dma_tag_create( - bus_get_dma_tag(dev), /* parent */ - sizeof(uint16_t), /* alignment */ - 0, /* boundary */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - sc->vtnet_tx_nsegs * MJUM9BYTES, /* max request size */ - sc->vtnet_tx_nsegs, /* max # segments */ - MJUM9BYTES, /* maxsegsize */ - BUS_DMA_COHERENT, /* flags */ - busdma_lock_mutex, /* lockfunc */ - &sc->vtnet_tx_mtx, /* lockarg */ - &sc->vtnet_tx_dmat); - if (error) { - device_printf(dev, "cannot create bus_dma_tag\n"); - goto fail; - } - - mtx_init(&sc->vtnet_hdr_mtx, device_get_nameunit(dev), - "VirtIO Net header lock", MTX_DEF); - - error = bus_dma_tag_create( - bus_get_dma_tag(dev), /* parent */ - sizeof(uint16_t), /* alignment */ - 0, /* boundary */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - PAGE_SIZE, /* max request size */ - 1, /* max # segments */ - PAGE_SIZE, /* maxsegsize */ - BUS_DMA_COHERENT, /* flags */ - busdma_lock_mutex, /* lockfunc */ - &sc->vtnet_hdr_mtx, /* lockarg */ - &sc->vtnet_hdr_dmat); - if (error) { - device_printf(dev, "cannot create bus_dma_tag\n"); - goto fail; - } - - mtx_init(&sc->vtnet_ack_mtx, device_get_nameunit(dev), - "VirtIO Net ACK lock", MTX_DEF); - - error = bus_dma_tag_create( - bus_get_dma_tag(dev), /* parent */ - sizeof(uint8_t), /* alignment */ - 0, /* boundary */ - BUS_SPACE_MAXADDR, /* lowaddr */ - BUS_SPACE_MAXADDR, /* highaddr */ - NULL, NULL, /* filter, filterarg */ - sizeof(uint8_t), /* max request size */ - 1, /* max # segments */ - sizeof(uint8_t), /* maxsegsize */ - BUS_DMA_COHERENT, /* flags */ - busdma_lock_mutex, /* lockfunc */ - &sc->vtnet_ack_mtx, /* lockarg */ - &sc->vtnet_ack_dmat); - if (error) { - device_printf(dev, "cannot create bus_dma_tag\n"); - goto fail; - } - -#ifdef __powerpc__ - /* - * Virtio uses physical addresses rather than bus addresses, so we - * need to ask busdma to skip the iommu physical->bus mapping. At - * present, this is only a thing on the powerpc architectures. - */ - bus_dma_tag_set_iommu(sc->vtnet_rx_dmat, NULL, NULL); - bus_dma_tag_set_iommu(sc->vtnet_tx_dmat, NULL, NULL); - bus_dma_tag_set_iommu(sc->vtnet_hdr_dmat, NULL, NULL); - bus_dma_tag_set_iommu(sc->vtnet_ack_dmat, NULL, NULL); -#endif - error = vtnet_alloc_rx_filters(sc); if (error) { device_printf(dev, "cannot allocate Rx filters\n"); @@ -1667,11 +1545,6 @@ static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) { struct mbuf *m_head, *m_tail, *m; - struct vtnet_rx_buffer_header *vthdr; - bus_dma_segment_t segs[1]; - bus_dmamap_t dmap; - int nsegs; - int err; int i, size; m_head = NULL; @@ -1689,43 +1562,13 @@ vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) } m->m_len = size; - vthdr = (struct vtnet_rx_buffer_header *)m->m_data; - - /* Reserve space for header */ - m_adj(m, VTNET_RX_BUFFER_HEADER_OFFSET); - /* * Need to offset the mbuf if the header we're going to add * will misalign. */ - if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) + if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) { m_adj(m, VTNET_ETHER_ALIGN); - - err = bus_dmamap_create(sc->vtnet_rx_dmat, 0, &dmap); - if (err) { - printf("Failed to create dmamap, err :%d\n", - err); - m_freem(m); - return (NULL); - } - - nsegs = 0; - err = bus_dmamap_load_mbuf_sg(sc->vtnet_rx_dmat, dmap, m, segs, - &nsegs, BUS_DMA_NOWAIT); - if (err != 0) { - printf("Failed to map mbuf into DMA visible memory, err: %d\n", - err); - m_freem(m); - bus_dmamap_destroy(sc->vtnet_rx_dmat, dmap); - return (NULL); } - KASSERT(nsegs == 1, - ("%s: unexpected number of DMA segments for rx buffer: %d", - __func__, nsegs)); - - vthdr->addr = segs[0].ds_addr; - vthdr->dmap = dmap; - if (m_head != NULL) { m_tail->m_next = m; m_tail = m; @@ -1751,7 +1594,7 @@ vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0, int len, clustersz, nreplace, error; sc = rxq->vtnrx_sc; - clustersz = sc->vtnet_rx_clustersz - VTNET_RX_BUFFER_HEADER_OFFSET; + clustersz = sc->vtnet_rx_clustersz; /* * Need to offset the mbuf if the header we're going to add will * misalign, account for that here. @@ -1866,12 +1709,9 @@ vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) { - struct vtnet_rx_buffer_header *hdr; struct vtnet_softc *sc; struct sglist *sg; int header_inlined, error; - bus_addr_t paddr; - struct mbuf *mp; sc = rxq->vtnrx_sc; sg = rxq->vtnrx_sg; @@ -1884,38 +1724,28 @@ vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) header_inlined = vtnet_modern(sc) || (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */ - hdr = vtnet_mbuf_to_rx_buffer_header(sc, m); - paddr = hdr->addr; - /* * Note: The mbuf has been already adjusted when we allocate it if we * have to do strict alignment. */ - if (header_inlined) { - error = sglist_append_phys(sg, paddr, m->m_len); - } else { + if (header_inlined) + error = sglist_append_mbuf(sg, m); + else { + struct vtnet_rx_header *rxhdr = + mtod(m, struct vtnet_rx_header *); MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); /* Append the header and remaining mbuf data. */ - error = sglist_append_phys(sg, paddr, sc->vtnet_hdr_size); + error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); if (error) return (error); - error = sglist_append_phys(sg, - paddr + sizeof(struct vtnet_rx_header), + error = sglist_append(sg, &rxhdr[1], m->m_len - sizeof(struct vtnet_rx_header)); if (error) return (error); - mp = m->m_next; - while (mp) { - hdr = vtnet_mbuf_to_rx_buffer_header(sc, mp); - paddr = hdr->addr; - error = sglist_append_phys(sg, paddr, mp->m_len); - if (error) - return (error); - - mp = mp->m_next; - } + if (m->m_next != NULL) + error = sglist_append_mbuf(sg, m->m_next); } if (error) @@ -2101,7 +1931,6 @@ vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) m_tail = m_head; while (--nbufs > 0) { - struct vtnet_rx_buffer_header *vthdr; struct mbuf *m; uint32_t len; @@ -2111,10 +1940,6 @@ vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) goto fail; } - vthdr = vtnet_mbuf_to_rx_buffer_header(sc, m); - bus_dmamap_sync(sc->vtnet_rx_dmat, vthdr->dmap, - BUS_DMASYNC_POSTREAD); - if (vtnet_rxq_new_buf(rxq) != 0) { rxq->vtnrx_stats.vrxs_iqdrops++; vtnet_rxq_discard_buf(rxq, m); @@ -2235,7 +2060,6 @@ static int vtnet_rxq_eof(struct vtnet_rxq *rxq) { struct virtio_net_hdr lhdr, *hdr; - struct vtnet_rx_buffer_header *vthdr; struct vtnet_softc *sc; if_t ifp; struct virtqueue *vq; @@ -2251,31 +2075,14 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq) CURVNET_SET(if_getvnet(ifp)); while (count-- > 0) { - struct mbuf *m, *mp; + struct mbuf *m; uint32_t len, nbufs, adjsz; - uint32_t synced; m = virtqueue_dequeue(vq, &len); if (m == NULL) break; deq++; - mp = m; - - /* - * Sync all mbufs in this packet. There will only be a single - * mbuf unless LRO is in use. - */ - synced = 0; - while (mp && synced < len) { - vthdr = vtnet_mbuf_to_rx_buffer_header(sc, mp); - bus_dmamap_sync(sc->vtnet_rx_dmat, vthdr->dmap, - BUS_DMASYNC_POSTREAD); - - synced += mp->m_len; - mp = mp->m_next; - } - if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { rxq->vtnrx_stats.vrxs_ierrors++; vtnet_rxq_discard_buf(rxq, m); @@ -2535,14 +2342,6 @@ vtnet_txq_free_mbufs(struct vtnet_txq *txq) while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { if (kring == NULL) { - bus_dmamap_unload(txq->vtntx_sc->vtnet_tx_dmat, - txhdr->dmap); - bus_dmamap_destroy(txq->vtntx_sc->vtnet_tx_dmat, - txhdr->dmap); - bus_dmamap_unload(txq->vtntx_sc->vtnet_tx_dmat, - txhdr->hdr_dmap); - bus_dmamap_destroy(txq->vtntx_sc->vtnet_tx_dmat, - txhdr->hdr_dmap); m_freem(txhdr->vth_mbuf); uma_zfree(vtnet_tx_header_zone, txhdr); } @@ -2712,36 +2511,15 @@ drop: return (NULL); } -static void -vtnet_txq_enqueue_callback(void *arg, bus_dma_segment_t *segs, - int nsegs, int error) -{ - vm_paddr_t *hdr_paddr; - - if (error != 0) - return; - - KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); - - hdr_paddr = (vm_paddr_t *)arg; - *hdr_paddr = segs[0].ds_addr; -} - static int vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, struct vtnet_tx_header *txhdr) { - bus_dma_segment_t segs[VTNET_TX_SEGS_MAX]; - int nsegs; struct vtnet_softc *sc; struct virtqueue *vq; struct sglist *sg; struct mbuf *m; int error; - vm_paddr_t hdr_paddr; - bus_dmamap_t hdr_dmap; - bus_dmamap_t dmap; - int i; sc = txq->vtntx_sc; vq = txq->vtntx_vq; @@ -2749,55 +2527,15 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, m = *m_head; sglist_reset(sg); - - error = bus_dmamap_create(sc->vtnet_tx_dmat, 0, &hdr_dmap); - if (error) - goto fail; - - error = bus_dmamap_load(sc->vtnet_tx_dmat, hdr_dmap, &txhdr->vth_uhdr, - sc->vtnet_hdr_size, vtnet_txq_enqueue_callback, &hdr_paddr, - BUS_DMA_NOWAIT); - if (error) - goto fail_hdr_dmamap_destroy; - - error = sglist_append_phys(sg, hdr_paddr, sc->vtnet_hdr_size); + error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); if (error != 0 || sg->sg_nseg != 1) { KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", __func__, error, sg->sg_nseg)); - goto fail_hdr_dmamap_unload; + goto fail; } - bus_dmamap_sync(sc->vtnet_tx_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE); - - error = bus_dmamap_create(sc->vtnet_tx_dmat, 0, &dmap); - if (error) - goto fail_hdr_dmamap_unload; - - nsegs = 0; - error = bus_dmamap_load_mbuf_sg(sc->vtnet_tx_dmat, dmap, m, segs, - &nsegs, BUS_DMA_NOWAIT); - if (error != 0) - goto fail_dmamap_destroy; - KASSERT(nsegs <= sc->vtnet_tx_nsegs, - ("%s: unexpected number of DMA segments for tx buffer: %d (max %d)", - __func__, nsegs, sc->vtnet_tx_nsegs)); - - bus_dmamap_sync(sc->vtnet_tx_dmat, dmap, BUS_DMASYNC_PREWRITE); - - for (i = 0; i < nsegs && !error; i++) - error = sglist_append_phys(sg, segs[i].ds_addr, segs[i].ds_len); - + error = sglist_append_mbuf(sg, m); if (error) { - sglist_reset(sg); - bus_dmamap_unload(sc->vtnet_tx_dmat, dmap); - - error = sglist_append_phys(sg, hdr_paddr, sc->vtnet_hdr_size); - if (error != 0 || sg->sg_nseg != 1) { - KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", - __func__, error, sg->sg_nseg)); - goto fail_dmamap_destroy; - } - m = m_defrag(m, M_NOWAIT); if (m == NULL) { sc->vtnet_stats.tx_defrag_failed++; @@ -2807,41 +2545,16 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, *m_head = m; sc->vtnet_stats.tx_defragged++; - nsegs = 0; - error = bus_dmamap_load_mbuf_sg(sc->vtnet_tx_dmat, dmap, m, - segs, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) - goto fail_dmamap_destroy; - KASSERT(nsegs <= sc->vtnet_tx_nsegs, - ("%s: unexpected number of DMA segments for tx buffer: %d (max %d)", - __func__, nsegs, sc->vtnet_tx_nsegs)); - - bus_dmamap_sync(sc->vtnet_tx_dmat, dmap, BUS_DMASYNC_PREWRITE); - - for (i = 0; i < nsegs && !error; i++) - error = sglist_append_phys(sg, segs[i].ds_addr, - segs[i].ds_len); - + error = sglist_append_mbuf(sg, m); if (error) - goto fail_dmamap_unload; + goto fail; } txhdr->vth_mbuf = m; - txhdr->dmap = dmap; - txhdr->hdr_dmap = hdr_dmap; - error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); return (error); -fail_dmamap_unload: - bus_dmamap_unload(sc->vtnet_tx_dmat, dmap); -fail_dmamap_destroy: - bus_dmamap_destroy(sc->vtnet_tx_dmat, dmap); -fail_hdr_dmamap_unload: - bus_dmamap_unload(sc->vtnet_tx_dmat, hdr_dmap); -fail_hdr_dmamap_destroy: - bus_dmamap_destroy(sc->vtnet_tx_dmat, hdr_dmap); fail: m_freem(*m_head); *m_head = NULL; @@ -3797,43 +3510,10 @@ vtnet_free_ctrl_vq(struct vtnet_softc *sc) } static void -vtnet_load_callback(void *arg, bus_dma_segment_t *segs, int nsegs, - int error) -{ - bus_addr_t *paddr; - - if (error != 0) - return; - - KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); - - paddr = (bus_addr_t *)arg; - *paddr = segs[0].ds_addr; -} - -static int -vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, uint8_t *ack, struct sglist *sg, - int readable, int writable) +vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, + struct sglist *sg, int readable, int writable) { - bus_dmamap_t ack_dmap; - bus_addr_t ack_paddr; struct virtqueue *vq; - int error; - - error = bus_dmamap_create(sc->vtnet_ack_dmat, 0, &ack_dmap); - if (error) - goto error_out; - - error = bus_dmamap_load(sc->vtnet_ack_dmat, ack_dmap, ack, - sizeof(uint8_t), vtnet_load_callback, &ack_paddr, BUS_DMA_NOWAIT); - if (error) - goto error_destroy; - - bus_dmamap_sync(sc->vtnet_ack_dmat, ack_dmap, BUS_DMASYNC_PREWRITE); - - error = sglist_append_phys(sg, ack_paddr, sizeof(uint8_t)); - if (error) - goto error_unload; vq = sc->vtnet_ctrl_vq; @@ -3841,237 +3521,152 @@ vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, uint8_t *ack, struct sglist *sg, VTNET_CORE_LOCK_ASSERT(sc); if (!virtqueue_empty(vq)) - goto error_unload; + return; /* * Poll for the response, but the command is likely completed before * returning from the notify. */ - if (virtqueue_enqueue(vq, (void *)ack, sg, readable, writable) == 0) { + if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) { virtqueue_notify(vq); virtqueue_poll(vq, NULL); } - - bus_dmamap_sync(sc->vtnet_ack_dmat, ack_dmap, BUS_DMASYNC_POSTREAD); - -error_unload: - bus_dmamap_unload(sc->vtnet_ack_dmat, ack_dmap); -error_destroy: - bus_dmamap_destroy(sc->vtnet_ack_dmat, ack_dmap); -error_out: - return (error); } static int vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) { struct sglist_seg segs[3]; - bus_dmamap_t hdr_dmap; - bus_addr_t hdr_paddr; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; uint8_t addr[ETHER_ADDR_LEN] __aligned(8); uint8_t pad2; + uint8_t ack; } s; - uint8_t ack; int error; - error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap); - if (error) - goto error_out; - - error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s, - sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT); - if (error) - goto error_destroy_hdr; - + error = 0; MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC); s.hdr.class = VIRTIO_NET_CTRL_MAC; s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN); - ack = VIRTIO_NET_ERR; - bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE); + s.ack = VIRTIO_NET_ERR; sglist_init(&sg, nitems(segs), segs); - error |= sglist_append_phys(&sg, hdr_paddr, - sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append_phys(&sg, - hdr_paddr + ((uintptr_t)&s.addr - (uintptr_t)&s), - ETHER_ADDR_LEN); - MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1); + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); if (error == 0) - error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1); - if (error == 0) - error = (ack == VIRTIO_NET_OK ? 0 : EIO); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap); -error_destroy_hdr: - bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap); -error_out: - return (error); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads) { struct sglist_seg segs[3]; - bus_dmamap_t hdr_dmap; - bus_addr_t hdr_paddr; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; uint64_t offloads __aligned(8); uint8_t pad2; + uint8_t ack; } s; - uint8_t ack; int error; - error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap); - if (error) - goto error_out; - - error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s, - sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT); - if (error) - goto error_destroy_hdr; - + error = 0; MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; s.offloads = vtnet_gtoh64(sc, offloads); - ack = VIRTIO_NET_ERR; - bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE); + s.ack = VIRTIO_NET_ERR; sglist_init(&sg, nitems(segs), segs); - error |= sglist_append_phys(&sg, hdr_paddr, - sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append_phys(&sg, - hdr_paddr + ((uintptr_t)&s.offloads - (uintptr_t)&s), - sizeof(uint64_t)); - MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1); + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); if (error == 0) - error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1); - if (error == 0) - error = (ack == VIRTIO_NET_OK ? 0 : EIO); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap); -error_destroy_hdr: - bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap); -error_out: - return (error); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) { struct sglist_seg segs[3]; - bus_dmamap_t hdr_dmap; - bus_addr_t hdr_paddr; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; struct virtio_net_ctrl_mq mq __aligned(2); uint8_t pad2; + uint8_t ack; } s; - uint8_t ack; int error; - error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap); - if (error) - goto error_out; - - error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s, - sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT); - if (error) - goto error_destroy_hdr; - + error = 0; MPASS(sc->vtnet_flags & VTNET_FLAG_MQ); s.hdr.class = VIRTIO_NET_CTRL_MQ; s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs); - ack = VIRTIO_NET_ERR; - bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE); + s.ack = VIRTIO_NET_ERR; sglist_init(&sg, nitems(segs), segs); - error |= sglist_append_phys(&sg, hdr_paddr, - sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append_phys(&sg, - hdr_paddr + ((uintptr_t)&s.mq - (uintptr_t)&s), - sizeof(struct virtio_net_ctrl_mq)); - MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1); + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); if (error == 0) - error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1); - if (error == 0) - error = (ack == VIRTIO_NET_OK ? 0 : EIO); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap); -error_destroy_hdr: - bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap); -error_out: - return (error); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on) { struct sglist_seg segs[3]; - bus_dmamap_t hdr_dmap; - bus_addr_t hdr_paddr; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; uint8_t onoff; uint8_t pad2; + uint8_t ack; } s; - uint8_t ack; int error; - error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap); - if (error) - goto error_out; - - error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s, - sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT); - if (error) - goto error_destroy_hdr; - + error = 0; MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); s.hdr.class = VIRTIO_NET_CTRL_RX; s.hdr.cmd = cmd; s.onoff = on; - ack = VIRTIO_NET_ERR; - bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE); + s.ack = VIRTIO_NET_ERR; sglist_init(&sg, nitems(segs), segs); - error |= sglist_append_phys(&sg, hdr_paddr, - sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append_phys(&sg, - hdr_paddr + ((uintptr_t)&s.onoff - (uintptr_t)&s), - sizeof(uint8_t)); - MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1); + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); if (error == 0) - error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1); - if (error == 0) - error = (ack == VIRTIO_NET_OK ? 0 : EIO); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap); -error_destroy_hdr: - bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap); -error_out: - return (error); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int @@ -4142,10 +3737,6 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) struct virtio_net_ctrl_hdr hdr __aligned(2); struct vtnet_mac_filter *filter; struct sglist_seg segs[4]; - bus_dmamap_t filter_dmap; - bus_addr_t filter_paddr; - bus_dmamap_t hdr_dmap; - bus_addr_t hdr_paddr; struct sglist sg; if_t ifp; bool promisc, allmulti; @@ -4185,25 +3776,6 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) if (promisc && allmulti) goto out; - error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap); - if (error) - goto out_error; - - error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &hdr, - sizeof(hdr), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT); - if (error) - goto out_destroy_hdr; - - error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &filter_dmap); - if (error) - goto out_unload_hdr; - - error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, filter, - sizeof(*filter), vtnet_load_callback, &filter_paddr, - BUS_DMA_NOWAIT); - if (error) - goto out_destroy_filter; - filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt); filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt); @@ -4212,33 +3784,19 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) ack = VIRTIO_NET_ERR; sglist_init(&sg, nitems(segs), segs); - error |= sglist_append_phys(&sg, hdr_paddr, - sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append_phys(&sg, - filter_paddr + ((uintptr_t)&filter->vmf_unicast - - (uintptr_t)filter), + error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &filter->vmf_unicast, sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN); - error |= sglist_append_phys(&sg, - filter_paddr + ((uintptr_t)&filter->vmf_multicast - - (uintptr_t)filter), + error |= sglist_append(&sg, &filter->vmf_multicast, sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN); - MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1); + error |= sglist_append(&sg, &ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); if (error == 0) - error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1); - if (error == 0) - error = (ack == VIRTIO_NET_OK ? 0 : EIO); - - bus_dmamap_unload(sc->vtnet_hdr_dmat, filter_dmap); -out_destroy_filter: - bus_dmamap_destroy(sc->vtnet_hdr_dmat, filter_dmap); -out_unload_hdr: - bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap); -out_destroy_hdr: - bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap); -out_error: - if (error != 0) + vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); + if (ack != VIRTIO_NET_OK) if_printf(ifp, "error setting host MAC filter table\n"); + out: if (promisc && vtnet_set_promisc(sc, true) != 0) if_printf(ifp, "cannot enable promiscuous mode\n"); @@ -4250,53 +3808,34 @@ static int vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct sglist_seg segs[3]; - bus_dmamap_t hdr_dmap; - bus_addr_t hdr_paddr; struct sglist sg; struct { struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; uint16_t tag __aligned(2); uint8_t pad2; + uint8_t ack; } s; - uint8_t ack; int error; - error = bus_dmamap_create(sc->vtnet_hdr_dmat, 0, &hdr_dmap); - if (error) - goto error_out; - - error = bus_dmamap_load(sc->vtnet_hdr_dmat, hdr_dmap, &s, - sizeof(s), vtnet_load_callback, &hdr_paddr, BUS_DMA_NOWAIT); - if (error) - goto error_destroy_hdr; - + error = 0; MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); s.hdr.class = VIRTIO_NET_CTRL_VLAN; s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; s.tag = vtnet_gtoh16(sc, tag); - ack = VIRTIO_NET_ERR; - bus_dmamap_sync(sc->vtnet_hdr_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE); + s.ack = VIRTIO_NET_ERR; sglist_init(&sg, nitems(segs), segs); - error |= sglist_append_phys(&sg, hdr_paddr, - sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append_phys(&sg, - hdr_paddr + ((uintptr_t)&s.tag - (uintptr_t)&s), - sizeof(uint16_t)); - MPASS(error == 0 && sg.sg_nseg == nitems(segs) - 1); + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); if (error == 0) - error = vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg, 1); - if (error == 0) - error = (ack == VIRTIO_NET_OK ? 0 : EIO); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - bus_dmamap_unload(sc->vtnet_hdr_dmat, hdr_dmap); -error_destroy_hdr: - bus_dmamap_destroy(sc->vtnet_hdr_dmat, hdr_dmap); -error_out: - return (error); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static void diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h index e445bdf6d6cb..eb5e6784b07f 100644 --- a/sys/dev/virtio/network/if_vtnetvar.h +++ b/sys/dev/virtio/network/if_vtnetvar.h @@ -190,18 +190,6 @@ struct vtnet_softc { struct mtx vtnet_mtx; char vtnet_mtx_name[16]; uint8_t vtnet_hwaddr[ETHER_ADDR_LEN]; - - bus_dma_tag_t vtnet_rx_dmat; - struct mtx vtnet_rx_mtx; - - bus_dma_tag_t vtnet_tx_dmat; - struct mtx vtnet_tx_mtx; - - bus_dma_tag_t vtnet_hdr_dmat; - struct mtx vtnet_hdr_mtx; - - bus_dma_tag_t vtnet_ack_dmat; - struct mtx vtnet_ack_mtx; }; /* vtnet flag descriptions for use with printf(9) %b identifier. */ #define VTNET_FLAGS_BITS \ @@ -285,10 +273,6 @@ struct vtnet_tx_header { } vth_uhdr; struct mbuf *vth_mbuf; - - bus_dmamap_t dmap; - - bus_dmamap_t hdr_dmap; }; /* diff --git a/sys/dev/virtio/p9fs/virtio_p9fs.c b/sys/dev/virtio/p9fs/virtio_p9fs.c index 2b276a60aa9a..19a32fea458e 100644 --- a/sys/dev/virtio/p9fs/virtio_p9fs.c +++ b/sys/dev/virtio/p9fs/virtio_p9fs.c @@ -464,16 +464,20 @@ static int vt9p_modevent(module_t mod, int type, void *unused) { int error; + static int loaded = 0; error = 0; switch (type) { case MOD_LOAD: - p9_init_zones(); - p9_register_trans(&vt9p_trans); + if (loaded++ == 0) { + p9_register_trans(&vt9p_trans); + } break; case MOD_UNLOAD: - p9_destroy_zones(); + if (--loaded == 0) { + p9_unregister_trans(&vt9p_trans); + } break; case MOD_SHUTDOWN: break; @@ -481,6 +485,7 @@ vt9p_modevent(module_t mod, int type, void *unused) error = EOPNOTSUPP; break; } + return (error); } diff --git a/sys/dev/vt/vt_core.c b/sys/dev/vt/vt_core.c index 68a9a71c3d72..f7cffcea5b92 100644 --- a/sys/dev/vt/vt_core.c +++ b/sys/dev/vt/vt_core.c @@ -1684,6 +1684,9 @@ vtterm_splash(struct vt_device *vd) uintptr_t image; vt_axis_t top, left; + if (KERNEL_PANICKED()) + return; + if ((vd->vd_flags & VDF_TEXTMODE) != 0 || (boothowto & RB_MUTE) == 0) return; diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index c1188c3819e7..267e80918d1b 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -469,7 +469,6 @@ fdesc_getattr(struct vop_getattr_args *ap) break; } - vp->v_type = vap->va_type; return (0); } diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c index 1e4e8506790f..9ea4e5f4c9df 100644 --- a/sys/fs/nfs/nfs_commonkrpc.c +++ b/sys/fs/nfs/nfs_commonkrpc.c @@ -561,7 +561,9 @@ newnfs_disconnect(struct nfsmount *nmp, struct nfssockreq *nrp) } } mtx_unlock(&nrp->nr_mtx); + CURVNET_SET_QUIET(CRED_TO_VNET(nrp->nr_cred)); rpc_gss_secpurge_call(client); + CURVNET_RESTORE(); CLNT_CLOSE(client); CLNT_RELEASE(client); if (nmp != NULL && nmp->nm_aconnect > 0) { @@ -685,7 +687,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, struct nfsreq *rep = NULL; char *srv_principal = NULL, *clnt_principal = NULL; sigset_t oldset; - struct ucred *authcred; + struct ucred *authcred, *savcred; struct nfsclsession *sep; uint8_t sessionid[NFSX_V4SESSIONID]; bool nextconn_set; @@ -832,6 +834,11 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, ((nmp->nm_tprintf_delay)-(nmp->nm_tprintf_initial_delay)); } + /* + * For Kerberos, the upcall needs to be done to the gssd daemon + * running in the correct vnet. + */ + CURVNET_SET_QUIET(CRED_TO_VNET(authcred)); if (nd->nd_procnum == NFSPROC_NULL) auth = authnone_create(); else if (usegssname) { @@ -849,8 +856,9 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp, } else auth = nfs_getauth(nrp, secflavour, NULL, srv_principal, NULL, authcred); - crfree(authcred); + CURVNET_RESTORE(); if (auth == NULL) { + crfree(authcred); m_freem(nd->nd_mreq); if (set_sigset) newnfs_restore_sigmask(td, &oldset); @@ -967,6 +975,13 @@ tryagain: } } + /* + * In case CLNT_CALL_MBUF()/clnt_bck_call() does an AUTH_REFRESH(), + * the thread's credentials need to be set to authcred, so that the + * correct vnet will be set. + */ + savcred = curthread->td_ucred; + curthread->td_ucred = authcred; nd->nd_mrep = NULL; if (clp != NULL && sep != NULL) stat = clnt_bck_call(nrp->nr_client, &ext, procnum, @@ -988,6 +1003,7 @@ tryagain: stat = CLNT_CALL_MBUF(nrp->nr_client, &ext, procnum, nd->nd_mreq, &nd->nd_mrep, timo); NFSCL_DEBUG(2, "clnt call=%d\n", stat); + curthread->td_ucred = savcred; if (rep != NULL) { /* @@ -1069,6 +1085,7 @@ tryagain: error = EACCES; } if (error) { + crfree(authcred); m_freem(nd->nd_mreq); if (usegssname == 0) AUTH_DESTROY(auth); @@ -1429,6 +1446,7 @@ tryagain: } } out: + crfree(authcred); #ifdef KDTRACE_HOOKS if (nmp != NULL && dtrace_nfscl_nfs234_done_probe != NULL) { @@ -1460,6 +1478,7 @@ out: newnfs_restore_sigmask(td, &oldset); return (0); nfsmout: + crfree(authcred); m_freem(nd->nd_mrep); m_freem(nd->nd_mreq); if (usegssname == 0) diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c index 91d9188d30c5..03c6688b1406 100644 --- a/sys/fs/nfs/nfs_commonport.c +++ b/sys/fs/nfs/nfs_commonport.c @@ -71,10 +71,10 @@ vop_advlock_t *nfs_advlock_p = NULL; vop_reclaim_t *nfs_reclaim_p = NULL; uint32_t nfs_srvmaxio = NFS_SRVMAXIO; -NFSD_VNET_DEFINE(struct nfsstatsv1 *, nfsstatsv1_p); +VNET_DEFINE(struct nfsstatsv1 *, nfsstatsv1_p); -NFSD_VNET_DECLARE(struct nfssockreq, nfsrv_nfsuserdsock); -NFSD_VNET_DECLARE(nfsuserd_state, nfsrv_nfsuserd); +VNET_DECLARE(struct nfssockreq, nfsrv_nfsuserdsock); +VNET_DECLARE(nfsuserd_state, nfsrv_nfsuserd); int nfs_pnfsio(task_fn_t *, void *); @@ -428,9 +428,9 @@ nfssvc_nfscommon(struct thread *td, struct nfssvc_args *uap) { int error; - NFSD_CURVNET_SET(NFSD_TD_TO_VNET(td)); + CURVNET_SET(TD_TO_VNET(td)); error = nfssvc_call(td, uap, td->td_ucred); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); NFSEXITCODE(error); return (error); } @@ -470,105 +470,105 @@ nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) if ((uap->flag & NFSSVC_NEWSTRUCT) == 0) { /* Copy fields to the old ext_nfsstat structure. */ oldnfsstats.attrcache_hits = - NFSD_VNET(nfsstatsv1_p)->attrcache_hits; + VNET(nfsstatsv1_p)->attrcache_hits; oldnfsstats.attrcache_misses = - NFSD_VNET(nfsstatsv1_p)->attrcache_misses; + VNET(nfsstatsv1_p)->attrcache_misses; oldnfsstats.lookupcache_hits = - NFSD_VNET(nfsstatsv1_p)->lookupcache_hits; + VNET(nfsstatsv1_p)->lookupcache_hits; oldnfsstats.lookupcache_misses = - NFSD_VNET(nfsstatsv1_p)->lookupcache_misses; + VNET(nfsstatsv1_p)->lookupcache_misses; oldnfsstats.direofcache_hits = - NFSD_VNET(nfsstatsv1_p)->direofcache_hits; + VNET(nfsstatsv1_p)->direofcache_hits; oldnfsstats.direofcache_misses = - NFSD_VNET(nfsstatsv1_p)->direofcache_misses; + VNET(nfsstatsv1_p)->direofcache_misses; oldnfsstats.accesscache_hits = - NFSD_VNET(nfsstatsv1_p)->accesscache_hits; + VNET(nfsstatsv1_p)->accesscache_hits; oldnfsstats.accesscache_misses = - NFSD_VNET(nfsstatsv1_p)->accesscache_misses; + VNET(nfsstatsv1_p)->accesscache_misses; oldnfsstats.biocache_reads = - NFSD_VNET(nfsstatsv1_p)->biocache_reads; + VNET(nfsstatsv1_p)->biocache_reads; oldnfsstats.read_bios = - NFSD_VNET(nfsstatsv1_p)->read_bios; + VNET(nfsstatsv1_p)->read_bios; oldnfsstats.read_physios = - NFSD_VNET(nfsstatsv1_p)->read_physios; + VNET(nfsstatsv1_p)->read_physios; oldnfsstats.biocache_writes = - NFSD_VNET(nfsstatsv1_p)->biocache_writes; + VNET(nfsstatsv1_p)->biocache_writes; oldnfsstats.write_bios = - NFSD_VNET(nfsstatsv1_p)->write_bios; + VNET(nfsstatsv1_p)->write_bios; oldnfsstats.write_physios = - NFSD_VNET(nfsstatsv1_p)->write_physios; + VNET(nfsstatsv1_p)->write_physios; oldnfsstats.biocache_readlinks = - NFSD_VNET(nfsstatsv1_p)->biocache_readlinks; + VNET(nfsstatsv1_p)->biocache_readlinks; oldnfsstats.readlink_bios = - NFSD_VNET(nfsstatsv1_p)->readlink_bios; + VNET(nfsstatsv1_p)->readlink_bios; oldnfsstats.biocache_readdirs = - NFSD_VNET(nfsstatsv1_p)->biocache_readdirs; + VNET(nfsstatsv1_p)->biocache_readdirs; oldnfsstats.readdir_bios = - NFSD_VNET(nfsstatsv1_p)->readdir_bios; + VNET(nfsstatsv1_p)->readdir_bios; for (i = 0; i < NFSV4_NPROCS; i++) oldnfsstats.rpccnt[i] = - NFSD_VNET(nfsstatsv1_p)->rpccnt[i]; + VNET(nfsstatsv1_p)->rpccnt[i]; oldnfsstats.rpcretries = - NFSD_VNET(nfsstatsv1_p)->rpcretries; + VNET(nfsstatsv1_p)->rpcretries; for (i = 0; i < NFSV4OP_NOPS; i++) oldnfsstats.srvrpccnt[i] = - NFSD_VNET(nfsstatsv1_p)->srvrpccnt[i]; + VNET(nfsstatsv1_p)->srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV4OP_NOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) oldnfsstats.srvrpccnt[j] = - NFSD_VNET(nfsstatsv1_p)->srvrpccnt[i]; + VNET(nfsstatsv1_p)->srvrpccnt[i]; oldnfsstats.reserved_0 = 0; oldnfsstats.reserved_1 = 0; oldnfsstats.rpcrequests = - NFSD_VNET(nfsstatsv1_p)->rpcrequests; + VNET(nfsstatsv1_p)->rpcrequests; oldnfsstats.rpctimeouts = - NFSD_VNET(nfsstatsv1_p)->rpctimeouts; + VNET(nfsstatsv1_p)->rpctimeouts; oldnfsstats.rpcunexpected = - NFSD_VNET(nfsstatsv1_p)->rpcunexpected; + VNET(nfsstatsv1_p)->rpcunexpected; oldnfsstats.rpcinvalid = - NFSD_VNET(nfsstatsv1_p)->rpcinvalid; + VNET(nfsstatsv1_p)->rpcinvalid; oldnfsstats.srvcache_inproghits = - NFSD_VNET(nfsstatsv1_p)->srvcache_inproghits; + VNET(nfsstatsv1_p)->srvcache_inproghits; oldnfsstats.reserved_2 = 0; oldnfsstats.srvcache_nonidemdonehits = - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits; oldnfsstats.srvcache_misses = - NFSD_VNET(nfsstatsv1_p)->srvcache_misses; + VNET(nfsstatsv1_p)->srvcache_misses; oldnfsstats.srvcache_tcppeak = - NFSD_VNET(nfsstatsv1_p)->srvcache_tcppeak; + VNET(nfsstatsv1_p)->srvcache_tcppeak; oldnfsstats.srvcache_size = - NFSD_VNET(nfsstatsv1_p)->srvcache_size; + VNET(nfsstatsv1_p)->srvcache_size; oldnfsstats.srvclients = - NFSD_VNET(nfsstatsv1_p)->srvclients; + VNET(nfsstatsv1_p)->srvclients; oldnfsstats.srvopenowners = - NFSD_VNET(nfsstatsv1_p)->srvopenowners; + VNET(nfsstatsv1_p)->srvopenowners; oldnfsstats.srvopens = - NFSD_VNET(nfsstatsv1_p)->srvopens; + VNET(nfsstatsv1_p)->srvopens; oldnfsstats.srvlockowners = - NFSD_VNET(nfsstatsv1_p)->srvlockowners; + VNET(nfsstatsv1_p)->srvlockowners; oldnfsstats.srvlocks = - NFSD_VNET(nfsstatsv1_p)->srvlocks; + VNET(nfsstatsv1_p)->srvlocks; oldnfsstats.srvdelegates = - NFSD_VNET(nfsstatsv1_p)->srvdelegates; + VNET(nfsstatsv1_p)->srvdelegates; for (i = 0; i < NFSV4OP_CBNOPS; i++) oldnfsstats.cbrpccnt[i] = - NFSD_VNET(nfsstatsv1_p)->cbrpccnt[i]; + VNET(nfsstatsv1_p)->cbrpccnt[i]; oldnfsstats.clopenowners = - NFSD_VNET(nfsstatsv1_p)->clopenowners; - oldnfsstats.clopens = NFSD_VNET(nfsstatsv1_p)->clopens; + VNET(nfsstatsv1_p)->clopenowners; + oldnfsstats.clopens = VNET(nfsstatsv1_p)->clopens; oldnfsstats.cllockowners = - NFSD_VNET(nfsstatsv1_p)->cllockowners; - oldnfsstats.cllocks = NFSD_VNET(nfsstatsv1_p)->cllocks; + VNET(nfsstatsv1_p)->cllockowners; + oldnfsstats.cllocks = VNET(nfsstatsv1_p)->cllocks; oldnfsstats.cldelegates = - NFSD_VNET(nfsstatsv1_p)->cldelegates; + VNET(nfsstatsv1_p)->cldelegates; oldnfsstats.cllocalopenowners = - NFSD_VNET(nfsstatsv1_p)->cllocalopenowners; + VNET(nfsstatsv1_p)->cllocalopenowners; oldnfsstats.cllocalopens = - NFSD_VNET(nfsstatsv1_p)->cllocalopens; + VNET(nfsstatsv1_p)->cllocalopens; oldnfsstats.cllocallockowners = - NFSD_VNET(nfsstatsv1_p)->cllocallockowners; + VNET(nfsstatsv1_p)->cllocallockowners; oldnfsstats.cllocallocks = - NFSD_VNET(nfsstatsv1_p)->cllocallocks; + VNET(nfsstatsv1_p)->cllocallocks; error = copyout(&oldnfsstats, uap->argp, sizeof (oldnfsstats)); } else { @@ -578,174 +578,174 @@ nfssvc_call(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) if (nfsstatver.vers == NFSSTATS_OV1) { /* Copy nfsstatsv1 to nfsstatsov1. */ nfsstatsov1.attrcache_hits = - NFSD_VNET(nfsstatsv1_p)->attrcache_hits; + VNET(nfsstatsv1_p)->attrcache_hits; nfsstatsov1.attrcache_misses = - NFSD_VNET(nfsstatsv1_p)->attrcache_misses; + VNET(nfsstatsv1_p)->attrcache_misses; nfsstatsov1.lookupcache_hits = - NFSD_VNET(nfsstatsv1_p)->lookupcache_hits; + VNET(nfsstatsv1_p)->lookupcache_hits; nfsstatsov1.lookupcache_misses = - NFSD_VNET(nfsstatsv1_p)->lookupcache_misses; + VNET(nfsstatsv1_p)->lookupcache_misses; nfsstatsov1.direofcache_hits = - NFSD_VNET(nfsstatsv1_p)->direofcache_hits; + VNET(nfsstatsv1_p)->direofcache_hits; nfsstatsov1.direofcache_misses = - NFSD_VNET(nfsstatsv1_p)->direofcache_misses; + VNET(nfsstatsv1_p)->direofcache_misses; nfsstatsov1.accesscache_hits = - NFSD_VNET(nfsstatsv1_p)->accesscache_hits; + VNET(nfsstatsv1_p)->accesscache_hits; nfsstatsov1.accesscache_misses = - NFSD_VNET(nfsstatsv1_p)->accesscache_misses; + VNET(nfsstatsv1_p)->accesscache_misses; nfsstatsov1.biocache_reads = - NFSD_VNET(nfsstatsv1_p)->biocache_reads; + VNET(nfsstatsv1_p)->biocache_reads; nfsstatsov1.read_bios = - NFSD_VNET(nfsstatsv1_p)->read_bios; + VNET(nfsstatsv1_p)->read_bios; nfsstatsov1.read_physios = - NFSD_VNET(nfsstatsv1_p)->read_physios; + VNET(nfsstatsv1_p)->read_physios; nfsstatsov1.biocache_writes = - NFSD_VNET(nfsstatsv1_p)->biocache_writes; + VNET(nfsstatsv1_p)->biocache_writes; nfsstatsov1.write_bios = - NFSD_VNET(nfsstatsv1_p)->write_bios; + VNET(nfsstatsv1_p)->write_bios; nfsstatsov1.write_physios = - NFSD_VNET(nfsstatsv1_p)->write_physios; + VNET(nfsstatsv1_p)->write_physios; nfsstatsov1.biocache_readlinks = - NFSD_VNET(nfsstatsv1_p)->biocache_readlinks; + VNET(nfsstatsv1_p)->biocache_readlinks; nfsstatsov1.readlink_bios = - NFSD_VNET(nfsstatsv1_p)->readlink_bios; + VNET(nfsstatsv1_p)->readlink_bios; nfsstatsov1.biocache_readdirs = - NFSD_VNET(nfsstatsv1_p)->biocache_readdirs; + VNET(nfsstatsv1_p)->biocache_readdirs; nfsstatsov1.readdir_bios = - NFSD_VNET(nfsstatsv1_p)->readdir_bios; + VNET(nfsstatsv1_p)->readdir_bios; for (i = 0; i < NFSV42_OLDNPROCS; i++) nfsstatsov1.rpccnt[i] = - NFSD_VNET(nfsstatsv1_p)->rpccnt[i]; + VNET(nfsstatsv1_p)->rpccnt[i]; nfsstatsov1.rpcretries = - NFSD_VNET(nfsstatsv1_p)->rpcretries; + VNET(nfsstatsv1_p)->rpcretries; for (i = 0; i < NFSV42_PURENOPS; i++) nfsstatsov1.srvrpccnt[i] = - NFSD_VNET(nfsstatsv1_p)->srvrpccnt[i]; + VNET(nfsstatsv1_p)->srvrpccnt[i]; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) nfsstatsov1.srvrpccnt[j] = - NFSD_VNET(nfsstatsv1_p)->srvrpccnt[i]; + VNET(nfsstatsv1_p)->srvrpccnt[i]; nfsstatsov1.reserved_0 = 0; nfsstatsov1.reserved_1 = 0; nfsstatsov1.rpcrequests = - NFSD_VNET(nfsstatsv1_p)->rpcrequests; + VNET(nfsstatsv1_p)->rpcrequests; nfsstatsov1.rpctimeouts = - NFSD_VNET(nfsstatsv1_p)->rpctimeouts; + VNET(nfsstatsv1_p)->rpctimeouts; nfsstatsov1.rpcunexpected = - NFSD_VNET(nfsstatsv1_p)->rpcunexpected; + VNET(nfsstatsv1_p)->rpcunexpected; nfsstatsov1.rpcinvalid = - NFSD_VNET(nfsstatsv1_p)->rpcinvalid; + VNET(nfsstatsv1_p)->rpcinvalid; nfsstatsov1.srvcache_inproghits = - NFSD_VNET(nfsstatsv1_p)->srvcache_inproghits; + VNET(nfsstatsv1_p)->srvcache_inproghits; nfsstatsov1.reserved_2 = 0; nfsstatsov1.srvcache_nonidemdonehits = - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits; nfsstatsov1.srvcache_misses = - NFSD_VNET(nfsstatsv1_p)->srvcache_misses; + VNET(nfsstatsv1_p)->srvcache_misses; nfsstatsov1.srvcache_tcppeak = - NFSD_VNET(nfsstatsv1_p)->srvcache_tcppeak; + VNET(nfsstatsv1_p)->srvcache_tcppeak; nfsstatsov1.srvcache_size = - NFSD_VNET(nfsstatsv1_p)->srvcache_size; + VNET(nfsstatsv1_p)->srvcache_size; nfsstatsov1.srvclients = - NFSD_VNET(nfsstatsv1_p)->srvclients; + VNET(nfsstatsv1_p)->srvclients; nfsstatsov1.srvopenowners = - NFSD_VNET(nfsstatsv1_p)->srvopenowners; + VNET(nfsstatsv1_p)->srvopenowners; nfsstatsov1.srvopens = - NFSD_VNET(nfsstatsv1_p)->srvopens; + VNET(nfsstatsv1_p)->srvopens; nfsstatsov1.srvlockowners = - NFSD_VNET(nfsstatsv1_p)->srvlockowners; + VNET(nfsstatsv1_p)->srvlockowners; nfsstatsov1.srvlocks = - NFSD_VNET(nfsstatsv1_p)->srvlocks; + VNET(nfsstatsv1_p)->srvlocks; nfsstatsov1.srvdelegates = - NFSD_VNET(nfsstatsv1_p)->srvdelegates; + VNET(nfsstatsv1_p)->srvdelegates; for (i = 0; i < NFSV42_CBNOPS; i++) nfsstatsov1.cbrpccnt[i] = - NFSD_VNET(nfsstatsv1_p)->cbrpccnt[i]; + VNET(nfsstatsv1_p)->cbrpccnt[i]; nfsstatsov1.clopenowners = - NFSD_VNET(nfsstatsv1_p)->clopenowners; + VNET(nfsstatsv1_p)->clopenowners; nfsstatsov1.clopens = - NFSD_VNET(nfsstatsv1_p)->clopens; + VNET(nfsstatsv1_p)->clopens; nfsstatsov1.cllockowners = - NFSD_VNET(nfsstatsv1_p)->cllockowners; + VNET(nfsstatsv1_p)->cllockowners; nfsstatsov1.cllocks = - NFSD_VNET(nfsstatsv1_p)->cllocks; + VNET(nfsstatsv1_p)->cllocks; nfsstatsov1.cldelegates = - NFSD_VNET(nfsstatsv1_p)->cldelegates; + VNET(nfsstatsv1_p)->cldelegates; nfsstatsov1.cllocalopenowners = - NFSD_VNET(nfsstatsv1_p)->cllocalopenowners; + VNET(nfsstatsv1_p)->cllocalopenowners; nfsstatsov1.cllocalopens = - NFSD_VNET(nfsstatsv1_p)->cllocalopens; + VNET(nfsstatsv1_p)->cllocalopens; nfsstatsov1.cllocallockowners = - NFSD_VNET(nfsstatsv1_p)->cllocallockowners; + VNET(nfsstatsv1_p)->cllocallockowners; nfsstatsov1.cllocallocks = - NFSD_VNET(nfsstatsv1_p)->cllocallocks; + VNET(nfsstatsv1_p)->cllocallocks; nfsstatsov1.srvstartcnt = - NFSD_VNET(nfsstatsv1_p)->srvstartcnt; + VNET(nfsstatsv1_p)->srvstartcnt; nfsstatsov1.srvdonecnt = - NFSD_VNET(nfsstatsv1_p)->srvdonecnt; + VNET(nfsstatsv1_p)->srvdonecnt; for (i = NFSV42_NOPS, j = NFSV42_PURENOPS; i < NFSV42_NOPS + NFSV4OP_FAKENOPS; i++, j++) { nfsstatsov1.srvbytes[j] = - NFSD_VNET(nfsstatsv1_p)->srvbytes[i]; + VNET(nfsstatsv1_p)->srvbytes[i]; nfsstatsov1.srvops[j] = - NFSD_VNET(nfsstatsv1_p)->srvops[i]; + VNET(nfsstatsv1_p)->srvops[i]; nfsstatsov1.srvduration[j] = - NFSD_VNET(nfsstatsv1_p)->srvduration[i]; + VNET(nfsstatsv1_p)->srvduration[i]; } nfsstatsov1.busyfrom = - NFSD_VNET(nfsstatsv1_p)->busyfrom; + VNET(nfsstatsv1_p)->busyfrom; nfsstatsov1.busyfrom = - NFSD_VNET(nfsstatsv1_p)->busyfrom; + VNET(nfsstatsv1_p)->busyfrom; error = copyout(&nfsstatsov1, uap->argp, sizeof(nfsstatsov1)); } else if (nfsstatver.vers != NFSSTATS_V1) error = EPERM; else - error = copyout(NFSD_VNET(nfsstatsv1_p), + error = copyout(VNET(nfsstatsv1_p), uap->argp, sizeof(nfsstatsv1)); } } if (error == 0) { if ((uap->flag & NFSSVC_ZEROCLTSTATS) != 0) { - NFSD_VNET(nfsstatsv1_p)->attrcache_hits = 0; - NFSD_VNET(nfsstatsv1_p)->attrcache_misses = 0; - NFSD_VNET(nfsstatsv1_p)->lookupcache_hits = 0; - NFSD_VNET(nfsstatsv1_p)->lookupcache_misses = 0; - NFSD_VNET(nfsstatsv1_p)->direofcache_hits = 0; - NFSD_VNET(nfsstatsv1_p)->direofcache_misses = 0; - NFSD_VNET(nfsstatsv1_p)->accesscache_hits = 0; - NFSD_VNET(nfsstatsv1_p)->accesscache_misses = 0; - NFSD_VNET(nfsstatsv1_p)->biocache_reads = 0; - NFSD_VNET(nfsstatsv1_p)->read_bios = 0; - NFSD_VNET(nfsstatsv1_p)->read_physios = 0; - NFSD_VNET(nfsstatsv1_p)->biocache_writes = 0; - NFSD_VNET(nfsstatsv1_p)->write_bios = 0; - NFSD_VNET(nfsstatsv1_p)->write_physios = 0; - NFSD_VNET(nfsstatsv1_p)->biocache_readlinks = 0; - NFSD_VNET(nfsstatsv1_p)->readlink_bios = 0; - NFSD_VNET(nfsstatsv1_p)->biocache_readdirs = 0; - NFSD_VNET(nfsstatsv1_p)->readdir_bios = 0; - NFSD_VNET(nfsstatsv1_p)->rpcretries = 0; - NFSD_VNET(nfsstatsv1_p)->rpcrequests = 0; - NFSD_VNET(nfsstatsv1_p)->rpctimeouts = 0; - NFSD_VNET(nfsstatsv1_p)->rpcunexpected = 0; - NFSD_VNET(nfsstatsv1_p)->rpcinvalid = 0; - bzero(NFSD_VNET(nfsstatsv1_p)->rpccnt, - sizeof(NFSD_VNET(nfsstatsv1_p)->rpccnt)); + VNET(nfsstatsv1_p)->attrcache_hits = 0; + VNET(nfsstatsv1_p)->attrcache_misses = 0; + VNET(nfsstatsv1_p)->lookupcache_hits = 0; + VNET(nfsstatsv1_p)->lookupcache_misses = 0; + VNET(nfsstatsv1_p)->direofcache_hits = 0; + VNET(nfsstatsv1_p)->direofcache_misses = 0; + VNET(nfsstatsv1_p)->accesscache_hits = 0; + VNET(nfsstatsv1_p)->accesscache_misses = 0; + VNET(nfsstatsv1_p)->biocache_reads = 0; + VNET(nfsstatsv1_p)->read_bios = 0; + VNET(nfsstatsv1_p)->read_physios = 0; + VNET(nfsstatsv1_p)->biocache_writes = 0; + VNET(nfsstatsv1_p)->write_bios = 0; + VNET(nfsstatsv1_p)->write_physios = 0; + VNET(nfsstatsv1_p)->biocache_readlinks = 0; + VNET(nfsstatsv1_p)->readlink_bios = 0; + VNET(nfsstatsv1_p)->biocache_readdirs = 0; + VNET(nfsstatsv1_p)->readdir_bios = 0; + VNET(nfsstatsv1_p)->rpcretries = 0; + VNET(nfsstatsv1_p)->rpcrequests = 0; + VNET(nfsstatsv1_p)->rpctimeouts = 0; + VNET(nfsstatsv1_p)->rpcunexpected = 0; + VNET(nfsstatsv1_p)->rpcinvalid = 0; + bzero(VNET(nfsstatsv1_p)->rpccnt, + sizeof(VNET(nfsstatsv1_p)->rpccnt)); } if ((uap->flag & NFSSVC_ZEROSRVSTATS) != 0) { - NFSD_VNET(nfsstatsv1_p)->srvcache_inproghits = 0; - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits = 0; - NFSD_VNET(nfsstatsv1_p)->srvcache_misses = 0; - NFSD_VNET(nfsstatsv1_p)->srvcache_tcppeak = 0; - bzero(NFSD_VNET(nfsstatsv1_p)->srvrpccnt, - sizeof(NFSD_VNET(nfsstatsv1_p)->srvrpccnt)); - bzero(NFSD_VNET(nfsstatsv1_p)->cbrpccnt, - sizeof(NFSD_VNET(nfsstatsv1_p)->cbrpccnt)); + VNET(nfsstatsv1_p)->srvcache_inproghits = 0; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits = 0; + VNET(nfsstatsv1_p)->srvcache_misses = 0; + VNET(nfsstatsv1_p)->srvcache_tcppeak = 0; + bzero(VNET(nfsstatsv1_p)->srvrpccnt, + sizeof(VNET(nfsstatsv1_p)->srvrpccnt)); + bzero(VNET(nfsstatsv1_p)->cbrpccnt, + sizeof(VNET(nfsstatsv1_p)->cbrpccnt)); } } goto out; @@ -891,11 +891,11 @@ nfs_vnetinit(const void *unused __unused) { if (IS_DEFAULT_VNET(curvnet)) - NFSD_VNET(nfsstatsv1_p) = &nfsstatsv1; + VNET(nfsstatsv1_p) = &nfsstatsv1; else - NFSD_VNET(nfsstatsv1_p) = malloc(sizeof(struct nfsstatsv1), + VNET(nfsstatsv1_p) = malloc(sizeof(struct nfsstatsv1), M_TEMP, M_WAITOK | M_ZERO); - mtx_init(&NFSD_VNET(nfsrv_nfsuserdsock).nr_mtx, "nfsuserd", + mtx_init(&VNET(nfsrv_nfsuserdsock).nr_mtx, "nfsuserd", NULL, MTX_DEF); } VNET_SYSINIT(nfs_vnetinit, SI_SUB_VNET_DONE, SI_ORDER_FIRST, @@ -905,10 +905,10 @@ static void nfs_cleanup(void *unused __unused) { - mtx_destroy(&NFSD_VNET(nfsrv_nfsuserdsock).nr_mtx); + mtx_destroy(&VNET(nfsrv_nfsuserdsock).nr_mtx); if (!IS_DEFAULT_VNET(curvnet)) { - free(NFSD_VNET(nfsstatsv1_p), M_TEMP); - NFSD_VNET(nfsstatsv1_p) = NULL; + free(VNET(nfsstatsv1_p), M_TEMP); + VNET(nfsstatsv1_p) = NULL; } /* Clean out the name<-->id cache. */ nfsrv_cleanusergroup(); @@ -945,7 +945,7 @@ nfscommon_modevent(module_t mod, int type, void *data) case MOD_UNLOAD: if (newnfs_numnfsd != 0 || - NFSD_VNET(nfsrv_nfsuserd) != NOTRUNNING || + VNET(nfsrv_nfsuserd) != NOTRUNNING || nfs_numnfscbd != 0) { error = EBUSY; break; diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 3bff2737b687..b5f83a98b307 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -96,19 +96,19 @@ struct nfs_prime_userd nfs_prime_userd[] = { { 0, 0, 0, NULL }, }; -NFSD_VNET_DEFINE(int, nfsd_enable_stringtouid) = 0; -NFSD_VNET_DEFINE(struct nfssockreq, nfsrv_nfsuserdsock); -NFSD_VNET_DEFINE(nfsuserd_state, nfsrv_nfsuserd) = NOTRUNNING; -NFSD_VNET_DEFINE(uid_t, nfsrv_defaultuid) = UID_NOBODY; -NFSD_VNET_DEFINE(gid_t, nfsrv_defaultgid) = GID_NOGROUP; +VNET_DEFINE(int, nfsd_enable_stringtouid) = 0; +VNET_DEFINE(struct nfssockreq, nfsrv_nfsuserdsock); +VNET_DEFINE(nfsuserd_state, nfsrv_nfsuserd) = NOTRUNNING; +VNET_DEFINE(uid_t, nfsrv_defaultuid) = UID_NOBODY; +VNET_DEFINE(gid_t, nfsrv_defaultgid) = GID_NOGROUP; -NFSD_VNET_DEFINE_STATIC(int, nfsrv_userdupcalls) = 0; +VNET_DEFINE_STATIC(int, nfsrv_userdupcalls) = 0; SYSCTL_DECL(_vfs_nfs); -NFSD_VNET_DEFINE_STATIC(int, nfs_enable_uidtostring) = 0; +VNET_DEFINE_STATIC(int, nfs_enable_uidtostring) = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, enable_uidtostring, - CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfs_enable_uidtostring), 0, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nfs_enable_uidtostring), 0, "Make nfs always send numeric owner_names"); int nfsrv_maxpnfsmirror = 1; @@ -216,14 +216,14 @@ struct nfsrv_lughash { struct nfsuserhashhead lughead; }; -NFSD_VNET_DEFINE_STATIC(int, nfsrv_usercnt) = 0; -NFSD_VNET_DEFINE_STATIC(int, nfsrv_dnsnamelen) = 0; -NFSD_VNET_DEFINE_STATIC(int, nfsrv_usermax) = 999999999; -NFSD_VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsuserhash) = NULL; -NFSD_VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsusernamehash) = NULL; -NFSD_VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsgrouphash) = NULL; -NFSD_VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsgroupnamehash) = NULL; -NFSD_VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL; +VNET_DEFINE_STATIC(int, nfsrv_usercnt) = 0; +VNET_DEFINE_STATIC(int, nfsrv_dnsnamelen) = 0; +VNET_DEFINE_STATIC(int, nfsrv_usermax) = 999999999; +VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsuserhash) = NULL; +VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsusernamehash) = NULL; +VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsgrouphash) = NULL; +VNET_DEFINE_STATIC(struct nfsrv_lughash *, nfsgroupnamehash) = NULL; +VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL; /* * This static array indicates whether or not the RPC generates a large @@ -1369,7 +1369,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, #endif CTASSERT(sizeof(ino_t) == sizeof(uint64_t)); - NFSD_CURVNET_SET_QUIET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); if (compare) { retnotsup = 0; error = nfsrv_getattrbits(nd, &attrbits, NULL, &retnotsup); @@ -2064,7 +2064,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, } else if (nap != NULL) { if (nfsv4_strtouid(nd, cp, j, &uid)) nap->na_uid = - NFSD_VNET(nfsrv_defaultuid); + VNET(nfsrv_defaultuid); else nap->na_uid = uid; } @@ -2098,7 +2098,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, } else if (nap != NULL) { if (nfsv4_strtogid(nd, cp, j, &gid)) nap->na_gid = - NFSD_VNET(nfsrv_defaultgid); + VNET(nfsrv_defaultgid); else nap->na_gid = gid; } @@ -2602,7 +2602,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, error = nfsm_advance(nd, attrsize - attrsum, -1); } nfsmout: - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); NFSEXITCODE2(error, nd); return (error); } @@ -3652,16 +3652,16 @@ nfsv4_uidtostr(uid_t uid, u_char **cpp, int *retlenp) int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; - NFSD_CURVNET_SET_QUIET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); cnt = 0; tryagain: - if (NFSD_VNET(nfsrv_dnsnamelen) > 0 && - !NFSD_VNET(nfs_enable_uidtostring)) { + if (VNET(nfsrv_dnsnamelen) > 0 && + !VNET(nfs_enable_uidtostring)) { /* * Always map nfsrv_defaultuid to "nobody". */ - if (uid == NFSD_VNET(nfsrv_defaultuid)) { - i = NFSD_VNET(nfsrv_dnsnamelen) + 7; + if (uid == VNET(nfsrv_defaultuid)) { + i = VNET(nfsrv_dnsnamelen) + 7; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); @@ -3673,9 +3673,9 @@ tryagain: *retlenp = i; NFSBCOPY("nobody@", cp, 7); cp += 7; - NFSBCOPY(NFSD_VNET(nfsrv_dnsname), cp, - NFSD_VNET(nfsrv_dnsnamelen)); - NFSD_CURVNET_RESTORE(); + NFSBCOPY(VNET(nfsrv_dnsname), cp, + VNET(nfsrv_dnsnamelen)); + CURVNET_RESTORE(); return; } hasampersand = 0; @@ -3700,7 +3700,7 @@ tryagain: i = usrp->lug_namelen; else i = usrp->lug_namelen + - NFSD_VNET(nfsrv_dnsnamelen) + 1; + VNET(nfsrv_dnsnamelen) + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) @@ -3715,14 +3715,14 @@ tryagain: if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; - NFSBCOPY(NFSD_VNET(nfsrv_dnsname), cp, - NFSD_VNET(nfsrv_dnsnamelen)); + NFSBCOPY(VNET(nfsrv_dnsname), cp, + VNET(nfsrv_dnsnamelen)); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); return; } } @@ -3750,7 +3750,7 @@ tryagain: *cp-- = '0' + (tmp % 10); tmp /= 10; } - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); return; } @@ -3771,7 +3771,7 @@ nfsrv_getgrpscred(struct ucred *oldcred) cnt = 0; uid = oldcred->cr_uid; tryagain: - if (NFSD_VNET(nfsrv_dnsnamelen) > 0) { + if (VNET(nfsrv_dnsnamelen) > 0) { hp = NFSUSERHASH(uid); mtx_lock(&hp->mtx); TAILQ_FOREACH(usrp, &hp->lughead, lug_numhash) { @@ -3819,7 +3819,7 @@ nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp) uid_t tuid; struct nfsrv_lughash *hp, *hp2; - NFSD_CURVNET_SET_QUIET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); if (len == 0) { error = NFSERR_BADOWNER; goto out; @@ -3831,7 +3831,7 @@ nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp) /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || - NFSD_VNET(nfsd_enable_stringtouid) != 0)) + VNET(nfsd_enable_stringtouid) != 0)) *uidp = tuid; else error = NFSERR_BADOWNER; @@ -3848,7 +3848,7 @@ nfsv4_strtouid(struct nfsrv_descript *nd, u_char *str, int len, uid_t *uidp) cnt = 0; tryagain: - if (NFSD_VNET(nfsrv_dnsnamelen) > 0) { + if (VNET(nfsrv_dnsnamelen) > 0) { /* * If an '@' is found and the domain name matches, search for * the name with dns stripped off. @@ -3856,10 +3856,10 @@ tryagain: * since RFC8881 defines this string as a DNS domain name. */ if (cnt == 0 && i < len && i > 0 && - (len - 1 - i) == NFSD_VNET(nfsrv_dnsnamelen) && - strncasecmp(cp, NFSD_VNET(nfsrv_dnsname), - NFSD_VNET(nfsrv_dnsnamelen)) == 0) { - len -= (NFSD_VNET(nfsrv_dnsnamelen) + 1); + (len - 1 - i) == VNET(nfsrv_dnsnamelen) && + strncasecmp(cp, VNET(nfsrv_dnsname), + VNET(nfsrv_dnsnamelen)) == 0) { + len -= (VNET(nfsrv_dnsnamelen) + 1); *(cp - 1) = '\0'; } @@ -3867,7 +3867,7 @@ tryagain: * Check for the special case of "nobody". */ if (len == 6 && !NFSBCMP(str, "nobody", 6)) { - *uidp = NFSD_VNET(nfsrv_defaultuid); + *uidp = VNET(nfsrv_defaultuid); error = 0; goto out; } @@ -3902,7 +3902,7 @@ tryagain: error = NFSERR_BADOWNER; out: - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); NFSEXITCODE(error); return (error); } @@ -3924,16 +3924,16 @@ nfsv4_gidtostr(gid_t gid, u_char **cpp, int *retlenp) int cnt, hasampersand, len = NFSV4_SMALLSTR, ret; struct nfsrv_lughash *hp; - NFSD_CURVNET_SET_QUIET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); cnt = 0; tryagain: - if (NFSD_VNET(nfsrv_dnsnamelen) > 0 && - !NFSD_VNET(nfs_enable_uidtostring)) { + if (VNET(nfsrv_dnsnamelen) > 0 && + !VNET(nfs_enable_uidtostring)) { /* * Always map nfsrv_defaultgid to "nogroup". */ - if (gid == NFSD_VNET(nfsrv_defaultgid)) { - i = NFSD_VNET(nfsrv_dnsnamelen) + 8; + if (gid == VNET(nfsrv_defaultgid)) { + i = VNET(nfsrv_dnsnamelen) + 8; if (i > len) { if (len > NFSV4_SMALLSTR) free(cp, M_NFSSTRING); @@ -3945,9 +3945,9 @@ tryagain: *retlenp = i; NFSBCOPY("nogroup@", cp, 8); cp += 8; - NFSBCOPY(NFSD_VNET(nfsrv_dnsname), cp, - NFSD_VNET(nfsrv_dnsnamelen)); - NFSD_CURVNET_RESTORE(); + NFSBCOPY(VNET(nfsrv_dnsname), cp, + VNET(nfsrv_dnsnamelen)); + CURVNET_RESTORE(); return; } hasampersand = 0; @@ -3972,7 +3972,7 @@ tryagain: i = usrp->lug_namelen; else i = usrp->lug_namelen + - NFSD_VNET(nfsrv_dnsnamelen) + 1; + VNET(nfsrv_dnsnamelen) + 1; if (i > len) { mtx_unlock(&hp->mtx); if (len > NFSV4_SMALLSTR) @@ -3987,14 +3987,14 @@ tryagain: if (!hasampersand) { cp += usrp->lug_namelen; *cp++ = '@'; - NFSBCOPY(NFSD_VNET(nfsrv_dnsname), cp, - NFSD_VNET(nfsrv_dnsnamelen)); + NFSBCOPY(VNET(nfsrv_dnsname), cp, + VNET(nfsrv_dnsnamelen)); } TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); TAILQ_INSERT_TAIL(&hp->lughead, usrp, lug_numhash); mtx_unlock(&hp->mtx); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); return; } } @@ -4022,7 +4022,7 @@ tryagain: *cp-- = '0' + (tmp % 10); tmp /= 10; } - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); return; } @@ -4045,7 +4045,7 @@ nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp) gid_t tgid; struct nfsrv_lughash *hp, *hp2; - NFSD_CURVNET_SET_QUIET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); if (len == 0) { error = NFSERR_BADOWNER; goto out; @@ -4057,7 +4057,7 @@ nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp) /* A numeric string. */ if ((nd->nd_flag & ND_KERBV) == 0 && ((nd->nd_flag & ND_NFSCL) != 0 || - NFSD_VNET(nfsd_enable_stringtouid) != 0)) + VNET(nfsd_enable_stringtouid) != 0)) *gidp = tgid; else error = NFSERR_BADOWNER; @@ -4074,16 +4074,16 @@ nfsv4_strtogid(struct nfsrv_descript *nd, u_char *str, int len, gid_t *gidp) cnt = 0; tryagain: - if (NFSD_VNET(nfsrv_dnsnamelen) > 0) { + if (VNET(nfsrv_dnsnamelen) > 0) { /* * If an '@' is found and the dns name matches, search for the * name with the dns stripped off. */ if (cnt == 0 && i < len && i > 0 && - (len - 1 - i) == NFSD_VNET(nfsrv_dnsnamelen) && - strncasecmp(cp, NFSD_VNET(nfsrv_dnsname), - NFSD_VNET(nfsrv_dnsnamelen)) == 0) { - len -= (NFSD_VNET(nfsrv_dnsnamelen) + 1); + (len - 1 - i) == VNET(nfsrv_dnsnamelen) && + strncasecmp(cp, VNET(nfsrv_dnsname), + VNET(nfsrv_dnsnamelen)) == 0) { + len -= (VNET(nfsrv_dnsnamelen) + 1); *(cp - 1) = '\0'; } @@ -4091,7 +4091,7 @@ tryagain: * Check for the special case of "nogroup". */ if (len == 7 && !NFSBCMP(str, "nogroup", 7)) { - *gidp = NFSD_VNET(nfsrv_defaultgid); + *gidp = VNET(nfsrv_defaultgid); error = 0; goto out; } @@ -4126,7 +4126,7 @@ tryagain: error = NFSERR_BADOWNER; out: - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); NFSEXITCODE(error); return (error); } @@ -4148,12 +4148,12 @@ nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p) int error; NFSLOCKNAMEID(); - if (NFSD_VNET(nfsrv_nfsuserd) != NOTRUNNING) { + if (VNET(nfsrv_nfsuserd) != NOTRUNNING) { NFSUNLOCKNAMEID(); error = EPERM; goto out; } - NFSD_VNET(nfsrv_nfsuserd) = STARTSTOP; + VNET(nfsrv_nfsuserd) = STARTSTOP; /* * Set up the socket record and connect. * Set nr_client NULL before unlocking, just to ensure that no other @@ -4161,13 +4161,13 @@ nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p) * occur if the use of the nameid lock to protect nfsrv_nfsuserd is * broken. */ - rp = &NFSD_VNET(nfsrv_nfsuserdsock); + rp = &VNET(nfsrv_nfsuserdsock); rp->nr_client = NULL; NFSUNLOCKNAMEID(); rp->nr_sotype = SOCK_DGRAM; rp->nr_soproto = IPPROTO_UDP; rp->nr_lock = (NFSR_RESERVEDPORT | NFSR_LOCALHOST); - rp->nr_cred = NULL; + rp->nr_cred = crhold(curthread->td_ucred); rp->nr_prog = RPCPROG_NFSUSERD; error = 0; switch (nargs->nuserd_family) { @@ -4202,12 +4202,12 @@ nfsrv_nfsuserdport(struct nfsuserd_args *nargs, NFSPROC_T *p) &rp->nr_client); if (error == 0) { NFSLOCKNAMEID(); - NFSD_VNET(nfsrv_nfsuserd) = RUNNING; + VNET(nfsrv_nfsuserd) = RUNNING; NFSUNLOCKNAMEID(); } else { free(rp->nr_nam, M_SONAME); NFSLOCKNAMEID(); - NFSD_VNET(nfsrv_nfsuserd) = NOTRUNNING; + VNET(nfsrv_nfsuserd) = NOTRUNNING; NFSUNLOCKNAMEID(); } out: @@ -4223,20 +4223,21 @@ nfsrv_nfsuserddelport(void) { NFSLOCKNAMEID(); - if (NFSD_VNET(nfsrv_nfsuserd) != RUNNING) { + if (VNET(nfsrv_nfsuserd) != RUNNING) { NFSUNLOCKNAMEID(); return; } - NFSD_VNET(nfsrv_nfsuserd) = STARTSTOP; + VNET(nfsrv_nfsuserd) = STARTSTOP; /* Wait for all upcalls to complete. */ - while (NFSD_VNET(nfsrv_userdupcalls) > 0) - msleep(&NFSD_VNET(nfsrv_userdupcalls), NFSNAMEIDMUTEXPTR, PVFS, + while (VNET(nfsrv_userdupcalls) > 0) + msleep(&VNET(nfsrv_userdupcalls), NFSNAMEIDMUTEXPTR, PVFS, "nfsupcalls", 0); NFSUNLOCKNAMEID(); - newnfs_disconnect(NULL, &NFSD_VNET(nfsrv_nfsuserdsock)); - free(NFSD_VNET(nfsrv_nfsuserdsock).nr_nam, M_SONAME); + newnfs_disconnect(NULL, &VNET(nfsrv_nfsuserdsock)); + free(VNET(nfsrv_nfsuserdsock).nr_nam, M_SONAME); + crfree(VNET(nfsrv_nfsuserdsock).nr_cred); NFSLOCKNAMEID(); - NFSD_VNET(nfsrv_nfsuserd) = NOTRUNNING; + VNET(nfsrv_nfsuserd) = NOTRUNNING; NFSUNLOCKNAMEID(); } @@ -4256,7 +4257,7 @@ nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name) int error; NFSLOCKNAMEID(); - if (NFSD_VNET(nfsrv_nfsuserd) != RUNNING) { + if (VNET(nfsrv_nfsuserd) != RUNNING) { NFSUNLOCKNAMEID(); error = EPERM; goto out; @@ -4265,9 +4266,9 @@ nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name) * Maintain a count of upcalls in progress, so that nfsrv_X() * can wait until no upcalls are in progress. */ - NFSD_VNET(nfsrv_userdupcalls)++; + VNET(nfsrv_userdupcalls)++; NFSUNLOCKNAMEID(); - KASSERT(NFSD_VNET(nfsrv_userdupcalls) > 0, + KASSERT(VNET(nfsrv_userdupcalls) > 0, ("nfsrv_getuser: non-positive upcalls")); nd = &nfsd; cred = newnfs_getcred(); @@ -4285,13 +4286,13 @@ nfsrv_getuser(int procnum, uid_t uid, gid_t gid, char *name) len = strlen(name); (void) nfsm_strtom(nd, name, len); } - error = newnfs_request(nd, NULL, NULL, &NFSD_VNET(nfsrv_nfsuserdsock), + error = newnfs_request(nd, NULL, NULL, &VNET(nfsrv_nfsuserdsock), NULL, NULL, cred, RPCPROG_NFSUSERD, RPCNFSUSERD_VERS, NULL, 0, NULL, NULL); NFSLOCKNAMEID(); - if (--NFSD_VNET(nfsrv_userdupcalls) == 0 && - NFSD_VNET(nfsrv_nfsuserd) == STARTSTOP) - wakeup(&NFSD_VNET(nfsrv_userdupcalls)); + if (--VNET(nfsrv_userdupcalls) == 0 && + VNET(nfsrv_nfsuserd) == STARTSTOP) + wakeup(&VNET(nfsrv_userdupcalls)); NFSUNLOCKNAMEID(); NFSFREECRED(cred); if (!error) { @@ -4352,7 +4353,7 @@ nfssvc_idname(struct nfsd_idargs *nidp) free(cp, M_NFSSTRING); goto out; } - if (atomic_cmpset_acq_int(&NFSD_VNET(nfsrv_dnsnamelen), 0, 0) == + if (atomic_cmpset_acq_int(&VNET(nfsrv_dnsnamelen), 0, 0) == 0) { /* * Free up all the old stuff and reinitialize hash @@ -4361,80 +4362,80 @@ nfssvc_idname(struct nfsd_idargs *nidp) * ones, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsusernamehash)[i].mtx); + mtx_lock(&VNET(nfsusernamehash)[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsuserhash)[i].mtx); + mtx_lock(&VNET(nfsuserhash)[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, - &NFSD_VNET(nfsuserhash)[i].lughead, lug_numhash, nusrp) + &VNET(nfsuserhash)[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 1); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsuserhash)[i].mtx); + mtx_unlock(&VNET(nfsuserhash)[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsusernamehash)[i].mtx); + mtx_unlock(&VNET(nfsusernamehash)[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsgroupnamehash)[i].mtx); + mtx_lock(&VNET(nfsgroupnamehash)[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsgrouphash)[i].mtx); + mtx_lock(&VNET(nfsgrouphash)[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) TAILQ_FOREACH_SAFE(usrp, - &NFSD_VNET(nfsgrouphash)[i].lughead, lug_numhash, + &VNET(nfsgrouphash)[i].lughead, lug_numhash, nusrp) nfsrv_removeuser(usrp, 0); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsgrouphash)[i].mtx); + mtx_unlock(&VNET(nfsgrouphash)[i].mtx); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsgroupnamehash)[i].mtx); - free(NFSD_VNET(nfsrv_dnsname), M_NFSSTRING); - NFSD_VNET(nfsrv_dnsname) = NULL; + mtx_unlock(&VNET(nfsgroupnamehash)[i].mtx); + free(VNET(nfsrv_dnsname), M_NFSSTRING); + VNET(nfsrv_dnsname) = NULL; } - if (NFSD_VNET(nfsuserhash) == NULL) { + if (VNET(nfsuserhash) == NULL) { /* Allocate the hash tables. */ - NFSD_VNET(nfsuserhash) = malloc(sizeof(struct nfsrv_lughash) * + VNET(nfsuserhash) = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_init(&NFSD_VNET(nfsuserhash)[i].mtx, "nfsuidhash", + mtx_init(&VNET(nfsuserhash)[i].mtx, "nfsuidhash", NULL, MTX_DEF | MTX_DUPOK); - NFSD_VNET(nfsusernamehash) = malloc(sizeof(struct nfsrv_lughash) * + VNET(nfsusernamehash) = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_init(&NFSD_VNET(nfsusernamehash)[i].mtx, + mtx_init(&VNET(nfsusernamehash)[i].mtx, "nfsusrhash", NULL, MTX_DEF | MTX_DUPOK); - NFSD_VNET(nfsgrouphash) = malloc(sizeof(struct nfsrv_lughash) * + VNET(nfsgrouphash) = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_init(&NFSD_VNET(nfsgrouphash)[i].mtx, "nfsgidhash", + mtx_init(&VNET(nfsgrouphash)[i].mtx, "nfsgidhash", NULL, MTX_DEF | MTX_DUPOK); - NFSD_VNET(nfsgroupnamehash) = malloc(sizeof(struct nfsrv_lughash) * + VNET(nfsgroupnamehash) = malloc(sizeof(struct nfsrv_lughash) * nfsrv_lughashsize, M_NFSUSERGROUP, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_init(&NFSD_VNET(nfsgroupnamehash)[i].mtx, + mtx_init(&VNET(nfsgroupnamehash)[i].mtx, "nfsgrphash", NULL, MTX_DEF | MTX_DUPOK); } /* (Re)initialize the list heads. */ for (i = 0; i < nfsrv_lughashsize; i++) - TAILQ_INIT(&NFSD_VNET(nfsuserhash)[i].lughead); + TAILQ_INIT(&VNET(nfsuserhash)[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) - TAILQ_INIT(&NFSD_VNET(nfsusernamehash)[i].lughead); + TAILQ_INIT(&VNET(nfsusernamehash)[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) - TAILQ_INIT(&NFSD_VNET(nfsgrouphash)[i].lughead); + TAILQ_INIT(&VNET(nfsgrouphash)[i].lughead); for (i = 0; i < nfsrv_lughashsize; i++) - TAILQ_INIT(&NFSD_VNET(nfsgroupnamehash)[i].lughead); + TAILQ_INIT(&VNET(nfsgroupnamehash)[i].lughead); /* * Put name in "DNS" string. */ - NFSD_VNET(nfsrv_dnsname) = cp; - NFSD_VNET(nfsrv_defaultuid) = nidp->nid_uid; - NFSD_VNET(nfsrv_defaultgid) = nidp->nid_gid; - NFSD_VNET(nfsrv_usercnt) = 0; - NFSD_VNET(nfsrv_usermax) = nidp->nid_usermax; - atomic_store_rel_int(&NFSD_VNET(nfsrv_dnsnamelen), + VNET(nfsrv_dnsname) = cp; + VNET(nfsrv_defaultuid) = nidp->nid_uid; + VNET(nfsrv_defaultgid) = nidp->nid_gid; + VNET(nfsrv_usercnt) = 0; + VNET(nfsrv_usermax) = nidp->nid_usermax; + atomic_store_rel_int(&VNET(nfsrv_dnsnamelen), nidp->nid_namelen); goto out; } @@ -4504,7 +4505,7 @@ nfssvc_idname(struct nfsd_idargs *nidp) if (nidp->nid_flag & (NFSID_DELUID | NFSID_ADDUID)) { /* Must lock all username hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsusernamehash)[i].mtx); + mtx_lock(&VNET(nfsusernamehash)[i].mtx); username_locked = 1; hp_idnum = NFSUSERHASH(nidp->nid_uid); mtx_lock(&hp_idnum->mtx); @@ -4533,7 +4534,7 @@ nfssvc_idname(struct nfsd_idargs *nidp) } else if (nidp->nid_flag & (NFSID_DELGID | NFSID_ADDGID)) { /* Must lock all groupname hash lists first, to avoid a LOR. */ for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsgroupnamehash)[i].mtx); + mtx_lock(&VNET(nfsgroupnamehash)[i].mtx); groupname_locked = 1; hp_idnum = NFSGROUPHASH(nidp->nid_gid); mtx_lock(&hp_idnum->mtx); @@ -4580,7 +4581,7 @@ nfssvc_idname(struct nfsd_idargs *nidp) thp = NFSUSERNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); - atomic_add_int(&NFSD_VNET(nfsrv_usercnt), 1); + atomic_add_int(&VNET(nfsrv_usercnt), 1); } else if (nidp->nid_flag & (NFSID_ADDGID | NFSID_ADDGROUPNAME)) { if (nfs_nfsv4root && nfs_in_prime(NFSID_ADDGID, nidp->nid_uid, nidp->nid_gid)) @@ -4592,7 +4593,7 @@ nfssvc_idname(struct nfsd_idargs *nidp) thp = NFSGROUPNAMEHASH(newusrp->lug_name, newusrp->lug_namelen); mtx_assert(&thp->mtx, MA_OWNED); TAILQ_INSERT_TAIL(&thp->lughead, newusrp, lug_namehash); - atomic_add_int(&NFSD_VNET(nfsrv_usercnt), 1); + atomic_add_int(&VNET(nfsrv_usercnt), 1); } else { if (newusrp->lug_cred != NULL) crfree(newusrp->lug_cred); @@ -4621,17 +4622,17 @@ nfssvc_idname(struct nfsd_idargs *nidp) NFSID_DELUSERNAME | NFSID_ADDUSERNAME)) != 0) { if (username_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsusernamehash)[i].mtx); + mtx_lock(&VNET(nfsusernamehash)[i].mtx); username_locked = 1; } KASSERT(user_locked == 0, ("nfssvc_idname: user_locked")); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsuserhash)[i].mtx); + mtx_lock(&VNET(nfsuserhash)[i].mtx); user_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, - &NFSD_VNET(nfsuserhash)[i].lughead, lug_numhash, + &VNET(nfsuserhash)[i].lughead, lug_numhash, nusrp) if (!usrp->lug_wired && usrp->lug_expiry < NFSD_MONOSEC) @@ -4643,26 +4644,26 @@ nfssvc_idname(struct nfsd_idargs *nidp) * algorithm. This code deletes the least * recently used entry on each hash list. */ - if (NFSD_VNET(nfsrv_usercnt) <= NFSD_VNET(nfsrv_usermax)) + if (VNET(nfsrv_usercnt) <= VNET(nfsrv_usermax)) break; - usrp = TAILQ_FIRST(&NFSD_VNET(nfsuserhash)[i].lughead); + usrp = TAILQ_FIRST(&VNET(nfsuserhash)[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 1); } } else { if (groupname_locked == 0) { for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsgroupnamehash)[i].mtx); + mtx_lock(&VNET(nfsgroupnamehash)[i].mtx); groupname_locked = 1; } KASSERT(group_locked == 0, ("nfssvc_idname: group_locked")); for (i = 0; i < nfsrv_lughashsize; i++) - mtx_lock(&NFSD_VNET(nfsgrouphash)[i].mtx); + mtx_lock(&VNET(nfsgrouphash)[i].mtx); group_locked = 1; for (i = 0; i < nfsrv_lughashsize; i++) { TAILQ_FOREACH_SAFE(usrp, - &NFSD_VNET(nfsgrouphash)[i].lughead, lug_numhash, + &VNET(nfsgrouphash)[i].lughead, lug_numhash, nusrp) if (!usrp->lug_wired && usrp->lug_expiry < NFSD_MONOSEC) @@ -4674,9 +4675,9 @@ nfssvc_idname(struct nfsd_idargs *nidp) * algorithm. This code deletes the least * recently user entry on each hash list. */ - if (NFSD_VNET(nfsrv_usercnt) <= NFSD_VNET(nfsrv_usermax)) + if (VNET(nfsrv_usercnt) <= VNET(nfsrv_usermax)) break; - usrp = TAILQ_FIRST(&NFSD_VNET(nfsgrouphash)[i].lughead); + usrp = TAILQ_FIRST(&VNET(nfsgrouphash)[i].lughead); if (usrp != NULL) nfsrv_removeuser(usrp, 0); } @@ -4692,16 +4693,16 @@ nfssvc_idname(struct nfsd_idargs *nidp) mtx_unlock(&hp_name->mtx); if (user_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsuserhash)[i].mtx); + mtx_unlock(&VNET(nfsuserhash)[i].mtx); if (username_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsusernamehash)[i].mtx); + mtx_unlock(&VNET(nfsusernamehash)[i].mtx); if (group_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsgrouphash)[i].mtx); + mtx_unlock(&VNET(nfsgrouphash)[i].mtx); if (groupname_locked != 0) for (i = 0; i < nfsrv_lughashsize; i++) - mtx_unlock(&NFSD_VNET(nfsgroupnamehash)[i].mtx); + mtx_unlock(&VNET(nfsgroupnamehash)[i].mtx); out: NFSEXITCODE(error); return (error); @@ -4730,7 +4731,7 @@ nfsrv_removeuser(struct nfsusrgrp *usrp, int isuser) mtx_assert(&hp->mtx, MA_OWNED); TAILQ_REMOVE(&hp->lughead, usrp, lug_namehash); } - atomic_add_int(&NFSD_VNET(nfsrv_usercnt), -1); + atomic_add_int(&VNET(nfsrv_usercnt), -1); if (usrp->lug_cred != NULL) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); @@ -4749,11 +4750,11 @@ nfsrv_cleanusergroup(void) struct nfsusrgrp *nusrp, *usrp; int i; - if (NFSD_VNET(nfsuserhash) == NULL) + if (VNET(nfsuserhash) == NULL) return; for (i = 0; i < nfsrv_lughashsize; i++) { - hp = &NFSD_VNET(nfsuserhash)[i]; + hp = &VNET(nfsuserhash)[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSUSERNAMEHASH(usrp->lug_name, @@ -4763,7 +4764,7 @@ nfsrv_cleanusergroup(void) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } - hp = &NFSD_VNET(nfsgrouphash)[i]; + hp = &VNET(nfsgrouphash)[i]; TAILQ_FOREACH_SAFE(usrp, &hp->lughead, lug_numhash, nusrp) { TAILQ_REMOVE(&hp->lughead, usrp, lug_numhash); hp2 = NFSGROUPNAMEHASH(usrp->lug_name, @@ -4773,16 +4774,16 @@ nfsrv_cleanusergroup(void) crfree(usrp->lug_cred); free(usrp, M_NFSUSERGROUP); } - mtx_destroy(&NFSD_VNET(nfsuserhash)[i].mtx); - mtx_destroy(&NFSD_VNET(nfsusernamehash)[i].mtx); - mtx_destroy(&NFSD_VNET(nfsgroupnamehash)[i].mtx); - mtx_destroy(&NFSD_VNET(nfsgrouphash)[i].mtx); + mtx_destroy(&VNET(nfsuserhash)[i].mtx); + mtx_destroy(&VNET(nfsusernamehash)[i].mtx); + mtx_destroy(&VNET(nfsgroupnamehash)[i].mtx); + mtx_destroy(&VNET(nfsgrouphash)[i].mtx); } - free(NFSD_VNET(nfsuserhash), M_NFSUSERGROUP); - free(NFSD_VNET(nfsusernamehash), M_NFSUSERGROUP); - free(NFSD_VNET(nfsgrouphash), M_NFSUSERGROUP); - free(NFSD_VNET(nfsgroupnamehash), M_NFSUSERGROUP); - free(NFSD_VNET(nfsrv_dnsname), M_NFSSTRING); + free(VNET(nfsuserhash), M_NFSUSERGROUP); + free(VNET(nfsusernamehash), M_NFSUSERGROUP); + free(VNET(nfsgrouphash), M_NFSUSERGROUP); + free(VNET(nfsgroupnamehash), M_NFSUSERGROUP); + free(VNET(nfsrv_dnsname), M_NFSSTRING); } /* diff --git a/sys/fs/nfs/nfsdport.h b/sys/fs/nfs/nfsdport.h index 6439ef921d29..33190274eb86 100644 --- a/sys/fs/nfs/nfsdport.h +++ b/sys/fs/nfs/nfsdport.h @@ -92,7 +92,7 @@ struct nfsexstuff { bcmp(&(f1)->fh_fid, &(f2)->fh_fid, sizeof(struct fid)) == 0) #define NFSLOCKHASH(f) \ - (&NFSD_VNET(nfslockhash)[nfsrv_hashfh(f) % nfsrv_lockhashsize]) + (&VNET(nfslockhash)[nfsrv_hashfh(f) % nfsrv_lockhashsize]) #define NFSFPVNODE(f) ((f)->f_vnode) #define NFSFPCRED(f) ((f)->f_cred) diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h index f6b6cfb22908..91345bde3441 100644 --- a/sys/fs/nfs/nfsport.h +++ b/sys/fs/nfs/nfsport.h @@ -179,20 +179,6 @@ */ #define NFSMUTEX_T struct mtx -/* Just define the NFSD_VNETxxx() macros as VNETxxx() macros. */ -#define NFSD_VNET_NAME(n) VNET_NAME(n) -#define NFSD_VNET_DECLARE(t, n) VNET_DECLARE(t, n) -#define NFSD_VNET_DEFINE(t, n) VNET_DEFINE(t, n) -#define NFSD_VNET_DEFINE_STATIC(t, n) VNET_DEFINE_STATIC(t, n) -#define NFSD_VNET(n) VNET(n) - -#define CTLFLAG_NFSD_VNET CTLFLAG_VNET - -#define NFSD_CURVNET_SET(n) CURVNET_SET(n) -#define NFSD_CURVNET_SET_QUIET(n) CURVNET_SET_QUIET(n) -#define NFSD_CURVNET_RESTORE() CURVNET_RESTORE() -#define NFSD_TD_TO_VNET(n) TD_TO_VNET(n) - #endif /* _KERNEL */ /* diff --git a/sys/fs/nfs/nfsrvstate.h b/sys/fs/nfs/nfsrvstate.h index 858c52ec6218..0e93f87234b2 100644 --- a/sys/fs/nfs/nfsrvstate.h +++ b/sys/fs/nfs/nfsrvstate.h @@ -56,18 +56,18 @@ LIST_HEAD(nfsdontlisthead, nfsdontlist); TAILQ_HEAD(nfsuserhashhead, nfsusrgrp); #define NFSCLIENTHASH(id) \ - (&NFSD_VNET(nfsclienthash)[(id).lval[1] % nfsrv_clienthashsize]) + (&VNET(nfsclienthash)[(id).lval[1] % nfsrv_clienthashsize]) #define NFSSTATEHASH(clp, id) \ (&((clp)->lc_stateid[(id).other[2] % nfsrv_statehashsize])) #define NFSUSERHASH(id) \ - (&NFSD_VNET(nfsuserhash)[(id) % nfsrv_lughashsize]) + (&VNET(nfsuserhash)[(id) % nfsrv_lughashsize]) #define NFSUSERNAMEHASH(p, l) \ - (&NFSD_VNET(nfsusernamehash)[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ + (&VNET(nfsusernamehash)[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ % nfsrv_lughashsize]) #define NFSGROUPHASH(id) \ - (&NFSD_VNET(nfsgrouphash)[(id) % nfsrv_lughashsize]) + (&VNET(nfsgrouphash)[(id) % nfsrv_lughashsize]) #define NFSGROUPNAMEHASH(p, l) \ - (&NFSD_VNET(nfsgroupnamehash)[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ + (&VNET(nfsgroupnamehash)[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ % nfsrv_lughashsize]) struct nfssessionhash { @@ -75,7 +75,7 @@ struct nfssessionhash { struct nfssessionhashhead list; }; #define NFSSESSIONHASH(f) \ - (&NFSD_VNET(nfssessionhash)[nfsrv_hashsessionid(f) % \ + (&VNET(nfssessionhash)[nfsrv_hashsessionid(f) % \ nfsrv_sessionhashsize]) struct nfslayouthash { diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index bc5f74b79749..6573144fc8d3 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -1254,7 +1254,7 @@ nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) struct mount *mp; struct nfsmount *nmp; - NFSD_CURVNET_SET(NFSD_TD_TO_VNET(td)); + CURVNET_SET(TD_TO_VNET(td)); if (uap->flag & NFSSVC_CBADDSOCK) { error = copyin(uap->argp, (caddr_t)&nfscbdarg, sizeof(nfscbdarg)); if (error) @@ -1374,7 +1374,7 @@ nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) error = EINVAL; } out: - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (error); } diff --git a/sys/fs/nfsclient/nfs_clsubs.c b/sys/fs/nfsclient/nfs_clsubs.c index ae9fa51947cc..6361ae5f2901 100644 --- a/sys/fs/nfsclient/nfs_clsubs.c +++ b/sys/fs/nfsclient/nfs_clsubs.c @@ -263,9 +263,17 @@ nfsuint64 * ncl_getcookie(struct nfsnode *np, off_t off, int add) { struct nfsdmap *dp, *dp2; - int pos; + u_int pos; nfsuint64 *retval = NULL; + /* + * Limiting "off" to 50Gbytes sets a limit of 100 million directory + * entries of maximum filename length. Much more with shorter + * file names. This limit ensures "pos" will not be truncated + * in the devision below. + */ + if (off > 53687091200ull) + goto out; pos = (uoff_t)off / NFS_DIRBLKSIZ; if (pos == 0 || off < 0) { KASSERT(!add, ("nfs getcookie add at <= 0")); diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h index 07c7ccb0ff10..cd9ded943c2f 100644 --- a/sys/fs/nfsclient/nfsnode.h +++ b/sys/fs/nfsclient/nfsnode.h @@ -61,7 +61,7 @@ struct sillyrename { struct nfsdmap { LIST_ENTRY(nfsdmap) ndm_list; - int ndm_eocookie; + u_int ndm_eocookie; union { nfsuint64 ndmu3_cookies[NFSNUMCOOKIES]; uint64_t ndmu4_cookies[NFSNUMCOOKIES]; diff --git a/sys/fs/nfsserver/nfs_fha_new.c b/sys/fs/nfsserver/nfs_fha_new.c index e3f70490b818..dc22cfc418e5 100644 --- a/sys/fs/nfsserver/nfs_fha_new.c +++ b/sys/fs/nfsserver/nfs_fha_new.c @@ -53,42 +53,42 @@ static int fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS); static void fha_extract_info(struct svc_req *req, struct fha_info *i); -NFSD_VNET_DEFINE_STATIC(struct fha_params *, fhanew_softc); -NFSD_VNET_DEFINE_STATIC(struct fha_ctls, nfsfha_ctls); +VNET_DEFINE_STATIC(struct fha_params *, fhanew_softc); +VNET_DEFINE_STATIC(struct fha_ctls, nfsfha_ctls); SYSCTL_DECL(_vfs_nfsd); SYSCTL_NODE(_vfs_nfsd, OID_AUTO, fha, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "NFS File Handle Affinity (FHA)"); SYSCTL_UINT(_vfs_nfsd_fha, - OID_AUTO, enable, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, - &NFSD_VNET_NAME(nfsfha_ctls).enable, 0, + OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfsfha_ctls).enable, 0, "Enable NFS File Handle Affinity (FHA)"); SYSCTL_UINT(_vfs_nfsd_fha, - OID_AUTO, read, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, - &NFSD_VNET_NAME(nfsfha_ctls).read, 0, + OID_AUTO, read, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfsfha_ctls).read, 0, "Enable NFS FHA read locality"); SYSCTL_UINT(_vfs_nfsd_fha, - OID_AUTO, write, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, - &NFSD_VNET_NAME(nfsfha_ctls).write, 0, + OID_AUTO, write, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfsfha_ctls).write, 0, "Enable NFS FHA write locality"); SYSCTL_UINT(_vfs_nfsd_fha, - OID_AUTO, bin_shift, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, - &NFSD_VNET_NAME(nfsfha_ctls).bin_shift, 0, + OID_AUTO, bin_shift, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfsfha_ctls).bin_shift, 0, "Maximum locality distance 2^(bin_shift) bytes"); SYSCTL_UINT(_vfs_nfsd_fha, - OID_AUTO, max_nfsds_per_fh, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, - &NFSD_VNET_NAME(nfsfha_ctls).max_nfsds_per_fh, 0, + OID_AUTO, max_nfsds_per_fh, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfsfha_ctls).max_nfsds_per_fh, 0, "Maximum nfsd threads that " "should be working on requests for the same file handle"); SYSCTL_UINT(_vfs_nfsd_fha, - OID_AUTO, max_reqs_per_nfsd, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, - &NFSD_VNET_NAME(nfsfha_ctls).max_reqs_per_nfsd, 0, "Maximum requests that " + OID_AUTO, max_reqs_per_nfsd, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfsfha_ctls).max_reqs_per_nfsd, 0, "Maximum requests that " "single nfsd thread should be working on at any time"); SYSCTL_PROC(_vfs_nfsd_fha, OID_AUTO, fhe_stats, @@ -106,9 +106,9 @@ fhanew_init(void *foo) struct fha_params *softc; int i; - NFSD_VNET(fhanew_softc) = malloc(sizeof(struct fha_params), M_TEMP, + VNET(fhanew_softc) = malloc(sizeof(struct fha_params), M_TEMP, M_WAITOK | M_ZERO); - softc = NFSD_VNET(fhanew_softc); + softc = VNET(fhanew_softc); snprintf(softc->server_name, sizeof(softc->server_name), FHANEW_SERVER_NAME); @@ -119,12 +119,12 @@ fhanew_init(void *foo) /* * Set the default tuning parameters. */ - NFSD_VNET(nfsfha_ctls).enable = FHA_DEF_ENABLE; - NFSD_VNET(nfsfha_ctls).read = FHA_DEF_READ; - NFSD_VNET(nfsfha_ctls).write = FHA_DEF_WRITE; - NFSD_VNET(nfsfha_ctls).bin_shift = FHA_DEF_BIN_SHIFT; - NFSD_VNET(nfsfha_ctls).max_nfsds_per_fh = FHA_DEF_MAX_NFSDS_PER_FH; - NFSD_VNET(nfsfha_ctls).max_reqs_per_nfsd = FHA_DEF_MAX_REQS_PER_NFSD; + VNET(nfsfha_ctls).enable = FHA_DEF_ENABLE; + VNET(nfsfha_ctls).read = FHA_DEF_READ; + VNET(nfsfha_ctls).write = FHA_DEF_WRITE; + VNET(nfsfha_ctls).bin_shift = FHA_DEF_BIN_SHIFT; + VNET(nfsfha_ctls).max_nfsds_per_fh = FHA_DEF_MAX_NFSDS_PER_FH; + VNET(nfsfha_ctls).max_reqs_per_nfsd = FHA_DEF_MAX_REQS_PER_NFSD; } @@ -134,7 +134,7 @@ fhanew_uninit(void *foo) struct fha_params *softc; int i; - softc = NFSD_VNET(fhanew_softc); + softc = VNET(fhanew_softc); for (i = 0; i < FHA_HASH_SIZE; i++) mtx_destroy(&softc->fha_hash[i].mtx); @@ -474,8 +474,8 @@ fha_hash_entry_choose_thread(struct fha_params *softc, } /* Check whether we should consider locality. */ - if ((i->read && !NFSD_VNET(nfsfha_ctls).read) || - (i->write && !NFSD_VNET(nfsfha_ctls).write)) + if ((i->read && !VNET(nfsfha_ctls).read) || + (i->write && !VNET(nfsfha_ctls).write)) goto noloc; /* @@ -486,11 +486,11 @@ fha_hash_entry_choose_thread(struct fha_params *softc, offset2 = thread->st_p3; if (((offset1 >= offset2) - && ((offset1 - offset2) < (1 << NFSD_VNET(nfsfha_ctls).bin_shift))) + && ((offset1 - offset2) < (1 << VNET(nfsfha_ctls).bin_shift))) || ((offset2 > offset1) - && ((offset2 - offset1) < (1 << NFSD_VNET(nfsfha_ctls).bin_shift)))) { - if ((NFSD_VNET(nfsfha_ctls).max_reqs_per_nfsd == 0) || - (req_count < NFSD_VNET(nfsfha_ctls).max_reqs_per_nfsd)) { + && ((offset2 - offset1) < (1 << VNET(nfsfha_ctls).bin_shift)))) { + if ((VNET(nfsfha_ctls).max_reqs_per_nfsd == 0) || + (req_count < VNET(nfsfha_ctls).max_reqs_per_nfsd)) { #if 0 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO, "fha: %p(%d)r", thread, req_count); @@ -520,8 +520,8 @@ noloc: * We didn't find a good match yet. See if we can add * a new thread to this file handle entry's thread list. */ - if ((NFSD_VNET(nfsfha_ctls).max_nfsds_per_fh == 0) || - (fhe->num_threads < NFSD_VNET(nfsfha_ctls).max_nfsds_per_fh)) { + if ((VNET(nfsfha_ctls).max_nfsds_per_fh == 0) || + (fhe->num_threads < VNET(nfsfha_ctls).max_nfsds_per_fh)) { thread = this_thread; #if 0 ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO, @@ -551,10 +551,10 @@ fhanew_assign(SVCTHREAD *this_thread, struct svc_req *req) struct fha_info i; struct fha_hash_entry *fhe; - NFSD_CURVNET_SET(NFSD_TD_TO_VNET(curthread)); - softc = NFSD_VNET(fhanew_softc); + CURVNET_SET(TD_TO_VNET(curthread)); + softc = VNET(fhanew_softc); /* Check to see whether we're enabled. */ - if (NFSD_VNET(nfsfha_ctls).enable == 0) + if (VNET(nfsfha_ctls).enable == 0) goto thist; /* @@ -594,11 +594,11 @@ fhanew_assign(SVCTHREAD *this_thread, struct svc_req *req) mtx_lock(&thread->st_lock); mtx_unlock(fhe->mtx); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (thread); thist: req->rq_p1 = NULL; - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); mtx_lock(&this_thread->st_lock); return (this_thread); } @@ -613,13 +613,13 @@ fhanew_nd_complete(SVCTHREAD *thread, struct svc_req *req) struct fha_hash_entry *fhe = req->rq_p1; struct mtx *mtx; - NFSD_CURVNET_SET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET(TD_TO_VNET(curthread)); /* * This may be called for reqs that didn't go through * fha_assign (e.g. extra NULL ops used for RPCSEC_GSS. */ if (!fhe) { - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); return; } @@ -635,7 +635,7 @@ fhanew_nd_complete(SVCTHREAD *thread, struct svc_req *req) fha_hash_entry_remove(fhe); } mtx_unlock(mtx); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); } static int @@ -650,8 +650,8 @@ fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS) sbuf_new(&sb, NULL, 65536, SBUF_FIXEDLEN); - NFSD_CURVNET_SET(NFSD_TD_TO_VNET(curthread)); - softc = NFSD_VNET(fhanew_softc); + CURVNET_SET(TD_TO_VNET(curthread)); + softc = VNET(fhanew_softc); for (i = 0; i < FHA_HASH_SIZE; i++) if (!LIST_EMPTY(&softc->fha_hash[i].list)) break; @@ -694,7 +694,7 @@ fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS) } out: - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); sbuf_trim(&sb); sbuf_finish(&sb); error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req); diff --git a/sys/fs/nfsserver/nfs_nfsdcache.c b/sys/fs/nfsserver/nfs_nfsdcache.c index de72187bbb91..9f2732fd005b 100644 --- a/sys/fs/nfsserver/nfs_nfsdcache.c +++ b/sys/fs/nfsserver/nfs_nfsdcache.c @@ -160,13 +160,13 @@ extern struct mtx nfsrc_udpmtx; -NFSD_VNET_DECLARE(struct nfsrvhashhead *, nfsrvudphashtbl); -NFSD_VNET_DECLARE(struct nfsrchash_bucket *, nfsrchash_table); -NFSD_VNET_DECLARE(struct nfsrchash_bucket *, nfsrcahash_table); -NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); +VNET_DECLARE(struct nfsrvhashhead *, nfsrvudphashtbl); +VNET_DECLARE(struct nfsrchash_bucket *, nfsrchash_table); +VNET_DECLARE(struct nfsrchash_bucket *, nfsrcahash_table); +VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); -NFSD_VNET_DEFINE(int, nfsrc_floodlevel) = NFSRVCACHE_FLOODLEVEL; -NFSD_VNET_DEFINE(int, nfsrc_tcpsavedreplies) = 0; +VNET_DEFINE(int, nfsrc_floodlevel) = NFSRVCACHE_FLOODLEVEL; +VNET_DEFINE(int, nfsrc_tcpsavedreplies) = 0; SYSCTL_DECL(_vfs_nfsd); @@ -182,8 +182,8 @@ sysctl_tcphighwater(SYSCTL_HANDLER_ARGS) return (error); if (newhighwater < 0) return (EINVAL); - if (newhighwater >= NFSD_VNET(nfsrc_floodlevel)) - NFSD_VNET(nfsrc_floodlevel) = newhighwater + newhighwater / 5; + if (newhighwater >= VNET(nfsrc_floodlevel)) + VNET(nfsrc_floodlevel) = newhighwater + newhighwater / 5; nfsrc_tcphighwater = newhighwater; return (0); } @@ -204,8 +204,8 @@ SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW, &nfsrc_tcpnonidempotent, 0, "Enable the DRC for NFS over TCP"); -NFSD_VNET_DEFINE_STATIC(int, nfsrc_udpcachesize) = 0; -NFSD_VNET_DEFINE_STATIC(TAILQ_HEAD(, nfsrvcache), nfsrvudplru); +VNET_DEFINE_STATIC(int, nfsrc_udpcachesize) = 0; +VNET_DEFINE_STATIC(TAILQ_HEAD(, nfsrvcache), nfsrvudplru); /* * and the reverse mapping from generic to Version 2 procedure numbers @@ -237,10 +237,10 @@ static int newnfsv2_procid[NFS_V3NPROCS] = { #define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE) #define NFSRCUDPHASH(xid) \ - (&NFSD_VNET(nfsrvudphashtbl)[nfsrc_hash(xid)]) + (&VNET(nfsrvudphashtbl)[nfsrc_hash(xid)]) #define NFSRCHASH(xid) \ - (&NFSD_VNET(nfsrchash_table)[nfsrc_hash(xid)].tbl) -#define NFSRCAHASH(xid) (&NFSD_VNET(nfsrcahash_table)[nfsrc_hash(xid)]) + (&VNET(nfsrchash_table)[nfsrc_hash(xid)].tbl) +#define NFSRCAHASH(xid) (&VNET(nfsrcahash_table)[nfsrc_hash(xid)]) #define TRUE 1 #define FALSE 0 #define NFSRVCACHE_CHECKLEN 100 @@ -296,7 +296,7 @@ nfsrc_cachemutex(struct nfsrvcache *rp) if ((rp->rc_flag & RC_UDP) != 0) return (&nfsrc_udpmtx); - return (&NFSD_VNET(nfsrchash_table)[nfsrc_hash(rp->rc_xid)].mtx); + return (&VNET(nfsrchash_table)[nfsrc_hash(rp->rc_xid)].mtx); } /* @@ -307,26 +307,26 @@ nfsrvd_initcache(void) { int i; - NFSD_VNET(nfsrvudphashtbl) = malloc(sizeof(struct nfsrvhashhead) * + VNET(nfsrvudphashtbl) = malloc(sizeof(struct nfsrvhashhead) * NFSRVCACHE_HASHSIZE, M_NFSRVCACHE, M_WAITOK | M_ZERO); - NFSD_VNET(nfsrchash_table) = malloc(sizeof(struct nfsrchash_bucket) * + VNET(nfsrchash_table) = malloc(sizeof(struct nfsrchash_bucket) * NFSRVCACHE_HASHSIZE, M_NFSRVCACHE, M_WAITOK | M_ZERO); - NFSD_VNET(nfsrcahash_table) = malloc(sizeof(struct nfsrchash_bucket) * + VNET(nfsrcahash_table) = malloc(sizeof(struct nfsrchash_bucket) * NFSRVCACHE_HASHSIZE, M_NFSRVCACHE, M_WAITOK | M_ZERO); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - mtx_init(&NFSD_VNET(nfsrchash_table)[i].mtx, "nfsrtc", NULL, + mtx_init(&VNET(nfsrchash_table)[i].mtx, "nfsrtc", NULL, MTX_DEF); - mtx_init(&NFSD_VNET(nfsrcahash_table)[i].mtx, "nfsrtca", NULL, + mtx_init(&VNET(nfsrcahash_table)[i].mtx, "nfsrtca", NULL, MTX_DEF); } for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_INIT(&NFSD_VNET(nfsrvudphashtbl)[i]); - LIST_INIT(&NFSD_VNET(nfsrchash_table)[i].tbl); - LIST_INIT(&NFSD_VNET(nfsrcahash_table)[i].tbl); + LIST_INIT(&VNET(nfsrvudphashtbl)[i]); + LIST_INIT(&VNET(nfsrchash_table)[i].tbl); + LIST_INIT(&VNET(nfsrcahash_table)[i].tbl); } - TAILQ_INIT(&NFSD_VNET(nfsrvudplru)); - NFSD_VNET(nfsrc_tcpsavedreplies) = 0; - NFSD_VNET(nfsrc_udpcachesize) = 0; + TAILQ_INIT(&VNET(nfsrvudplru)); + VNET(nfsrc_tcpsavedreplies) = 0; + VNET(nfsrc_udpcachesize) = 0; } /* @@ -399,17 +399,17 @@ loop: if (rp->rc_flag == 0) panic("nfs udp cache0"); rp->rc_flag |= RC_LOCKED; - TAILQ_REMOVE(&NFSD_VNET(nfsrvudplru), rp, rc_lru); - TAILQ_INSERT_TAIL(&NFSD_VNET(nfsrvudplru), rp, rc_lru); + TAILQ_REMOVE(&VNET(nfsrvudplru), rp, rc_lru); + TAILQ_INSERT_TAIL(&VNET(nfsrvudplru), rp, rc_lru); if (rp->rc_flag & RC_INPROG) { - NFSD_VNET(nfsstatsv1_p)->srvcache_inproghits++; + VNET(nfsstatsv1_p)->srvcache_inproghits++; mtx_unlock(mutex); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; mtx_unlock(mutex); nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; @@ -417,7 +417,7 @@ loop: rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAITOK); @@ -432,9 +432,9 @@ loop: goto out; } } - NFSD_VNET(nfsstatsv1_p)->srvcache_misses++; - atomic_add_int(&NFSD_VNET(nfsstatsv1_p)->srvcache_size, 1); - NFSD_VNET(nfsrc_udpcachesize)++; + VNET(nfsstatsv1_p)->srvcache_misses++; + atomic_add_int(&VNET(nfsstatsv1_p)->srvcache_size, 1); + VNET(nfsrc_udpcachesize)++; newrp->rc_flag |= RC_INPROG; saddr = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); @@ -447,7 +447,7 @@ loop: newrp->rc_flag |= RC_INETIPV6; } LIST_INSERT_HEAD(hp, newrp, rc_hash); - TAILQ_INSERT_TAIL(&NFSD_VNET(nfsrvudplru), newrp, rc_lru); + TAILQ_INSERT_TAIL(&VNET(nfsrvudplru), newrp, rc_lru); mtx_unlock(mutex); nd->nd_rp = newrp; ret = RC_DOIT; @@ -479,15 +479,15 @@ nfsrvd_updatecache(struct nfsrv_descript *nd) panic("nfsrvd_updatecache not inprog"); rp->rc_flag &= ~RC_INPROG; if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&NFSD_VNET(nfsrvudplru), rp, rc_lru); - TAILQ_INSERT_TAIL(&NFSD_VNET(nfsrvudplru), rp, rc_lru); + TAILQ_REMOVE(&VNET(nfsrvudplru), rp, rc_lru); + TAILQ_INSERT_TAIL(&VNET(nfsrvudplru), rp, rc_lru); } /* * Reply from cache is a special case returned by nfsrv_checkseqid(). */ if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; mtx_unlock(mutex); nd->nd_repstat = 0; if (nd->nd_mreq) @@ -510,7 +510,7 @@ nfsrvd_updatecache(struct nfsrv_descript *nd) (rp->rc_refcnt > 0 || ((nd->nd_flag & ND_SAVEREPLY) && (rp->rc_flag & RC_UDP)) || ((nd->nd_flag & ND_SAVEREPLY) && !(rp->rc_flag & RC_UDP) && - NFSD_VNET(nfsrc_tcpsavedreplies) <= NFSD_VNET(nfsrc_floodlevel) && + VNET(nfsrc_tcpsavedreplies) <= VNET(nfsrc_floodlevel) && nfsrc_tcpnonidempotent))) { if (rp->rc_refcnt > 0) { if (!(rp->rc_flag & RC_NFSV4)) @@ -524,12 +524,12 @@ nfsrvd_updatecache(struct nfsrv_descript *nd) mtx_unlock(mutex); } else { if (!(rp->rc_flag & RC_UDP)) { - atomic_add_int(&NFSD_VNET(nfsrc_tcpsavedreplies), + atomic_add_int(&VNET(nfsrc_tcpsavedreplies), 1); - if (NFSD_VNET(nfsrc_tcpsavedreplies) > - NFSD_VNET(nfsstatsv1_p)->srvcache_tcppeak) - NFSD_VNET(nfsstatsv1_p)->srvcache_tcppeak = - NFSD_VNET(nfsrc_tcpsavedreplies); + if (VNET(nfsrc_tcpsavedreplies) > + VNET(nfsstatsv1_p)->srvcache_tcppeak) + VNET(nfsstatsv1_p)->srvcache_tcppeak = + VNET(nfsrc_tcpsavedreplies); } mtx_unlock(mutex); m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); @@ -686,7 +686,7 @@ tryagain: panic("nfs tcp cache0"); rp->rc_flag |= RC_LOCKED; if (rp->rc_flag & RC_INPROG) { - NFSD_VNET(nfsstatsv1_p)->srvcache_inproghits++; + VNET(nfsstatsv1_p)->srvcache_inproghits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -695,7 +695,7 @@ tryagain: /* * V2 only. */ - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -704,7 +704,7 @@ tryagain: *(nd->nd_errp) = rp->rc_status; rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else if (rp->rc_flag & RC_REPMBUF) { - NFSD_VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; + VNET(nfsstatsv1_p)->srvcache_nonidemdonehits++; mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); @@ -719,8 +719,8 @@ tryagain: free(newrp, M_NFSRVCACHE); goto out; } - NFSD_VNET(nfsstatsv1_p)->srvcache_misses++; - atomic_add_int(&NFSD_VNET(nfsstatsv1_p)->srvcache_size, 1); + VNET(nfsstatsv1_p)->srvcache_misses++; + atomic_add_int(&VNET(nfsstatsv1_p)->srvcache_size, 1); /* * For TCP, multiple entries for a key are allowed, so don't @@ -793,8 +793,8 @@ nfsrc_freecache(struct nfsrvcache *rp) LIST_REMOVE(rp, rc_hash); if (rp->rc_flag & RC_UDP) { - TAILQ_REMOVE(&NFSD_VNET(nfsrvudplru), rp, rc_lru); - NFSD_VNET(nfsrc_udpcachesize)--; + TAILQ_REMOVE(&VNET(nfsrvudplru), rp, rc_lru); + VNET(nfsrc_udpcachesize)--; } else if (rp->rc_acked != RC_NO_SEQ) { hbp = NFSRCAHASH(rp->rc_sockref); mtx_lock(&hbp->mtx); @@ -806,10 +806,10 @@ nfsrc_freecache(struct nfsrvcache *rp) if (rp->rc_flag & RC_REPMBUF) { m_freem(rp->rc_reply); if (!(rp->rc_flag & RC_UDP)) - atomic_add_int(&NFSD_VNET(nfsrc_tcpsavedreplies), -1); + atomic_add_int(&VNET(nfsrc_tcpsavedreplies), -1); } free(rp, M_NFSRVCACHE); - atomic_add_int(&NFSD_VNET(nfsstatsv1_p)->srvcache_size, -1); + atomic_add_int(&VNET(nfsstatsv1_p)->srvcache_size, -1); } /* @@ -822,18 +822,18 @@ nfsrvd_cleancache(void) int i; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &NFSD_VNET(nfsrchash_table)[i].tbl, + LIST_FOREACH_SAFE(rp, &VNET(nfsrchash_table)[i].tbl, rc_hash, nextrp) nfsrc_freecache(rp); } for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &NFSD_VNET(nfsrvudphashtbl)[i], rc_hash, + LIST_FOREACH_SAFE(rp, &VNET(nfsrvudphashtbl)[i], rc_hash, nextrp) { nfsrc_freecache(rp); } } - NFSD_VNET(nfsstatsv1_p)->srvcache_size = 0; - NFSD_VNET(nfsrc_tcpsavedreplies) = 0; + VNET(nfsstatsv1_p)->srvcache_size = 0; + VNET(nfsrc_tcpsavedreplies) = 0; } #define HISTSIZE 16 @@ -870,27 +870,27 @@ nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0) return; if (NFSD_MONOSEC != udp_lasttrim || - NFSD_VNET(nfsrc_udpcachesize) >= (nfsrc_udphighwater + + VNET(nfsrc_udpcachesize) >= (nfsrc_udphighwater + nfsrc_udphighwater / 2)) { mtx_lock(&nfsrc_udpmtx); udp_lasttrim = NFSD_MONOSEC; - TAILQ_FOREACH_SAFE(rp, &NFSD_VNET(nfsrvudplru), rc_lru, + TAILQ_FOREACH_SAFE(rp, &VNET(nfsrvudplru), rc_lru, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) && rp->rc_refcnt == 0 && ((rp->rc_flag & RC_REFCNT) || udp_lasttrim > rp->rc_timestamp || - NFSD_VNET(nfsrc_udpcachesize) > + VNET(nfsrc_udpcachesize) > nfsrc_udphighwater)) nfsrc_freecache(rp); } mtx_unlock(&nfsrc_udpmtx); } if (NFSD_MONOSEC != tcp_lasttrim || - NFSD_VNET(nfsrc_tcpsavedreplies) >= nfsrc_tcphighwater) { + VNET(nfsrc_tcpsavedreplies) >= nfsrc_tcphighwater) { force = nfsrc_tcphighwater / 4; if (force > 0 && - NFSD_VNET(nfsrc_tcpsavedreplies) + force >= + VNET(nfsrc_tcpsavedreplies) + force >= nfsrc_tcphighwater) { for (i = 0; i < HISTSIZE; i++) time_histo[i] = 0; @@ -910,9 +910,9 @@ nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) tto = nfsrc_tcptimeout; tcp_lasttrim = NFSD_MONOSEC; for (; i <= lastslot; i++) { - mtx_lock(&NFSD_VNET(nfsrchash_table)[i].mtx); + mtx_lock(&VNET(nfsrchash_table)[i].mtx); LIST_FOREACH_SAFE(rp, - &NFSD_VNET(nfsrchash_table)[i].tbl, rc_hash, + &VNET(nfsrchash_table)[i].tbl, rc_hash, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) @@ -942,7 +942,7 @@ nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) time_histo[j]++; } } - mtx_unlock(&NFSD_VNET(nfsrchash_table)[i].mtx); + mtx_unlock(&VNET(nfsrchash_table)[i].mtx); } if (force) { /* @@ -961,9 +961,9 @@ nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) k = 1; thisstamp = tcp_lasttrim + k; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - mtx_lock(&NFSD_VNET(nfsrchash_table)[i].mtx); + mtx_lock(&VNET(nfsrchash_table)[i].mtx); LIST_FOREACH_SAFE(rp, - &NFSD_VNET(nfsrchash_table)[i].tbl, + &VNET(nfsrchash_table)[i].tbl, rc_hash, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) @@ -973,7 +973,7 @@ nfsrc_trimcache(u_int64_t sockref, uint32_t snd_una, int final) rp->rc_acked == RC_ACK)) nfsrc_freecache(rp); } - mtx_unlock(&NFSD_VNET(nfsrchash_table)[i].mtx); + mtx_unlock(&VNET(nfsrchash_table)[i].mtx); } } } diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c index ce1189d40425..8c557143efbd 100644 --- a/sys/fs/nfsserver/nfs_nfsdkrpc.c +++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c @@ -82,19 +82,19 @@ int newnfs_nfsv3_procid[NFS_V3NPROCS] = { SYSCTL_DECL(_vfs_nfsd); -NFSD_VNET_DEFINE_STATIC(int, nfs_privport) = 1; -SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, - &NFSD_VNET_NAME(nfs_privport), 0, +VNET_DEFINE_STATIC(int, nfs_privport) = 1; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, nfs_privport, CTLFLAG_VNET | CTLFLAG_RWTUN, + &VNET_NAME(nfs_privport), 0, "Only allow clients using a privileged port for NFSv2, 3 and 4"); -NFSD_VNET_DEFINE_STATIC(int, nfs_minvers) = NFS_VER2; +VNET_DEFINE_STATIC(int, nfs_minvers) = NFS_VER2; SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_min_nfsvers, - CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, &NFSD_VNET_NAME(nfs_minvers), 0, + CTLFLAG_VNET | CTLFLAG_RWTUN, &VNET_NAME(nfs_minvers), 0, "The lowest version of NFS handled by the server"); -NFSD_VNET_DEFINE_STATIC(int, nfs_maxvers) = NFS_VER4; +VNET_DEFINE_STATIC(int, nfs_maxvers) = NFS_VER4; SYSCTL_INT(_vfs_nfsd, OID_AUTO, server_max_nfsvers, - CTLFLAG_NFSD_VNET | CTLFLAG_RWTUN, &NFSD_VNET_NAME(nfs_maxvers), 0, + CTLFLAG_VNET | CTLFLAG_RWTUN, &VNET_NAME(nfs_maxvers), 0, "The highest version of NFS handled by the server"); static int nfs_proc(struct nfsrv_descript *, u_int32_t, SVCXPRT *xprt, @@ -108,13 +108,13 @@ extern volatile int nfsrv_devidcnt; extern struct nfsv4_opflag nfsv4_opflag[NFSV42_NOPS]; extern int nfsd_debuglevel; -NFSD_VNET_DECLARE(struct proc *, nfsd_master_proc); +VNET_DECLARE(struct proc *, nfsd_master_proc); -NFSD_VNET_DEFINE(SVCPOOL *, nfsrvd_pool); -NFSD_VNET_DEFINE(int, nfsrv_numnfsd) = 0; -NFSD_VNET_DEFINE(struct nfsv4lock, nfsd_suspend_lock); +VNET_DEFINE(SVCPOOL *, nfsrvd_pool); +VNET_DEFINE(int, nfsrv_numnfsd) = 0; +VNET_DEFINE(struct nfsv4lock, nfsd_suspend_lock); -NFSD_VNET_DEFINE_STATIC(bool, nfsrvd_inited) = false; +VNET_DEFINE_STATIC(bool, nfsrvd_inited) = false; /* * NFS server system calls @@ -132,7 +132,7 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) u_int maxlen; #endif - NFSD_CURVNET_SET_QUIET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); memset(&nd, 0, sizeof(nd)); if (rqst->rq_vers == NFS_VER2) { if (rqst->rq_proc > NFSV2PROC_STATFS || @@ -177,7 +177,7 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) nd.nd_mreq = NULL; nd.nd_cred = NULL; - if (NFSD_VNET(nfs_privport) != 0) { + if (VNET(nfs_privport) != 0) { /* Check if source port is privileged */ u_short port; struct sockaddr *nam = nd.nd_nam; @@ -331,9 +331,9 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) * nfsv4root exports by nfsvno_v4rootexport(). */ NFSLOCKV4ROOTMUTEX(); - nfsv4_lock(&NFSD_VNET(nfsd_suspend_lock), 0, NULL, + nfsv4_lock(&VNET(nfsd_suspend_lock), 0, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); - nfsv4_getref(&NFSD_VNET(nfsd_suspend_lock), NULL, + nfsv4_getref(&VNET(nfsd_suspend_lock), NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); @@ -341,7 +341,7 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) nd.nd_repstat = nfsvno_v4rootexport(&nd); if (nd.nd_repstat != 0) { NFSLOCKV4ROOTMUTEX(); - nfsv4_relref(&NFSD_VNET(nfsd_suspend_lock)); + nfsv4_relref(&VNET(nfsd_suspend_lock)); NFSUNLOCKV4ROOTMUTEX(); svcerr_weakauth(rqst); svc_freereq(rqst); @@ -357,7 +357,7 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) #endif cacherep = nfs_proc(&nd, rqst->rq_xid, xprt, &rp); NFSLOCKV4ROOTMUTEX(); - nfsv4_relref(&NFSD_VNET(nfsd_suspend_lock)); + nfsv4_relref(&VNET(nfsd_suspend_lock)); NFSUNLOCKV4ROOTMUTEX(); } else { NFSMGET(nd.nd_mreq); @@ -398,7 +398,7 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt) out: free(nd.nd_principal, M_TEMP); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); ast_kclear(curthread); NFSEXITCODE(0); } @@ -512,9 +512,9 @@ nfssvc_loss(SVCXPRT *xprt) ack = 0; SVC_ACK(xprt, &ack); - NFSD_CURVNET_SET(NFSD_TD_TO_VNET(curthread)); + CURVNET_SET(TD_TO_VNET(curthread)); nfsrc_trimcache(xprt->xp_sockref, ack, 1); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); } /* @@ -541,21 +541,21 @@ nfsrvd_addsock(struct file *fp) * unexpectedly. */ if (so->so_type == SOCK_DGRAM) - xprt = svc_dg_create(NFSD_VNET(nfsrvd_pool), so, 0, 0); + xprt = svc_dg_create(VNET(nfsrvd_pool), so, 0, 0); else - xprt = svc_vc_create(NFSD_VNET(nfsrvd_pool), so, 0, 0); + xprt = svc_vc_create(VNET(nfsrvd_pool), so, 0, 0); if (xprt) { fp->f_ops = &badfileops; fp->f_data = NULL; xprt->xp_sockref = ++sockref; - if (NFSD_VNET(nfs_minvers) == NFS_VER2) + if (VNET(nfs_minvers) == NFS_VER2) svc_reg(xprt, NFS_PROG, NFS_VER2, nfssvc_program, NULL); - if (NFSD_VNET(nfs_minvers) <= NFS_VER3 && - NFSD_VNET(nfs_maxvers) >= NFS_VER3) + if (VNET(nfs_minvers) <= NFS_VER3 && + VNET(nfs_maxvers) >= NFS_VER3) svc_reg(xprt, NFS_PROG, NFS_VER3, nfssvc_program, NULL); - if (NFSD_VNET(nfs_maxvers) >= NFS_VER4) + if (VNET(nfs_maxvers) >= NFS_VER4) svc_reg(xprt, NFS_PROG, NFS_VER4, nfssvc_program, NULL); if (so->so_type == SOCK_STREAM) @@ -594,14 +594,14 @@ nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) * use. */ NFSD_LOCK(); - if (NFSD_VNET(nfsrv_numnfsd) == 0) { + if (VNET(nfsrv_numnfsd) == 0) { nfsdev_time = time_second; p = td->td_proc; PROC_LOCK(p); p->p_flag2 |= P2_AST_SU; PROC_UNLOCK(p); newnfs_numnfsd++; /* Total num for all vnets. */ - NFSD_VNET(nfsrv_numnfsd)++; /* Num for this vnet. */ + VNET(nfsrv_numnfsd)++; /* Num for this vnet. */ NFSD_UNLOCK(); error = nfsrv_createdevids(args, td); @@ -624,9 +624,9 @@ nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) td->td_ucred->cr_prison->pr_id); } - NFSD_VNET(nfsrvd_pool)->sp_minthreads = + VNET(nfsrvd_pool)->sp_minthreads = args->minthreads; - NFSD_VNET(nfsrvd_pool)->sp_maxthreads = + VNET(nfsrvd_pool)->sp_maxthreads = args->maxthreads; /* @@ -638,7 +638,7 @@ nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) nfsv4_opflag[NFSV4OP_GETATTR].modifyfs = 1; } - svc_run(NFSD_VNET(nfsrvd_pool)); + svc_run(VNET(nfsrvd_pool)); /* Reset Getattr to not do a vn_start_write(). */ nfsrv_writerpc[NFSPROC_GETATTR] = 0; @@ -652,7 +652,7 @@ nfsrvd_nfsd(struct thread *td, struct nfsd_nfsd_args *args) } NFSD_LOCK(); newnfs_numnfsd--; - NFSD_VNET(nfsrv_numnfsd)--; + VNET(nfsrv_numnfsd)--; nfsrvd_init(1); PROC_LOCK(p); p->p_flag2 &= ~P2_AST_SU; @@ -677,25 +677,25 @@ nfsrvd_init(int terminating) NFSD_LOCK_ASSERT(); if (terminating) { - NFSD_VNET(nfsd_master_proc) = NULL; + VNET(nfsd_master_proc) = NULL; NFSD_UNLOCK(); nfsrv_freealllayoutsanddevids(); nfsrv_freeallbackchannel_xprts(); - svcpool_close(NFSD_VNET(nfsrvd_pool)); + svcpool_close(VNET(nfsrvd_pool)); free(nfsrv_zeropnfsdat, M_TEMP); nfsrv_zeropnfsdat = NULL; NFSD_LOCK(); } else { /* Initialize per-vnet globals once per vnet. */ - if (NFSD_VNET(nfsrvd_inited)) + if (VNET(nfsrvd_inited)) return; - NFSD_VNET(nfsrvd_inited) = true; + VNET(nfsrvd_inited) = true; NFSD_UNLOCK(); - NFSD_VNET(nfsrvd_pool) = svcpool_create("nfsd", + VNET(nfsrvd_pool) = svcpool_create("nfsd", SYSCTL_STATIC_CHILDREN(_vfs_nfsd)); - NFSD_VNET(nfsrvd_pool)->sp_rcache = NULL; - NFSD_VNET(nfsrvd_pool)->sp_assign = fhanew_assign; - NFSD_VNET(nfsrvd_pool)->sp_done = fhanew_nd_complete; + VNET(nfsrvd_pool)->sp_rcache = NULL; + VNET(nfsrvd_pool)->sp_assign = fhanew_assign; + VNET(nfsrvd_pool)->sp_done = fhanew_nd_complete; NFSD_LOCK(); } } diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 13133e6fd165..e42d5ca1e0a3 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -71,14 +71,14 @@ extern int nfs_bufpackets; extern u_long sb_max_adj; extern struct nfsv4lock nfsv4rootfs_lock; -NFSD_VNET_DECLARE(int, nfsrv_numnfsd); -NFSD_VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); -NFSD_VNET_DECLARE(SVCPOOL *, nfsrvd_pool); -NFSD_VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); -NFSD_VNET_DECLARE(struct nfslockhashhead *, nfslockhash); -NFSD_VNET_DECLARE(struct nfssessionhash *, nfssessionhash); -NFSD_VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock); -NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); +VNET_DECLARE(int, nfsrv_numnfsd); +VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); +VNET_DECLARE(SVCPOOL *, nfsrvd_pool); +VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); +VNET_DECLARE(struct nfslockhashhead *, nfslockhash); +VNET_DECLARE(struct nfssessionhash *, nfssessionhash); +VNET_DECLARE(struct nfsv4lock, nfsd_suspend_lock); +VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); NFSDLOCKMUTEX; NFSSTATESPINLOCK; @@ -95,18 +95,18 @@ static struct timeval nfsd_master_start; static uint32_t nfsv4_sysid = 0; static fhandle_t zerofh; -NFSD_VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL; -NFSD_VNET_DEFINE(struct nfsrvhashhead *, nfsrvudphashtbl); -NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrchash_table); -NFSD_VNET_DEFINE(struct nfsrchash_bucket *, nfsrcahash_table); -NFSD_VNET_DEFINE(struct nfsrvfh, nfs_rootfh); -NFSD_VNET_DEFINE(int, nfs_rootfhset) = 0; -NFSD_VNET_DEFINE(struct callout, nfsd_callout); -NFSD_VNET_DEFINE_STATIC(struct mount *, nfsv4root_mnt); -NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt); -NFSD_VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt); -NFSD_VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false; -NFSD_VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false; +VNET_DEFINE(struct proc *, nfsd_master_proc) = NULL; +VNET_DEFINE(struct nfsrvhashhead *, nfsrvudphashtbl); +VNET_DEFINE(struct nfsrchash_bucket *, nfsrchash_table); +VNET_DEFINE(struct nfsrchash_bucket *, nfsrcahash_table); +VNET_DEFINE(struct nfsrvfh, nfs_rootfh); +VNET_DEFINE(int, nfs_rootfhset) = 0; +VNET_DEFINE(struct callout, nfsd_callout); +VNET_DEFINE_STATIC(struct mount *, nfsv4root_mnt); +VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_opt); +VNET_DEFINE_STATIC(struct vfsoptlist, nfsv4root_newopt); +VNET_DEFINE_STATIC(bool, nfsrv_suspend_nfsd) = false; +VNET_DEFINE_STATIC(bool, nfsrv_mntinited) = false; static int nfssvc_srvcall(struct thread *, struct nfssvc_args *, struct ucred *); @@ -181,9 +181,9 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, issue_delegations, CTLFLAG_RW, &nfsrv_issuedelegs, 0, "Enable nfsd to issue delegations"); SYSCTL_INT(_vfs_nfsd, OID_AUTO, debuglevel, CTLFLAG_RW, &nfsd_debuglevel, 0, "Debug level for NFS server"); -NFSD_VNET_DECLARE(int, nfsd_enable_stringtouid); +VNET_DECLARE(int, nfsd_enable_stringtouid); SYSCTL_INT(_vfs_nfsd, OID_AUTO, enable_stringtouid, - CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_enable_stringtouid), + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nfsd_enable_stringtouid), 0, "Enable nfsd to accept numeric owner_names"); static int nfsrv_pnfsgetdsattr = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsgetdsattr, CTLFLAG_RW, @@ -1073,7 +1073,7 @@ nfsvno_read(struct vnode *vp, off_t off, int cnt, struct ucred *cred, nh = nfsrv_sequential_heuristic(uiop, vp); ioflag |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ - NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_READ] += uiop->uio_resid; + VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_READ] += uiop->uio_resid; error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred); free(iv, M_TEMP); if (error) { @@ -1198,7 +1198,7 @@ nfsvno_write(struct vnode *vp, off_t off, int retlen, int *stable, nh = nfsrv_sequential_heuristic(uiop, vp); ioflags |= nh->nh_seqcount << IO_SEQSHIFT; /* XXX KDM make this more systematic? */ - NFSD_VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; + VNET(nfsstatsv1_p)->srvbytes[NFSV4OP_WRITE] += uiop->uio_resid; error = VOP_WRITE(vp, uiop, ioflags, cred); if (error == 0) nh->nh_nextoff = uiop->uio_offset; @@ -3603,7 +3603,7 @@ nfsvno_checkexp(struct mount *mp, struct sockaddr *nam, struct nfsexstuff *exp, error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { - if (NFSD_VNET(nfs_rootfhset)) { + if (VNET(nfs_rootfhset)) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; @@ -3644,7 +3644,7 @@ nfsvno_fhtovp(struct mount *mp, fhandle_t *fhp, struct sockaddr *nam, error = VFS_CHECKEXP(mp, nam, &exp->nes_exflag, credp, &exp->nes_numsecflavor, exp->nes_secflavors); if (error) { - if (NFSD_VNET(nfs_rootfhset)) { + if (VNET(nfs_rootfhset)) { exp->nes_exflag = 0; exp->nes_numsecflavor = 0; error = 0; @@ -3821,9 +3821,9 @@ nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) struct nameidata nd; fhandle_t fh; - error = vfs_export(NFSD_VNET(nfsv4root_mnt), &nfsexargp->export, false); + error = vfs_export(VNET(nfsv4root_mnt), &nfsexargp->export, false); if ((nfsexargp->export.ex_flags & MNT_DELEXPORT) != 0) - NFSD_VNET(nfs_rootfhset) = 0; + VNET(nfs_rootfhset) = 0; else if (error == 0) { if (nfsexargp->fspec == NULL) { error = EPERM; @@ -3839,11 +3839,11 @@ nfsrv_v4rootexport(void *argp, struct ucred *cred, struct thread *p) error = nfsvno_getfh(nd.ni_vp, &fh, p); vrele(nd.ni_vp); if (!error) { - NFSD_VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH; + VNET(nfs_rootfh).nfsrvfh_len = NFSX_MYFH; NFSBCOPY((caddr_t)&fh, - NFSD_VNET(nfs_rootfh).nfsrvfh_data, + VNET(nfs_rootfh).nfsrvfh_data, sizeof (fhandle_t)); - NFSD_VNET(nfs_rootfhset) = 1; + VNET(nfs_rootfhset) = 1; } } @@ -3881,29 +3881,29 @@ nfsd_mntinit(void) { NFSD_LOCK(); - if (NFSD_VNET(nfsrv_mntinited)) { + if (VNET(nfsrv_mntinited)) { NFSD_UNLOCK(); return; } - NFSD_VNET(nfsrv_mntinited) = true; + VNET(nfsrv_mntinited) = true; nfsrvd_init(0); NFSD_UNLOCK(); - NFSD_VNET(nfsv4root_mnt) = malloc(sizeof(struct mount), M_TEMP, + VNET(nfsv4root_mnt) = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO); - NFSD_VNET(nfsv4root_mnt)->mnt_flag = (MNT_RDONLY | MNT_EXPORTED); - mtx_init(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx, "nfs4mnt", NULL, MTX_DEF); - lockinit(&NFSD_VNET(nfsv4root_mnt)->mnt_explock, PVFS, "explock", 0, 0); - TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelist); - TAILQ_INIT(&NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelist); - NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL; - TAILQ_INIT(&NFSD_VNET(nfsv4root_opt)); - TAILQ_INIT(&NFSD_VNET(nfsv4root_newopt)); - NFSD_VNET(nfsv4root_mnt)->mnt_opt = &NFSD_VNET(nfsv4root_opt); - NFSD_VNET(nfsv4root_mnt)->mnt_optnew = &NFSD_VNET(nfsv4root_newopt); - NFSD_VNET(nfsv4root_mnt)->mnt_nvnodelistsize = 0; - NFSD_VNET(nfsv4root_mnt)->mnt_lazyvnodelistsize = 0; - callout_init(&NFSD_VNET(nfsd_callout), 1); + VNET(nfsv4root_mnt)->mnt_flag = (MNT_RDONLY | MNT_EXPORTED); + mtx_init(&VNET(nfsv4root_mnt)->mnt_mtx, "nfs4mnt", NULL, MTX_DEF); + lockinit(&VNET(nfsv4root_mnt)->mnt_explock, PVFS, "explock", 0, 0); + TAILQ_INIT(&VNET(nfsv4root_mnt)->mnt_nvnodelist); + TAILQ_INIT(&VNET(nfsv4root_mnt)->mnt_lazyvnodelist); + VNET(nfsv4root_mnt)->mnt_export = NULL; + TAILQ_INIT(&VNET(nfsv4root_opt)); + TAILQ_INIT(&VNET(nfsv4root_newopt)); + VNET(nfsv4root_mnt)->mnt_opt = &VNET(nfsv4root_opt); + VNET(nfsv4root_mnt)->mnt_optnew = &VNET(nfsv4root_newopt); + VNET(nfsv4root_mnt)->mnt_nvnodelistsize = 0; + VNET(nfsv4root_mnt)->mnt_lazyvnodelistsize = 0; + callout_init(&VNET(nfsd_callout), 1); nfsrvd_initcache(); nfsd_init(); @@ -3915,11 +3915,11 @@ nfsd_timer(void *arg) struct vnet *vnetp; vnetp = (struct vnet *)arg; - NFSD_CURVNET_SET_QUIET(vnetp); + CURVNET_SET_QUIET(vnetp); nfsrv_servertimer(vnetp); - callout_reset_sbt(&NFSD_VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer, + callout_reset_sbt(&VNET(nfsd_callout), SBT_1S, SBT_1S, nfsd_timer, arg, 0); - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); } /* @@ -4001,7 +4001,7 @@ nfsvno_v4rootexport(struct nfsrv_descript *nd) int error = 0, numsecflavor, secflavors[MAXSECFLAVORS], i; uint64_t exflags; - error = vfs_stdcheckexp(NFSD_VNET(nfsv4root_mnt), nd->nd_nam, &exflags, + error = vfs_stdcheckexp(VNET(nfsv4root_mnt), nd->nd_nam, &exflags, &credanon, &numsecflavor, secflavors); if (error) { error = NFSERR_PROGUNAVAIL; @@ -4056,7 +4056,7 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) char *buf, *cp, *cp2, *cp3; char fname[PNFS_FILENAME_LEN + 1]; - NFSD_CURVNET_SET(NFSD_TD_TO_VNET(td)); + CURVNET_SET(TD_TO_VNET(td)); if (uap->flag & NFSSVC_NFSDADDSOCK) { error = copyin(uap->argp, (caddr_t)&sockarg, sizeof (sockarg)); if (error) @@ -4164,9 +4164,9 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) nfsdarg.mdspathlen = 0; nfsdarg.mirrorcnt = 1; } - nfsd_timer(NFSD_TD_TO_VNET(td)); + nfsd_timer(TD_TO_VNET(td)); error = nfsrvd_nfsd(td, &nfsdarg); - callout_drain(&NFSD_VNET(nfsd_callout)); + callout_drain(&VNET(nfsd_callout)); free(nfsdarg.addr, M_TEMP); free(nfsdarg.dnshost, M_TEMP); free(nfsdarg.dspath, M_TEMP); @@ -4244,7 +4244,7 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) } out: - NFSD_CURVNET_RESTORE(); + CURVNET_RESTORE(); NFSEXITCODE(error); return (error); } @@ -4349,10 +4349,10 @@ nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) error = fp_getfvp(p, stablefd, &fp, &vp); if (!error && (NFSFPFLAG(fp) & (FREAD | FWRITE)) != (FREAD | FWRITE)) error = EBADF; - if (!error && NFSD_VNET(nfsrv_numnfsd) != 0) + if (!error && VNET(nfsrv_numnfsd) != 0) error = ENXIO; if (!error) { - NFSD_VNET(nfsrv_stablefirst).nsf_fp = fp; + VNET(nfsrv_stablefirst).nsf_fp = fp; nfsrv_setupstable(p); } } else if (uap->flag & NFSSVC_ADMINREVOKE) { @@ -4399,27 +4399,27 @@ nfssvc_srvcall(struct thread *p, struct nfssvc_args *uap, struct ucred *cred) nfsd_master_pid = procp->p_pid; bcopy(procp->p_comm, nfsd_master_comm, MAXCOMLEN + 1); nfsd_master_start = procp->p_stats->p_start; - NFSD_VNET(nfsd_master_proc) = procp; + VNET(nfsd_master_proc) = procp; PROC_UNLOCK(procp); } else if ((uap->flag & NFSSVC_SUSPENDNFSD) != 0) { NFSLOCKV4ROOTMUTEX(); - if (!NFSD_VNET(nfsrv_suspend_nfsd)) { + if (!VNET(nfsrv_suspend_nfsd)) { /* Lock out all nfsd threads */ do { igotlock = nfsv4_lock( - &NFSD_VNET(nfsd_suspend_lock), 1, NULL, + &VNET(nfsd_suspend_lock), 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0 && - !NFSD_VNET(nfsrv_suspend_nfsd)); - NFSD_VNET(nfsrv_suspend_nfsd) = true; + !VNET(nfsrv_suspend_nfsd)); + VNET(nfsrv_suspend_nfsd) = true; } NFSUNLOCKV4ROOTMUTEX(); error = 0; } else if ((uap->flag & NFSSVC_RESUMENFSD) != 0) { NFSLOCKV4ROOTMUTEX(); - if (NFSD_VNET(nfsrv_suspend_nfsd)) { - nfsv4_unlock(&NFSD_VNET(nfsd_suspend_lock), 0); - NFSD_VNET(nfsrv_suspend_nfsd) = false; + if (VNET(nfsrv_suspend_nfsd)) { + nfsv4_unlock(&VNET(nfsd_suspend_lock), 0); + VNET(nfsrv_suspend_nfsd) = false; } NFSUNLOCKV4ROOTMUTEX(); error = 0; @@ -4527,10 +4527,10 @@ nfsrv_backupstable(void) { struct proc *procp; - if (NFSD_VNET(nfsd_master_proc) != NULL) { + if (VNET(nfsd_master_proc) != NULL) { procp = pfind(nfsd_master_pid); /* Try to make sure it is the correct process. */ - if (procp == NFSD_VNET(nfsd_master_proc) && + if (procp == VNET(nfsd_master_proc) && procp->p_stats->p_start.tv_sec == nfsd_master_start.tv_sec && procp->p_stats->p_start.tv_usec == @@ -4538,7 +4538,7 @@ nfsrv_backupstable(void) strcmp(procp->p_comm, nfsd_master_comm) == 0) kern_psignal(procp, SIGUSR2); else - NFSD_VNET(nfsd_master_proc) = NULL; + VNET(nfsd_master_proc) = NULL; if (procp != NULL) PROC_UNLOCK(procp); @@ -7502,11 +7502,11 @@ nfsrv_cleanup(const void *unused __unused) int i; NFSD_LOCK(); - if (!NFSD_VNET(nfsrv_mntinited)) { + if (!VNET(nfsrv_mntinited)) { NFSD_UNLOCK(); return; } - NFSD_VNET(nfsrv_mntinited) = false; + VNET(nfsrv_mntinited) = false; NFSD_UNLOCK(); /* Clean out all NFSv4 state. */ @@ -7516,33 +7516,33 @@ nfsrv_cleanup(const void *unused __unused) nfsrvd_cleancache(); /* Clean out v4root exports. */ - if (NFSD_VNET(nfsv4root_mnt)->mnt_export != NULL) { - vfs_free_addrlist(NFSD_VNET(nfsv4root_mnt)->mnt_export); - free(NFSD_VNET(nfsv4root_mnt)->mnt_export, M_MOUNT); - NFSD_VNET(nfsv4root_mnt)->mnt_export = NULL; + if (VNET(nfsv4root_mnt)->mnt_export != NULL) { + vfs_free_addrlist(VNET(nfsv4root_mnt)->mnt_export); + free(VNET(nfsv4root_mnt)->mnt_export, M_MOUNT); + VNET(nfsv4root_mnt)->mnt_export = NULL; } /* Free up the krpc server pool. */ - if (NFSD_VNET(nfsrvd_pool) != NULL) - svcpool_destroy(NFSD_VNET(nfsrvd_pool)); + if (VNET(nfsrvd_pool) != NULL) + svcpool_destroy(VNET(nfsrvd_pool)); /* and get rid of the locks */ for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - mtx_destroy(&NFSD_VNET(nfsrchash_table)[i].mtx); - mtx_destroy(&NFSD_VNET(nfsrcahash_table)[i].mtx); + mtx_destroy(&VNET(nfsrchash_table)[i].mtx); + mtx_destroy(&VNET(nfsrcahash_table)[i].mtx); } - mtx_destroy(&NFSD_VNET(nfsv4root_mnt)->mnt_mtx); + mtx_destroy(&VNET(nfsv4root_mnt)->mnt_mtx); for (i = 0; i < nfsrv_sessionhashsize; i++) - mtx_destroy(&NFSD_VNET(nfssessionhash)[i].mtx); - lockdestroy(&NFSD_VNET(nfsv4root_mnt)->mnt_explock); - free(NFSD_VNET(nfsrvudphashtbl), M_NFSRVCACHE); - free(NFSD_VNET(nfsrchash_table), M_NFSRVCACHE); - free(NFSD_VNET(nfsrcahash_table), M_NFSRVCACHE); - free(NFSD_VNET(nfsclienthash), M_NFSDCLIENT); - free(NFSD_VNET(nfslockhash), M_NFSDLOCKFILE); - free(NFSD_VNET(nfssessionhash), M_NFSDSESSION); - free(NFSD_VNET(nfsv4root_mnt), M_TEMP); - NFSD_VNET(nfsv4root_mnt) = NULL; + mtx_destroy(&VNET(nfssessionhash)[i].mtx); + lockdestroy(&VNET(nfsv4root_mnt)->mnt_explock); + free(VNET(nfsrvudphashtbl), M_NFSRVCACHE); + free(VNET(nfsrchash_table), M_NFSRVCACHE); + free(VNET(nfsrcahash_table), M_NFSRVCACHE); + free(VNET(nfsclienthash), M_NFSDCLIENT); + free(VNET(nfslockhash), M_NFSDLOCKFILE); + free(VNET(nfssessionhash), M_NFSDSESSION); + free(VNET(nfsv4root_mnt), M_TEMP); + VNET(nfsv4root_mnt) = NULL; } VNET_SYSUNINIT(nfsrv_cleanup, SI_SUB_VNET_DONE, SI_ORDER_ANY, nfsrv_cleanup, NULL); diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 7f43654ae06d..ad0f495bbd69 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -4269,7 +4269,7 @@ nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram, /* Allocated large enough for an AF_INET or AF_INET6 socket. */ clp->lc_req.nr_nam = malloc(sizeof(struct sockaddr_in6), M_SONAME, M_WAITOK | M_ZERO); - clp->lc_req.nr_cred = NULL; + clp->lc_req.nr_cred = crhold(nd->nd_cred); NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); @@ -4359,6 +4359,7 @@ nfsrvd_setclientid(struct nfsrv_descript *nd, __unused int isdgram, if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); + crfree(clp->lc_req.nr_cred); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } @@ -4377,6 +4378,7 @@ nfsmout: if (clp) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); + crfree(clp->lc_req.nr_cred); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } @@ -4634,7 +4636,7 @@ nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, break; #endif } - clp->lc_req.nr_cred = NULL; + clp->lc_req.nr_cred = crhold(nd->nd_cred); NFSBCOPY(verf, clp->lc_verf, NFSX_VERF); clp->lc_idlen = idlen; error = nfsrv_mtostr(nd, clp->lc_id, idlen); @@ -4707,6 +4709,7 @@ nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); + crfree(clp->lc_req.nr_cred); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } @@ -4750,6 +4753,7 @@ nfsmout: if (clp != NULL) { free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); + crfree(clp->lc_req.nr_cred); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } @@ -4865,6 +4869,14 @@ nfsrvd_createsession(struct nfsrv_descript *nd, __unused int isdgram, *tl++ = txdr_unsigned(sep->sess_cbsess.nfsess_foreslots); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(0); /* No RDMA. */ + /* + * Although the client accepts slot#s up to + * sess_cbsess.nfsess_foreslots, the server can only use + * a maximum of NFSV4_SLOTS, so clip it to avoid ever using + * too high a slot. + */ + if (sep->sess_cbsess.nfsess_foreslots > NFSV4_SLOTS) + sep->sess_cbsess.nfsess_foreslots = NFSV4_SLOTS; } nfsmout: if (nd->nd_repstat != 0 && sep != NULL) diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c index efa7906dffc7..36996425ae95 100644 --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -52,13 +52,13 @@ extern volatile int nfsrv_layoutcnt; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; -NFSD_VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); -NFSD_VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); -NFSD_VNET_DECLARE(int, nfsrc_floodlevel); -NFSD_VNET_DECLARE(int, nfsrc_tcpsavedreplies); -NFSD_VNET_DECLARE(struct nfsrvfh, nfs_rootfh); -NFSD_VNET_DECLARE(int, nfs_rootfhset); -NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); +VNET_DECLARE(struct nfsrv_stablefirst, nfsrv_stablefirst); +VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); +VNET_DECLARE(int, nfsrc_floodlevel); +VNET_DECLARE(int, nfsrc_tcpsavedreplies); +VNET_DECLARE(struct nfsrvfh, nfs_rootfh); +VNET_DECLARE(int, nfs_rootfhset); +VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); int (*nfsrv3_procs0[NFS_V3NPROCS])(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *) = { @@ -477,16 +477,16 @@ nfsrvd_statstart(int op, struct bintime *now) } mtx_lock(&nfsrvd_statmtx); - if (NFSD_VNET(nfsstatsv1_p)->srvstartcnt == - NFSD_VNET(nfsstatsv1_p)->srvdonecnt) { + if (VNET(nfsstatsv1_p)->srvstartcnt == + VNET(nfsstatsv1_p)->srvdonecnt) { if (now != NULL) - NFSD_VNET(nfsstatsv1_p)->busyfrom = *now; + VNET(nfsstatsv1_p)->busyfrom = *now; else - binuptime(&NFSD_VNET(nfsstatsv1_p)->busyfrom); + binuptime(&VNET(nfsstatsv1_p)->busyfrom); } - NFSD_VNET(nfsstatsv1_p)->srvrpccnt[op]++; - NFSD_VNET(nfsstatsv1_p)->srvstartcnt++; + VNET(nfsstatsv1_p)->srvrpccnt[op]++; + VNET(nfsstatsv1_p)->srvstartcnt++; mtx_unlock(&nfsrvd_statmtx); } @@ -509,21 +509,21 @@ nfsrvd_statend(int op, uint64_t bytes, struct bintime *now, mtx_lock(&nfsrvd_statmtx); - NFSD_VNET(nfsstatsv1_p)->srvbytes[op] += bytes; - NFSD_VNET(nfsstatsv1_p)->srvops[op]++; + VNET(nfsstatsv1_p)->srvbytes[op] += bytes; + VNET(nfsstatsv1_p)->srvops[op]++; if (then != NULL) { dt = *now; bintime_sub(&dt, then); - bintime_add(&NFSD_VNET(nfsstatsv1_p)->srvduration[op], &dt); + bintime_add(&VNET(nfsstatsv1_p)->srvduration[op], &dt); } dt = *now; - bintime_sub(&dt, &NFSD_VNET(nfsstatsv1_p)->busyfrom); - bintime_add(&NFSD_VNET(nfsstatsv1_p)->busytime, &dt); - NFSD_VNET(nfsstatsv1_p)->busyfrom = *now; + bintime_sub(&dt, &VNET(nfsstatsv1_p)->busyfrom); + bintime_add(&VNET(nfsstatsv1_p)->busytime, &dt); + VNET(nfsstatsv1_p)->busyfrom = *now; - NFSD_VNET(nfsstatsv1_p)->srvdonecnt++; + VNET(nfsstatsv1_p)->srvdonecnt++; mtx_unlock(&nfsrvd_statmtx); } @@ -761,7 +761,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, */ igotlock = 0; NFSLOCKV4ROOTMUTEX(); - if (NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_NEEDLOCK) + if (VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_NEEDLOCK) igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); else @@ -774,8 +774,8 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, * Done when the grace period is over or a client has long * since expired. */ - NFSD_VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NEEDLOCK; - if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & + VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NEEDLOCK; + if ((VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_GRACEOVER | NFSNSF_UPDATEDONE)) == NFSNSF_GRACEOVER) nfsrv_updatestable(p); @@ -785,12 +785,12 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, * stable storage file and then remove them from the client * list. */ - if (NFSD_VNET(nfsrv_stablefirst).nsf_flags & + if (VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_EXPIREDCLIENT) { - NFSD_VNET(nfsrv_stablefirst).nsf_flags &= + VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_EXPIREDCLIENT; for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], + LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash, nclp) { if (clp->lc_flags & LCL_EXPIREIT) { if (!LIST_EMPTY(&clp->lc_open) || @@ -824,7 +824,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, * If flagged, search for open owners that haven't had any opens * for a long time. */ - if (NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_NOOPENS) { + if (VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_NOOPENS) { nfsrv_throwawayopens(p); } @@ -951,10 +951,10 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, if (i == 0 && (nd->nd_rp == NULL || nd->nd_rp->rc_refcnt == 0) && (nfsrv_mallocmget_limit() || - NFSD_VNET(nfsrc_tcpsavedreplies) > - NFSD_VNET(nfsrc_floodlevel))) { - if (NFSD_VNET(nfsrc_tcpsavedreplies) > - NFSD_VNET(nfsrc_floodlevel)) + VNET(nfsrc_tcpsavedreplies) > + VNET(nfsrc_floodlevel))) { + if (VNET(nfsrc_tcpsavedreplies) > + VNET(nfsrc_floodlevel)) printf("nfsd server cache flooded, try " "increasing vfs.nfsd.tcphighwater\n"); nd->nd_repstat = NFSERR_RESOURCE; @@ -1069,7 +1069,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } break; case NFSV4OP_PUTROOTFH: - if (NFSD_VNET(nfs_rootfhset)) { + if (VNET(nfs_rootfhset)) { if ((nd->nd_flag & ND_LASTOP) == 0) { /* * Pre-parse the next op#. If it is @@ -1090,7 +1090,7 @@ nfsrvd_compound(struct nfsrv_descript *nd, int isdgram, u_char *tag, } while (nextop == NFSV4OP_SAVEFH && i < numops - 1); } - nfsd_fhtovp(nd, &NFSD_VNET(nfs_rootfh), + nfsd_fhtovp(nd, &VNET(nfs_rootfh), LK_SHARED, &nvp, &nes, NULL, 0, nextop); if (!nd->nd_repstat) { if (vp) diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index ccee9187bfec..22e702b001c0 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -40,10 +40,10 @@ time_t nfsdev_time = 0; int nfsrv_layouthashsize; volatile int nfsrv_layoutcnt = 0; -NFSD_VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst); +VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst); -NFSD_VNET_DECLARE(int, nfsrv_numnfsd); -NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); +VNET_DECLARE(int, nfsrv_numnfsd); +VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p); extern uint32_t nfs_srvmaxio; extern int nfsrv_lease; @@ -115,17 +115,17 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW, &nfsrv_flexlinuxhack, 0, "For Linux clients, hack around Flex File Layout bug"); -NFSD_VNET_DEFINE_STATIC(bool, nfsd_disable_grace) = false; +VNET_DEFINE_STATIC(bool, nfsd_disable_grace) = false; SYSCTL_BOOL(_vfs_nfsd, OID_AUTO, testing_disable_grace, - CTLFLAG_NFSD_VNET | CTLFLAG_RW, &NFSD_VNET_NAME(nfsd_disable_grace), + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nfsd_disable_grace), 0, "Disable grace for testing"); /* * Hash lists for nfs V4. */ -NFSD_VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash); -NFSD_VNET_DEFINE(struct nfslockhashhead *, nfslockhash); -NFSD_VNET_DEFINE(struct nfssessionhash *, nfssessionhash); +VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash); +VNET_DEFINE(struct nfslockhashhead *, nfslockhash); +VNET_DEFINE(struct nfssessionhash *, nfssessionhash); struct nfslayouthash *nfslayouthash; volatile int nfsrv_dontlistlen = 0; @@ -137,7 +137,7 @@ static int nfsrv_nogsscallback = 0; static volatile int nfsrv_writedelegcnt = 0; static int nfsrv_faildscnt; -NFSD_VNET_DEFINE_STATIC(time_t, nfsrvboottime); +VNET_DEFINE_STATIC(time_t, nfsrvboottime); /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, @@ -165,7 +165,6 @@ static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp); static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp); -static int nfsrv_getipnumber(u_char *cp); static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid); static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, @@ -352,7 +351,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { - LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) { + LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; @@ -399,7 +398,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = - NFSD_VNET(nfsrvboottime); + VNET(nfsrvboottime); clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; @@ -415,7 +414,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - NFSD_VNET(nfsstatsv1_p)->srvclients++; + VNET(nfsstatsv1_p)->srvclients++; nfsrv_openpluslock++; nfsrv_clients++; nfsrv_clientunlock(mlocked); @@ -512,7 +511,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = - NFSD_VNET(nfsrvboottime); + VNET(nfsrvboottime); clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; @@ -543,7 +542,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - NFSD_VNET(nfsstatsv1_p)->srvclients++; + VNET(nfsstatsv1_p)->srvclients++; nfsrv_openpluslock++; nfsrv_clients++; if (!mlocked) { @@ -618,7 +617,7 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, LIST_INIT(&new_clp->lc_session); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); - NFSD_VNET(nfsstatsv1_p)->srvclients++; + VNET(nfsstatsv1_p)->srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } @@ -678,7 +677,7 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, if (clpp) *clpp = NULL; if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 || - opflags != CLOPS_RENEW) && NFSD_VNET(nfsrvboottime) != + opflags != CLOPS_RENEW) && VNET(nfsrvboottime) != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; @@ -920,7 +919,7 @@ nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p) int error = 0, i; bool mlocked; - if (NFSD_VNET(nfsrvboottime) != clientid.lval[0]) { + if (VNET(nfsrvboottime) != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } @@ -1014,7 +1013,7 @@ nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { - LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) { + LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; @@ -1076,8 +1075,8 @@ nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) * Rattle through the client lists until done. */ while (i < nfsrv_clienthashsize && cnt < maxcnt) { - clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]); - while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i]) && cnt < + clp = LIST_FIRST(&VNET(nfsclienthash)[i]); + while (clp != LIST_END(&VNET(nfsclienthash)[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; @@ -1354,15 +1353,15 @@ nfsrv_servertimer(void *arg __unused) * If server hasn't started yet, just return. */ NFSLOCKSTATE(); - if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce == 0) { + if (VNET(nfsrv_stablefirst).nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } - if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) { - if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags & + if (!(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) { + if (!(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) && - NFSD_MONOSEC > NFSD_VNET(nfsrv_stablefirst).nsf_eograce) - NFSD_VNET(nfsrv_stablefirst).nsf_flags |= + NFSD_MONOSEC > VNET(nfsrv_stablefirst).nsf_eograce) + VNET(nfsrv_stablefirst).nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; @@ -1385,8 +1384,8 @@ nfsrv_servertimer(void *arg __unused) * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { - clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]); - while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i])) { + clp = LIST_FIRST(&VNET(nfsclienthash)[i]); + while (clp != LIST_END(&VNET(nfsclienthash)[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC @@ -1417,7 +1416,7 @@ nfsrv_servertimer(void *arg __unused) * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; - NFSD_VNET(nfsrv_stablefirst).nsf_flags |= + VNET(nfsrv_stablefirst).nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* @@ -1435,7 +1434,7 @@ nfsrv_servertimer(void *arg __unused) if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit) - NFSD_VNET(nfsrv_stablefirst).nsf_flags |= + VNET(nfsrv_stablefirst).nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; @@ -1504,10 +1503,11 @@ nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) newnfs_disconnect(NULL, &clp->lc_req); free(clp->lc_req.nr_nam, M_SONAME); NFSFREEMUTEX(&clp->lc_req.nr_mtx); + crfree(clp->lc_req.nr_cred); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); - NFSD_VNET(nfsstatsv1_p)->srvclients--; + VNET(nfsstatsv1_p)->srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); @@ -1550,7 +1550,7 @@ nfsrv_freedeleg(struct nfsstate *stp) nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); free(stp, M_NFSDSTATE); - NFSD_VNET(nfsstatsv1_p)->srvdelegates--; + VNET(nfsstatsv1_p)->srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } @@ -1576,7 +1576,7 @@ nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); - NFSD_VNET(nfsstatsv1_p)->srvopenowners--; + VNET(nfsstatsv1_p)->srvopenowners--; nfsrv_openpluslock--; } @@ -1637,7 +1637,7 @@ nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); free(stp, M_NFSDSTATE); - NFSD_VNET(nfsstatsv1_p)->srvopens--; + VNET(nfsstatsv1_p)->srvopens--; nfsrv_openpluslock--; } @@ -1656,7 +1656,7 @@ nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); free(stp, M_NFSDSTATE); - NFSD_VNET(nfsstatsv1_p)->srvlockowners--; + VNET(nfsstatsv1_p)->srvlockowners--; nfsrv_openpluslock--; } @@ -1732,7 +1732,7 @@ nfsrv_freenfslock(struct nfslock *lop) if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); - NFSD_VNET(nfsstatsv1_p)->srvlocks--; + VNET(nfsstatsv1_p)->srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); @@ -2523,7 +2523,7 @@ tryagain: LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; - NFSD_VNET(nfsstatsv1_p)->srvlockowners++; + VNET(nfsstatsv1_p)->srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { @@ -3183,12 +3183,12 @@ tryagain: LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - NFSD_VNET(nfsstatsv1_p)->srvopenowners++; + VNET(nfsstatsv1_p)->srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - NFSD_VNET(nfsstatsv1_p)->srvopens++; + VNET(nfsstatsv1_p)->srvopens++; nfsrv_openpluslock++; break; } @@ -3249,7 +3249,7 @@ tryagain: NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; - NFSD_VNET(nfsstatsv1_p)->srvdelegates++; + VNET(nfsstatsv1_p)->srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; @@ -3289,12 +3289,12 @@ tryagain: LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; - NFSD_VNET(nfsstatsv1_p)->srvopenowners++; + VNET(nfsstatsv1_p)->srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; - NFSD_VNET(nfsstatsv1_p)->srvopens++; + VNET(nfsstatsv1_p)->srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; @@ -3350,7 +3350,7 @@ tryagain: new_open, ls_hash); openstp = new_open; new_open = NULL; - NFSD_VNET(nfsstatsv1_p)->srvopens++; + VNET(nfsstatsv1_p)->srvopens++; nfsrv_openpluslock++; /* @@ -3414,9 +3414,9 @@ tryagain: openstp = new_open; new_open = NULL; *new_stpp = NULL; - NFSD_VNET(nfsstatsv1_p)->srvopens++; + VNET(nfsstatsv1_p)->srvopens++; nfsrv_openpluslock++; - NFSD_VNET(nfsstatsv1_p)->srvopenowners++; + VNET(nfsstatsv1_p)->srvopenowners++; nfsrv_openpluslock++; } if (!error) { @@ -3879,7 +3879,7 @@ nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { - NFSD_VNET(nfsstatsv1_p)->srvlocks++; + VNET(nfsstatsv1_p)->srvlocks++; nfsrv_openpluslock++; } } @@ -4078,280 +4078,74 @@ out: } /* - * Get the client ip address for callbacks. If the strings can't be parsed, - * just set lc_program to 0 to indicate no callbacks are possible. - * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set - * the address to the client's transport address. This won't be used - * for callbacks, but can be printed out by nfsstats for info.) + * Just set lc_program to 0 to indicate no callbacks are possible. + * Set the address to the client's transport address. This won't be used + * for callbacks, but can be printed out by nfsstats for info. * Return error if the xdr can't be parsed, 0 otherwise. */ int nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp) { - u_int32_t *tl; - u_char *cp, *cp2; - int i, j, maxalen = 0, minalen = 0; - sa_family_t af; + uint32_t *tl; + int error = 0, i; #ifdef INET - struct sockaddr_in *rin = NULL, *sin; + struct sockaddr_in *rin, *sin; #endif #ifdef INET6 - struct sockaddr_in6 *rin6 = NULL, *sin6; -#endif - u_char *addr; - int error = 0, cantparse = 0; -#ifdef INET - union { - in_addr_t ival; - u_char cval[4]; - } ip; -#endif -#if defined(INET6) || defined(INET) - union { - in_port_t sval; - u_char cval[2]; - } port; + struct sockaddr_in6 *rin6, *sin6; #endif - /* 8 is the maximum length of the port# string. */ - addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK); clp->lc_req.nr_client = NULL; clp->lc_req.nr_lock = 0; - af = AF_UNSPEC; - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); - if (i >= 3 && i <= 4) { - error = nfsrv_mtostr(nd, addr, i); + if (i < 0) { + error = NFSERR_BADXDR; + goto nfsmout; + } else if (i > 0) { + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; -#ifdef INET - if (!strcmp(addr, "tcp")) { - clp->lc_flags |= LCL_TCPCALLBACK; - clp->lc_req.nr_sotype = SOCK_STREAM; - clp->lc_req.nr_soproto = IPPROTO_TCP; - af = AF_INET; - } else if (!strcmp(addr, "udp")) { - clp->lc_req.nr_sotype = SOCK_DGRAM; - clp->lc_req.nr_soproto = IPPROTO_UDP; - af = AF_INET; - } -#endif -#ifdef INET6 - if (af == AF_UNSPEC) { - if (!strcmp(addr, "tcp6")) { - clp->lc_flags |= LCL_TCPCALLBACK; - clp->lc_req.nr_sotype = SOCK_STREAM; - clp->lc_req.nr_soproto = IPPROTO_TCP; - af = AF_INET6; - } else if (!strcmp(addr, "udp6")) { - clp->lc_req.nr_sotype = SOCK_DGRAM; - clp->lc_req.nr_soproto = IPPROTO_UDP; - af = AF_INET6; - } - } -#endif - if (af == AF_UNSPEC) { - cantparse = 1; - } - } else { - cantparse = 1; - if (i > 0) { - error = nfsm_advance(nd, NFSM_RNDUP(i), -1); - if (error) - goto nfsmout; - } } - /* - * The caller has allocated clp->lc_req.nr_nam to be large enough - * for either AF_INET or AF_INET6 and zeroed out the contents. - * maxalen is set to the maximum length of the host IP address string - * plus 8 for the maximum length of the port#. - * minalen is set to the minimum length of the host IP address string - * plus 4 for the minimum length of the port#. - * These lengths do not include NULL termination, - * so INET[6]_ADDRSTRLEN - 1 is used in the calculations. - */ - switch (af) { + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + i = fxdr_unsigned(int, *tl); + if (i < 0) { + error = NFSERR_BADXDR; + goto nfsmout; + } else if (i > 0) { + error = nfsm_advance(nd, NFSM_RNDUP(i), -1); + if (error) + goto nfsmout; + } + switch (nd->nd_nam->sa_family) { #ifdef INET case AF_INET: + sin = (struct sockaddr_in *)nd->nd_nam; rin = (struct sockaddr_in *)clp->lc_req.nr_nam; rin->sin_family = AF_INET; rin->sin_len = sizeof(struct sockaddr_in); - maxalen = INET_ADDRSTRLEN - 1 + 8; - minalen = 7 + 4; + rin->sin_addr.s_addr = sin->sin_addr.s_addr; + rin->sin_port = 0x0; break; #endif #ifdef INET6 case AF_INET6: + sin6 = (struct sockaddr_in6 *)nd->nd_nam; rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; rin6->sin6_family = AF_INET6; rin6->sin6_len = sizeof(struct sockaddr_in6); - maxalen = INET6_ADDRSTRLEN - 1 + 8; - minalen = 3 + 4; + rin6->sin6_addr = sin6->sin6_addr; + rin6->sin6_port = 0x0; break; #endif } - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - i = fxdr_unsigned(int, *tl); - if (i < 0) { - error = NFSERR_BADXDR; - goto nfsmout; - } else if (i == 0) { - cantparse = 1; - } else if (!cantparse && i <= maxalen && i >= minalen) { - error = nfsrv_mtostr(nd, addr, i); - if (error) - goto nfsmout; - - /* - * Parse out the address fields. We expect 6 decimal numbers - * separated by '.'s for AF_INET and two decimal numbers - * preceded by '.'s for AF_INET6. - */ - cp = NULL; - switch (af) { -#ifdef INET6 - /* - * For AF_INET6, first parse the host address. - */ - case AF_INET6: - cp = strchr(addr, '.'); - if (cp != NULL) { - *cp++ = '\0'; - if (inet_pton(af, addr, &rin6->sin6_addr) == 1) - i = 4; - else { - cp = NULL; - cantparse = 1; - } - } - break; -#endif -#ifdef INET - case AF_INET: - cp = addr; - i = 0; - break; -#endif - } - while (cp != NULL && *cp && i < 6) { - cp2 = cp; - while (*cp2 && *cp2 != '.') - cp2++; - if (*cp2) - *cp2++ = '\0'; - else if (i != 5) { - cantparse = 1; - break; - } - j = nfsrv_getipnumber(cp); - if (j >= 0) { - if (i < 4) -#ifdef INET - ip.cval[3 - i] = j; -#else - ; -#endif -#if defined(INET6) || defined(INET) - else - port.cval[5 - i] = j; -#endif - } else { - cantparse = 1; - break; - } - cp = cp2; - i++; - } - if (!cantparse) { - /* - * The host address INADDR_ANY is (mis)used to indicate - * "there is no valid callback address". - */ - switch (af) { -#ifdef INET6 - case AF_INET6: - if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr, - &in6addr_any)) - rin6->sin6_port = htons(port.sval); - else - cantparse = 1; - break; -#endif -#ifdef INET - case AF_INET: - if (ip.ival != INADDR_ANY) { - rin->sin_addr.s_addr = htonl(ip.ival); - rin->sin_port = htons(port.sval); - } else { - cantparse = 1; - } - break; -#endif - } - } - } else { - cantparse = 1; - if (i > 0) { - error = nfsm_advance(nd, NFSM_RNDUP(i), -1); - if (error) - goto nfsmout; - } - } - if (cantparse) { - switch (nd->nd_nam->sa_family) { -#ifdef INET - case AF_INET: - sin = (struct sockaddr_in *)nd->nd_nam; - rin = (struct sockaddr_in *)clp->lc_req.nr_nam; - rin->sin_family = AF_INET; - rin->sin_len = sizeof(struct sockaddr_in); - rin->sin_addr.s_addr = sin->sin_addr.s_addr; - rin->sin_port = 0x0; - break; -#endif -#ifdef INET6 - case AF_INET6: - sin6 = (struct sockaddr_in6 *)nd->nd_nam; - rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam; - rin6->sin6_family = AF_INET6; - rin6->sin6_len = sizeof(struct sockaddr_in6); - rin6->sin6_addr = sin6->sin6_addr; - rin6->sin6_port = 0x0; - break; -#endif - } - clp->lc_program = 0; - } + clp->lc_program = 0; nfsmout: - free(addr, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* - * Turn a string of up to three decimal digits into a number. Return -1 upon - * error. - */ -static int -nfsrv_getipnumber(u_char *cp) -{ - int i = 0, j = 0; - - while (*cp) { - if (j > 2 || *cp < '0' || *cp > '9') - return (-1); - i *= 10; - i += (*cp - '0'); - cp++; - j++; - } - if (i < 256) - return (i); - return (-1); -} - -/* * This function checks for restart conditions. */ static int @@ -4366,11 +4160,11 @@ nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { - if (clientid.lval[0] != NFSD_VNET(nfsrvboottime)) { + if (clientid.lval[0] != VNET(nfsrvboottime)) { ret = NFSERR_STALECLIENTID; goto out; } - } else if (stateidp->other[0] != NFSD_VNET(nfsrvboottime) && + } else if (stateidp->other[0] != VNET(nfsrvboottime) && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; @@ -4403,15 +4197,15 @@ nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, int error = 0, notreclaimed; struct nfsrv_stable *sp; - if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE | + if ((VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE | NFSNSF_GRACEOVER)) == 0) { /* * First, check to see if all of the clients have done a * ReclaimComplete. If so, grace can end now. */ notreclaimed = 0; - if (!NFSD_VNET(nfsd_disable_grace)) { - LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, + if (!VNET(nfsd_disable_grace)) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) { notreclaimed = 1; @@ -4420,11 +4214,11 @@ nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, } } if (notreclaimed == 0) - NFSD_VNET(nfsrv_stablefirst).nsf_flags |= + VNET(nfsrv_stablefirst).nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); } - if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) { + if ((VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; @@ -4446,8 +4240,8 @@ nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > - NFSD_VNET(nfsrv_stablefirst).nsf_eograce) - NFSD_VNET(nfsrv_stablefirst).nsf_eograce = + VNET(nfsrv_stablefirst).nsf_eograce) + VNET(nfsrv_stablefirst).nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } @@ -4882,7 +4676,7 @@ tryagain: void nfsrv_setupstable(NFSPROC_T *p) { - struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst); + struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst); struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; @@ -4898,7 +4692,7 @@ nfsrv_setupstable(NFSPROC_T *p) /* * Set Grace over just until the file reads successfully. */ - NFSD_VNET(nfsrvboottime) = time_second; + VNET(nfsrvboottime) = time_second; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; @@ -4933,8 +4727,8 @@ nfsrv_setupstable(NFSPROC_T *p) do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { - if (NFSD_VNET(nfsrvboottime) == sf->nsf_bootvals[i]) { - NFSD_VNET(nfsrvboottime)++; + if (VNET(nfsrvboottime) == sf->nsf_bootvals[i]) { + VNET(nfsrvboottime)++; tryagain = 1; break; } @@ -5017,7 +4811,7 @@ nfsrv_setupstable(NFSPROC_T *p) void nfsrv_updatestable(NFSPROC_T *p) { - struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst); + struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst); struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; @@ -5046,7 +4840,7 @@ nfsrv_updatestable(NFSPROC_T *p) sf->nsf_bootvals = (time_t *)malloc(sizeof(time_t), M_TEMP, M_WAITOK); } - sf->nsf_bootvals[0] = NFSD_VNET(nfsrvboottime); + sf->nsf_bootvals[0] = VNET(nfsrvboottime); sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); @@ -5100,7 +4894,7 @@ nfsrv_updatestable(NFSPROC_T *p) void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { - struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst); + struct nfsrv_stablefirst *sf = &VNET(nfsrv_stablefirst); struct nfst_rec *sp; int error; @@ -5133,12 +4927,12 @@ nfsrv_markstable(struct nfsclient *clp) /* * First find the client structure. */ - LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } - if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head)) + if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head)) return; /* @@ -5160,12 +4954,12 @@ nfsrv_markreclaim(struct nfsclient *clp) /* * First find the client structure. */ - LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } - if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head)) + if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head)) return; /* @@ -5191,7 +4985,7 @@ nfsrv_checkstable(struct nfsclient *clp) /* * First, find the entry for the client. */ - LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) { + LIST_FOREACH(sp, &VNET(nfsrv_stablefirst).nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; @@ -5201,9 +4995,9 @@ nfsrv_checkstable(struct nfsclient *clp) * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ - if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head) || + if (sp == LIST_END(&VNET(nfsrv_stablefirst).nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || - !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK)) + !(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK)) return (1); return (0); } @@ -5231,7 +5025,7 @@ nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || - !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) + !(VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); @@ -5630,7 +5424,7 @@ nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ - if (NFSD_VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG || + if (VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; @@ -5864,12 +5658,12 @@ nfsrv_throwawayopens(NFSPROC_T *p) int i; NFSLOCKSTATE(); - NFSD_VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS; + VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash, + LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && @@ -5937,7 +5731,7 @@ static time_t nfsrv_leaseexpiry(void) { - if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC) + if (VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } @@ -6258,7 +6052,7 @@ nfsrv_throwawayallstate(NFSPROC_T *p) * For each client, clean out the state and then free the structure. */ for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash, + LIST_FOREACH_SAFE(clp, &VNET(nfsclienthash)[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p, false, NULL); nfsrv_freedeleglist(&clp->lc_deleg); @@ -6272,7 +6066,7 @@ nfsrv_throwawayallstate(NFSPROC_T *p) * Also, free up any remaining lock file structures. */ for (i = 0; i < nfsrv_lockhashsize; i++) { - LIST_FOREACH_SAFE(lfp, &NFSD_VNET(nfslockhash)[i], lf_hash, + LIST_FOREACH_SAFE(lfp, &VNET(nfslockhash)[i], lf_hash, nlfp) { printf("nfsd unload: fnd a lock file struct\n"); nfsrv_freenfslockfile(lfp); @@ -6401,7 +6195,7 @@ nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m) sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); - if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & + if ((VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) { buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK); switch (nd->nd_nam->sa_family) { @@ -6786,7 +6580,7 @@ nfsrv_freeallbackchannel_xprts(void) int i; for (i = 0; i < nfsrv_clienthashsize; i++) { - LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) { + LIST_FOREACH(clp, &VNET(nfsclienthash)[i], lc_hash) { LIST_FOREACH(sep, &clp->lc_session, sess_list) { xprt = sep->sess_cbsess.nfsess_xprt; sep->sess_cbsess.nfsess_xprt = NULL; @@ -7502,7 +7296,7 @@ nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp, /* Insert the new layout in the lists. */ *lypp = NULL; atomic_add_int(&nfsrv_layoutcnt, 1); - NFSD_VNET(nfsstatsv1_p)->srvlayouts++; + VNET(nfsstatsv1_p)->srvlayouts++; NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen); *layoutlenp = lyp->lay_layoutlen; TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list); @@ -7595,7 +7389,7 @@ nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp) NFSD_DEBUG(4, "Freelayout=%p\n", lyp); atomic_add_int(&nfsrv_layoutcnt, -1); - NFSD_VNET(nfsstatsv1_p)->srvlayouts--; + VNET(nfsstatsv1_p)->srvlayouts--; TAILQ_REMOVE(lhp, lyp, lay_list); free(lyp, M_NFSDSTATE); } @@ -9034,7 +8828,7 @@ nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp, new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); *new_delegp = NULL; - NFSD_VNET(nfsstatsv1_p)->srvdelegates++; + VNET(nfsstatsv1_p)->srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } diff --git a/sys/fs/nfsserver/nfs_nfsdsubs.c b/sys/fs/nfsserver/nfs_nfsdsubs.c index f4a934edede3..f7fd81ef4e1e 100644 --- a/sys/fs/nfsserver/nfs_nfsdsubs.c +++ b/sys/fs/nfsserver/nfs_nfsdsubs.c @@ -50,12 +50,12 @@ extern int nfsrv_useacl; extern uid_t nfsrv_defaultuid; extern gid_t nfsrv_defaultgid; -NFSD_VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); -NFSD_VNET_DECLARE(struct nfslockhashhead *, nfslockhash); -NFSD_VNET_DECLARE(struct nfssessionhash *, nfssessionhash); -NFSD_VNET_DECLARE(int, nfs_rootfhset); -NFSD_VNET_DECLARE(uid_t, nfsrv_defaultuid); -NFSD_VNET_DECLARE(gid_t, nfsrv_defaultgid); +VNET_DECLARE(struct nfsclienthashhead *, nfsclienthash); +VNET_DECLARE(struct nfslockhashhead *, nfslockhash); +VNET_DECLARE(struct nfssessionhash *, nfssessionhash); +VNET_DECLARE(int, nfs_rootfhset); +VNET_DECLARE(uid_t, nfsrv_defaultuid); +VNET_DECLARE(gid_t, nfsrv_defaultgid); char nfs_v2pubfh[NFSX_V2FH]; struct nfsdontlisthead nfsrv_dontlisthead; @@ -1618,10 +1618,10 @@ nfsrv_checkuidgid(struct nfsrv_descript *nd, struct nfsvattr *nvap) if (NFSVNO_NOTSETUID(nvap) && NFSVNO_NOTSETGID(nvap)) goto out; if ((NFSVNO_ISSETUID(nvap) && - nvap->na_uid == NFSD_VNET(nfsrv_defaultuid) && + nvap->na_uid == VNET(nfsrv_defaultuid) && enable_nobodycheck == 1) || (NFSVNO_ISSETGID(nvap) && - nvap->na_gid == NFSD_VNET(nfsrv_defaultgid) && + nvap->na_gid == VNET(nfsrv_defaultgid) && enable_nogroupcheck == 1)) { error = NFSERR_BADOWNER; goto out; @@ -2153,20 +2153,20 @@ nfsd_init(void) * Initialize client queues. Don't free/reinitialize * them when nfsds are restarted. */ - NFSD_VNET(nfsclienthash) = malloc(sizeof(struct nfsclienthashhead) * + VNET(nfsclienthash) = malloc(sizeof(struct nfsclienthashhead) * nfsrv_clienthashsize, M_NFSDCLIENT, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_clienthashsize; i++) - LIST_INIT(&NFSD_VNET(nfsclienthash)[i]); - NFSD_VNET(nfslockhash) = malloc(sizeof(struct nfslockhashhead) * + LIST_INIT(&VNET(nfsclienthash)[i]); + VNET(nfslockhash) = malloc(sizeof(struct nfslockhashhead) * nfsrv_lockhashsize, M_NFSDLOCKFILE, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_lockhashsize; i++) - LIST_INIT(&NFSD_VNET(nfslockhash)[i]); - NFSD_VNET(nfssessionhash) = malloc(sizeof(struct nfssessionhash) * + LIST_INIT(&VNET(nfslockhash)[i]); + VNET(nfssessionhash) = malloc(sizeof(struct nfssessionhash) * nfsrv_sessionhashsize, M_NFSDSESSION, M_WAITOK | M_ZERO); for (i = 0; i < nfsrv_sessionhashsize; i++) { - mtx_init(&NFSD_VNET(nfssessionhash)[i].mtx, "nfssm", NULL, + mtx_init(&VNET(nfssessionhash)[i].mtx, "nfssm", NULL, MTX_DEF); - LIST_INIT(&NFSD_VNET(nfssessionhash)[i].list); + LIST_INIT(&VNET(nfssessionhash)[i].list); } LIST_INIT(&nfsrv_dontlisthead); TAILQ_INIT(&nfsrv_recalllisthead); @@ -2183,7 +2183,7 @@ int nfsd_checkrootexp(struct nfsrv_descript *nd) { - if (NFSD_VNET(nfs_rootfhset) == 0) + if (VNET(nfs_rootfhset) == 0) return (NFSERR_AUTHERR | AUTH_FAILED); /* * For NFSv4.1/4.2, if the client specifies SP4_NONE, then these diff --git a/sys/fs/p9fs/p9fs.h b/sys/fs/p9fs/p9fs.h index 2470734fef4d..b469495ef69e 100644 --- a/sys/fs/p9fs/p9fs.h +++ b/sys/fs/p9fs/p9fs.h @@ -154,6 +154,7 @@ struct p9fs_session { struct mtx p9fs_mtx; /* mutex used for guarding the chain.*/ STAILQ_HEAD( ,p9fs_node) virt_node_list; /* list of p9fs nodes in this session*/ struct p9_fid *mnt_fid; /* to save nobody 's fid for unmounting as root user */ + unsigned int name_max; /* cached max filename length */ }; struct p9fs_mount { diff --git a/sys/fs/p9fs/p9fs_subr.c b/sys/fs/p9fs/p9fs_subr.c index d0f04f6c5e97..f66e7a171029 100644 --- a/sys/fs/p9fs/p9fs_subr.c +++ b/sys/fs/p9fs/p9fs_subr.c @@ -275,16 +275,20 @@ p9fs_compatible_mode(struct p9_fid *fid, int mode) { /* * Return TRUE for an exact match. For OREAD and OWRITE, allow - * existing ORDWR fids to match. Only check the low two bits - * of mode. + * existing ORDWR fids to match. * - * TODO: figure out if this is correct for O_APPEND + * We mask both the requested mode and the existing fid's mode + * with 3 (0b11) to isolate the base access intent (O_RDONLY, + * O_WRONLY, or O_RDWR). This prevents extended open flags like + * O_EXCL or O_APPEND from causing a mismatch when we are merely + * looking for an appropriately privileged open descriptor. */ int fid_mode = fid->mode & 3; - if (fid_mode == mode) + int req_mode = mode & 3; + if (fid_mode == req_mode) return (TRUE); if (fid_mode == P9PROTO_ORDWR) - return (mode == P9PROTO_OREAD || mode == P9PROTO_OWRITE); + return (req_mode == P9PROTO_OREAD || req_mode == P9PROTO_OWRITE); return (FALSE); } diff --git a/sys/fs/p9fs/p9fs_vfsops.c b/sys/fs/p9fs/p9fs_vfsops.c index 0e09c58e57b6..a0f0a5a4e494 100644 --- a/sys/fs/p9fs/p9fs_vfsops.c +++ b/sys/fs/p9fs/p9fs_vfsops.c @@ -119,6 +119,8 @@ p9fs_init(struct vfsconf *vfsp) p9fs_io_buffer_zone = uma_zcreate("p9fs io_buffer zone", P9FS_MTU, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + p9_init_zones(); + return (0); } @@ -127,6 +129,8 @@ static int p9fs_uninit(struct vfsconf *vfsp) { + p9_destroy_zones(); + uma_zdestroy(p9fs_node_zone); uma_zdestroy(p9fs_io_buffer_zone); uma_zdestroy(p9fs_getattr_zone); diff --git a/sys/fs/p9fs/p9fs_vnops.c b/sys/fs/p9fs/p9fs_vnops.c index 7141e9700602..2519e5cd050a 100644 --- a/sys/fs/p9fs/p9fs_vnops.c +++ b/sys/fs/p9fs/p9fs_vnops.c @@ -37,10 +37,12 @@ #include <sys/fcntl.h> #include <sys/namei.h> #include <sys/priv.h> -#include <sys/stat.h> -#include <sys/vnode.h> #include <sys/rwlock.h> +#include <sys/stat.h> +#include <sys/syslimits.h> +#include <sys/unistd.h> #include <sys/vmmeter.h> +#include <sys/vnode.h> #include <vm/vm.h> #include <vm/vm_extern.h> @@ -417,7 +419,7 @@ out: * the name and perm specified under the parent dir. If this succeeds (an entry * is created for the new file on the server), we create our metadata for this * file (vnode, p9fs node calling vget). Once we are done, we clunk the open - * fid of the parent directory. + * fid of the parent directory if it was not retained. */ static int create_common(struct p9fs_node *dnp, struct componentname *cnp, @@ -471,6 +473,28 @@ create_common(struct p9fs_node *dnp, struct componentname *cnp, dnp, newfid, vpp, cnp->cn_nameptr); if (error != 0) goto out; + + if (ofid != NULL) { + struct p9fs_node *np = P9FS_VTON(*vpp); + ofid->v_opens = 0; + /* + * The 9P file creation request natively opens + * the file as part of the create operation and + * gives us a writable file handle (ofid). + * We retain this open descriptor by adding it + * to the VOFID list of the new vnode. This + * guarantees that a subsequent VOP_OPEN call + * does not need to send a redundant TOPEN + * request. This is particularly important + * because if a file was requested to be created + * with 000 permissions, the host will reject + * subsequent TOPEN requests due to insufficient + * permissions, which would cause an overall + * open() failure. + */ + p9fs_fid_add(np, ofid, VOFID); + ofid = NULL; /* prevent closing handle below */ + } } else { /* Not found return NOENTRY.*/ goto out; @@ -2248,6 +2272,72 @@ p9fs_delayed_setsize(struct vop_delayed_setsize_args *ap) return (0); } +static unsigned int +p9fs_get_name_max(struct p9fs_node *np) +{ + struct p9fs_session *vses = np->p9fs_ses; + struct p9_statfs statfs; + struct p9_fid *vfid; + unsigned int name_max; + int error = 0; + + name_max = atomic_load_int(&vses->name_max); + if (name_max != 0) + return (name_max); + + P9_DEBUG(VOPS, "%s: querying _PC_NAME_MAX\n", __func__); + vfid = p9fs_get_fid(vses->clnt, np, NULL, VFID, -1, &error); + if (vfid != NULL) { + error = p9_client_statfs(vfid, &statfs); + if (error == 0) { + /* + * Note that this is not strictly correct if you have + * nested mounts on the host (e.g. when using qemu with + * multidevs=remap), but is a better estimate than just + * returning 255. + */ + name_max = statfs.namelen; + } + } + P9_DEBUG(VOPS, "%s: max_name=%u error=%d\n", __func__, name_max, error); + if (error != 0 || name_max == 0) { + printf("p9fs: warning: failed to query name_max (error %d), " + "using fallback %d\n", error, NAME_MAX); + name_max = NAME_MAX; /* fallback and prevent retrying */ + } + atomic_store_int(&vses->name_max, name_max); + return (name_max); +} + +/* + * Return POSIX pathconf information applicable to p9fs filesystems. + */ +static int +p9fs_pathconf(struct vop_pathconf_args *ap) +{ + int error = 0; + struct vnode *vp = ap->a_vp; + struct p9fs_node *np = P9FS_VTON(vp); + + switch (ap->a_name) { + case _PC_NAME_MAX: + *ap->a_retval = p9fs_get_name_max(np); + break; + case _PC_SYMLINK_MAX: + case _PC_PATH_MAX: + /* + * These are conservative estimates, the real value depends on + * the host file system. + */ + *ap->a_retval = MAXPATHLEN; + break; + default: + error = vop_stdpathconf(ap); + break; + } + return (error); +} + struct vop_vector p9fs_vnops = { .vop_default = &default_vnodeops, .vop_lookup = p9fs_lookup, @@ -2257,6 +2347,7 @@ struct vop_vector p9fs_vnops = { .vop_delayed_setsize = p9fs_delayed_setsize, .vop_getattr = p9fs_getattr_dotl, .vop_setattr = p9fs_setattr_dotl, + .vop_pathconf = p9fs_pathconf, .vop_reclaim = p9fs_reclaim, .vop_inactive = p9fs_inactive, .vop_readdir = p9fs_readdir, diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 8e3b41170cab..958ec559fd8d 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1435,7 +1435,8 @@ exec_prealloc_args_kva(void *arg __unused) mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF); for (i = 0; i < exec_map_entries; i++) { argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK); - argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size); + argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size, + ptoa(exec_map_guard_pages)); argkva->gen = exec_args_gen; SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); } diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 4adbd71fae24..07c137b1e5ab 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -3084,7 +3084,7 @@ db_sysctl_cmd_usage(void) " \n" "For example: \n" "sysctl vm.v_free_min \n" - "vn.v_free_min: 12669 \n" + "vm.v_free_min: 12669 \n" ); } diff --git a/sys/kern/subr_eventhandler.c b/sys/kern/subr_eventhandler.c index 7568101dabd2..ccc62253d81e 100644 --- a/sys/kern/subr_eventhandler.c +++ b/sys/kern/subr_eventhandler.c @@ -198,7 +198,10 @@ _eventhandler_deregister(struct eventhandler_list *list, eventhandler_tag tag, } else { CTR3(KTR_EVH, "%s: marking item %p from \"%s\" as dead", __func__, ep, list->el_name); + KASSERT(ep->ee_priority != EHE_DEAD_PRIORITY, + ("%s: handler for %s is dead", __func__, list->el_name)); ep->ee_priority = EHE_DEAD_PRIORITY; + list->el_deadcount++; } } else { /* remove entire list */ @@ -213,11 +216,15 @@ _eventhandler_deregister(struct eventhandler_list *list, eventhandler_tag tag, } else { CTR2(KTR_EVH, "%s: marking all items from \"%s\" as dead", __func__, list->el_name); - TAILQ_FOREACH(ep, &list->el_entries, ee_link) + TAILQ_FOREACH(ep, &list->el_entries, ee_link) { + KASSERT(ep->ee_priority != EHE_DEAD_PRIORITY, + ("%s: handler for %s is dead", __func__, list->el_name)); ep->ee_priority = EHE_DEAD_PRIORITY; + list->el_deadcount++; + } } } - while (wait && list->el_runcount > 0) + while (wait && list->el_deadcount > 0) mtx_sleep(list, &list->el_lock, 0, "evhrm", 0); EHL_UNLOCK(list); } @@ -281,10 +288,12 @@ void eventhandler_prune_list(struct eventhandler_list *list) { struct eventhandler_entry *ep, *en; - int pruned = 0; + int pruned __diagused; CTR2(KTR_EVH, "%s: pruning list \"%s\"", __func__, list->el_name); EHL_LOCK_ASSERT(list, MA_OWNED); + + pruned = 0; TAILQ_FOREACH_SAFE(ep, &list->el_entries, ee_link, en) { if (ep->ee_priority == EHE_DEAD_PRIORITY) { TAILQ_REMOVE(&list->el_entries, ep, ee_link); @@ -292,8 +301,11 @@ eventhandler_prune_list(struct eventhandler_list *list) pruned++; } } - if (pruned > 0) - wakeup(list); + KASSERT(pruned == list->el_deadcount, + ("%s: pruned %u entries from \"%s\" but expected %u", + __func__, pruned, list->el_name, list->el_deadcount)); + list->el_deadcount = 0; + wakeup(list); } /* diff --git a/sys/kern/subr_kobj.c b/sys/kern/subr_kobj.c index d97834b3fe58..a88639289954 100644 --- a/sys/kern/subr_kobj.c +++ b/sys/kern/subr_kobj.c @@ -308,7 +308,7 @@ kobj_init(kobj_t obj, kobj_class_t cls) { int error; - error = kobj_init1(obj, cls, M_NOWAIT); + error = kobj_init1(obj, cls, M_WAITOK); if (error != 0) panic("kobj_init1 failed: error %d", error); } @@ -317,6 +317,8 @@ void kobj_init_static(kobj_t obj, kobj_class_t cls) { + KASSERT(cls->ops != NULL, + ("%s: class %p is not compiled", __func__, cls)); KASSERT(kobj_mutex_inited == 0, ("%s: only supported during early cycles", __func__)); diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index c67996ad7df1..dece6457a4bf 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -108,7 +108,12 @@ proc_read_regs(struct thread *td, struct reg *regs) int proc_write_regs(struct thread *td, struct reg *regs) { + int error; + PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + return (error); return (set_regs(td, regs)); } @@ -122,7 +127,12 @@ proc_read_dbregs(struct thread *td, struct dbreg *dbregs) int proc_write_dbregs(struct thread *td, struct dbreg *dbregs) { + int error; + PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + return (error); return (set_dbregs(td, dbregs)); } @@ -140,7 +150,12 @@ proc_read_fpregs(struct thread *td, struct fpreg *fpregs) int proc_write_fpregs(struct thread *td, struct fpreg *fpregs) { + int error; + PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + return (error); return (set_fpregs(td, fpregs)); } @@ -261,6 +276,10 @@ proc_write_regset(struct thread *td, int note, struct iovec *iov) if (regset->set == NULL) return (EINVAL); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + return (error); + p = td->td_proc; /* Drop the proc lock while allocating the temp buffer */ @@ -293,7 +312,12 @@ proc_read_regs32(struct thread *td, struct reg32 *regs32) int proc_write_regs32(struct thread *td, struct reg32 *regs32) { + int error; + PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + return (error); return (set_regs32(td, regs32)); } @@ -307,7 +331,12 @@ proc_read_dbregs32(struct thread *td, struct dbreg32 *dbregs32) int proc_write_dbregs32(struct thread *td, struct dbreg32 *dbregs32) { + int error; + PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + return (error); return (set_dbregs32(td, dbregs32)); } @@ -321,7 +350,12 @@ proc_read_fpregs32(struct thread *td, struct fpreg32 *fpregs32) int proc_write_fpregs32(struct thread *td, struct fpreg32 *fpregs32) { + int error; + PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) + return (error); return (set_fpregs32(td, fpregs32)); } #endif @@ -363,8 +397,8 @@ proc_rwmem(struct proc *p, struct uio *uio) fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; if (writing) { - error = priv_check_cred(p->p_ucred, PRIV_PROC_MEM_WRITE); - if (error) + error = priv_check(curthread, PRIV_PROC_MEM_WRITE); + if (error != 0) return (error); } @@ -697,11 +731,11 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap) addr = uap->addr; break; } - if (error) + if (error != 0) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, uap->data); - if (error) + if (error != 0) return (error); switch (uap->req) { @@ -1237,7 +1271,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) CTR3(KTR_PTRACE, "PT_STEP: tid %d (pid %d), sig = %d", td2->td_tid, p->p_pid, data); error = ptrace_single_step(td2); - if (error) + if (error != 0) goto out; break; case PT_CONTINUE: @@ -1247,7 +1281,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) if (addr != (void *)1) { error = ptrace_set_pc(td2, (u_long)(uintfptr_t)addr); - if (error) + if (error != 0) goto out; td2->td_dbgflags |= TDB_USERWR; } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index d56aac883d9c..2508f7edd1c5 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -83,6 +83,7 @@ #include <sys/socketvar.h> #include <sys/signalvar.h> #include <sys/stat.h> +#include <sys/sysent.h> #include <sys/sx.h> #include <sys/sysctl.h> #include <sys/systm.h> @@ -2752,8 +2753,24 @@ uipc_ctloutput(struct socket *so, struct sockopt *sopt) error = EINVAL; } UNP_PCB_UNLOCK(unp); - if (error == 0) - error = sooptcopyout(sopt, &xu, sizeof(xu)); + if (error != 0) + break; +#ifdef COMPAT_FREEBSD32 + if (SV_PROC_FLAG(sopt->sopt_td->td_proc, SV_ILP32)) { + struct xucred32 xu32 = {}; + int i; + + xu32.cr_version = xu.cr_version; + xu32.cr_uid = xu.cr_uid; + xu32.cr_ngroups = xu.cr_ngroups; + for (i = 0; i < XU_NGROUPS; i++) + xu32.cr_groups[i] = xu.cr_groups[i]; + xu32.cr_pid = xu.cr_pid; + error = sooptcopyout(sopt, &xu32, sizeof(xu32)); + break; + } +#endif + error = sooptcopyout(sopt, &xu, sizeof(xu)); break; case LOCAL_CREDS: @@ -3159,6 +3176,8 @@ unp_soisdisconnected(struct socket *so) so->so_state |= SS_ISDISCONNECTED; so->so_state &= ~SS_ISCONNECTED; so->so_rcv.uxst_peer = NULL; + selwakeuppri(&so->so_wrsel, PSOCK); + KNOTE_LOCKED(&so->so_snd.sb_sel->si_note, 0); socantrcvmore_locked(so); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 8f47a7abbc5e..675f6fb4e526 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -727,7 +727,8 @@ bufspace_wait(struct bufdomain *bd, struct vnode *vp, int gbflags, BD_LOCK(bd); while (bd->bd_wanted) { if (vp != NULL && vp->v_type != VCHR && - (td->td_pflags & TDP_BUFNEED) == 0) { + (td->td_pflags & TDP_BUFNEED) == 0 && + vp->v_bufobj.bo_dirty.bv_cnt > 0) { BD_UNLOCK(bd); /* * getblk() is called with a vnode locked, and diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index bd7caa01e153..566dd3d8770d 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -62,6 +62,9 @@ #include <rpc/types.h> #include <rpc/auth.h> +/* Publicly exported FS */ +static struct nfs_public nfs_pub; + static MALLOC_DEFINE(M_NETADDR, "export_host", "Export host address structure"); #if defined(INET) || defined(INET6) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 58975f7ac932..823ccc6cab71 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -237,9 +237,6 @@ static struct mtx mntid_mtx; */ static struct mtx __exclusive_cache_line vnode_list_mtx; -/* Publicly exported FS */ -struct nfs_public nfs_pub; - static uma_zone_t buf_trie_zone; static smr_t buf_trie_smr; diff --git a/sys/modules/ath10k/Makefile b/sys/modules/ath10k/Makefile index b28df768c3d5..becf42a6a79a 100644 --- a/sys/modules/ath10k/Makefile +++ b/sys/modules/ath10k/Makefile @@ -1,3 +1,5 @@ +.include <kmod.opts.mk> + DEVATH10KDIR= ${SRCTOP}/sys/contrib/dev/athk/ath10k .PATH: ${DEVATH10KDIR} diff --git a/sys/modules/ath11k/Makefile b/sys/modules/ath11k/Makefile index a2f0064c2ae9..eaf196af5b55 100644 --- a/sys/modules/ath11k/Makefile +++ b/sys/modules/ath11k/Makefile @@ -1,3 +1,5 @@ +.include <kmod.opts.mk> + DEVATH11KDIR= ${SRCTOP}/sys/contrib/dev/athk/ath11k .PATH: ${DEVATH11KDIR} diff --git a/sys/modules/ath12k/Makefile b/sys/modules/ath12k/Makefile index ecca0bc3b409..071bb88a8b37 100644 --- a/sys/modules/ath12k/Makefile +++ b/sys/modules/ath12k/Makefile @@ -1,3 +1,5 @@ +.include <kmod.opts.mk> + DEVATH12KDIR= ${SRCTOP}/sys/contrib/dev/athk/ath12k .PATH: ${DEVATH12KDIR} diff --git a/sys/modules/i2c/Makefile b/sys/modules/i2c/Makefile index 6561327abfca..ff4536694dfc 100644 --- a/sys/modules/i2c/Makefile +++ b/sys/modules/i2c/Makefile @@ -28,6 +28,7 @@ SUBDIR = \ .if !empty(OPT_FDT) SUBDIR += hym8563 \ + rs5c372a \ rv3032 \ rx8803 \ tca64xx \ diff --git a/sys/modules/i2c/rs5c372a/Makefile b/sys/modules/i2c/rs5c372a/Makefile new file mode 100644 index 000000000000..3bd387115a8a --- /dev/null +++ b/sys/modules/i2c/rs5c372a/Makefile @@ -0,0 +1,14 @@ +.PATH: ${SRCTOP}/sys/dev/iicbus/rtc/ +KMOD = rs5c372a +SRCS = rs5c372a.c + +# Generated files... +SRCS+= \ + bus_if.h \ + clock_if.h \ + device_if.h \ + iicbus_if.h \ + opt_platform.h \ + ofw_bus_if.h \ + +.include <bsd.kmod.mk> diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 1cf55ead9587..b4e4c1e4d29b 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -204,6 +204,9 @@ /* DECLARE_EVENT_CLASS() is available */ /* #undef HAVE_DECLARE_EVENT_CLASS */ +/* dentry aliases are in d_u member */ +/* #undef HAVE_DENTRY_D_U_ALIASES */ + /* 3-arg dequeue_signal() takes a type argument */ /* #undef HAVE_DEQUEUE_SIGNAL_3ARG_TYPE */ @@ -911,7 +914,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.4.99-522-FreeBSD_g1644e2ffd" +#define ZFS_META_ALIAS "zfs-2.4.99-566-FreeBSD_ga12c6ed62" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -920,7 +923,7 @@ /* #undef ZFS_META_DATA */ /* Define the maximum compatible kernel version. */ -#define ZFS_META_KVER_MAX "6.19" +#define ZFS_META_KVER_MAX "7.0" /* Define the minimum compatible kernel version. */ #define ZFS_META_KVER_MIN "4.18" @@ -941,7 +944,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "522-FreeBSD_g1644e2ffd" +#define ZFS_META_RELEASE "566-FreeBSD_ga12c6ed62" /* Define the project version. */ #define ZFS_META_VERSION "2.4.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 9c5cec1628b7..113c3d7a5bbb 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.4.99-522-g1644e2ffd" +#define ZFS_META_GITREV "zfs-2.4.99-566-ga12c6ed62-dirty" diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index c254e2aa3107..ab05b4d075c7 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -236,25 +236,15 @@ static eventhandler_tag ifevent_tag; * must be sleepable and also have safe concurrent access to a vlan interface. * Since the sx(9) exists, it is used by default in most paths unless sleeping * is not permitted, or if it is not clear whether sleeping is permitted. - * */ -#define _VLAN_SX_ID ifv_sx - -static struct sx _VLAN_SX_ID; - -#define VLAN_LOCKING_INIT() \ - sx_init_flags(&_VLAN_SX_ID, "vlan_sx", SX_RECURSE) - -#define VLAN_LOCKING_DESTROY() \ - sx_destroy(&_VLAN_SX_ID) +static struct sx vlan_sx; -#define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) -#define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) -#define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID) -#define VLAN_XUNLOCK() sx_xunlock(&_VLAN_SX_ID) -#define VLAN_SLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_SLOCKED) -#define VLAN_XLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_XLOCKED) -#define VLAN_SXLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_LOCKED) +#define VLAN_LOCK_INIT() \ + sx_init_flags(&vlan_sx, "vlan_sx", SX_RECURSE) +#define VLAN_LOCK_DESTROY() sx_destroy(&vlan_sx) +#define VLAN_LOCK() sx_xlock(&vlan_sx) +#define VLAN_UNLOCK() sx_xunlock(&vlan_sx) +#define VLAN_LOCK_ASSERT() sx_assert(&vlan_sx, SA_XLOCKED) /* * We also have a per-trunk mutex that should be acquired when changing @@ -262,9 +252,9 @@ static struct sx _VLAN_SX_ID; */ #define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF) #define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock) -#define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock) -#define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock) -#define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED); +#define TRUNK_LOCK(trunk) mtx_lock(&(trunk)->lock) +#define TRUNK_UNLOCK(trunk) mtx_unlock(&(trunk)->lock) +#define TRUNK_LOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED) /* * The VLAN_ARRAY substitutes the dynamic hash with a static array @@ -434,7 +424,7 @@ vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) int i, b; struct ifvlan *ifv2; - VLAN_XLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; @@ -464,7 +454,7 @@ vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) int i, b; struct ifvlan *ifv2; - VLAN_XLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << (trunk->hwidth - 1); @@ -492,7 +482,7 @@ vlan_growhash(struct ifvlantrunk *trunk, int howmuch) struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; - VLAN_XLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { @@ -603,7 +593,7 @@ vlan_inithash(struct ifvlantrunk *trunk) static void trunk_destroy(struct ifvlantrunk *trunk) { - VLAN_XLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); vlan_freehash(trunk); trunk->parent->if_vlantrunk = NULL; @@ -629,7 +619,7 @@ vlan_setmulti(struct ifnet *ifp) struct vlan_mc_entry *mc; int error; - VLAN_XLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); /* Find the parent. */ sc = ifp->if_softc; @@ -693,11 +683,11 @@ vlan_ifevent(void *arg __unused, struct ifnet *ifp, int event) return; } - TRUNK_WLOCK(trunk); + TRUNK_LOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = ifp->if_baudrate; } - TRUNK_WUNLOCK(trunk); + TRUNK_UNLOCK(trunk); NET_EPOCH_EXIT(et); } @@ -728,7 +718,7 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp) * We need an exclusive lock here to prevent concurrent SIOCSIFLLADDR * ioctl calls on the parent garbling the lladdr of the child vlan. */ - TRUNK_WLOCK(trunk); + TRUNK_LOCK(trunk); VLAN_FOREACH(ifv, trunk) { /* * Copy new new lladdr into the ifv_ifp, enqueue a task @@ -744,7 +734,7 @@ vlan_iflladdr(void *arg __unused, struct ifnet *ifp) sdl->sdl_alen = ifp->if_addrlen; taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } - TRUNK_WUNLOCK(trunk); + TRUNK_UNLOCK(trunk); NET_EPOCH_EXIT(et); } @@ -761,10 +751,10 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp) struct ifvlan *ifv; struct ifvlantrunk *trunk; - VLAN_XLOCK(); + VLAN_LOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { - VLAN_XUNLOCK(); + VLAN_UNLOCK(); return; } @@ -779,7 +769,7 @@ vlan_ifdetach(void *arg __unused, struct ifnet *ifp) /* Trunk should have been destroyed in vlan_unconfig(). */ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); - VLAN_XUNLOCK(); + VLAN_UNLOCK(); } /* @@ -916,7 +906,7 @@ vlan_modevent(module_t mod, int type, void *data) vlan_ifevent, NULL, EVENTHANDLER_PRI_ANY); if (ifevent_tag == NULL) return (ENOMEM); - VLAN_LOCKING_INIT(); + VLAN_LOCK_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; @@ -954,7 +944,7 @@ vlan_modevent(module_t mod, int type, void *data) vlan_cookie_p = NULL; vlan_setcookie_p = NULL; vlan_devat_p = NULL; - VLAN_LOCKING_DESTROY(); + VLAN_LOCK_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); break; @@ -1343,9 +1333,9 @@ vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw) uint16_t vlan_id = 0; uint16_t vlan_proto = 0; - VLAN_SLOCK(); + VLAN_LOCK(); if (__predict_false((ifv = ifp->if_softc) == NULL)) { - VLAN_SUNLOCK(); + VLAN_UNLOCK(); /* * XXXGL: the interface already went through if_dead(). This * check to be removed when we got better interface removal. @@ -1356,7 +1346,7 @@ vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw) parent_index = PARENT(ifv)->if_index; vlan_id = ifv->ifv_vid; vlan_proto = ifv->ifv_proto; - VLAN_SUNLOCK(); + VLAN_UNLOCK(); if (parent_index != 0) nlattr_add_u32(nw, IFLA_LINK, parent_index); @@ -1696,7 +1686,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, if (trunk->parent != p) return (EBUSY); - VLAN_XLOCK(); + VLAN_LOCK(); ifv->ifv_proto = proto; @@ -1720,17 +1710,17 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, goto done; } - VLAN_XLOCK(); + VLAN_LOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); vlan_inithash(trunk); TRUNK_LOCK_INIT(trunk); - TRUNK_WLOCK(trunk); + TRUNK_LOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; if_ref(trunk->parent); - TRUNK_WUNLOCK(trunk); + TRUNK_UNLOCK(trunk); } else { trunk = p->if_vlantrunk; } @@ -1838,7 +1828,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, done: if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); - VLAN_XUNLOCK(); + VLAN_UNLOCK(); return (error); } @@ -1847,9 +1837,9 @@ static void vlan_unconfig(struct ifnet *ifp) { - VLAN_XLOCK(); + VLAN_LOCK(); vlan_unconfig_locked(ifp, 0); - VLAN_XUNLOCK(); + VLAN_UNLOCK(); } static void @@ -1861,7 +1851,7 @@ vlan_unconfig_locked(struct ifnet *ifp, int departing) struct ifnet *parent; int error; - VLAN_XLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); ifv = ifp->if_softc; trunk = ifv->ifv_trunk; @@ -1935,7 +1925,7 @@ vlan_setflag(struct ifnet *ifp, int flag, int status, struct ifvlan *ifv; int error; - VLAN_SXLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); ifv = ifp->if_softc; status = status ? (ifp->if_flags & flag) : 0; @@ -1994,13 +1984,13 @@ vlan_link_state(struct ifnet *ifp) return; } - TRUNK_WLOCK(trunk); + TRUNK_LOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; if_link_state_change(ifv->ifv_ifp, trunk->parent->if_link_state); } - TRUNK_WUNLOCK(trunk); + TRUNK_UNLOCK(trunk); NET_EPOCH_EXIT(et); } @@ -2011,7 +2001,7 @@ vlan_link_state(struct ifnet *ifp) int error; \ \ ifv = ifp->if_softc; \ - VLAN_SLOCK(); \ + VLAN_LOCK(); \ if (TRUNK(ifv) != NULL) { \ p = PARENT(ifv); \ if_ref(p); \ @@ -2020,7 +2010,7 @@ vlan_link_state(struct ifnet *ifp) } else { \ error = ENXIO; \ } \ - VLAN_SUNLOCK(); \ + VLAN_UNLOCK(); \ return (error); @@ -2091,7 +2081,7 @@ vlan_capabilities(struct ifvlan *ifv) u_long hwa = 0; NET_EPOCH_ASSERT(); - VLAN_SXLOCK_ASSERT(); + VLAN_LOCK_ASSERT(); p = PARENT(ifv); ifp = ifv->ifv_ifp; @@ -2222,17 +2212,17 @@ vlan_trunk_capabilities(struct ifnet *ifp) struct ifvlantrunk *trunk; struct ifvlan *ifv; - VLAN_SLOCK(); + VLAN_LOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { - VLAN_SUNLOCK(); + VLAN_UNLOCK(); return; } NET_EPOCH_ENTER(et); VLAN_FOREACH(ifv, trunk) vlan_capabilities(ifv); NET_EPOCH_EXIT(et); - VLAN_SUNLOCK(); + VLAN_UNLOCK(); } static int @@ -2267,7 +2257,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifp->if_addrlen); break; case SIOCGIFMEDIA: - VLAN_SLOCK(); + VLAN_LOCK(); if (TRUNK(ifv) != NULL) { p = PARENT(ifv); if_ref(p); @@ -2288,7 +2278,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } else { error = EINVAL; } - VLAN_SUNLOCK(); + VLAN_UNLOCK(); break; case SIOCSIFMEDIA: @@ -2299,10 +2289,10 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) /* * Set the interface MTU. */ - VLAN_SLOCK(); + VLAN_LOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { - TRUNK_WLOCK(trunk); + TRUNK_LOCK(trunk); if (ifr->ifr_mtu > (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < @@ -2310,10 +2300,10 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; - TRUNK_WUNLOCK(trunk); + TRUNK_UNLOCK(trunk); } else error = EINVAL; - VLAN_SUNLOCK(); + VLAN_UNLOCK(); break; case SIOCSETVLAN: @@ -2376,14 +2366,14 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) } #endif bzero(&vlr, sizeof(vlr)); - VLAN_SLOCK(); + VLAN_LOCK(); if (TRUNK(ifv) != NULL) { strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_vid; vlr.vlr_proto = ifv->ifv_proto; } - VLAN_SUNLOCK(); + VLAN_UNLOCK(); error = copyout(&vlr, ifr_data_get_ptr(ifr), sizeof(vlr)); break; @@ -2392,10 +2382,10 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ - VLAN_SLOCK(); + VLAN_LOCK(); if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); - VLAN_SUNLOCK(); + VLAN_UNLOCK(); break; case SIOCADDMULTI: @@ -2407,11 +2397,11 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) * XXX We need the rmlock here to avoid sleeping while * holding in6_multi_mtx. */ - VLAN_XLOCK(); + VLAN_LOCK(); trunk = TRUNK(ifv); if (trunk != NULL) error = vlan_setmulti(ifp); - VLAN_XUNLOCK(); + VLAN_UNLOCK(); break; case SIOCGVLANPCP: @@ -2445,7 +2435,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; case SIOCSIFCAP: - VLAN_SLOCK(); + VLAN_LOCK(); ifv->ifv_capenable = ifr->ifr_reqcap; trunk = TRUNK(ifv); if (trunk != NULL) { @@ -2455,7 +2445,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) vlan_capabilities(ifv); NET_EPOCH_EXIT(et); } - VLAN_SUNLOCK(); + VLAN_UNLOCK(); break; default: diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 3edf08aefeb5..a5f377e84307 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -873,6 +873,13 @@ pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) goto cleanup; + if (sks->proto == IPPROTO_SCTP && st->src.scrub == NULL) { + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: invalid SCTP state from creator id: %08x\n", __func__, + ntohl(sp->pfs_1301.creatorid)); + goto cleanup; + } + st->id = sp->pfs_1301.id; st->creatorid = sp->pfs_1301.creatorid; pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index d3e60b137c1a..ab2140a60ce7 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -3226,14 +3226,12 @@ pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, PF_RULES_WUNLOCK(); pf_hash_rule(rule); - if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule) != NULL) { - PF_RULES_WLOCK(); - TAILQ_REMOVE(ruleset->rules[rs_num].inactive.ptr, rule, entries); - ruleset->rules[rs_num].inactive.rcount--; - pf_free_rule(rule); - rule = NULL; - ERROUT(EEXIST); - } + /** + * Note: rule hashes may collide. Accept this, because the worst that can + * happen is that we get counter preservation wrong. + * Failing to insert here would be worse. + **/ + RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule); PF_CONFIG_UNLOCK(); return (0); @@ -4895,14 +4893,8 @@ DIOCGETRULENV_error: ruleset->rules[rs_num].active.rcount--; } else { pf_hash_rule(newrule); - if (RB_INSERT(pf_krule_global, - ruleset->rules[rs_num].active.tree, newrule) != NULL) { - pf_free_rule(newrule); - PF_RULES_WUNLOCK(); - PF_CONFIG_UNLOCK(); - error = EEXIST; - goto fail; - } + RB_INSERT(pf_krule_global, + ruleset->rules[rs_num].active.tree, newrule); if (oldrule == NULL) TAILQ_INSERT_TAIL( diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index e4ce9e64f637..d1beb7681c21 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -51,8 +51,11 @@ #include <netlink/netlink_debug.h> _DECLARE_DEBUG(LOG_DEBUG); +static bool nlattr_add_labels(struct nl_writer *nw, int attrtype, + const struct pf_krule *r); +static bool nlattr_add_rule(struct nl_writer *nw, const struct pf_krule *rule); static bool nlattr_add_pf_threshold(struct nl_writer *, int, - struct pf_kthreshold *); + const struct pf_kthreshold *); struct nl_parsed_state { uint8_t version; @@ -63,6 +66,7 @@ struct nl_parsed_state { sa_family_t af; struct pf_addr addr; struct pf_addr mask; + bool include_rule; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) @@ -75,6 +79,7 @@ static const struct nlattr_parser nla_p_state[] = { { .type = PF_ST_PROTO, .off = _OUT(proto), .cb = nlattr_get_uint16 }, { .type = PF_ST_FILTER_ADDR, .off = _OUT(addr), .cb = nlattr_get_in6_addr }, { .type = PF_ST_FILTER_MASK, .off = _OUT(mask), .cb = nlattr_get_in6_addr }, + { .type = PF_ST_INCLUDE_RULE, .off = _OUT(include_rule), .cb = nlattr_get_bool }, }; static const struct nlfield_parser nlf_p_generic[] = { { .off_in = _IN(version), .off_out = _OUT(version), .cb = nlf_get_u8 }, @@ -146,8 +151,26 @@ dump_state_key(struct nl_writer *nw, int attr, const struct pf_state_key *key) return (true); } +static bool +nlattr_add_rule_nested(struct nl_writer *nw, int attr, const struct pf_krule *r) +{ + int off; + bool ret; + + off = nlattr_add_nested(nw, attr); + if (off == 0) + return (false); + + ret = nlattr_add_rule(nw, r); + + nlattr_set_len(nw, off); + + return (ret); +} + static int -dump_state(struct nlpcb *nlp, const struct nlmsghdr *hdr, struct pf_kstate *s, +dump_state(struct nlpcb *nlp, const struct nlmsghdr *hdr, + struct nl_parsed_state *attrs, struct pf_kstate *s, struct nl_pstate *npt) { struct nl_writer *nw = npt->nw; @@ -231,6 +254,9 @@ dump_state(struct nlpcb *nlp, const struct nlmsghdr *hdr, struct pf_kstate *s, if (!dump_state_peer(nw, PF_ST_PEER_DST, &s->dst)) goto enomem; + if (attrs->include_rule && s->rule != NULL) + nlattr_add_rule_nested(nw, PF_ST_CREATED_BY_RULE, s->rule); + if (nlmsg_end(nw)) return (0); @@ -282,7 +308,7 @@ handle_dumpstates(struct nlpcb *nlp, struct nl_parsed_state *attrs, &attrs->mask, &attrs->addr, af)) continue; - error = dump_state(nlp, hdr, s, npt); + error = dump_state(nlp, hdr, attrs, s, npt); if (error != 0) break; } @@ -307,7 +333,7 @@ handle_getstate(struct nlpcb *nlp, struct nl_parsed_state *attrs, s = pf_find_state_byid(attrs->id, attrs->creatorid); if (s == NULL) return (ENOENT); - ret = dump_state(nlp, hdr, s, npt); + ret = dump_state(nlp, hdr, attrs, s, npt); PF_STATE_UNLOCK(s); return (ret); @@ -465,7 +491,8 @@ NL_DECLARE_ATTR_PARSER(rule_addr_parser, nla_p_ruleaddr); #undef _OUT static bool -nlattr_add_rule_addr(struct nl_writer *nw, int attrtype, struct pf_rule_addr *r) +nlattr_add_rule_addr(struct nl_writer *nw, int attrtype, + const struct pf_rule_addr *r) { struct pf_addr_wrap aw = {0}; int off = nlattr_add_nested(nw, attrtype); @@ -687,7 +714,8 @@ nlattr_get_nested_timeouts(struct nlattr *nla, struct nl_pstate *npt, const void } static bool -nlattr_add_timeout(struct nl_writer *nw, int attrtype, uint32_t *timeout) +nlattr_add_timeout(struct nl_writer *nw, int attrtype, + const uint32_t *timeout) { int off = nlattr_add_nested(nw, attrtype); @@ -875,76 +903,10 @@ out: return (error); } -struct nl_parsed_get_rule { - char anchor[MAXPATHLEN]; - uint8_t action; - uint32_t nr; - uint32_t ticket; - uint8_t clear; -}; -#define _OUT(_field) offsetof(struct nl_parsed_get_rule, _field) -static const struct nlattr_parser nla_p_getrule[] = { - { .type = PF_GR_ANCHOR, .off = _OUT(anchor), .arg = (void *)MAXPATHLEN, .cb = nlattr_get_chara }, - { .type = PF_GR_ACTION, .off = _OUT(action), .cb = nlattr_get_uint8 }, - { .type = PF_GR_NR, .off = _OUT(nr), .cb = nlattr_get_uint32 }, - { .type = PF_GR_TICKET, .off = _OUT(ticket), .cb = nlattr_get_uint32 }, - { .type = PF_GR_CLEAR, .off = _OUT(clear), .cb = nlattr_get_uint8 }, -}; -#undef _OUT -NL_DECLARE_PARSER(getrule_parser, struct genlmsghdr, nlf_p_empty, nla_p_getrule); - -static int -pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) +static bool +nlattr_add_rule(struct nl_writer *nw, const struct pf_krule *rule) { - char anchor_call[MAXPATHLEN]; - struct nl_parsed_get_rule attrs = {}; - struct nl_writer *nw = npt->nw; - struct genlmsghdr *ghdr_new; - struct pf_kruleset *ruleset; - struct pf_krule *rule; - u_int64_t src_nodes_total = 0; - int rs_num; - int error; - - error = nl_parse_nlmsg(hdr, &getrule_parser, npt, &attrs); - if (error != 0) - return (error); - - if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) - return (ENOMEM); - - ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); - ghdr_new->cmd = PFNL_CMD_GETRULE; - - PF_RULES_WLOCK(); - ruleset = pf_find_kruleset(attrs.anchor); - if (ruleset == NULL) { - PF_RULES_WUNLOCK(); - error = ENOENT; - goto out; - } - - rs_num = pf_get_ruleset_number(attrs.action); - if (rs_num >= PF_RULESET_MAX) { - PF_RULES_WUNLOCK(); - error = EINVAL; - goto out; - } - - if (attrs.ticket != ruleset->rules[rs_num].active.ticket) { - PF_RULES_WUNLOCK(); - error = EBUSY; - goto out; - } - - rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); - while ((rule != NULL) && (rule->nr != attrs.nr)) - rule = TAILQ_NEXT(rule, entries); - if (rule == NULL) { - PF_RULES_WUNLOCK(); - error = EBUSY; - goto out; - } + u_int64_t src_nodes_total = 0; nlattr_add_rule_addr(nw, PF_RT_SRC, &rule->src); nlattr_add_rule_addr(nw, PF_RT_DST, &rule->dst); @@ -1050,6 +1012,81 @@ pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) nlattr_add_u8(nw, PF_RT_SOURCE_LIMIT, rule->sourcelim.id); nlattr_add_u32(nw, PF_RT_SOURCE_LIMIT_ACTION, rule->sourcelim.limiter_action); + return (true); +} + +struct nl_parsed_get_rule { + char anchor[MAXPATHLEN]; + uint8_t action; + uint32_t nr; + uint32_t ticket; + uint8_t clear; +}; +#define _OUT(_field) offsetof(struct nl_parsed_get_rule, _field) +static const struct nlattr_parser nla_p_getrule[] = { + { .type = PF_GR_ANCHOR, .off = _OUT(anchor), .arg = (void *)MAXPATHLEN, .cb = nlattr_get_chara }, + { .type = PF_GR_ACTION, .off = _OUT(action), .cb = nlattr_get_uint8 }, + { .type = PF_GR_NR, .off = _OUT(nr), .cb = nlattr_get_uint32 }, + { .type = PF_GR_TICKET, .off = _OUT(ticket), .cb = nlattr_get_uint32 }, + { .type = PF_GR_CLEAR, .off = _OUT(clear), .cb = nlattr_get_uint8 }, +}; +#undef _OUT +NL_DECLARE_PARSER(getrule_parser, struct genlmsghdr, nlf_p_empty, nla_p_getrule); + +static int +pf_handle_getrule(struct nlmsghdr *hdr, struct nl_pstate *npt) +{ + char anchor_call[MAXPATHLEN]; + struct nl_parsed_get_rule attrs = {}; + struct nl_writer *nw = npt->nw; + struct genlmsghdr *ghdr_new; + struct pf_kruleset *ruleset; + struct pf_krule *rule; + int rs_num; + int error; + + error = nl_parse_nlmsg(hdr, &getrule_parser, npt, &attrs); + if (error != 0) + return (error); + + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) + return (ENOMEM); + + ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); + ghdr_new->cmd = PFNL_CMD_GETRULE; + + PF_RULES_WLOCK(); + ruleset = pf_find_kruleset(attrs.anchor); + if (ruleset == NULL) { + PF_RULES_WUNLOCK(); + error = ENOENT; + goto out; + } + + rs_num = pf_get_ruleset_number(attrs.action); + if (rs_num >= PF_RULESET_MAX) { + PF_RULES_WUNLOCK(); + error = EINVAL; + goto out; + } + + if (attrs.ticket != ruleset->rules[rs_num].active.ticket) { + PF_RULES_WUNLOCK(); + error = EBUSY; + goto out; + } + + rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); + while ((rule != NULL) && (rule->nr != attrs.nr)) + rule = TAILQ_NEXT(rule, entries); + if (rule == NULL) { + PF_RULES_WUNLOCK(); + error = EBUSY; + goto out; + } + + nlattr_add_rule(nw, rule); + error = pf_kanchor_copyout(ruleset, rule, anchor_call, sizeof(anchor_call)); MPASS(error == 0); @@ -1729,7 +1766,7 @@ pf_handle_get_ruleset(struct nlmsghdr *hdr, struct nl_pstate *npt) static bool nlattr_add_pf_threshold(struct nl_writer *nw, int attrtype, - struct pf_kthreshold *t) + const struct pf_kthreshold *t) { int off = nlattr_add_nested(nw, attrtype); int conn_rate_count = 0; diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h index 6591c707d9a4..4d0186ea86a5 100644 --- a/sys/netpfil/pf/pf_nl.h +++ b/sys/netpfil/pf/pf_nl.h @@ -152,6 +152,8 @@ enum pfstate_type_t { PF_ST_RT_IFNAME = 37, /* string */ PF_ST_SRC_NODE_FLAGS = 38, /* u8 */ PF_ST_RT_AF = 39, /* u8 */ + PF_ST_INCLUDE_RULE = 40, /* bool */ + PF_ST_CREATED_BY_RULE = 41, /* nested, pf_rule_type_t */ }; enum pf_addr_type_t { diff --git a/sys/powerpc/conf/QORIQ64 b/sys/powerpc/conf/QORIQ64 index c64d015c37d1..d1f4f3401d23 100644 --- a/sys/powerpc/conf/QORIQ64 +++ b/sys/powerpc/conf/QORIQ64 @@ -10,7 +10,6 @@ ident QORIQ64 machine powerpc powerpc64 -include "dpaa/config.dpaa" makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols makeoptions WITH_CTF=1 #makeoptions WERROR="-Werror -Wno-format" @@ -22,6 +21,7 @@ options _KPOSIX_PRIORITY_SCHEDULING options ALT_BREAK_TO_DEBUGGER options BREAK_TO_DEBUGGER options BOOTP +#options BOOTP_WIRED_TO=memac0 options BOOTP_NFSROOT #options BOOTP_NFSV3 options CD9660 @@ -134,3 +134,5 @@ device uinput # install /dev/uinput cdev options HID_DEBUG # enable debug msgs device hid # Generic HID support device hidbus # Generic HID bus + +options BOOTVERBOSE diff --git a/sys/powerpc/conf/dpaa/DPAA b/sys/powerpc/conf/dpaa/DPAA deleted file mode 100644 index 323c71048c12..000000000000 --- a/sys/powerpc/conf/dpaa/DPAA +++ /dev/null @@ -1,99 +0,0 @@ -# -# Common kernel config for Freescale QorIQ DPAA development boards like the -# P2041RDB, P3041DS and P5020DS. -# -# This is not standalone kernel config. Use it only for including -# purposes. -# - -cpu BOOKE -cpu BOOKE_E500 - -machine powerpc powerpc -#makeoptions DEBUG=-g #Build kernel with gdb(1) debug symbols -#makeoptions WERROR="-Werror -Wno-format" -makeoptions NO_MODULES=yes - -# Platform support -options MPC85XX #Freescale SoC family - -options SMP #Symmetric Multi Processing - -#options SCHED_4BSD #4BSD scheduler -options INET #InterNETworking -options INET6 #IPv6 communications protocols -options FFS #Berkeley Fast Filesystem -options NFSCL #New Network Filesystem Client -options SOFTUPDATES #Enable FFS soft updates support -options PROCFS #Process filesystem (requires PSEUDOFS) -options PSEUDOFS #Pseudo-filesystem framework -options GEOM_PART_GPT #GUID Partition Tables. -options GEOM_LABEL -options COMPAT_43 #Compatible with BSD 4.3 [KEEP THIS!] -options SYSVSHM #SYSV-style shared memory -options SYSVMSG #SYSV-style message queues -options SYSVSEM #SYSV-style semaphores -options _KPOSIX_PRIORITY_SCHEDULING #Posix P1003_1B real-time extensions - -options KDB #Enable the kernel debugger -options DDB #Support DDB -options GDB - -options ALT_BREAK_TO_DEBUGGER -options BREAK_TO_DEBUGGER -options DIAGNOSTIC -options INVARIANTS #Enable calls of extra sanity checking -options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS - -options KTR -options KTR_COMPILE=0xffffffff -options KTR_MASK=KTR_PMAP -options KTR_ENTRIES=8192 - -options WITNESS #Enable checks to detect deadlocks and cycles -#options WITNESS_KDB - -# RamFS Root -#options GEOM_UZIP -#options MD_ROOT -#options MD_ROOT_SIZE=10240 - -# Netbooting -options BOOTP -options BOOTP_NFSROOT -options BOOTP_NFSV3 -options BOOTP_WIRED_TO=dtsec3 -options NFS_ROOT - -# Block devices -device mmc -device mmcsd -device sdhci - -# Network devices -device miibus # MII bus support -device iflib -device em - - -# I2C support -device iicbus -device iic - -device uart -device ehci -device usb -device scbus -device da -device umass -device pty -device cfi - -device pci - -# Pseudo devices -device ether # Ethernet support -device loop # Network loopback -device bpf # Berkeley packet filter -device md # Memory "disks" - diff --git a/sys/powerpc/conf/dpaa/config.dpaa b/sys/powerpc/conf/dpaa/config.dpaa deleted file mode 100644 index 0b7c8fcf3b29..000000000000 --- a/sys/powerpc/conf/dpaa/config.dpaa +++ /dev/null @@ -1,24 +0,0 @@ - -files "dpaa/files.dpaa" - -makeoptions DPAA_COMPILE_CMD="${LINUXKPI_C} ${DPAAWARNFLAGS} \ - -Wno-cast-qual -Wno-unused-function -Wno-init-self \ - -include $S/contrib/ncsw/build/dflags.h \ - -Wno-error=missing-prototypes \ - -I$S/contrib/ncsw/build/ \ - -I$S/contrib/ncsw/inc \ - -I$S/contrib/ncsw/inc/cores \ - -I$S/contrib/ncsw/inc/etc \ - -I$S/contrib/ncsw/inc/Peripherals \ - -I$S/contrib/ncsw/etc \ - -I$S/contrib/ncsw/Peripherals/BM \ - -I$S/contrib/ncsw/Peripherals/FM \ - -I$S/contrib/ncsw/Peripherals/FM/HC \ - -I$S/contrib/ncsw/Peripherals/FM/inc \ - -I$S/contrib/ncsw/Peripherals/FM/MAC \ - -I$S/contrib/ncsw/Peripherals/FM/Pcd \ - -I$S/contrib/ncsw/Peripherals/FM/Port \ - -I$S/contrib/ncsw/Peripherals/FM/Rtc \ - -I$S/contrib/ncsw/Peripherals/QM \ - -I$S/contrib/ncsw/inc/flib \ - -I$S/contrib/ncsw/inc/integrations" diff --git a/sys/powerpc/conf/dpaa/files.dpaa b/sys/powerpc/conf/dpaa/files.dpaa deleted file mode 100644 index 4af0f896d883..000000000000 --- a/sys/powerpc/conf/dpaa/files.dpaa +++ /dev/null @@ -1,122 +0,0 @@ - -# NetCommSw drivers -contrib/ncsw/etc/error.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/etc/ncsw_list.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/etc/memcpy.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/etc/mm.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/etc/ncsw_mem.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/etc/sprint.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/BM/bm.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/BM/bman_low.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/BM/bm_pool.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/BM/bm_portal.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/SP/fm_sp.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Rtc/fm_rtc.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Rtc/fman_rtc.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Port/fman_port.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Port/fm_port.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Port/fm_port_im.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fm_cc.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fm_kg.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fman_kg.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fm_manip.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fm_pcd.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fm_plcr.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fm_prs.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/Pcd/fman_prs.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/dtsec.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/dtsec_mii_acc.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/fman_dtsec.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/fman_dtsec_mii_acc.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/fm_mac.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/fman_dtsec.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/tgec.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/tgec_mii_acc.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/MAC/fman_tgec.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/HC/hc.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/fm_muram.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/fm_ncsw.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/FM/fman_ncsw.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD} ${NO_WMISLEADING_INDENTATION}" -contrib/ncsw/Peripherals/QM/qm.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/Peripherals/QM/qm_portal_fqr.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/user/env/stdlib.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/user/env/xx.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -contrib/ncsw/user/env/core.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" - -# FreeBSD Wrappers -dev/dpaa/portals_common.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/bman_portals.c optional dpaa fdt \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/bman.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/bman_fdt.c optional dpaa fdt \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/qman_portals.c optional dpaa fdt \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/qman.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/qman_fdt.c optional dpaa fdt \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/fman.c optional dpaa fdt \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/fman_mdio.c optional dpaa fdt \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/fman_fdt.c optional dpaa fdt \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/if_dtsec.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/if_dtsec_im.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/if_dtsec_rm.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/if_dtsec_fdt.c optional dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" - -# Examples -dev/dpaa/bman-example.c optional bman_example dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" -dev/dpaa/qman-example.c optional qman_example dpaa \ - no-depend compile-with "${DPAA_COMPILE_CMD}" diff --git a/sys/rpc/clnt_bck.c b/sys/rpc/clnt_bck.c index c5cbbf045bdc..9ff85b1fa2c0 100644 --- a/sys/rpc/clnt_bck.c +++ b/sys/rpc/clnt_bck.c @@ -58,6 +58,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/jail.h> #include <sys/ktls.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -440,15 +441,19 @@ got_reply: * If unsuccessful AND error is an authentication error * then refresh credentials and try again, else break */ - else if (stat == RPC_AUTHERROR) + else if (stat == RPC_AUTHERROR) { /* maybe our credentials need to be refreshed ... */ + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); if (nrefreshes > 0 && AUTH_REFRESH(auth, &reply_msg)) { + CURVNET_RESTORE(); nrefreshes--; XDR_DESTROY(&xdrs); mtx_lock(&ct->ct_lock); goto call_again; } + CURVNET_RESTORE(); /* end of unsuccessful completion */ + } /* end of valid reply message */ } else errp->re_status = stat = RPC_CANTDECODERES; diff --git a/sys/rpc/clnt_dg.c b/sys/rpc/clnt_dg.c index b6a8cdce0d76..d7870aef5674 100644 --- a/sys/rpc/clnt_dg.c +++ b/sys/rpc/clnt_dg.c @@ -39,6 +39,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/jail.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -738,15 +739,19 @@ got_reply: * If unsuccessful AND error is an authentication error * then refresh credentials and try again, else break */ - else if (stat == RPC_AUTHERROR) + else if (stat == RPC_AUTHERROR) { /* maybe our credentials need to be refreshed ... */ + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); if (nrefreshes > 0 && AUTH_REFRESH(auth, &reply_msg)) { + CURVNET_RESTORE(); nrefreshes--; XDR_DESTROY(&xdrs); mtx_lock(&cs->cs_lock); goto call_again; } + CURVNET_RESTORE(); + } /* end of unsuccessful completion */ } /* end of valid reply message */ else { diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c index ecd5fdd04f34..e395cd27ccaa 100644 --- a/sys/rpc/clnt_vc.c +++ b/sys/rpc/clnt_vc.c @@ -54,6 +54,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/jail.h> #include <sys/kernel.h> #include <sys/kthread.h> #include <sys/ktls.h> @@ -559,15 +560,19 @@ got_reply: * If unsuccessful AND error is an authentication error * then refresh credentials and try again, else break */ - else if (stat == RPC_AUTHERROR) + else if (stat == RPC_AUTHERROR) { /* maybe our credentials need to be refreshed ... */ + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); if (nrefreshes > 0 && AUTH_REFRESH(auth, &reply_msg)) { + CURVNET_RESTORE(); nrefreshes--; XDR_DESTROY(&xdrs); mtx_lock(&ct->ct_lock); goto call_again; } + CURVNET_RESTORE(); + } /* end of unsuccessful completion */ } /* end of valid reply message */ else { diff --git a/sys/rpc/rpcsec_gss/rpcsec_gss.c b/sys/rpc/rpcsec_gss/rpcsec_gss.c index d99e3a3090df..de690bbbd1d1 100644 --- a/sys/rpc/rpcsec_gss/rpcsec_gss.c +++ b/sys/rpc/rpcsec_gss/rpcsec_gss.c @@ -150,26 +150,42 @@ static struct timeval AUTH_TIMEOUT = { 25, 0 }; #define RPC_GSS_HASH_SIZE 11 #define RPC_GSS_MAX 256 -static struct rpc_gss_data_list rpc_gss_cache[RPC_GSS_HASH_SIZE]; -static struct rpc_gss_data_list rpc_gss_all; -static struct sx rpc_gss_lock; -static int rpc_gss_count; + +VNET_DEFINE_STATIC(struct rpc_gss_data_list *, rpc_gss_cache); +VNET_DEFINE_STATIC(struct rpc_gss_data_list, rpc_gss_all); +VNET_DEFINE_STATIC(struct sx, rpc_gss_lock); +VNET_DEFINE_STATIC(int, rpc_gss_count); static AUTH *rpc_gss_seccreate_int(CLIENT *, struct ucred *, const char *, const char *, gss_OID, rpc_gss_service_t, u_int, rpc_gss_options_req_t *, rpc_gss_options_ret_t *); static void -rpc_gss_hashinit(void *dummy) +rpc_gss_hashinit(void *dummy __unused) { int i; + VNET(rpc_gss_cache) = mem_alloc(sizeof(struct rpc_gss_data_list) * + RPC_GSS_HASH_SIZE); for (i = 0; i < RPC_GSS_HASH_SIZE; i++) - TAILQ_INIT(&rpc_gss_cache[i]); - TAILQ_INIT(&rpc_gss_all); - sx_init(&rpc_gss_lock, "rpc_gss_lock"); + TAILQ_INIT(&VNET(rpc_gss_cache)[i]); + TAILQ_INIT(&VNET(rpc_gss_all)); + sx_init(&VNET(rpc_gss_lock), "rpc_gss_lock"); } -SYSINIT(rpc_gss_hashinit, SI_SUB_KMEM, SI_ORDER_ANY, rpc_gss_hashinit, NULL); +VNET_SYSINIT(rpc_gss_hashinit, SI_SUB_VNET_DONE, SI_ORDER_ANY, + rpc_gss_hashinit, NULL); + +static void +rpc_gss_hashinit_cleanup(void *dummy __unused) +{ + + rpc_gss_secpurge(NULL); + mem_free(VNET(rpc_gss_cache), sizeof(struct rpc_gss_data_list) * + RPC_GSS_HASH_SIZE); + sx_destroy(&VNET(rpc_gss_lock)); +} +VNET_SYSUNINIT(rpc_gss_hashinit_cleanup, SI_SUB_VNET_DONE, SI_ORDER_ANY, + rpc_gss_hashinit_cleanup, NULL); static uint32_t rpc_gss_hash(const char *principal, gss_OID mech, @@ -198,15 +214,16 @@ rpc_gss_secfind(CLIENT *clnt, struct ucred *cred, const char *principal, struct rpc_gss_data *gd, *tgd; rpc_gss_options_ret_t options; - if (rpc_gss_count > RPC_GSS_MAX) { - while (rpc_gss_count > RPC_GSS_MAX) { - sx_xlock(&rpc_gss_lock); - tgd = TAILQ_FIRST(&rpc_gss_all); + CURVNET_ASSERT_SET(); + if (VNET(rpc_gss_count) > RPC_GSS_MAX) { + while (VNET(rpc_gss_count) > RPC_GSS_MAX) { + sx_xlock(&VNET(rpc_gss_lock)); + tgd = TAILQ_FIRST(&VNET(rpc_gss_all)); th = tgd->gd_hash; - TAILQ_REMOVE(&rpc_gss_cache[th], tgd, gd_link); - TAILQ_REMOVE(&rpc_gss_all, tgd, gd_alllink); - rpc_gss_count--; - sx_xunlock(&rpc_gss_lock); + TAILQ_REMOVE(&VNET(rpc_gss_cache)[th], tgd, gd_link); + TAILQ_REMOVE(&VNET(rpc_gss_all), tgd, gd_alllink); + VNET(rpc_gss_count)--; + sx_xunlock(&VNET(rpc_gss_lock)); AUTH_DESTROY(tgd->gd_auth); } } @@ -217,23 +234,24 @@ rpc_gss_secfind(CLIENT *clnt, struct ucred *cred, const char *principal, h = rpc_gss_hash(principal, mech_oid, cred, service); again: - sx_slock(&rpc_gss_lock); - TAILQ_FOREACH(gd, &rpc_gss_cache[h], gd_link) { + sx_slock(&VNET(rpc_gss_lock)); + TAILQ_FOREACH(gd, &VNET(rpc_gss_cache)[h], gd_link) { if (gd->gd_ucred->cr_uid == cred->cr_uid && !strcmp(gd->gd_principal, principal) && gd->gd_mech == mech_oid && gd->gd_cred.gc_svc == service) { refcount_acquire(&gd->gd_refs); - if (sx_try_upgrade(&rpc_gss_lock)) { + if (sx_try_upgrade(&VNET(rpc_gss_lock))) { /* * Keep rpc_gss_all LRU sorted. */ - TAILQ_REMOVE(&rpc_gss_all, gd, gd_alllink); - TAILQ_INSERT_TAIL(&rpc_gss_all, gd, + TAILQ_REMOVE(&VNET(rpc_gss_all), gd, + gd_alllink); + TAILQ_INSERT_TAIL(&VNET(rpc_gss_all), gd, gd_alllink); - sx_xunlock(&rpc_gss_lock); + sx_xunlock(&VNET(rpc_gss_lock)); } else { - sx_sunlock(&rpc_gss_lock); + sx_sunlock(&VNET(rpc_gss_lock)); } /* @@ -249,7 +267,7 @@ again: return (gd->gd_auth); } } - sx_sunlock(&rpc_gss_lock); + sx_sunlock(&VNET(rpc_gss_lock)); /* * We missed in the cache - create a new association. @@ -262,8 +280,8 @@ again: gd = AUTH_PRIVATE(auth); gd->gd_hash = h; - sx_xlock(&rpc_gss_lock); - TAILQ_FOREACH(tgd, &rpc_gss_cache[h], gd_link) { + sx_xlock(&VNET(rpc_gss_lock)); + TAILQ_FOREACH(tgd, &VNET(rpc_gss_cache)[h], gd_link) { if (tgd->gd_ucred->cr_uid == cred->cr_uid && !strcmp(tgd->gd_principal, principal) && tgd->gd_mech == mech_oid @@ -272,17 +290,17 @@ again: * We lost a race to create the AUTH that * matches this cred. */ - sx_xunlock(&rpc_gss_lock); + sx_xunlock(&VNET(rpc_gss_lock)); AUTH_DESTROY(auth); goto again; } } - rpc_gss_count++; - TAILQ_INSERT_TAIL(&rpc_gss_cache[h], gd, gd_link); - TAILQ_INSERT_TAIL(&rpc_gss_all, gd, gd_alllink); + VNET(rpc_gss_count)++; + TAILQ_INSERT_TAIL(&VNET(rpc_gss_cache)[h], gd, gd_link); + TAILQ_INSERT_TAIL(&VNET(rpc_gss_all), gd, gd_alllink); refcount_acquire(&gd->gd_refs); /* one for the cache, one for user */ - sx_xunlock(&rpc_gss_lock); + sx_xunlock(&VNET(rpc_gss_lock)); return (auth); } @@ -293,14 +311,15 @@ rpc_gss_secpurge(CLIENT *clnt) uint32_t h; struct rpc_gss_data *gd, *tgd; - TAILQ_FOREACH_SAFE(gd, &rpc_gss_all, gd_alllink, tgd) { - if (gd->gd_clnt == clnt) { - sx_xlock(&rpc_gss_lock); + CURVNET_ASSERT_SET(); + TAILQ_FOREACH_SAFE(gd, &VNET(rpc_gss_all), gd_alllink, tgd) { + if (clnt == NULL || gd->gd_clnt == clnt) { + sx_xlock(&VNET(rpc_gss_lock)); h = gd->gd_hash; - TAILQ_REMOVE(&rpc_gss_cache[h], gd, gd_link); - TAILQ_REMOVE(&rpc_gss_all, gd, gd_alllink); - rpc_gss_count--; - sx_xunlock(&rpc_gss_lock); + TAILQ_REMOVE(&VNET(rpc_gss_cache)[h], gd, gd_link); + TAILQ_REMOVE(&VNET(rpc_gss_all), gd, gd_alllink); + VNET(rpc_gss_count)--; + sx_xunlock(&VNET(rpc_gss_lock)); AUTH_DESTROY(gd->gd_auth); } } @@ -748,6 +767,7 @@ rpc_gss_init(AUTH *auth, rpc_gss_options_ret_t *options_ret) gss_OID_set mechlist; static enum krb_imp my_krb_imp = KRBIMP_UNKNOWN; + CURVNET_ASSERT_SET(); rpc_gss_log_debug("in rpc_gss_refresh()"); gd = AUTH_PRIVATE(auth); @@ -774,17 +794,6 @@ rpc_gss_init(AUTH *auth, rpc_gss_options_ret_t *options_ret) gd->gd_cred.gc_seq = 0; /* - * XXX Threads from inside jails can get here via calls - * to clnt_vc_call()->AUTH_REFRESH()->rpc_gss_refresh() - * but the NFS mount is always done outside of the - * jails in vnet0. Since the thread credentials won't - * necessarily have cr_prison == vnet0 and this function - * has no access to the socket, using vnet0 seems the - * only option. This is broken if NFS mounts are enabled - * within vnet prisons. - */ - CURVNET_SET_QUIET(vnet0); - /* * For KerberosV, if there is a client principal name, that implies * that this is a host based initiator credential in the default * keytab file. For this case, it is necessary to do a @@ -1030,14 +1039,12 @@ out: gss_delete_sec_context(&min_stat, &gd->gd_ctx, GSS_C_NO_BUFFER); } - CURVNET_RESTORE(); mtx_lock(&gd->gd_lock); gd->gd_state = RPCSEC_GSS_START; wakeup(gd); mtx_unlock(&gd->gd_lock); return (FALSE); } - CURVNET_RESTORE(); mtx_lock(&gd->gd_lock); gd->gd_state = RPCSEC_GSS_ESTABLISHED; diff --git a/sys/rpc/rpcsec_tls.h b/sys/rpc/rpcsec_tls.h index 97c49dc32245..d796ca88f31a 100644 --- a/sys/rpc/rpcsec_tls.h +++ b/sys/rpc/rpcsec_tls.h @@ -65,21 +65,6 @@ bool rpctls_getinfo(u_int *maxlen, bool rpctlscd_run, /* String for AUTH_TLS reply verifier. */ #define RPCTLS_START_STRING "STARTTLS" -/* Macros for VIMAGE. */ -/* Just define the KRPC_VNETxxx() macros as VNETxxx() macros. */ -#define KRPC_VNET_NAME(n) VNET_NAME(n) -#define KRPC_VNET_DECLARE(t, n) VNET_DECLARE(t, n) -#define KRPC_VNET_DEFINE(t, n) VNET_DEFINE(t, n) -#define KRPC_VNET_DEFINE_STATIC(t, n) VNET_DEFINE_STATIC(t, n) -#define KRPC_VNET(n) VNET(n) - -#define CTLFLAG_KRPC_VNET CTLFLAG_VNET - -#define KRPC_CURVNET_SET(n) CURVNET_SET(n) -#define KRPC_CURVNET_SET_QUIET(n) CURVNET_SET_QUIET(n) -#define KRPC_CURVNET_RESTORE() CURVNET_RESTORE() -#define KRPC_TD_TO_VNET(n) TD_TO_VNET(n) - #endif /* _KERNEL */ #endif /* _RPC_RPCSEC_TLS_H_ */ diff --git a/sys/rpc/rpcsec_tls/rpctls_impl.c b/sys/rpc/rpcsec_tls/rpctls_impl.c index 22ba699a6fab..e1b80217263f 100644 --- a/sys/rpc/rpcsec_tls/rpctls_impl.c +++ b/sys/rpc/rpcsec_tls/rpctls_impl.c @@ -75,8 +75,8 @@ static struct syscall_helper_data rpctls_syscalls[] = { static struct opaque_auth rpctls_null_verf; -KRPC_VNET_DECLARE(uint64_t, svc_vc_tls_handshake_success); -KRPC_VNET_DECLARE(uint64_t, svc_vc_tls_handshake_failed); +VNET_DECLARE(uint64_t, svc_vc_tls_handshake_success); +VNET_DECLARE(uint64_t, svc_vc_tls_handshake_failed); static CLIENT *rpctls_connect_handle; static CLIENT *rpctls_server_handle; @@ -159,7 +159,7 @@ sys_rpctls_syscall(struct thread *td, struct rpctls_syscall_args *uap) if (error != 0) return (error); - KRPC_CURVNET_SET(KRPC_TD_TO_VNET(td)); + CURVNET_SET(TD_TO_VNET(td)); mtx_lock(&rpctls_lock); upsp = RB_FIND(upsock_t, &upcall_sockets, &(struct upsock){ @@ -176,7 +176,7 @@ sys_rpctls_syscall(struct thread *td, struct rpctls_syscall_args *uap) } mtx_unlock(&rpctls_lock); if (upsp == NULL) { - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); printf("%s: socket lookup failed\n", __func__); return (EPERM); } @@ -187,7 +187,7 @@ sys_rpctls_syscall(struct thread *td, struct rpctls_syscall_args *uap) * As such, it needs to be closed here. */ soclose(ups.so); - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (error); } soref(ups.so); @@ -211,7 +211,7 @@ sys_rpctls_syscall(struct thread *td, struct rpctls_syscall_args *uap) finit(fp, FREAD | FWRITE, DTYPE_SOCKET, ups.so, &socketops); fdrop(fp, td); /* Drop fp reference. */ td->td_retval[0] = fd; - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (error); } @@ -441,8 +441,8 @@ _svcauth_rpcsec_tls(struct svc_req *rqst, struct rpc_msg *msg) u_int maxlen; #endif - KRPC_CURVNET_SET_QUIET(KRPC_TD_TO_VNET(curthread)); - KRPC_VNET(svc_vc_tls_handshake_failed)++; + CURVNET_SET_QUIET(TD_TO_VNET(curthread)); + VNET(svc_vc_tls_handshake_failed)++; /* Initialize reply. */ rqst->rq_verf = rpctls_null_verf; @@ -450,12 +450,12 @@ _svcauth_rpcsec_tls(struct svc_req *rqst, struct rpc_msg *msg) if (rqst->rq_cred.oa_length != 0 || msg->rm_call.cb_verf.oa_length != 0 || msg->rm_call.cb_verf.oa_flavor != AUTH_NULL) { - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (AUTH_BADCRED); } if (rqst->rq_proc != NULLPROC) { - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (AUTH_REJECTEDCRED); } @@ -465,7 +465,7 @@ _svcauth_rpcsec_tls(struct svc_req *rqst, struct rpc_msg *msg) call_stat = TRUE; #endif if (!call_stat) { - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (AUTH_REJECTEDCRED); } @@ -488,7 +488,7 @@ _svcauth_rpcsec_tls(struct svc_req *rqst, struct rpc_msg *msg) xprt->xp_dontrcv = FALSE; sx_xunlock(&xprt->xp_lock); xprt_active(xprt); /* Harmless if already active. */ - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (AUTH_REJECTEDCRED); } @@ -506,12 +506,12 @@ _svcauth_rpcsec_tls(struct svc_req *rqst, struct rpc_msg *msg) xprt->xp_uid = uid; xprt->xp_gidp = gidp; } - KRPC_VNET(svc_vc_tls_handshake_failed)--; - KRPC_VNET(svc_vc_tls_handshake_success)++; + VNET(svc_vc_tls_handshake_failed)--; + VNET(svc_vc_tls_handshake_success)++; } sx_xunlock(&xprt->xp_lock); xprt_active(xprt); /* Harmless if already active. */ - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); return (RPCSEC_GSS_NODISPATCH); } diff --git a/sys/rpc/svc_vc.c b/sys/rpc/svc_vc.c index 848109704ad0..24c09f182174 100644 --- a/sys/rpc/svc_vc.c +++ b/sys/rpc/svc_vc.c @@ -77,57 +77,57 @@ SYSCTL_NODE(_kern_rpc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, SYSCTL_NODE(_kern_rpc, OID_AUTO, unenc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "unencrypted"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgbytes) = 0; -SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW, - &KRPC_VNET_NAME(svc_vc_rx_msgbytes), 0, "Count of non-TLS rx bytes"); +VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgbytes) = 0; +SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgbytes, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(svc_vc_rx_msgbytes), 0, "Count of non-TLS rx bytes"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgcnt) = 0; -SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW, - &KRPC_VNET_NAME(svc_vc_rx_msgcnt), 0, "Count of non-TLS rx messages"); +VNET_DEFINE_STATIC(uint64_t, svc_vc_rx_msgcnt) = 0; +SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, rx_msgcnt, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(svc_vc_rx_msgcnt), 0, "Count of non-TLS rx messages"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgbytes) = 0; -SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgbytes, CTLFLAG_KRPC_VNET | CTLFLAG_RW, - &KRPC_VNET_NAME(svc_vc_tx_msgbytes), 0, "Count of non-TLS tx bytes"); +VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgbytes) = 0; +SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgbytes, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(svc_vc_tx_msgbytes), 0, "Count of non-TLS tx bytes"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgcnt) = 0; -SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgcnt, CTLFLAG_KRPC_VNET | CTLFLAG_RW, - &KRPC_VNET_NAME(svc_vc_tx_msgcnt), 0, "Count of non-TLS tx messages"); +VNET_DEFINE_STATIC(uint64_t, svc_vc_tx_msgcnt) = 0; +SYSCTL_U64(_kern_rpc_unenc, OID_AUTO, tx_msgcnt, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(svc_vc_tx_msgcnt), 0, "Count of non-TLS tx messages"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_alerts) = 0; +VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_alerts) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, alerts, - CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_alerts), 0, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(svc_vc_tls_alerts), 0, "Count of TLS alert messages"); -KRPC_VNET_DEFINE(uint64_t, svc_vc_tls_handshake_failed) = 0; +VNET_DEFINE(uint64_t, svc_vc_tls_handshake_failed) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, handshake_failed, - CTLFLAG_KRPC_VNET | CTLFLAG_RW, - &KRPC_VNET_NAME(svc_vc_tls_handshake_failed), 0, + CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(svc_vc_tls_handshake_failed), 0, "Count of TLS failed handshakes"); -KRPC_VNET_DEFINE(uint64_t, svc_vc_tls_handshake_success) = 0; +VNET_DEFINE(uint64_t, svc_vc_tls_handshake_success) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, handshake_success, - CTLFLAG_KRPC_VNET | CTLFLAG_RW, - &KRPC_VNET_NAME(svc_vc_tls_handshake_success), 0, + CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(svc_vc_tls_handshake_success), 0, "Count of TLS successful handshakes"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgbytes) = 0; +VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgbytes) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, rx_msgbytes, - CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_rx_msgbytes), 0, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(svc_vc_tls_rx_msgbytes), 0, "Count of TLS rx bytes"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgcnt) = 0; +VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_rx_msgcnt) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, rx_msgcnt, - CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_rx_msgcnt), 0, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(svc_vc_tls_rx_msgcnt), 0, "Count of TLS rx messages"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgbytes) = 0; +VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgbytes) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, tx_msgbytes, - CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_tx_msgbytes), 0, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(svc_vc_tls_tx_msgbytes), 0, "Count of TLS tx bytes"); -KRPC_VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgcnt) = 0; +VNET_DEFINE_STATIC(uint64_t, svc_vc_tls_tx_msgcnt) = 0; SYSCTL_U64(_kern_rpc_tls, OID_AUTO, tx_msgcnt, - CTLFLAG_KRPC_VNET | CTLFLAG_RW, &KRPC_VNET_NAME(svc_vc_tls_tx_msgcnt), 0, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(svc_vc_tls_tx_msgcnt), 0, "Count of TLS tx messages"); static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *, @@ -850,15 +850,15 @@ tryagain: * This record needs to be handled in userland * via an SSL_read() call, so do an upcall to the daemon. */ - KRPC_CURVNET_SET(so->so_vnet); + CURVNET_SET(so->so_vnet); if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0 && error == ENXIO) { - KRPC_VNET(svc_vc_tls_alerts)++; + VNET(svc_vc_tls_alerts)++; /* Disable reception. */ xprt->xp_dontrcv = TRUE; sx_xunlock(&xprt->xp_lock); ret = rpctls_srv_handlerecord(so, &reterr); - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); sx_xlock(&xprt->xp_lock); xprt->xp_dontrcv = FALSE; if (ret != RPC_SUCCESS || reterr != RPCTLSERR_OK) { @@ -875,7 +875,7 @@ tryagain: } if (error) { - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); SOCK_RECVBUF_LOCK(so); if (xprt->xp_upcallset) { xprt->xp_upcallset = 0; @@ -889,7 +889,7 @@ tryagain: } if (!m) { - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); /* * EOF - the other end has closed the socket. */ @@ -915,20 +915,20 @@ tryagain: m_freem(m); m_free(ctrl); rcvflag = MSG_DONTWAIT | MSG_TLSAPPDATA; - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); goto tryagain; } - KRPC_VNET(svc_vc_tls_rx_msgcnt)++; - KRPC_VNET(svc_vc_tls_rx_msgbytes) += + VNET(svc_vc_tls_rx_msgcnt)++; + VNET(svc_vc_tls_rx_msgbytes) += 1000000000 - uio.uio_resid; } m_free(ctrl); } else { - KRPC_VNET(svc_vc_rx_msgcnt)++; - KRPC_VNET(svc_vc_rx_msgbytes) += 1000000000 - + VNET(svc_vc_rx_msgcnt)++; + VNET(svc_vc_rx_msgbytes) += 1000000000 - uio.uio_resid; } - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); if (cd->mpending) m_last(cd->mpending)->m_next = m; @@ -1017,7 +1017,7 @@ svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg, htonl(0x80000000 | (len - sizeof(uint32_t))); /* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */ - KRPC_CURVNET_SET(xprt->xp_socket->so_vnet); + CURVNET_SET(xprt->xp_socket->so_vnet); if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) { /* * Copy the mbuf chain to a chain of @@ -1029,13 +1029,13 @@ svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg, maxextsiz = min(maxextsiz, maxlen); #endif mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz); - KRPC_VNET(svc_vc_tls_tx_msgcnt)++; - KRPC_VNET(svc_vc_tls_tx_msgbytes) += len; + VNET(svc_vc_tls_tx_msgcnt)++; + VNET(svc_vc_tls_tx_msgbytes) += len; } else { - KRPC_VNET(svc_vc_tx_msgcnt)++; - KRPC_VNET(svc_vc_tx_msgbytes) += len; + VNET(svc_vc_tx_msgcnt)++; + VNET(svc_vc_tx_msgbytes) += len; } - KRPC_CURVNET_RESTORE(); + CURVNET_RESTORE(); atomic_add_32(&xprt->xp_snd_cnt, len); /* * sosend consumes mreq. diff --git a/sys/sys/cdefs.h b/sys/sys/cdefs.h index 132d4809b0ed..a08c50c8439a 100644 --- a/sys/sys/cdefs.h +++ b/sys/sys/cdefs.h @@ -152,6 +152,7 @@ #define __weak_symbol __attribute__((__weak__)) #define __dead2 __attribute__((__noreturn__)) #define __pure2 __attribute__((__const__)) +#define __maybe_unused __attribute__((__unused__)) #define __unused __attribute__((__unused__)) #define __used __attribute__((__used__)) #define __deprecated __attribute__((__deprecated__)) diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index 29a16b393b52..f37b1150a7e8 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -46,7 +46,7 @@ struct eventhandler_entry_vimage { struct eventhandler_list { char *el_name; - int el_flags; /* Unused. */ + u_int el_deadcount; u_int el_runcount; struct mtx el_lock; TAILQ_ENTRY(eventhandler_list) el_link; @@ -82,7 +82,7 @@ struct eventhandler_list { KASSERT((list)->el_runcount > 0, \ ("eventhandler_invoke: runcount underflow")); \ (list)->el_runcount--; \ - if ((list)->el_runcount == 0) \ + if ((list)->el_runcount == 0 && (list)->el_deadcount != 0) \ eventhandler_prune_list(list); \ EHL_UNLOCK((list)); \ } while (0) diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 18f85192f6c3..1ca492929813 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1071,7 +1071,6 @@ int vfs_report_lockf(struct mount *mp, struct sbuf *sb); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx_padalign mountlist_mtx; -extern struct nfs_public nfs_pub; extern struct sx vfsconf_sx; #define vfsconf_lock() sx_xlock(&vfsconf_sx) #define vfsconf_unlock() sx_xunlock(&vfsconf_sx) diff --git a/sys/sys/priority.h b/sys/sys/priority.h index 93dd5aa90d95..3a3537bee1b2 100644 --- a/sys/sys/priority.h +++ b/sys/sys/priority.h @@ -136,10 +136,10 @@ #endif struct priority { - u_char pri_class; /* Scheduling class. */ - u_char pri_level; /* Normal priority level. */ - u_char pri_native; /* Priority before propagation. */ - u_char pri_user; /* User priority based on p_cpu and p_nice. */ + unsigned char pri_class; /* Scheduling class. */ + unsigned char pri_level; /* Normal priority level. */ + unsigned char pri_native; /* Priority before propagation. */ + unsigned char pri_user; /* User priority based on p_cpu and p_nice. */ }; #endif /* !_SYS_PRIORITY_H_ */ diff --git a/sys/sys/rtprio.h b/sys/sys/rtprio.h index 6e0d8c7b0fe7..d6968a9e58a4 100644 --- a/sys/sys/rtprio.h +++ b/sys/sys/rtprio.h @@ -34,6 +34,7 @@ #ifndef _SYS_RTPRIO_H_ #define _SYS_RTPRIO_H_ +#include <sys/_types.h> #include <sys/priority.h> /* @@ -71,8 +72,8 @@ * Scheduling class information. */ struct rtprio { - u_short type; /* scheduling class */ - u_short prio; + unsigned short type; /* scheduling class */ + unsigned short prio; }; #ifdef _KERNEL @@ -86,8 +87,8 @@ void pri_to_rtp(struct thread *, struct rtprio *); #include <sys/cdefs.h> __BEGIN_DECLS -int rtprio(int, pid_t, struct rtprio *); -int rtprio_thread(int, lwpid_t, struct rtprio *); +int rtprio(int, __pid_t, struct rtprio *); +int rtprio_thread(int, __lwpid_t, struct rtprio *); __END_DECLS #endif /* !_KERNEL */ #endif /* !_SYS_RTPRIO_H_ */ diff --git a/sys/sys/ucred.h b/sys/sys/ucred.h index ba241cf9ff3a..4bf48a5e4b87 100644 --- a/sys/sys/ucred.h +++ b/sys/sys/ucred.h @@ -193,6 +193,17 @@ struct setcred32 { uint32_t sc_label; /* struct mac32 [*] */ }; +#ifdef COMPAT_FREEBSD32 +/* 32-bit compatible version of xucred */ +struct xucred32 { + u_int cr_version; /* structure layout version */ + uid_t cr_uid; /* effective user id */ + short cr_ngroups; /* number of groups (incl. cr_gid). */ + gid_t cr_groups[XU_NGROUPS]; /* groups */ + pid_t cr_pid; +}; +#endif + struct thread; /* Common native and 32-bit compatibility entry point. */ diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 207eb721d129..799b89ce2fc8 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -51,7 +51,7 @@ void *kva_alloc_aligned(vm_size_t, vm_size_t); void kva_free(void *, vm_size_t); /* These operate on pageable virtual addresses. */ -void *kmap_alloc_wait(vm_map_t, vm_size_t); +void *kmap_alloc_wait(vm_map_t, vm_size_t, vm_size_t); void kmap_free_wakeup(vm_map_t, void *, vm_size_t); /* These operate on virtual addresses backed by memory. */ diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 40a001afb300..316b43c1c240 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -274,8 +274,12 @@ again: exec_map_entries = 2 * mp_ncpus + 4; #endif exec_map_entry_size = round_page(PATH_MAX + ARG_MAX); - kmem_subinit(exec_map, kernel_map, &minaddr, &maxaddr, - exec_map_entries * exec_map_entry_size + 64 * PAGE_SIZE, false); + exec_map_guard_pages = 1; + TUNABLE_INT_FETCH("vm.exec_map_guard_pages", &exec_map_guard_pages); + size = exec_map_entries * + (exec_map_entry_size + 2 * ptoa(exec_map_guard_pages)) + + 64 * PAGE_SIZE; + kmem_subinit(exec_map, kernel_map, &minaddr, &maxaddr, size, false); kmem_subinit(pipe_map, kernel_map, &minaddr, &maxaddr, maxpipekva, false); TSEXIT(); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 3a6fed6cb3e1..fc5d0de424bd 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -107,6 +107,7 @@ CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS; u_int exec_map_entry_size; +u_int exec_map_guard_pages; u_int exec_map_entries; SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, @@ -706,34 +707,52 @@ kmem_free(void *addr, vm_size_t size) vmem_free(arena, (uintptr_t)addr, size); } +static void +kmap_alloc_map(vm_map_t map, vm_offset_t addr, vm_size_t size, + vm_prot_t prot, int flags) +{ + int error __diagused; + + error = vm_map_insert(map, NULL, 0, + addr, addr + size, prot, prot, flags); + KASSERT(error == KERN_SUCCESS, + ("%s: unexpected error %d", __func__, error)); +} + /* * kmap_alloc_wait: * * Allocates pageable memory from a sub-map of the kernel. If the submap * has no room, the caller sleeps waiting for more memory in the submap. + * If "guard_size" is non-zero, then unmapped KVA is left at the beginning + * and end of the allocated range. * * This routine may block. */ void * -kmap_alloc_wait(vm_map_t map, vm_size_t size) +kmap_alloc_wait(vm_map_t map, vm_size_t size, vm_size_t guard_size) { vm_offset_t addr; + vm_size_t total_size; + + KASSERT(size % PAGE_SIZE == 0 && guard_size % PAGE_SIZE == 0, + ("%s: size %zu guard_size %zu", __func__, size, guard_size)); - size = round_page(size); if (!swap_reserve(size)) return (NULL); + total_size = size + 2 * guard_size; for (;;) { /* * To make this work for more than one map, use the map's lock * to lock out sleepers/wakers. */ vm_map_lock(map); - addr = vm_map_findspace(map, vm_map_min(map), size); - if (addr + size <= vm_map_max(map)) + addr = vm_map_findspace(map, vm_map_min(map), total_size); + if (addr + total_size <= vm_map_max(map)) break; /* no space now; see if we can ever get space */ - if (vm_map_max(map) - vm_map_min(map) < size) { + if (vm_map_max(map) - vm_map_min(map) < total_size) { vm_map_unlock(map); swap_release(size); return (0); @@ -741,10 +760,16 @@ kmap_alloc_wait(vm_map_t map, vm_size_t size) vm_map_modflags(map, MAP_NEEDS_WAKEUP, 0); vm_map_unlock_and_wait(map, 0); } - vm_map_insert(map, NULL, 0, addr, addr + size, VM_PROT_RW, VM_PROT_RW, + if (guard_size != 0) { + kmap_alloc_map(map, addr, guard_size, + VM_PROT_NONE, MAP_CREATE_GUARD); + kmap_alloc_map(map, addr + guard_size + size, guard_size, + VM_PROT_NONE, MAP_CREATE_GUARD); + } + kmap_alloc_map(map, addr + guard_size, size, VM_PROT_RW, MAP_ACC_CHARGED); vm_map_unlock(map); - return ((void *)addr); + return ((void *)(addr + guard_size)); } /* diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h index 942c03480364..296a50ae0058 100644 --- a/sys/vm/vm_kern.h +++ b/sys/vm/vm_kern.h @@ -75,4 +75,5 @@ extern struct vmem *memguard_arena; extern u_long vm_kmem_size; extern u_int exec_map_entries; extern u_int exec_map_entry_size; +extern u_int exec_map_guard_pages; #endif /* _VM_VM_KERN_H_ */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 63bdce9d60f8..94dd7d3a19bc 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -4723,6 +4723,11 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, return (rv); } +static bool report_stackoverflow = true; +SYSCTL_BOOL(_vm, OID_AUTO, report_stackoverflow, CTLFLAG_RWTUN, + &report_stackoverflow, 0, + "uprintf() on stack overflow"); + /* * Attempts to grow a vm stack entry. Returns KERN_SUCCESS if we * successfully grow the stack. @@ -4731,6 +4736,7 @@ static int vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) { vm_map_entry_t stack_entry; + struct thread *td; struct proc *p; struct vmspace *vm; vm_offset_t gap_end, gap_start, grow_start; @@ -4746,7 +4752,8 @@ vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) int error __diagused; #endif - p = curproc; + td = curthread; + p = td->td_proc; vm = p->p_vmspace; /* @@ -4754,15 +4761,14 @@ vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) * debugger or AIO daemon. The reason is that the wrong * resource limits are applied. */ - if (p != initproc && (map != &p->p_vmspace->vm_map || - p->p_textvp == NULL)) + if (p != initproc && (map != &vm->vm_map || p->p_textvp == NULL)) return (KERN_FAILURE); MPASS(!vm_map_is_system(map)); - lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK); - stacklim = lim_cur(curthread, RLIMIT_STACK); - vmemlim = lim_cur(curthread, RLIMIT_VMEM); + lmemlim = lim_cur(td, RLIMIT_MEMLOCK); + stacklim = lim_cur(td, RLIMIT_STACK); + vmemlim = lim_cur(td, RLIMIT_VMEM); retry: /* If addr is not in a hole for a stack grow area, no need to grow. */ if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry)) @@ -4778,15 +4784,19 @@ retry: } else { return (KERN_FAILURE); } - guard = ((curproc->p_flag2 & P2_STKGAP_DISABLE) != 0 || - (curproc->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 : + guard = ((p->p_flag2 & P2_STKGAP_DISABLE) != 0 || + (p->p_fctl0 & NT_FREEBSD_FCTL_STKGAP_DISABLE) != 0) ? 0 : gap_entry->next_read; max_grow = gap_entry->end - gap_entry->start; if (guard > max_grow) return (KERN_NO_SPACE); max_grow -= guard; - if (grow_amount > max_grow) + if (grow_amount > max_grow) { + if (report_stackoverflow) + uprintf("pid %d comm %s tid %d stack overflow\n", + p->p_pid, p->p_comm, td->td_tid); return (KERN_NO_SPACE); + } /* * If this is the main process stack, see if we're over the stack @@ -4794,8 +4804,12 @@ retry: */ is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr && addr < (vm_offset_t)vm->vm_stacktop; - if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) + if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) { + if (report_stackoverflow) + uprintf("pid %d comm %s tid %d stack overflow\n", + p->p_pid, p->p_comm, td->td_tid); return (KERN_NO_SPACE); + } #ifdef RACCT if (racct_enable) { |
