diff options
45 files changed, 695 insertions, 357 deletions
diff --git a/lib/libc/tests/net/inet_net_test.cc b/lib/libc/tests/net/inet_net_test.cc index 4ecf5a3de492..60b60b152eca 100644 --- a/lib/libc/tests/net/inet_net_test.cc +++ b/lib/libc/tests/net/inet_net_test.cc @@ -50,7 +50,7 @@ ATF_TEST_CASE_BODY(inet_net_inet4) struct test_addr { std::string input; - unsigned bits; + int bits; std::string output; }; @@ -134,7 +134,7 @@ ATF_TEST_CASE_BODY(inet_net_inet6) struct test_addr { std::string input; - unsigned bits; + int bits; std::string output; }; @@ -303,25 +303,25 @@ ATF_TEST_CASE_BODY(inet_net_ntop_invalid) std::ranges::fill(strbuf, 'Z'); auto ret = inet_net_ntop(AF_INET6, &addr6, 128, strbuf.data(), 1); - ATF_REQUIRE_EQ(ret, NULL); + ATF_REQUIRE_EQ(ret, nullptr); ATF_REQUIRE_EQ(strbuf[1], 'Z'); std::ranges::fill(strbuf, 'Z'); ret = inet_net_ntop(AF_INET, &addr4, 32, strbuf.data(), 1); - ATF_REQUIRE_EQ(ret, NULL); + ATF_REQUIRE_EQ(ret, nullptr); ATF_REQUIRE_EQ(strbuf[1], 'Z'); /* Check that invalid prefix lengths return an error */ ret = inet_net_ntop(AF_INET6, &addr6, 129, strbuf.data(), strbuf.size()); - ATF_REQUIRE_EQ(ret, NULL); + ATF_REQUIRE_EQ(ret, nullptr); ret = inet_net_ntop(AF_INET6, &addr6, -1, strbuf.data(), strbuf.size()); - ATF_REQUIRE_EQ(ret, NULL); + ATF_REQUIRE_EQ(ret, nullptr); ret = inet_net_ntop(AF_INET, &addr4, 33, strbuf.data(), strbuf.size()); - ATF_REQUIRE_EQ(ret, NULL); + ATF_REQUIRE_EQ(ret, nullptr); ret = inet_net_ntop(AF_INET, &addr4, -1, strbuf.data(), strbuf.size()); - ATF_REQUIRE_EQ(ret, NULL); + ATF_REQUIRE_EQ(ret, nullptr); } ATF_INIT_TEST_CASES(tcs) diff --git a/release/Makefile.oracle b/release/Makefile.oracle index 3573f29b96c1..6d792cc9fd30 100644 --- a/release/Makefile.oracle +++ b/release/Makefile.oracle @@ -24,21 +24,28 @@ # Syncing to all sites should take 2-3 hours after this final step. ORACLE_BASENAME= ${OSRELEASE}-${BUILDDATE}${GITREV:C/^(.+)/-\1/} -ORACLE_PORTS_LIST= ftp/curl emulators/qemu@tools CLEANFILES+= cw-oracle-portinstall cw-oracle-portinstall: .PHONY -.if !exists(/usr/local/bin/curl) || !exists(/usr/local/bin/qemu-img) -. if !exists(${PORTSDIR}/Makefile) -. if !exists(/usr/local/sbin/pkg-static) +.if (!exists(/usr/local/bin/curl) || !exists(/usr/local/bin/qemu-img)) && !exists(${PORTSDIR}/Makefile) +. if !exists(/usr/local/sbin/pkg-static) env ASSUME_ALWAYS_YES=yes pkg bootstrap -yf -. endif - env ASSUME_ALWAYS_YES=yes pkg install -y ${ORACLE_PORTS_LIST} +. endif +.endif +.if !exists(/usr/local/bin/curl) +. if !exists(${PORTSDIR}/Makefile) + env ASSUME_ALWAYS_YES=yes pkg install -y ftp/curl . else env UNAME_r=${UNAME_r} make -C \ ${PORTSDIR}/ftp/curl \ BATCH=1 WRKDIRPREFIX=/tmp/ports DISTDIR=/tmp/distfiles \ all install clean +. endif +.endif +.if !exists(/usr/local/bin/qemu-img) +. if !exists(${PORTSDIR}/Makefile) + env ASSUME_ALWAYS_YES=yes pkg install -y emulators/qemu@tools +. else env UNAME_r=${UNAME_r} FLAVOR=tools make -C \ ${PORTSDIR}/emulators/qemu \ BATCH=1 WRKDIRPREFIX=/tmp/ports DISTDIR=/tmp/distfiles \ diff --git a/release/tools/vmimage.subr b/release/tools/vmimage.subr index 842a808c623e..99e1936296e1 100644 --- a/release/tools/vmimage.subr +++ b/release/tools/vmimage.subr @@ -213,16 +213,6 @@ vm_extra_install_packages() { install -y -r ${PKG_REPO_NAME} $pkg done metalog_add_data ./var/db/pkg/local.sqlite - - # Add some database files which are created by pkg triggers; - # at some point in the future the tools which create these - # files should probably learn how to record them in METALOG - # (which would simplify no-root installworld as well). - metalog_add_data ./etc/login.conf.db - metalog_add_data ./etc/passwd - metalog_add_data ./etc/pwd.db - metalog_add_data ./etc/spwd.db 600 - metalog_add_data ./var/db/services.db else if [ -n "${WITHOUT_QEMU}" ]; then return 0 @@ -290,28 +280,42 @@ buildfs() { cat ${DESTDIR}/METALOG.pkg >> ${DESTDIR}/METALOG fi - # Check for any directories in the staging tree which weren't - # recorded in METALOG, and record them now. This is a quick hack - # to avoid creating unusable VM images and should go away once - # the bugs which produce such unlogged directories are gone. - grep type=dir ${DESTDIR}/METALOG | - cut -f 1 -d ' ' | - sort -u > ${DESTDIR}/METALOG.dirs - ( cd ${DESTDIR} && find . -type d ) | - sort | - comm -23 - ${DESTDIR}/METALOG.dirs > ${DESTDIR}/METALOG.missingdirs - if [ -s ${DESTDIR}/METALOG.missingdirs ]; then - echo "WARNING: Directories exist but were not in METALOG" - cat ${DESTDIR}/METALOG.missingdirs + if [ -n "${NO_ROOT}" ]; then + # Check for any directories in the staging tree which weren't + # recorded in METALOG, and record them now. This is a quick hack + # to avoid creating unusable VM images and should go away once + # the bugs which produce such unlogged directories are gone. + grep type=dir ${DESTDIR}/METALOG | + cut -f 1 -d ' ' | + sort -u > ${DESTDIR}/METALOG.dirs + ( cd ${DESTDIR} && find . -type d ) | + sort | + comm -23 - ${DESTDIR}/METALOG.dirs > ${DESTDIR}/METALOG.missingdirs + if [ -s ${DESTDIR}/METALOG.missingdirs ]; then + echo "WARNING: Directories exist but were not in METALOG" + cat ${DESTDIR}/METALOG.missingdirs + fi + while read DIR; do + metalog_add_data ${DIR} + done < ${DESTDIR}/METALOG.missingdirs + + if [ -z "${NOPKGBASE}" ]; then + # Add some database files which are created by pkg triggers; + # at some point in the future the tools which create these + # files should probably learn how to record them in METALOG + # (which would simplify no-root installworld as well). + metalog_add_data ./etc/login.conf.db + metalog_add_data ./etc/passwd + metalog_add_data ./etc/pwd.db + metalog_add_data ./etc/spwd.db 600 + metalog_add_data ./var/db/services.db + fi + + # Sort METALOG file; makefs produces directories with 000 permissions + # if their contents are seen before the directories themselves. + env -i LC_COLLATE=C sort -u ${DESTDIR}/METALOG > ${DESTDIR}/METALOG.sorted + mv ${DESTDIR}/METALOG.sorted ${DESTDIR}/METALOG fi - while read DIR; do - metalog_add_data ${DIR} - done < ${DESTDIR}/METALOG.missingdirs - - # Sort METALOG file; makefs produces directories with 000 permissions - # if their contents are seen before the directories themselves. - env -i LC_COLLATE=C sort -u ${DESTDIR}/METALOG > ${DESTDIR}/METALOG.sorted - mv ${DESTDIR}/METALOG.sorted ${DESTDIR}/METALOG case "${VMFS}" in ufs) diff --git a/sbin/geom/core/geom.c b/sbin/geom/core/geom.c index 1a4f7e1f061a..2de696ce6a43 100644 --- a/sbin/geom/core/geom.c +++ b/sbin/geom/core/geom.c @@ -1189,10 +1189,11 @@ status_one_geom(struct ggeom *gp, int script, int name_len, int status_len) gotone = len = 0; xo_open_instance("status"); LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) { + if (cp->lg_provider == NULL) + continue; + cstate = status_one_consumer(cp, "state"); csyncr = status_one_consumer(cp, "synchronized"); - if (cstate == NULL && csyncr == NULL) - continue; if (!gotone || script) { if (!gotone) { xo_emit("{:name/%*s} {:status/%*s} ", @@ -1206,14 +1207,17 @@ status_one_geom(struct ggeom *gp, int script, int name_len, int status_len) xo_open_instance("components"); if (cstate != NULL && csyncr != NULL) { - xo_emit("{P:/%*s}{:compontent} ({:state}, {:synchronized})\n", + xo_emit("{P:/%*s}{:component} ({:state}, {:synchronized})\n", len, "", cp->lg_provider->lg_name, cstate, csyncr); } else if (cstate != NULL) { - xo_emit("{P:/%*s}{:compontent} ({:state})\n", + xo_emit("{P:/%*s}{:component} ({:state})\n", len, "", cp->lg_provider->lg_name, cstate); - } else { - xo_emit("{P:/%*s}{:compontent} ({:synchronized})\n", + } else if (csyncr != NULL) { + xo_emit("{P:/%*s}{:component} ({:synchronized})\n", len, "", cp->lg_provider->lg_name, csyncr); + } else { + xo_emit("{P:/%*s}{:component}\n", + len, "", cp->lg_provider->lg_name); } xo_close_instance("components"); gotone = 1; @@ -1224,7 +1228,7 @@ status_one_geom(struct ggeom *gp, int script, int name_len, int status_len) xo_emit("{:name/%*s} {:status/%*s} ", name_len, name, status_len, status); xo_open_list("components"); xo_open_instance("components"); - xo_emit("{P:/%*s}{d:compontent}\n", len, "", "N/A"); + xo_emit("{P:/%*s}{d:component}\n", len, "", "N/A"); xo_close_instance("components"); } xo_close_list("components"); @@ -1258,11 +1262,11 @@ status_one_geom_prs(struct ggeom *gp, int script, int name_len, int status_len) } gotone = len = 0; LIST_FOREACH(cp, &gp->lg_consumer, lg_consumer) { - cstate = status_one_consumer(cp, "state"); - csyncr = status_one_consumer(cp, "synchronized"); - if (cstate == NULL && csyncr == NULL) + if (cp->lg_provider == NULL) continue; + cstate = status_one_consumer(cp, "state"); + csyncr = status_one_consumer(cp, "synchronized"); if (!gotone || script) { if (!gotone) { xo_emit("{:name/%*s} {:status/%*s} ", @@ -1276,14 +1280,17 @@ status_one_geom_prs(struct ggeom *gp, int script, int name_len, int status_len) xo_open_instance("component"); if (cstate != NULL && csyncr != NULL) { - xo_emit("{P:/%*s}{:compontent} ({:state}, {:synchronized})\n", + xo_emit("{P:/%*s}{:component} ({:state}, {:synchronized})\n", len, "", cp->lg_provider->lg_name, cstate, csyncr); } else if (cstate != NULL) { - xo_emit("{P:/%*s}{:compontent} ({:state})\n", + xo_emit("{P:/%*s}{:component} ({:state})\n", len, "", cp->lg_provider->lg_name, cstate); - } else { - xo_emit("{P:/%*s}{:compontent} ({:synchronized})\n", + } else if (csyncr != NULL) { + xo_emit("{P:/%*s}{:component} ({:synchronized})\n", len, "", cp->lg_provider->lg_name, csyncr); + } else { + xo_emit("{P:/%*s}{:component}\n", + len, "", cp->lg_provider->lg_name); } xo_close_instance("component"); gotone = 1; @@ -1294,7 +1301,7 @@ status_one_geom_prs(struct ggeom *gp, int script, int name_len, int status_len) xo_emit("{:name/%*s} {:status/%*s} ", name_len, name, status_len, status); xo_open_list("components"); xo_open_instance("components"); - xo_emit("{P:/%*s}{d:compontent}\n", len, "", "N/A"); + xo_emit("{P:/%*s}{d:component}\n", len, "", "N/A"); xo_close_instance("components"); } xo_close_list("components"); diff --git a/sys/amd64/sgx/sgx_linux.c b/sys/amd64/sgx/sgx_linux.c index 6ecef9207a38..d389edc1b2b0 100644 --- a/sys/amd64/sgx/sgx_linux.c +++ b/sys/amd64/sgx/sgx_linux.c @@ -92,16 +92,7 @@ out: return (error); } -static struct linux_ioctl_handler sgx_linux_handler = { - sgx_linux_ioctl, - SGX_LINUX_IOCTL_MIN, - SGX_LINUX_IOCTL_MAX, -}; - -SYSINIT(sgx_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &sgx_linux_handler); -SYSUNINIT(sgx_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &sgx_linux_handler); +LINUX_IOCTL_SET(sgx, SGX_LINUX_IOCTL_MIN, SGX_LINUX_IOCTL_MAX); static int sgx_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/compat/linux/linux_ioctl.h b/sys/compat/linux/linux_ioctl.h index ccc25bc919ab..8345b7e4b719 100644 --- a/sys/compat/linux/linux_ioctl.h +++ b/sys/compat/linux/linux_ioctl.h @@ -823,4 +823,16 @@ int linux32_ioctl_register_handler(struct linux_ioctl_handler *h); int linux32_ioctl_unregister_handler(struct linux_ioctl_handler *h); #endif +#define LINUX_IOCTL_SET(n, low, high) \ +static linux_ioctl_function_t n##_linux_ioctl; \ +static struct linux_ioctl_handler n##_linux_handler = { \ + n##_linux_ioctl, \ + low, \ + high \ +}; \ +SYSINIT(n##_ioctl_register, SI_SUB_KLD, SI_ORDER_MIDDLE, \ + linux_ioctl_register_handler, &n##_linux_handler); \ +SYSUNINIT(n##_ioctl_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, \ + linux_ioctl_unregister_handler, &n##_linux_handler) + #endif /* !_LINUX_IOCTL_H_ */ diff --git a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh index 1c608348ffcd..422b3e9df388 100755 --- a/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh +++ b/sys/contrib/openzfs/.github/workflows/scripts/qemu-2-start.sh @@ -121,7 +121,14 @@ case "$OS" in KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" ;; freebsd15-0c) - FreeBSD="15.0-ALPHA3" + FreeBSD="15.0-ALPHA4" + OSNAME="FreeBSD $FreeBSD" + OSv="freebsd14.0" + URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" + KSRC="$FREEBSD_SNAP/../amd64/$FreeBSD/src.txz" + ;; + freebsd16-0c) + FreeBSD="16.0-CURRENT" OSNAME="FreeBSD $FreeBSD" OSv="freebsd14.0" URLxz="$FREEBSD_SNAP/$FreeBSD/amd64/Latest/FreeBSD-$FreeBSD-amd64-BASIC-CI-ufs.raw.xz" @@ -287,7 +294,7 @@ else while pidof /usr/bin/qemu-system-x86_64 >/dev/null; do ssh 2>/dev/null root@vm0 "uname -a" && break done - ssh root@vm0 "pkg install -y bash ca_root_nss git qemu-guest-agent python3 py311-cloud-init" + ssh root@vm0 "env IGNORE_OSVERSION=yes pkg install -y bash ca_root_nss git qemu-guest-agent python3 py311-cloud-init" ssh root@vm0 "chsh -s $BASH root" ssh root@vm0 'sysrc qemu_guest_agent_enable="YES"' ssh root@vm0 'sysrc cloudinit_enable="YES"' diff --git a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml index 69349678d84c..3b164548f9be 100644 --- a/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml +++ b/sys/contrib/openzfs/.github/workflows/zfs-qemu.yml @@ -68,7 +68,7 @@ jobs: # FreeBSD variants of 2025-06: # FreeBSD Release: freebsd13-5r, freebsd14-2r, freebsd14-3r # FreeBSD Stable: freebsd13-5s, freebsd14-3s - # FreeBSD Current: freebsd15-0c + # FreeBSD Current: freebsd15-0c, freebsd16-0c os: ${{ fromJson(needs.test-config.outputs.test_os) }} runs-on: ubuntu-24.04 steps: diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index 70a4ed46f263..1ffb8ebc70f2 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -106,11 +106,15 @@ extern boolean_t spa_mode_readable_spacemaps; extern uint_t zfs_reconstruct_indirect_combinations_max; extern uint_t zfs_btree_verify_intensity; +enum { + ARG_ALLOCATED = 256, + ARG_BLOCK_BIN_MODE, + ARG_BLOCK_CLASSES, +}; + static const char cmdname[] = "zdb"; uint8_t dump_opt[512]; -#define ALLOCATED_OPT 256 - typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size); static uint64_t *zopt_metaslab = NULL; @@ -131,6 +135,20 @@ static objset_t *os; static boolean_t kernel_init_done; static boolean_t corruption_found = B_FALSE; +static enum { + BIN_AUTO = 0, + BIN_PSIZE, + BIN_LSIZE, + BIN_ASIZE, +} block_bin_mode = BIN_AUTO; + +static enum { + CLASS_NORMAL = 1 << 1, + CLASS_SPECIAL = 1 << 2, + CLASS_DEDUP = 1 << 3, + CLASS_OTHER = 1 << 4, +} block_classes = 0; + static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *, boolean_t); static void mos_obj_refd(uint64_t); @@ -749,6 +767,12 @@ usage(void) (void) fprintf(stderr, " Options to control amount of output:\n"); (void) fprintf(stderr, " -b --block-stats " "block statistics\n"); + (void) fprintf(stderr, " --bin=(lsize|psize|asize) " + "bin blocks based on this size in all three columns\n"); + (void) fprintf(stderr, + " --class=(normal|special|dedup|other)[,...]\n" + " only consider blocks from " + "these allocation classes\n"); (void) fprintf(stderr, " -B --backup " "backup stream\n"); (void) fprintf(stderr, " -c --checksum " @@ -1694,7 +1718,7 @@ dump_metaslab(metaslab_t *msp) (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start, (u_longlong_t)space_map_object(sm), freebuf); - if (dump_opt[ALLOCATED_OPT] || + if (dump_opt[ARG_ALLOCATED] || (dump_opt['m'] > 2 && !dump_opt['L'])) { mutex_enter(&msp->ms_lock); VERIFY0(metaslab_load(msp)); @@ -1705,7 +1729,7 @@ dump_metaslab(metaslab_t *msp) dump_metaslab_stats(msp); } - if (dump_opt[ALLOCATED_OPT]) { + if (dump_opt[ARG_ALLOCATED]) { uint64_t off = msp->ms_start; zfs_range_tree_walk(msp->ms_allocatable, dump_allocated, &off); @@ -1726,7 +1750,7 @@ dump_metaslab(metaslab_t *msp) SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift); } - if (dump_opt[ALLOCATED_OPT] || + if (dump_opt[ARG_ALLOCATED] || (dump_opt['m'] > 2 && !dump_opt['L'])) { metaslab_unload(msp); mutex_exit(&msp->ms_lock); @@ -5814,6 +5838,34 @@ dump_size_histograms(zdb_cb_t *zcb) (void) printf("\nBlock Size Histogram\n"); + switch (block_bin_mode) { + case BIN_PSIZE: + printf("(note: all categories are binned by %s)\n", "psize"); + break; + case BIN_LSIZE: + printf("(note: all categories are binned by %s)\n", "lsize"); + break; + case BIN_ASIZE: + printf("(note: all categories are binned by %s)\n", "asize"); + break; + default: + printf("(note: all categories are binned separately)\n"); + break; + } + if (block_classes != 0) { + char buf[256] = ""; + if (block_classes & CLASS_NORMAL) + strlcat(buf, "\"normal\", ", sizeof (buf)); + if (block_classes & CLASS_SPECIAL) + strlcat(buf, "\"special\", ", sizeof (buf)); + if (block_classes & CLASS_DEDUP) + strlcat(buf, "\"dedup\", ", sizeof (buf)); + if (block_classes & CLASS_OTHER) + strlcat(buf, "\"other\", ", sizeof (buf)); + buf[strlen(buf)-2] = '\0'; + printf("(note: only blocks in these classes are counted: %s)\n", + buf); + } /* * Print the first line titles */ @@ -6162,29 +6214,85 @@ skipped: [BPE_GET_PSIZE(bp)]++; return; } + + if (block_classes != 0) { + spa_config_enter(zcb->zcb_spa, SCL_CONFIG, FTAG, RW_READER); + + uint64_t vdev = DVA_GET_VDEV(&bp->blk_dva[0]); + uint64_t offset = DVA_GET_OFFSET(&bp->blk_dva[0]); + vdev_t *vd = vdev_lookup_top(zcb->zcb_spa, vdev); + ASSERT(vd != NULL); + metaslab_t *ms = vd->vdev_ms[offset >> vd->vdev_ms_shift]; + ASSERT(ms != NULL); + metaslab_group_t *mg = ms->ms_group; + ASSERT(mg != NULL); + metaslab_class_t *mc = mg->mg_class; + ASSERT(mc != NULL); + + spa_config_exit(zcb->zcb_spa, SCL_CONFIG, FTAG); + + int class; + if (mc == spa_normal_class(zcb->zcb_spa)) { + class = CLASS_NORMAL; + } else if (mc == spa_special_class(zcb->zcb_spa)) { + class = CLASS_SPECIAL; + } else if (mc == spa_dedup_class(zcb->zcb_spa)) { + class = CLASS_DEDUP; + } else { + class = CLASS_OTHER; + } + + if (!(block_classes & class)) { + goto hist_skipped; + } + } + /* * The binning histogram bins by powers of two up to * SPA_MAXBLOCKSIZE rather than creating bins for * every possible blocksize found in the pool. */ - int bin = highbit64(BP_GET_PSIZE(bp)) - 1; + int bin; + + /* + * Binning strategy: each bin includes blocks up to and including + * the given size (excluding blocks that fit into the previous bin). + * This way, the "4K" bin includes blocks within the (2K; 4K] range. + */ +#define BIN(size) (highbit64((size) - 1)) + + switch (block_bin_mode) { + case BIN_PSIZE: bin = BIN(BP_GET_PSIZE(bp)); break; + case BIN_LSIZE: bin = BIN(BP_GET_LSIZE(bp)); break; + case BIN_ASIZE: bin = BIN(BP_GET_ASIZE(bp)); break; + case BIN_AUTO: break; + default: PANIC("bad block_bin_mode"); abort(); + } + + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_PSIZE(bp)); zcb->zcb_psize_count[bin]++; zcb->zcb_psize_len[bin] += BP_GET_PSIZE(bp); zcb->zcb_psize_total += BP_GET_PSIZE(bp); - bin = highbit64(BP_GET_LSIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_LSIZE(bp)); zcb->zcb_lsize_count[bin]++; zcb->zcb_lsize_len[bin] += BP_GET_LSIZE(bp); zcb->zcb_lsize_total += BP_GET_LSIZE(bp); - bin = highbit64(BP_GET_ASIZE(bp)) - 1; + if (block_bin_mode == BIN_AUTO) + bin = BIN(BP_GET_ASIZE(bp)); zcb->zcb_asize_count[bin]++; zcb->zcb_asize_len[bin] += BP_GET_ASIZE(bp); zcb->zcb_asize_total += BP_GET_ASIZE(bp); +#undef BIN + +hist_skipped: if (!do_claim) return; @@ -9426,7 +9534,11 @@ main(int argc, char **argv) {"livelist", no_argument, NULL, 'y'}, {"zstd-headers", no_argument, NULL, 'Z'}, {"allocated-map", no_argument, NULL, - ALLOCATED_OPT}, + ARG_ALLOCATED}, + {"bin", required_argument, NULL, + ARG_BLOCK_BIN_MODE}, + {"class", required_argument, NULL, + ARG_BLOCK_CLASSES}, {0, 0, 0, 0} }; @@ -9457,7 +9569,7 @@ main(int argc, char **argv) case 'u': case 'y': case 'Z': - case ALLOCATED_OPT: + case ARG_ALLOCATED: dump_opt[c]++; dump_all = 0; break; @@ -9540,6 +9652,59 @@ main(int argc, char **argv) case 'x': vn_dumpdir = optarg; break; + case ARG_BLOCK_BIN_MODE: + if (strcmp(optarg, "lsize") == 0) { + block_bin_mode = BIN_LSIZE; + } else if (strcmp(optarg, "psize") == 0) { + block_bin_mode = BIN_PSIZE; + } else if (strcmp(optarg, "asize") == 0) { + block_bin_mode = BIN_ASIZE; + } else { + (void) fprintf(stderr, + "--bin=\"%s\" must be one of \"lsize\", " + "\"psize\" or \"asize\"\n", optarg); + usage(); + } + break; + + case ARG_BLOCK_CLASSES: { + char *buf = strdup(optarg), *tok = buf, *next, + *save = NULL; + + while ((next = strtok_r(tok, ",", &save)) != NULL) { + tok = NULL; + + if (strcmp(next, "normal") == 0) { + block_classes |= CLASS_NORMAL; + } else if (strcmp(next, "special") == 0) { + block_classes |= CLASS_SPECIAL; + } else if (strcmp(next, "dedup") == 0) { + block_classes |= CLASS_DEDUP; + } else if (strcmp(next, "other") == 0) { + block_classes |= CLASS_OTHER; + } else { + (void) fprintf(stderr, + "--class=\"%s\" must be a " + "comma-separated list of either " + "\"normal\", \"special\", " + "\"asize\" or \"other\"; " + "got \"%s\"\n", + optarg, next); + usage(); + } + } + + if (block_classes == 0) { + (void) fprintf(stderr, + "--class= must be a comma-separated " + "list of either \"normal\", \"special\", " + "\"asize\" or \"other\"; got empty\n"); + usage(); + } + + free(buf); + break; + } default: usage(); break; diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c index 088c0108e911..222b5524669e 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_vdev.c @@ -270,14 +270,13 @@ is_spare(nvlist_t *config, const char *path) * draid* Virtual dRAID spare */ static nvlist_t * -make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) +make_leaf_vdev(const char *arg, boolean_t is_primary, uint64_t ashift) { char path[MAXPATHLEN]; struct stat64 statbuf; nvlist_t *vdev = NULL; const char *type = NULL; boolean_t wholedisk = B_FALSE; - uint64_t ashift = 0; int err; /* @@ -382,31 +381,6 @@ make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary) (uint64_t)wholedisk) == 0); /* - * Override defaults if custom properties are provided. - */ - if (props != NULL) { - const char *value = NULL; - - if (nvlist_lookup_string(props, - zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { - if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { - (void) fprintf(stderr, - gettext("ashift must be a number.\n")); - return (NULL); - } - if (ashift != 0 && - (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { - (void) fprintf(stderr, - gettext("invalid 'ashift=%" PRIu64 "' " - "property: only values between %" PRId32 " " - "and %" PRId32 " are allowed.\n"), - ashift, ASHIFT_MIN, ASHIFT_MAX); - return (NULL); - } - } - } - - /* * If the device is known to incorrectly report its physical sector * size explicitly provide the known correct value. */ @@ -1513,6 +1487,29 @@ construct_spec(nvlist_t *props, int argc, char **argv) const char *type, *fulltype; boolean_t is_log, is_special, is_dedup, is_spare; boolean_t seen_logs; + uint64_t ashift = 0; + + if (props != NULL) { + const char *value = NULL; + + if (nvlist_lookup_string(props, + zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) { + if (zfs_nicestrtonum(NULL, value, &ashift) != 0) { + (void) fprintf(stderr, + gettext("ashift must be a number.\n")); + return (NULL); + } + if (ashift != 0 && + (ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) { + (void) fprintf(stderr, + gettext("invalid 'ashift=%" PRIu64 "' " + "property: only values between %" PRId32 " " + "and %" PRId32 " are allowed.\n"), + ashift, ASHIFT_MIN, ASHIFT_MAX); + return (NULL); + } + } + } top = NULL; toplevels = 0; @@ -1618,9 +1615,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) children * sizeof (nvlist_t *)); if (child == NULL) zpool_no_memory(); - if ((nv = make_leaf_vdev(props, argv[c], + if ((nv = make_leaf_vdev(argv[c], !(is_log || is_special || is_dedup || - is_spare))) == NULL) { + is_spare), ashift)) == NULL) { for (c = 0; c < children - 1; c++) nvlist_free(child[c]); free(child); @@ -1684,6 +1681,10 @@ construct_spec(nvlist_t *props, int argc, char **argv) ZPOOL_CONFIG_ALLOCATION_BIAS, VDEV_ALLOC_BIAS_DEDUP) == 0); } + if (ashift > 0) { + fnvlist_add_uint64(nv, + ZPOOL_CONFIG_ASHIFT, ashift); + } if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) { verify(nvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, @@ -1711,8 +1712,9 @@ construct_spec(nvlist_t *props, int argc, char **argv) * We have a device. Pass off to make_leaf_vdev() to * construct the appropriate nvlist describing the vdev. */ - if ((nv = make_leaf_vdev(props, argv[0], !(is_log || - is_special || is_dedup || is_spare))) == NULL) + if ((nv = make_leaf_vdev(argv[0], !(is_log || + is_special || is_dedup || is_spare), + ashift)) == NULL) goto spec_out; verify(nvlist_add_uint64(nv, diff --git a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h index 93aa4984d734..9ada482be000 100644 --- a/sys/contrib/openzfs/lib/libspl/include/sys/uio.h +++ b/sys/contrib/openzfs/lib/libspl/include/sys/uio.h @@ -41,6 +41,7 @@ #ifndef _LIBSPL_SYS_UIO_H #define _LIBSPL_SYS_UIO_H +#include <sys/sysmacros.h> #include <sys/types.h> #include_next <sys/uio.h> diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c index bdddefb92165..a589ca6896f0 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_status.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_status.c @@ -98,57 +98,57 @@ static const char *const zfs_msgid_table[] = { #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) static int -vdev_missing(vdev_stat_t *vs, uint_t vsc) +vdev_missing(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_CANT_OPEN && vs->vs_aux == VDEV_AUX_OPEN_FAILED); } static int -vdev_faulted(vdev_stat_t *vs, uint_t vsc) +vdev_faulted(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_FAULTED); } static int -vdev_errors(vdev_stat_t *vs, uint_t vsc) +vdev_errors(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_DEGRADED || vs->vs_read_errors != 0 || vs->vs_write_errors != 0 || vs->vs_checksum_errors != 0); } static int -vdev_broken(vdev_stat_t *vs, uint_t vsc) +vdev_broken(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_CANT_OPEN); } static int -vdev_offlined(vdev_stat_t *vs, uint_t vsc) +vdev_offlined(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_OFFLINE); } static int -vdev_removed(vdev_stat_t *vs, uint_t vsc) +vdev_removed(vdev_stat_t *vs, uint_t vsc, void *arg) { - (void) vsc; + (void) vsc, (void) arg; return (vs->vs_state == VDEV_STATE_REMOVED); } static int -vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) +vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc, void *arg) { - if (getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") != NULL) - return (0); + uint64_t ashift = *(uint64_t *)arg; return (VDEV_STAT_VALID(vs_physical_ashift, vsc) && + (ashift == 0 || vs->vs_configured_ashift < ashift) && vs->vs_configured_ashift < vs->vs_physical_ashift); } @@ -156,8 +156,8 @@ vdev_non_native_ashift(vdev_stat_t *vs, uint_t vsc) * Detect if any leaf devices that have seen errors or could not be opened. */ static boolean_t -find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), - boolean_t ignore_replacing) +find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t, void *), + void *arg, boolean_t ignore_replacing) { nvlist_t **child; uint_t c, children; @@ -177,14 +177,16 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) + for (c = 0; c < children; c++) { + if (find_vdev_problem(child[c], func, arg, + ignore_replacing)) return (B_TRUE); + } } else { uint_t vsc; vdev_stat_t *vs = (vdev_stat_t *)fnvlist_lookup_uint64_array( vdev, ZPOOL_CONFIG_VDEV_STATS, &vsc); - if (func(vs, vsc) != 0) + if (func(vs, vsc, arg) != 0) return (B_TRUE); } @@ -193,9 +195,11 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), */ if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_L2CACHE, &child, &children) == 0) { - for (c = 0; c < children; c++) - if (find_vdev_problem(child[c], func, ignore_replacing)) + for (c = 0; c < children; c++) { + if (find_vdev_problem(child[c], func, arg, + ignore_replacing)) return (B_TRUE); + } } return (B_FALSE); @@ -220,7 +224,7 @@ find_vdev_problem(nvlist_t *vdev, int (*func)(vdev_stat_t *, uint_t), */ static zpool_status_t check_status(nvlist_t *config, boolean_t isimport, - zpool_errata_t *erratap, const char *compat) + zpool_errata_t *erratap, const char *compat, uint64_t ashift) { pool_scan_stat_t *ps = NULL; uint_t vsc, psc; @@ -371,15 +375,15 @@ check_status(nvlist_t *config, boolean_t isimport, * Bad devices in non-replicated config. */ if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_NR); if (vs->vs_state == VDEV_STATE_CANT_OPEN && - find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + find_vdev_problem(nvroot, vdev_broken, NULL, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_NR); /* @@ -402,35 +406,37 @@ check_status(nvlist_t *config, boolean_t isimport, /* * Missing devices in a replicated config. */ - if (find_vdev_problem(nvroot, vdev_faulted, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_faulted, NULL, B_TRUE)) return (ZPOOL_STATUS_FAULTED_DEV_R); - if (find_vdev_problem(nvroot, vdev_missing, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_missing, NULL, B_TRUE)) return (ZPOOL_STATUS_MISSING_DEV_R); - if (find_vdev_problem(nvroot, vdev_broken, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_broken, NULL, B_TRUE)) return (ZPOOL_STATUS_CORRUPT_LABEL_R); /* * Devices with errors */ - if (!isimport && find_vdev_problem(nvroot, vdev_errors, B_TRUE)) + if (!isimport && find_vdev_problem(nvroot, vdev_errors, NULL, B_TRUE)) return (ZPOOL_STATUS_FAILING_DEV); /* * Offlined devices */ - if (find_vdev_problem(nvroot, vdev_offlined, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_offlined, NULL, B_TRUE)) return (ZPOOL_STATUS_OFFLINE_DEV); /* * Removed device */ - if (find_vdev_problem(nvroot, vdev_removed, B_TRUE)) + if (find_vdev_problem(nvroot, vdev_removed, NULL, B_TRUE)) return (ZPOOL_STATUS_REMOVED_DEV); /* * Suboptimal, but usable, ashift configuration. */ - if (find_vdev_problem(nvroot, vdev_non_native_ashift, B_FALSE)) + if (!isimport && + getenv("ZPOOL_STATUS_NON_NATIVE_ASHIFT_IGNORE") == NULL && + find_vdev_problem(nvroot, vdev_non_native_ashift, &ashift, B_FALSE)) return (ZPOOL_STATUS_NON_NATIVE_ASHIFT); /* @@ -510,8 +516,10 @@ zpool_get_status(zpool_handle_t *zhp, const char **msgid, ZFS_MAXPROPLEN, NULL, B_FALSE) != 0) compatibility[0] = '\0'; + uint64_t ashift = zpool_get_prop_int(zhp, ZPOOL_PROP_ASHIFT, NULL); + zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata, - compatibility); + compatibility, ashift); if (msgid != NULL) { if (ret >= NMSGID) @@ -526,7 +534,7 @@ zpool_status_t zpool_import_status(nvlist_t *config, const char **msgid, zpool_errata_t *errata) { - zpool_status_t ret = check_status(config, B_TRUE, errata, NULL); + zpool_status_t ret = check_status(config, B_TRUE, errata, NULL, 0); if (ret >= NMSGID) *msgid = NULL; diff --git a/sys/contrib/openzfs/lib/libzpool/kernel.c b/sys/contrib/openzfs/lib/libzpool/kernel.c index 8ed374627264..70eba5099119 100644 --- a/sys/contrib/openzfs/lib/libzpool/kernel.c +++ b/sys/contrib/openzfs/lib/libzpool/kernel.c @@ -23,6 +23,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2016 Actifio, Inc. All rights reserved. + * Copyright (c) 2025, Klara, Inc. */ #include <assert.h> @@ -645,39 +646,60 @@ __dprintf(boolean_t dprint, const char *file, const char *func, * cmn_err() and panic() * ========================================================================= */ -static char ce_prefix[CE_IGNORE][10] = { "", "NOTICE: ", "WARNING: ", "" }; -static char ce_suffix[CE_IGNORE][2] = { "", "\n", "\n", "" }; -__attribute__((noreturn)) void -vpanic(const char *fmt, va_list adx) +static __attribute__((noreturn)) void +panic_stop_or_abort(void) { - (void) fprintf(stderr, "error: "); - (void) vfprintf(stderr, fmt, adx); - (void) fprintf(stderr, "\n"); + const char *stopenv = getenv("LIBZPOOL_PANIC_STOP"); + if (stopenv != NULL && atoi(stopenv)) { + fputs("libzpool: LIBZPOOL_PANIC_STOP is set, sending " + "SIGSTOP to process group\n", stderr); + fflush(stderr); + + kill(0, SIGSTOP); + + fputs("libzpool: continued after panic stop, " + "aborting\n", stderr); + } abort(); /* think of it as a "user-level crash dump" */ } -__attribute__((noreturn)) void -panic(const char *fmt, ...) +static void +vcmn_msg(int ce, const char *fmt, va_list adx) { - va_list adx; + switch (ce) { + case CE_IGNORE: + return; + case CE_CONT: + break; + case CE_NOTE: + fputs("libzpool: NOTICE: ", stderr); + break; + case CE_WARN: + fputs("libzpool: WARNING: ", stderr); + break; + case CE_PANIC: + fputs("libzpool: PANIC: ", stderr); + break; + default: + fputs("libzpool: [unknown severity %d]: ", stderr); + break; + } - va_start(adx, fmt); - vpanic(fmt, adx); - va_end(adx); + vfprintf(stderr, fmt, adx); + if (ce != CE_CONT) + fputc('\n', stderr); + fflush(stderr); } void vcmn_err(int ce, const char *fmt, va_list adx) { + vcmn_msg(ce, fmt, adx); + if (ce == CE_PANIC) - vpanic(fmt, adx); - if (ce != CE_NOTE) { /* suppress noise in userland stress testing */ - (void) fprintf(stderr, "%s", ce_prefix[ce]); - (void) vfprintf(stderr, fmt, adx); - (void) fprintf(stderr, "%s", ce_suffix[ce]); - } + panic_stop_or_abort(); } void @@ -690,6 +712,25 @@ cmn_err(int ce, const char *fmt, ...) va_end(adx); } +__attribute__((noreturn)) void +panic(const char *fmt, ...) +{ + va_list adx; + + va_start(adx, fmt); + vcmn_msg(CE_PANIC, fmt, adx); + va_end(adx); + + panic_stop_or_abort(); +} + +__attribute__((noreturn)) void +vpanic(const char *fmt, va_list adx) +{ + vcmn_msg(CE_PANIC, fmt, adx); + panic_stop_or_abort(); +} + /* * ========================================================================= * misc routines diff --git a/sys/contrib/openzfs/man/man8/zdb.8 b/sys/contrib/openzfs/man/man8/zdb.8 index c3290ea14769..f51e24fa849c 100644 --- a/sys/contrib/openzfs/man/man8/zdb.8 +++ b/sys/contrib/openzfs/man/man8/zdb.8 @@ -144,6 +144,20 @@ subcommand. Display statistics regarding the number, size .Pq logical, physical and allocated and deduplication of blocks. +.It Fl -bin Ns = Ns ( Li lsize Ns | Ns Li psize Ns | Ns Li asize ) +When used with +.Fl bb , +sort blocks into all three bins according to the given size (instead of binning +a block for each size separately). +.Pp +For instance, with +.Fl -bin Ns = Ns Li lsize , +a block with lsize of 16K and psize of 4K will be added to the 16K bin +in all three columns. +.It Fl -class Ns = Ns ( Li normal Ns | Ns Li special Ns | Ns Li dedup Ns | Ns Li other ) Ns Op , Ns … +When used with +.Fl bb , +only consider blocks from these allocation classes. .It Fl B , -backup Generate a backup stream, similar to .Nm zfs Cm send , diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c index 4de48e013ec4..d0a9c662e6f0 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_ctldir.c @@ -762,8 +762,7 @@ zfsctl_common_pathconf(struct vop_pathconf_args *ap) return (0); case _PC_MIN_HOLE_SIZE: - *ap->a_retval = (int)SPA_MINBLOCKSIZE; - return (0); + return (EINVAL); case _PC_ACL_EXTENDED: *ap->a_retval = 0; diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c index 411225786089..f34a2fd37a77 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vnops_os.c @@ -4116,6 +4116,7 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, { znode_t *zp; zfsvfs_t *zfsvfs; + uint_t blksize, iosize; int error; switch (cmd) { @@ -4127,8 +4128,20 @@ zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, *valp = 64; return (0); case _PC_MIN_HOLE_SIZE: - *valp = (int)SPA_MINBLOCKSIZE; - return (0); + iosize = vp->v_mount->mnt_stat.f_iosize; + if (vp->v_type == VREG) { + zp = VTOZ(vp); + blksize = zp->z_blksz; + if (zp->z_size <= blksize) + blksize = MAX(blksize, iosize); + *valp = (int)blksize; + return (0); + } + if (vp->v_type == VDIR) { + *valp = (int)iosize; + return (0); + } + return (EINVAL); case _PC_ACL_EXTENDED: #if 0 /* POSIX ACLs are not implemented for ZFS on FreeBSD yet. */ zp = VTOZ(vp); @@ -4210,8 +4223,20 @@ zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, zfs_vmobject_wlock(object); (void) vm_page_grab_pages(object, OFF_TO_IDX(start), - VM_ALLOC_NORMAL | VM_ALLOC_WAITOK | VM_ALLOC_ZERO, + VM_ALLOC_NORMAL | VM_ALLOC_WAITOK, ma, count); + if (!vm_page_all_valid(ma[count - 1])) { + /* + * Later in this function, we copy DMU data to + * invalid pages only. The last page may not be + * entirely filled though, if the file does not + * end on a page boundary. Therefore, we zero + * that last page here to make sure it does not + * contain garbage after the end of file. + */ + ASSERT(vm_page_none_valid(ma[count - 1])); + vm_page_zero_invalid(ma[count - 1], FALSE); + } zfs_vmobject_wunlock(object); } if (blksz == zp->z_blksz) diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c index 967a018640e1..4e66bee7744d 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c @@ -337,16 +337,14 @@ zvol_discard(zv_request_t *zvr) } /* - * Align the request to volume block boundaries when a secure erase is - * not required. This will prevent dnode_free_range() from zeroing out - * the unaligned parts which is slow (read-modify-write) and useless - * since we are not freeing any space by doing so. + * Align the request to volume block boundaries. This will prevent + * dnode_free_range() from zeroing out the unaligned parts which is + * slow (read-modify-write) and useless since we are not freeing any + * space by doing so. */ - if (!io_is_secure_erase(bio, rq)) { - start = P2ROUNDUP(start, zv->zv_volblocksize); - end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); - size = end - start; - } + start = P2ROUNDUP(start, zv->zv_volblocksize); + end = P2ALIGN_TYPED(end, zv->zv_volblocksize, uint64_t); + size = end - start; if (start >= end) goto unlock; @@ -467,6 +465,24 @@ zvol_read_task(void *arg) zv_request_task_free(task); } +/* + * Note: + * + * The kernel uses different enum names for the IO opcode, depending on the + * kernel version ('req_opf', 'req_op'). To sidestep this, use macros rather + * than inline functions for these checks. + */ +/* Should this IO go down the zvol write path? */ +#define ZVOL_OP_IS_WRITE(op) \ + (op == REQ_OP_WRITE || \ + op == REQ_OP_FLUSH || \ + op == REQ_OP_DISCARD) + +/* Is this IO type supported by zvols? */ +#define ZVOL_OP_IS_SUPPORTED(op) (op == REQ_OP_READ || ZVOL_OP_IS_WRITE(op)) + +/* Get the IO opcode */ +#define ZVOL_OP(bio, rq) (bio != NULL ? bio_op(bio) : req_op(rq)) /* * Process a BIO or request @@ -486,27 +502,32 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint64_t size = io_size(bio, rq); int rw; - if (rq != NULL) { - /* - * Flush & trim requests go down the zvol_write codepath. Or - * more specifically: - * - * If request is a write, or if it's op_is_sync() and not a - * read, or if it's a flush, or if it's a discard, then send the - * request down the write path. - */ - if (op_is_write(rq->cmd_flags) || - (op_is_sync(rq->cmd_flags) && req_op(rq) != REQ_OP_READ) || - req_op(rq) == REQ_OP_FLUSH || - op_is_discard(rq->cmd_flags)) { - rw = WRITE; - } else { - rw = READ; - } + if (unlikely(!ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq)))) { + zfs_dbgmsg("Unsupported zvol %s, op=%d, flags=0x%x", + rq != NULL ? "request" : "BIO", + ZVOL_OP(bio, rq), + rq != NULL ? rq->cmd_flags : bio->bi_opf); + ASSERT(ZVOL_OP_IS_SUPPORTED(ZVOL_OP(bio, rq))); + zvol_end_io(bio, rq, SET_ERROR(ENOTSUPP)); + goto out; + } + + if (ZVOL_OP_IS_WRITE(ZVOL_OP(bio, rq))) { + rw = WRITE; } else { - rw = bio_data_dir(bio); + rw = READ; } + /* + * Sanity check + * + * If we're a BIO, check our rw matches the kernel's + * bio_data_dir(bio) rw. We need to check because we support fewer + * IO operations, and want to verify that what we think are reads and + * writes from those operations match what the kernel thinks. + */ + ASSERT(rq != NULL || rw == bio_data_dir(bio)); + if (unlikely(zv->zv_flags & ZVOL_REMOVING)) { zvol_end_io(bio, rq, SET_ERROR(ENXIO)); goto out; @@ -610,7 +631,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, * interfaces lack this functionality (they block waiting for * the i/o to complete). */ - if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) { + if (io_is_discard(bio, rq)) { if (force_sync) { zvol_discard(&zvr); } else { diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index b677f90280d7..dbb5e942e2e6 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -1157,7 +1157,7 @@ buf_fini(void) #if defined(_KERNEL) /* * Large allocations which do not require contiguous pages - * should be using vmem_free() in the linux kernel\ + * should be using vmem_free() in the linux kernel. */ vmem_free(buf_hash_table.ht_table, (buf_hash_table.ht_mask + 1) * sizeof (void *)); @@ -4651,10 +4651,10 @@ arc_flush_task(void *arg) arc_flush_impl(spa_guid, B_FALSE); arc_async_flush_remove(spa_guid, af->af_cache_level); - uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); - if (elaspsed > 0) { + uint64_t elapsed = NSEC2MSEC(gethrtime() - start_time); + if (elapsed > 0) { zfs_dbgmsg("spa %llu arc flushed in %llu ms", - (u_longlong_t)spa_guid, (u_longlong_t)elaspsed); + (u_longlong_t)spa_guid, (u_longlong_t)elapsed); } } @@ -9152,7 +9152,7 @@ top: if (dev->l2ad_first) { /* * This is the first sweep through the device. There is - * nothing to evict. We have already trimmmed the + * nothing to evict. We have already trimmed the * whole device. */ goto out; @@ -10086,12 +10086,12 @@ l2arc_device_teardown(void *arg) kmem_free(remdev->l2ad_dev_hdr, remdev->l2ad_dev_hdr_asize); vmem_free(remdev, sizeof (l2arc_dev_t)); - uint64_t elaspsed = NSEC2MSEC(gethrtime() - start_time); - if (elaspsed > 0) { + uint64_t elapsed = NSEC2MSEC(gethrtime() - start_time); + if (elapsed > 0) { zfs_dbgmsg("spa %llu, vdev %llu removed in %llu ms", (u_longlong_t)rva->rva_spa_gid, (u_longlong_t)rva->rva_vdev_gid, - (u_longlong_t)elaspsed); + (u_longlong_t)elapsed); } if (rva->rva_async) diff --git a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c index 45ba17e36c35..f46980cad111 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c +++ b/sys/contrib/openzfs/tests/zfs-tests/cmd/mmap_seek.c @@ -47,25 +47,34 @@ #endif static void +seek_expect(int fd, off_t offset, int whence, off_t expect_offset) +{ + errno = 0; + off_t seek_offset = lseek(fd, offset, whence); + if (seek_offset == expect_offset) + return; + + int err = errno; + fprintf(stderr, "lseek(fd, %ld, SEEK_%s) = %ld (expected %ld)", + offset, (whence == SEEK_DATA ? "DATA" : "HOLE"), + seek_offset, expect_offset); + if (err != 0) + fprintf(stderr, " (errno %d [%s])\n", err, strerror(err)); + else + fputc('\n', stderr); + exit(2); +} + +static inline void seek_data(int fd, off_t offset, off_t expected) { - off_t data_offset = lseek(fd, offset, SEEK_DATA); - if (data_offset != expected) { - fprintf(stderr, "lseek(fd, %d, SEEK_DATA) = %d (expected %d)\n", - (int)offset, (int)data_offset, (int)expected); - exit(2); - } + seek_expect(fd, offset, SEEK_DATA, expected); } -static void +static inline void seek_hole(int fd, off_t offset, off_t expected) { - off_t hole_offset = lseek(fd, offset, SEEK_HOLE); - if (hole_offset != expected) { - fprintf(stderr, "lseek(fd, %d, SEEK_HOLE) = %d (expected %d)\n", - (int)offset, (int)hole_offset, (int)expected); - exit(2); - } + seek_expect(fd, offset, SEEK_HOLE, expected); } int diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh index 2bd9f616170b..1a9774331ba1 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh @@ -47,6 +47,8 @@ function cleanup # bring back removed disk online for further tests insert_disk $REMOVED_DISK $scsi_host poolexists $TESTPOOL && destroy_pool $TESTPOOL + # Since the disk was offline during destroy, remove the label + zpool labelclear $DISK2 -f } log_assert "Testing zpool reopen with pool name as argument" diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh index bb697b76a9f3..9de308c4a11a 100755 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/zvol/zvol_misc/zvol_misc_trim.ksh @@ -41,6 +41,7 @@ # 5. TRIM the first 1MB and last 2MB of the 5MB block of data. # 6. Observe 2MB of used space on the zvol # 7. Verify the trimmed regions are zero'd on the zvol +# 8. Verify Secure Erase does not work on zvols (Linux only) verify_runnable "global" @@ -56,6 +57,7 @@ if is_linux ; then else trimcmd='blkdiscard' fi + secure_trimcmd="$trimcmd --secure" else # By default, FreeBSD 'trim' always does a dry-run. '-f' makes # it perform the actual operation. @@ -114,6 +116,11 @@ function do_test { log_must diff $datafile1 $datafile2 log_must rm $datafile1 $datafile2 + + # Secure erase should not work (Linux check only). + if [ -n "$secure_trimcmd" ] ; then + log_mustnot $secure_trimcmd $zvolpath + fi } log_assert "Verify that a ZFS volume can be TRIMed" diff --git a/sys/dev/aac/aac_linux.c b/sys/dev/aac/aac_linux.c index 609315f50939..65008c562342 100644 --- a/sys/dev/aac/aac_linux.c +++ b/sys/dev/aac/aac_linux.c @@ -52,15 +52,7 @@ #define AAC_LINUX_IOCTL_MIN 0x0000 #define AAC_LINUX_IOCTL_MAX 0x21ff -static linux_ioctl_function_t aac_linux_ioctl; -static struct linux_ioctl_handler aac_linux_handler = {aac_linux_ioctl, - AAC_LINUX_IOCTL_MIN, - AAC_LINUX_IOCTL_MAX}; - -SYSINIT (aac_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &aac_linux_handler); -SYSUNINIT(aac_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &aac_linux_handler); +LINUX_IOCTL_SET(aac, AAC_LINUX_IOCTL_MIN, AAC_LINUX_IOCTL_MAX); static int aac_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/aacraid/aacraid_linux.c b/sys/dev/aacraid/aacraid_linux.c index 267dd84c65d5..6b6259ed7059 100644 --- a/sys/dev/aacraid/aacraid_linux.c +++ b/sys/dev/aacraid/aacraid_linux.c @@ -54,15 +54,7 @@ #define AAC_LINUX_IOCTL_MIN 0x0000 #define AAC_LINUX_IOCTL_MAX 0x21ff -static linux_ioctl_function_t aacraid_linux_ioctl; -static struct linux_ioctl_handler aacraid_linux_handler = {aacraid_linux_ioctl, - AAC_LINUX_IOCTL_MIN, - AAC_LINUX_IOCTL_MAX}; - -SYSINIT (aacraid_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &aacraid_linux_handler); -SYSUNINIT(aacraid_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &aacraid_linux_handler); +LINUX_IOCTL_SET(aacraid, AAC_LINUX_IOCTL_MIN, AAC_LINUX_IOCTL_MAX); static int aacraid_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/ipmi/ipmi_linux.c b/sys/dev/ipmi/ipmi_linux.c index 05eb30a0aa77..58872de12003 100644 --- a/sys/dev/ipmi/ipmi_linux.c +++ b/sys/dev/ipmi/ipmi_linux.c @@ -66,15 +66,7 @@ #define L_IPMICTL_SET_MY_LUN_CMD _IOW(IPMI_IOC_MAGIC, 19, unsigned int) #define L_IPMICTL_GET_MY_LUN_CMD _IOW(IPMI_IOC_MAGIC, 20, unsigned int) -static linux_ioctl_function_t ipmi_linux_ioctl; -static struct linux_ioctl_handler ipmi_linux_handler = {ipmi_linux_ioctl, - IPMI_LINUX_IOCTL_MIN, - IPMI_LINUX_IOCTL_MAX}; - -SYSINIT (ipmi_linux_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &ipmi_linux_handler); -SYSUNINIT(ipmi_linux_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &ipmi_linux_handler); +LINUX_IOCTL_SET(ipmi, IPMI_LINUX_IOCTL_MIN, IPMI_LINUX_IOCTL_MAX); static int ipmi_linux_modevent(module_t mod, int type, void *data) diff --git a/sys/dev/mfi/mfi_linux.c b/sys/dev/mfi/mfi_linux.c index 8ed8baa3858a..9541ff37336a 100644 --- a/sys/dev/mfi/mfi_linux.c +++ b/sys/dev/mfi/mfi_linux.c @@ -53,15 +53,7 @@ #define MFI_LINUX_IOCTL_MIN 0x4d00 #define MFI_LINUX_IOCTL_MAX 0x4d04 -static linux_ioctl_function_t mfi_linux_ioctl; -static struct linux_ioctl_handler mfi_linux_handler = {mfi_linux_ioctl, - MFI_LINUX_IOCTL_MIN, - MFI_LINUX_IOCTL_MAX}; - -SYSINIT (mfi_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &mfi_linux_handler); -SYSUNINIT(mfi_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &mfi_linux_handler); +LINUX_IOCTL_SET(mfi, MFI_LINUX_IOCTL_MIN, MFI_LINUX_IOCTL_MAX); static struct linux_device_handler mfi_device_handler = { "mfi", "megaraid_sas", "mfi0", "megaraid_sas_ioctl_node", -1, 0, 1}; diff --git a/sys/dev/mrsas/mrsas_linux.c b/sys/dev/mrsas/mrsas_linux.c index d7d48740a204..b06788fffc82 100644 --- a/sys/dev/mrsas/mrsas_linux.c +++ b/sys/dev/mrsas/mrsas_linux.c @@ -67,15 +67,7 @@ #define MRSAS_LINUX_IOCTL_MIN 0x4d00 #define MRSAS_LINUX_IOCTL_MAX 0x4d01 -static linux_ioctl_function_t mrsas_linux_ioctl; -static struct linux_ioctl_handler mrsas_linux_handler = {mrsas_linux_ioctl, - MRSAS_LINUX_IOCTL_MIN, -MRSAS_LINUX_IOCTL_MAX}; - -SYSINIT(mrsas_register, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_register_handler, &mrsas_linux_handler); -SYSUNINIT(mrsas_unregister, SI_SUB_KLD, SI_ORDER_MIDDLE, - linux_ioctl_unregister_handler, &mrsas_linux_handler); +LINUX_IOCTL_SET(mrsas, MRSAS_LINUX_IOCTL_MIN, MRSAS_LINUX_IOCTL_MAX); static struct linux_device_handler mrsas_device_handler = {"mrsas", "megaraid_sas", "mrsas0", "megaraid_sas_ioctl_node", -1, 0, 1}; diff --git a/sys/dev/sound/pci/hda/hdac.c b/sys/dev/sound/pci/hda/hdac.c index 80028063bb0d..8a325c538b9b 100644 --- a/sys/dev/sound/pci/hda/hdac.c +++ b/sys/dev/sound/pci/hda/hdac.c @@ -170,6 +170,7 @@ static const struct { { HDA_NVIDIA_GF119, "NVIDIA GF119", 0, 0 }, { HDA_NVIDIA_GF110_1, "NVIDIA GF110", 0, HDAC_QUIRK_MSI }, { HDA_NVIDIA_GF110_2, "NVIDIA GF110", 0, HDAC_QUIRK_MSI }, + { HDA_ATI_RAVEN, "ATI Raven", 0, 0 }, { HDA_ATI_SB450, "ATI SB450", 0, 0 }, { HDA_ATI_SB600, "ATI SB600", 0, 0 }, { HDA_ATI_RS600, "ATI RS600", 0, 0 }, diff --git a/sys/dev/sound/pci/hda/hdac.h b/sys/dev/sound/pci/hda/hdac.h index c11e6b2d6810..8fb54108a833 100644 --- a/sys/dev/sound/pci/hda/hdac.h +++ b/sys/dev/sound/pci/hda/hdac.h @@ -154,6 +154,7 @@ /* ATI */ #define ATI_VENDORID 0x1002 +#define HDA_ATI_RAVEN HDA_MODEL_CONSTRUCT(ATI, 0x15de) #define HDA_ATI_SB450 HDA_MODEL_CONSTRUCT(ATI, 0x437b) #define HDA_ATI_SB600 HDA_MODEL_CONSTRUCT(ATI, 0x4383) #define HDA_ATI_RS600 HDA_MODEL_CONSTRUCT(ATI, 0x793b) diff --git a/sys/dev/tdfx/tdfx_linux.c b/sys/dev/tdfx/tdfx_linux.c index f3410106bad2..777144d21bb6 100644 --- a/sys/dev/tdfx/tdfx_linux.c +++ b/sys/dev/tdfx/tdfx_linux.c @@ -42,7 +42,7 @@ LINUX_IOCTL_SET(tdfx, LINUX_IOCTL_TDFX_MIN, LINUX_IOCTL_TDFX_MAX); * Linux emulation IOCTL for /dev/tdfx */ static int -linux_ioctl_tdfx(struct thread *td, struct linux_ioctl_args* args) +tdfx_linux_ioctl(struct thread *td, struct linux_ioctl_args* args) { cap_rights_t rights; int error = 0; diff --git a/sys/dev/tdfx/tdfx_linux.h b/sys/dev/tdfx/tdfx_linux.h index b87cb41f38fe..9d012c12274b 100644 --- a/sys/dev/tdfx/tdfx_linux.h +++ b/sys/dev/tdfx/tdfx_linux.h @@ -35,18 +35,6 @@ #include <machine/../linux/linux_proto.h> #include <compat/linux/linux_ioctl.h> -/* - * This code was donated by Vladimir N. Silynaev to allow for defining - * ioctls within modules - */ -#define LINUX_IOCTL_SET(n,low,high) \ -static linux_ioctl_function_t linux_ioctl_##n; \ -static struct linux_ioctl_handler n##_handler = {linux_ioctl_##n, low, high}; \ -SYSINIT(n##register, SI_SUB_KLD, SI_ORDER_MIDDLE,\ -linux_ioctl_register_handler, &n##_handler); \ -SYSUNINIT(n##unregister, SI_SUB_KLD, SI_ORDER_MIDDLE,\ -linux_ioctl_unregister_handler, &n##_handler); - /* Values for /dev/3dfx */ /* Query IOCTLs */ #define LINUX_IOCTL_TDFX_QUERY_BOARDS 0x3302 diff --git a/sys/dev/usb/controller/ehci_pci.c b/sys/dev/usb/controller/ehci_pci.c index d7298ab89df7..9550002e3b70 100644 --- a/sys/dev/usb/controller/ehci_pci.c +++ b/sys/dev/usb/controller/ehci_pci.c @@ -88,6 +88,7 @@ #define PCI_EHCI_VENDORID_NEC 0x1033 #define PCI_EHCI_VENDORID_OPTI 0x1045 #define PCI_EHCI_VENDORID_PHILIPS 0x1131 +#define PCI_EHCI_VENDORID_REALTEK 0x10ec #define PCI_EHCI_VENDORID_SIS 0x1039 #define PCI_EHCI_VENDORID_NVIDIA 0x12D2 #define PCI_EHCI_VENDORID_NVIDIA2 0x10DE @@ -218,6 +219,9 @@ ehci_pci_match(device_t self) case 0x15621131: return "Philips ISP156x USB 2.0 controller"; + case 0x816d10ec: + return ("Realtek RTL811x USB 2.0 controller"); + case 0x70021039: return "SiS 968 USB 2.0 controller"; @@ -402,6 +406,9 @@ ehci_pci_attach(device_t self) case PCI_EHCI_VENDORID_PHILIPS: sprintf(sc->sc_vendor, "Philips"); break; + case PCI_EHCI_VENDORID_REALTEK: + sprintf(sc->sc_vendor, "Realtek"); + break; case PCI_EHCI_VENDORID_SIS: sprintf(sc->sc_vendor, "SiS"); break; diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index 7f5b29ca2085..ec95716ea485 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -216,10 +216,17 @@ NFSD_VNET_DEFINE_STATIC(u_char *, nfsrv_dnsname) = NULL; * marked 0 in this array, the code will still work, just not quite as * efficiently.) */ -static int nfs_bigreply[NFSV42_NPROCS] = { 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }; +static bool nfs_bigreply[NFSV42_NPROCS] = { + [NFSPROC_GETACL] = true, + [NFSPROC_GETEXTATTR] = true, + [NFSPROC_LISTEXTATTR] = true, + [NFSPROC_LOOKUP] = true, + [NFSPROC_READ] = true, + [NFSPROC_READDIR] = true, + [NFSPROC_READDIRPLUS] = true, + [NFSPROC_READDS] = true, + [NFSPROC_READLINK] = true, +}; /* local functions */ static int nfsrv_skipace(struct nfsrv_descript *nd, int *acesizep); @@ -232,6 +239,8 @@ static int nfsrv_getrefstr(struct nfsrv_descript *, u_char **, u_char **, static void nfsrv_refstrbigenough(int, u_char **, u_char **, int *); static uint32_t vtonfsv4_type(struct vattr *); static __enum_uint8(vtype) nfsv4tov_type(uint32_t, uint16_t *); +static void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *, + struct nfsclsession *, bool, struct ucred *); static struct { int op; @@ -5021,9 +5030,9 @@ nfsv4_seqsess_cacherep(uint32_t slotid, struct nfsslot *slots, int repstat, /* * Generate the xdr for an NFSv4.1 Sequence Operation. */ -void +static void nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, - struct nfsclsession *sep, int dont_replycache, struct ucred *cred) + struct nfsclsession *sep, bool dont_replycache, struct ucred *cred) { uint32_t *tl, slotseq = 0; int error, maxslot, slotpos; @@ -5054,7 +5063,7 @@ nfsv4_setsequence(struct nfsmount *nmp, struct nfsrv_descript *nd, *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); - if (dont_replycache == 0) + if (!dont_replycache) *tl = newnfs_true; else *tl = newnfs_false; diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 61083ecf2d66..16a76c060e78 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -361,8 +361,6 @@ int nfsv4_getipaddr(struct nfsrv_descript *, struct sockaddr_in *, int nfsv4_seqsession(uint32_t, uint32_t, uint32_t, struct nfsslot *, struct mbuf **, uint16_t); void nfsv4_seqsess_cacherep(uint32_t, struct nfsslot *, int, struct mbuf **); -void nfsv4_setsequence(struct nfsmount *, struct nfsrv_descript *, - struct nfsclsession *, int, struct ucred *); int nfsv4_sequencelookup(struct nfsmount *, struct nfsclsession *, int *, int *, uint32_t *, uint8_t *, bool); void nfsv4_freeslot(struct nfsclsession *, int, bool); diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 3697d95fe0e5..00732d55cd46 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -2909,12 +2909,6 @@ prison_remove(struct prison *pr) { sx_assert(&allprison_lock, SA_XLOCKED); mtx_assert(&pr->pr_mtx, MA_OWNED); - if (!prison_isalive(pr)) { - /* Silently ignore already-dying prisons. */ - mtx_unlock(&pr->pr_mtx); - sx_xunlock(&allprison_lock); - return; - } prison_deref(pr, PD_KILL | PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED); } @@ -3461,12 +3455,17 @@ prison_deref(struct prison *pr, int flags) /* Kill the prison and its descendents. */ KASSERT(pr != &prison0, ("prison_deref trying to kill prison0")); - if (!(flags & PD_DEREF)) { - prison_hold(pr); - flags |= PD_DEREF; + if (!prison_isalive(pr)) { + /* Silently ignore already-dying prisons. */ + flags &= ~PD_KILL; + } else { + if (!(flags & PD_DEREF)) { + prison_hold(pr); + flags |= PD_DEREF; + } + flags = prison_lock_xlock(pr, flags); + prison_deref_kill(pr, &freeprison); } - flags = prison_lock_xlock(pr, flags); - prison_deref_kill(pr, &freeprison); } if (flags & PD_DEUREF) { /* Drop a user reference. */ diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index db1b6f33a8ef..3a17ed289235 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -843,7 +843,7 @@ /* #undef ZFS_DEVICE_MINOR */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.4.99-95-FreeBSD_g5605a6d79" +#define ZFS_META_ALIAS "zfs-2.4.99-113-FreeBSD_g6ae99d269" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -873,7 +873,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "95-FreeBSD_g5605a6d79" +#define ZFS_META_RELEASE "113-FreeBSD_g6ae99d269" /* Define the project version. */ #define ZFS_META_VERSION "2.4.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 8a1802f5480b..6f568754f61d 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.4.99-95-g5605a6d79" +#define ZFS_META_GITREV "zfs-2.4.99-113-g6ae99d269" diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 66983edcdd73..10383bc0801e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -477,7 +477,7 @@ bbr_log_rtt_shrinks(struct tcp_bbr *bbr, uint32_t cts, uint32_t applied, uint16_t set); static struct bbr_sendmap * bbr_find_lowest_rsm(struct tcp_bbr *bbr); -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type); static void bbr_log_to_start(struct tcp_bbr *bbr, uint32_t cts, uint32_t to, int32_t pacing_delay, @@ -1841,7 +1841,7 @@ bbr_counter_destroy(void) } -static __inline void +static inline void bbr_fill_in_logging_data(struct tcp_bbr *bbr, struct tcp_log_bbr *l, uint32_t cts) { memset(l, 0, sizeof(union tcp_log_stackspecific)); @@ -4206,7 +4206,7 @@ bbr_calc_thresh_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, /* * Return one of three RTTs to use (in microseconds). */ -static __inline uint32_t +static inline uint32_t bbr_get_rtt(struct tcp_bbr *bbr, int32_t rtt_type) { uint32_t f_rtt; @@ -4370,7 +4370,7 @@ bbr_timeout_rack(struct tcpcb *tp, struct tcp_bbr *bbr, uint32_t cts) return (0); } -static __inline void +static inline void bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap *rsm, uint32_t start) { int idx; diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index c7962b57a69e..50077abdfd86 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -4730,7 +4730,7 @@ rack_make_timely_judgement(struct tcp_rack *rack, uint32_t rtt, int32_t rtt_diff return (timely_says); } -static __inline int +static inline int rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) { if (SEQ_GEQ(rsm->r_start, tp->gput_seq) && @@ -4767,7 +4767,7 @@ rack_in_gp_window(struct tcpcb *tp, struct rack_sendmap *rsm) return (0); } -static __inline void +static inline void rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) { @@ -4784,7 +4784,7 @@ rack_mark_in_gp_win(struct tcpcb *tp, struct rack_sendmap *rsm) rsm->r_flags &= ~RACK_IN_GP_WIN; } -static __inline void +static inline void rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { /* A GP measurement is ending, clear all marks on the send map*/ @@ -4802,7 +4802,7 @@ rack_clear_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) } -static __inline void +static inline void rack_tend_gp_marks(struct tcpcb *tp, struct tcp_rack *rack) { struct rack_sendmap *rsm = NULL; @@ -6864,6 +6864,18 @@ rack_mark_lost(struct tcpcb *tp, } } +static inline void +rack_mark_nolonger_lost(struct tcp_rack *rack, struct rack_sendmap *rsm) +{ + KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), + ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); + rsm->r_flags &= ~RACK_WAS_LOST; + if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) + rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; + else + rack->r_ctl.rc_considered_lost = 0; +} + /* * RACK Timer, here we simply do logging and house keeping. * the normal rack_output() function will call the @@ -7005,7 +7017,7 @@ rack_setup_offset_for_rsm(struct tcp_rack *rack, struct rack_sendmap *src_rsm, s rsm->orig_t_space = M_TRAILINGROOM(rsm->m); } -static __inline void +static inline void rack_clone_rsm(struct tcp_rack *rack, struct rack_sendmap *nrsm, struct rack_sendmap *rsm, uint32_t start) { @@ -8130,13 +8142,7 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, * remove the lost desgination and reduce the * bytes considered lost. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } idx = rsm->r_rtr_cnt - 1; rsm->r_tim_lastsent[idx] = ts; @@ -9492,6 +9498,11 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we do not use our rack_mark_nolonger_lost() function + * since we are moving our data pointer around and the + * ack'ed side is already not considered lost. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9659,16 +9670,11 @@ do_rest_ofb: changed += (rsm->r_end - rsm->r_start); /* You get a count for acking a whole segment or more */ if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here we can use the inline function since + * the rsm is truly marked lost and now no longer lost. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); if (rsm->r_in_tmap) /* should be true */ @@ -9851,6 +9857,10 @@ do_rest_ofb: if (rsm->r_flags & RACK_WAS_LOST) { int my_chg; + /* + * Note here we are using hookery again so we can't + * use our rack_mark_nolonger_lost() function. + */ my_chg = (nrsm->r_end - nrsm->r_start); KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); @@ -9952,16 +9962,10 @@ do_rest_ofb: rack_update_rtt(tp, rack, rsm, to, cts, SACKED, 0); changed += (rsm->r_end - rsm->r_start); if (rsm->r_flags & RACK_WAS_LOST) { - int my_chg; - - my_chg = (rsm->r_end - rsm->r_start); - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= my_chg), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (my_chg <= rack->r_ctl.rc_considered_lost) - rack->r_ctl.rc_considered_lost -= my_chg; - else - rack->r_ctl.rc_considered_lost = 0; + /* + * Here it is safe to use our function. + */ + rack_mark_nolonger_lost(rack, rsm); } rack->r_ctl.rc_sacked += (rsm->r_end - rsm->r_start); @@ -10362,13 +10366,7 @@ more: * and yet before retransmitting we get an ack * which can happen due to reordering. */ - rsm->r_flags &= ~RACK_WAS_LOST; - KASSERT((rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative", rsm, rack)); - if (rack->r_ctl.rc_considered_lost >= (rsm->r_end - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= rsm->r_end - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } rack_log_map_chg(tp, rack, NULL, rsm, NULL, MAP_FREE, rsm->r_end, __LINE__); rack->r_ctl.rc_holes_rxt -= rsm->r_rtr_bytes; @@ -10476,12 +10474,7 @@ more: * which can happen due to reordering. In this * case its only a partial ack of the send. */ - KASSERT((rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)), - ("rsm:%p rack:%p rc_considered_lost goes negative th_ack:%u", rsm, rack, th_ack)); - if (rack->r_ctl.rc_considered_lost >= (th_ack - rsm->r_start)) - rack->r_ctl.rc_considered_lost -= th_ack - rsm->r_start; - else - rack->r_ctl.rc_considered_lost = 0; + rack_mark_nolonger_lost(rack, rsm); } /* * Clear the dup ack count for diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index d6fc24a23fe9..fd70fb1c8a36 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -5965,6 +5965,7 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, ctx.nat_pool = &(ctx.nr->rdr); } + *ctx.rm = &V_pf_default_rule; if (ctx.nr && ctx.nr->natpass) { r = ctx.nr; ruleset = *ctx.rsm; diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index fb1b121d0bc0..5d85e16f18e3 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -216,6 +216,7 @@ pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_krulese */ ctx->arsm = ctx->aruleset; } + break; } else { ctx->a = r; /* remember anchor */ ctx->aruleset = ruleset; /* and its ruleset */ diff --git a/tests/sys/netpfil/pf/rdr.sh b/tests/sys/netpfil/pf/rdr.sh index 24b95b2047f4..b0f0e6d13d0f 100644 --- a/tests/sys/netpfil/pf/rdr.sh +++ b/tests/sys/netpfil/pf/rdr.sh @@ -338,6 +338,56 @@ natpass_cleanup() pft_cleanup } +atf_test_case "pr290177" "cleanup" +pr290177_head() +{ + atf_set descr 'Test PR290177' + atf_set require.user root +} + +pr290177_body() +{ + pft_init + + epair=$(vnet_mkepair) + + ifconfig ${epair}a 192.0.2.2/24 up + ifconfig ${epair}a inet alias 192.0.2.3/24 up + + vnet_mkjail alcatraz ${epair}b + jexec alcatraz ifconfig ${epair}b 192.0.2.1/24 up + jexec alcatraz ifconfig lo0 127.0.0.1/8 up + + # Sanity check + atf_check -s exit:0 -o ignore \ + ping -c 1 192.0.2.1 + + jexec alcatraz pfctl -e + pft_set_rules alcatraz \ + "table <white> { 192.0.2.2 }" \ + "no rdr inet proto tcp from <white> to any port 25" \ + "rdr pass inet proto tcp from any to any port 25 -> 127.0.0.1 port 2500" + + echo foo | jexec alcatraz nc -N -l 2500 & + sleep 1 + + reply=$(nc -w 3 -s 192.0.2.2 192.0.2.1 25) + if [ "${reply}" == "foo" ] + then + atf_fail "no rdr rule failed" + fi + reply=$(nc -w 3 -s 192.0.2.3 192.0.2.1 25) + if [ "${reply}" != "foo" ] + then + atf_fail "rdr rule failed" + fi +} + +pr290177_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "natpass" @@ -345,4 +395,5 @@ atf_init_test_cases() atf_add_test_case "tcp_v6_pass" atf_add_test_case "srcport_compat" atf_add_test_case "srcport_pass" + atf_add_test_case "pr290177" } diff --git a/tools/tools/git/mfc-candidates.lua b/tools/tools/git/mfc-candidates.lua index a1420dc726da..cbf7dcb3a257 100755 --- a/tools/tools/git/mfc-candidates.lua +++ b/tools/tools/git/mfc-candidates.lua @@ -117,7 +117,7 @@ end local function usage(from_branch, to_branch, author) local script_name = arg[0]:match("([^/]+)$") - print(script_name .. " [-ah] [-f from_branch] [-t to_branch] [-u user] [-X exclude_file] [path ...]") + print(script_name .. " [-ah] [-F git-show-fmt] [-f from_branch] [-t to_branch] [-u user] [-X exclude_file] [path ...]") print() params(from_branch, to_branch, author) end @@ -162,6 +162,7 @@ local function main() local do_help = false local exclude_file = nil + local gitshowfmt = '%h %s' local i = 1 while i <= #arg and arg[i] do local opt = arg[i] @@ -181,6 +182,9 @@ local function main() i = i + 1 elseif opt == "-v" then verbose = verbose + 1 + elseif opt == "-F" then + gitshowfmt = arg[i + 1] + i = i + 1 elseif opt == "-X" then exclude_file = arg[i + 1] i = i + 1 @@ -217,7 +221,7 @@ local function main() -- Print the result for _, hash in ipairs(result_hashes) do - print(exec_command("git show --pretty='%h %s' --no-patch " .. hash)) + print(exec_command("git show --pretty='" .. gitshowfmt .. "' --no-patch " .. hash)) end end diff --git a/usr.bin/find/find.c b/usr.bin/find/find.c index 8b24ecd6a306..2247ae86a94b 100644 --- a/usr.bin/find/find.c +++ b/usr.bin/find/find.c @@ -211,7 +211,7 @@ find_execute(PLAN *plan, char *paths[]) } if (showinfo) { - fprintf(stderr, "Scanning: %s/%s\n", entry->fts_path, entry->fts_name); + fprintf(stderr, "Scanning: %s\n", entry->fts_path); fprintf(stderr, "Scanned: %zu\n\n", counter); showinfo = 0; } diff --git a/usr.bin/sockstat/main.c b/usr.bin/sockstat/main.c index d1ea6b1bc958..abb73acafc2f 100644 --- a/usr.bin/sockstat/main.c +++ b/usr.bin/sockstat/main.c @@ -1196,7 +1196,9 @@ calculate_sock_column_widths(struct col_widths *cw, struct sock *s) first = true; len = strlen(s->protoname); - if (s->vflag & (INP_IPV4 | INP_IPV6)) + if (s->vflag & INP_IPV4) + len += 1; + if (s->vflag & INP_IPV6) len += 1; cw->proto = MAX(cw->proto, len); diff --git a/usr.sbin/bsdinstall/Makefile b/usr.sbin/bsdinstall/Makefile index e5bb3197fa05..5d7be97ed7cf 100644 --- a/usr.sbin/bsdinstall/Makefile +++ b/usr.sbin/bsdinstall/Makefile @@ -22,7 +22,8 @@ REVISION?= ${_REVISION} .if ${BRANCH} == CURRENT || ${BRANCH} == STABLE SUBURL= base_latest -.elif ${BRANCH} == RELEASE +.elif ${BRANCH} == RELEASE || ${BRANCH:C/[0-9]+$//} == BETA || \ + ${BRANCH:C/[0-9]+$//} == RC SUBURL= base_release_${REVISION:C/[0-9]+\.//} .else .warning Invalid branch "${BRANCH}" |