13 files changed, 649 insertions, 13 deletions
diff --git a/lib/geom/part/gpart.8 b/lib/geom/part/gpart.8
index f76c1d9d5d6c..2e11417f8494 100644
--- a/lib/geom/part/gpart.8
+++ b/lib/geom/part/gpart.8
@@ -22,7 +22,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd February 11, 2025
+.Dd October 24, 2025
 .Dt GPART 8
 .Os
 .Sh NAME
@@ -1497,6 +1497,26 @@ and
 .Bd -literal -offset indent
 /sbin/gpart backup ada0 | /sbin/gpart restore -F ada1 ada2
 .Ed
+.Sh DIAGNOSTICS
+.Bl -diag
+.It gpart: arg0 '%s': Invalid argument
+The provided
+.Ar geom
+argument
+is not a GEOM provider.
+Not every device in
+.Xr devfs 4
+is a GEOM provider.
+For example, a
+.Xr zfs 4
+zvol will show up as a GEOM provider only if its
+.Sy volmode
+is set properly
+.Po refer to
+.Xr zfsprops 8
+for details
+.Pc .
+.El
 .Sh SEE ALSO
 .Xr geom 4 ,
 .Xr boot0cfg 8 ,
diff --git a/lib/libmd/Makefile b/lib/libmd/Makefile
index 547a134fc440..c4ab767c8b2f 100644
--- a/lib/libmd/Makefile
+++ b/lib/libmd/Makefile
@@ -108,7 +108,7 @@ CFLAGS+= -DWEAK_REFS
 CFLAGS.skein_block.c+= -DSKEIN_LOOP=995
 .PATH: ${.CURDIR}/${MACHINE_ARCH} ${SRCTOP}/sys/crypto/sha2
 .PATH: ${SRCTOP}/sys/crypto/skein ${SRCTOP}/sys/crypto/skein/${MACHINE_ARCH}
-.PATH: ${SRCTOP}/sys/kern
+.PATH: ${SRCTOP}/sys/crypto
 
 USE_ASM_SOURCES?=1
 .if defined(BOOTSTRAPPING) || ${MK_MACHDEP_OPTIMIZATIONS} == no
@@ -117,6 +117,13 @@ USE_ASM_SOURCES:=0
 .endif
 
 .if ${USE_ASM_SOURCES} != 0
+.if exists(${MACHINE_ARCH}/md5block.S)
+SRCS+=	md5block.S
+CFLAGS+= -DMD5_ASM
+.if exists(${MACHINE_ARCH}/md5dispatch.c)
+SRCS+=  md5dispatch.c
+.endif
+.endif
 .if exists(${MACHINE_ARCH}/sha1block.S)
 SRCS+=	sha1block.S
 CFLAGS+= -DSHA1_ASM
diff --git a/lib/libmd/aarch64/md5block.S b/lib/libmd/aarch64/md5block.S
new file mode 100644
index 000000000000..b928c8dd795a
--- /dev/null
+++ b/lib/libmd/aarch64/md5block.S
@@ -0,0 +1,206 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/elf_common.h>
+#include <machine/asm.h>
+
+# optimal instruction sequence for k = \key + \m
+.macro	addkm	key, m
+.if 0x100000000 - \key > 0x00ffffff
+	movz	k, #\key & 0xffff
+	movk	k, #\key >> 16, lsl #16
+	add	k, k, \m
+.elseif 0x100000000 - \key > 0x0000ffff
+	sub	k, \m, #(0x100000000 - \key) & 0xfff000
+	sub	k, k, #(0x100000000 - \key) & 0xfff
+.else
+	movz	k, #0x100000000 - \key
+	sub	k, \m, k
+.endif
+.endm
+
+.macro	round	a, b, c, d, f, key, m, s
+	\f	f, \b, \c, \d
+	addkm	\key, \m		// k[i] + m[g]
+	add	\a, \a, k		// k[i] + m[g] + a
+	add	\a, \a, f		// k[i] + m[g] + a + f
+	ror	\a, \a, #32-\s
+	add	\a, \a, \b
+.endm
+
+	/* f = b ? c : d */
+.macro	f0	f, b, c, d
+	eor	\f, \c, \d
+	and	\f, \f, \b
+	eor	\f, \f, \d
+.endm
+
+	/*
+	 * special cased round 1 function
+	 * f1 = d ? b : c = (d & b) + (~d & c)
+	 */
+.macro	round1	a, b, c, d, key, m, s
+	bic	tmp, \c, \d		// ~d & c
+	addkm	\key, \m		// k[i] + m[g]
+	add	\a, \a, k		// k[i] + m[g] + a
+	and	f, \b, \d		// d & b
+	add	\a, \a, tmp		// k[i] + m[g] + a + (~d & c)
+	add	\a, \a, f		// k[i] + m[g] + a + (~d & c) + (d & b)
+	ror	\a, \a, #32-\s
+	add	\a, \a, \b
+.endm
+
+	/* f = b ^ c ^ d */
+.macro	f2	f, b, c, d
+	eor	\f, \c, \d
+	eor	\f, \f, \b
+.endm
+
+	/* f = c ^ (b | ~d) */
+.macro	f3	f, b, c, d
+	orn	\f, \b, \d
+	eor	\f, \f, \c
+.endm
+
+	/* do 4 rounds */
+.macro	rounds	f, m0, m1, m2, m3, s0, s1, s2, s3, k0, k1, k2, k3
+	round	a, b, c, d, \f, \k0, \m0, \s0
+	round	d, a, b, c, \f, \k1, \m1, \s1
+	round	c, d, a, b, \f, \k2, \m2, \s2
+	round	b, c, d, a, \f, \k3, \m3, \s3
+.endm
+
+	/* do 4 rounds with f0, f1, f2, f3 */
+.macro	rounds0	m0, m1, m2, m3, k0, k1, k2, k3
+	rounds	f0, \m0, \m1, \m2, \m3, 7, 12, 17, 22, \k0, \k1, \k2, \k3
+.endm
+
+.macro	rounds1	m0, m1, m2, m3, k0, k1, k2, k3
+	round1	a, b, c, d, \k0, \m0,  5
+	round1	d, a, b, c, \k1, \m1,  9
+	round1	c, d, a, b, \k2, \m2, 14
+	round1	b, c, d, a, \k3, \m3, 20
+.endm
+
+.macro	rounds2	m0, m1, m2, m3, k0, k1, k2, k3
+	rounds	f2, \m0, \m1, \m2, \m3, 4, 11, 16, 23, \k0, \k1, \k2, \k3
+.endm
+
+.macro	rounds3	m0, m1, m2, m3, k0, k1, k2, k3
+	rounds	f3, \m0, \m1, \m2, \m3, 6, 10, 15, 21, \k0, \k1, \k2, \k3
+.endm
+
+	/* md5block(MD5_CTX, buf, len) */
+ENTRY(_libmd_md5block)
+ctx	.req	x0
+buf	.req	x1
+len	.req	x2
+end	.req	x2			// aliases len
+a	.req	w3
+b	.req	w4
+c	.req	w5
+d	.req	w6
+f	.req	w7
+tmp	.req	w8
+k	.req	w9
+m0	.req	w10
+m1	.req	w11
+m2	.req	w12
+m3	.req	w13
+m4	.req	w14
+m5	.req	w15
+m6	.req	w16
+m7	.req	w17
+					// x18 is the platform register
+m8	.req	w19
+m9	.req	w20
+m10	.req	w21
+m11	.req	w22
+m12	.req	w23
+m13	.req	w24
+m14	.req	w25
+m15	.req	w26
+
+a_	.req	m0
+b_	.req	m7
+c_	.req	m14
+d_	.req	m5
+
+	stp	x19, x20, [sp, #-0x40]!
+	stp	x21, x22, [sp, #0x10]
+	stp	x23, x24, [sp, #0x20]
+	stp	x25, x26, [sp, #0x30]
+
+	bics	len, len, #63		// length in blocks
+	add	end, buf, len		// end pointer
+
+	beq	.Lend			// was len == 0 after BICS?
+
+	ldp	a, b, [ctx, #0]
+	ldp	c, d, [ctx, #8]
+
+	/* first eight rounds interleaved with data loads */
+.Lloop:	ldp	m0, m1, [buf, #0]
+	round	a, b, c, d, f0, 0xd76aa478, m0,  7
+	ldp	m2, m3, [buf, #8]
+	round	d, a, b, c, f0, 0xe8c7b756, m1, 12
+	ldp	m4, m5, [buf, #16]
+	round	c, d, a, b, f0, 0x242070db, m2, 17
+	ldp	m6, m7, [buf, #24]
+	round	b, c, d, a, f0, 0xc1bdceee, m3, 22
+
+	ldp	m8, m9, [buf, #32]
+	round	a, b, c, d, f0, 0xf57c0faf, m4,  7
+	ldp	m10, m11, [buf, #40]
+	round	d, a, b, c, f0, 0x4787c62a, m5, 12
+	ldp	m12, m13, [buf, #48]
+	round	c, d, a, b, f0, 0xa8304613, m6, 17
+	ldp	m14, m15, [buf, #56]
+	round	b, c, d, a, f0, 0xfd469501, m7, 22
+
+	/* remaining rounds use the roundsX macros */
+	rounds0	 m8,  m9, m10, m11, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
+	rounds0	m12, m13, m14, m15, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
+
+	rounds1	 m1,  m6, m11,  m0, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
+	rounds1	 m5, m10, m15,  m4, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
+	rounds1	 m9, m14,  m3,  m8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
+	rounds1	m13,  m2,  m7, m12, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
+
+	rounds2	 m5,  m8, m11, m14, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
+	rounds2	 m1,  m4,  m7, m10, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
+	rounds2	m13,  m0,  m3,  m6, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
+	rounds2	 m9, m12, m15,  m2, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
+
+	rounds3	 m0,  m7, m14,  m5, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
+	rounds3	m12,  m3, m10,  m1, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
+	rounds3	 m8, m15,  m6, m13, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
+	rounds3	 m4, m11,  m2,  m9, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+
+	ldp	a_, b_, [ctx, #0]
+	ldp	c_, d_, [ctx, #8]
+	add	a, a, a_
+	add	b, b, b_
+	add	c, c, c_
+	add	d, d, d_
+	stp	a, b, [ctx, #0]
+	stp	c, d, [ctx, #8]
+
+	add	buf, buf, #64
+	cmp	buf, end
+	bne	.Lloop
+
+.Lend:	ldp	x25, x26, [sp, #0x30]
+	ldp	x23, x24, [sp, #0x20]
+	ldp	x21, x22, [sp, #0x10]
+	ldp	x19, x20, [sp], #0x40
+
+	ret
+END(_libmd_md5block)
+
+GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
+
+	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libmd/aarch64/sha1block.S b/lib/libmd/aarch64/sha1block.S
index 56a0297efadd..e16fb36342fd 100644
--- a/lib/libmd/aarch64/sha1block.S
+++ b/lib/libmd/aarch64/sha1block.S
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  *
diff --git a/lib/libmd/aarch64/sha1dispatch.c b/lib/libmd/aarch64/sha1dispatch.c
index e34bf0a1a344..045527044320 100644
--- a/lib/libmd/aarch64/sha1dispatch.c
+++ b/lib/libmd/aarch64/sha1dispatch.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
diff --git a/lib/libmd/amd64/md5block.S b/lib/libmd/amd64/md5block.S
new file mode 100644
index 000000000000..0dd594dd5dc2
--- /dev/null
+++ b/lib/libmd/amd64/md5block.S
@@ -0,0 +1,363 @@
+/*-
+ * Copyright (c) 2024, 2025 Robert Clausecker <fuz@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <machine/asm.h>
+
+/* apply the round keys to the four round functions */
+.macro	allrounds	rfn0, rfn1, rfn2, rfn3
+	\rfn0	 0, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee
+	\rfn0	 4, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501
+	\rfn0	 8, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be
+	\rfn0	12, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821
+
+	\rfn1	16, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa
+	\rfn1	20, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8
+	\rfn1	24, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed
+	\rfn1	28, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a
+
+	\rfn2	32, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c
+	\rfn2	36, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70
+	\rfn2	40, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05
+	\rfn2	44, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665
+
+	\rfn3	48, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039
+	\rfn3	52, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1
+	\rfn3	56, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1
+	\rfn3	60, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
+.endm
+
+	// md5block(MD5_CTX, buf, len)
+ENTRY(_libmd_md5block_baseline)
+.macro	round	a, b, c, d, f, k, m, s
+	\f	%ebp, \b, \c, \d
+	add	$\k, \a			// a + k[i]
+	add	((\m)%16*4)(%rsi), \a	// a + k[i] + m[g]
+	add	%ebp, \a		// a + k[i] + m[g] + f
+	rol	$\s, \a
+	add	\b, \a
+.endm
+
+	// f = b ? c : d
+.macro	f0	f, b, c, d
+	mov	\c, \f
+	xor	\d, \f
+	and	\b, \f
+	xor	\d, \f
+.endm
+
+	// f = d ? b : c
+.macro	f1	f, b, c, d
+	mov	\c, \f
+	xor	\b, \f
+	and	\d, \f
+	xor	\c, \f
+.endm
+
+	// f = b ^ c ^ d
+.macro	f2	f, b, c, d
+	mov	\c, \f
+	xor	\d, \f
+	xor	\b, \f
+.endm
+
+	// f = c ^ (b | ~d)
+.macro	f3	f, b, c, d
+	mov	$-1, \f
+	xor	\d, \f
+	or	\b, \f
+	xor	\c, \f
+.endm
+
+	// do 4 rounds
+.macro	rounds	f, p, q, s0, s1, s2, s3, k0, k1, k2, k3
+	round	%eax, %ebx, %ecx, %edx, \f, \k0, \p*0+\q, \s0
+	round	%edx, %eax, %ebx, %ecx, \f, \k1, \p*1+\q, \s1
+	round	%ecx, %edx, %eax, %ebx, \f, \k2, \p*2+\q, \s2
+	round	%ebx, %ecx, %edx, %eax, \f, \k3, \p*3+\q, \s3
+.endm
+
+	// do 4 rounds with f0, f1, f2, f3
+.macro	rounds0	i, k0, k1, k2, k3
+	rounds	f0, 1, \i, 7, 12, 17, 22, \k0, \k1, \k2, \k3
+.endm
+
+.macro	rounds1	i, k0, k1, k2, k3
+	rounds	f1, 5, 5*\i+1, 5, 9, 14, 20, \k0, \k1, \k2, \k3
+.endm
+
+.macro	rounds2	i, k0, k1, k2, k3
+	rounds	f2, 3, 3*\i+5, 4, 11, 16, 23, \k0, \k1, \k2, \k3
+.endm
+
+.macro	rounds3	i, k0, k1, k2, k3
+	rounds	f3, 7, 7*\i, 6, 10, 15, 21, \k0, \k1, \k2, \k3
+.endm
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+
+	and	$~63, %rdx		// length in blocks
+	lea	(%rsi, %rdx, 1), %r12	// end pointer
+
+	mov	(%rdi), %eax		// a
+	mov	4(%rdi), %ebx		// b
+	mov	8(%rdi), %ecx		// c
+	mov	12(%rdi), %edx		// d
+
+	cmp	%rsi, %r12		// any data to process?
+	je	.Lend
+
+	.balign	16
+.Lloop:	mov	%eax, %r8d
+	mov	%ebx, %r9d
+	mov	%ecx, %r10d
+	mov	%edx, %r11d
+
+	allrounds	rounds0, rounds1, rounds2, rounds3
+
+	add	%r8d, %eax
+	add	%r9d, %ebx
+	add	%r10d, %ecx
+	add	%r11d, %edx
+
+	add	$64, %rsi
+	cmp	%rsi, %r12
+	jne	.Lloop
+
+	mov	%eax, (%rdi)
+	mov	%ebx, 4(%rdi)
+	mov	%ecx, 8(%rdi)
+	mov	%edx, 12(%rdi)
+
+.Lend:	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+END(_libmd_md5block_baseline)
+
+	/*
+	 * An implementation leveraging the ANDN instruction
+	 * from BMI1 to shorten some dependency chains.
+	 */
+ENTRY(_libmd_md5block_bmi1)
+	// special-cased round 1
+	// f1 = d ? b : c = (d & b) + (~d & c)
+.macro	round1	a, b, c, d, k, m, s
+	andn	\c, \d, %edi		// ~d & c
+	add	$\k, \a			// a + k[i]
+	mov	\d, %ebp
+	add	((\m)%16*4)(%rsi), \a	// a + k[i] + m[g]
+	and	\b, %ebp		// d & b
+	add	%edi, \a		// a + k[i] + m[g] + (~d & c)
+	add	%ebp, \a		// a + k[i] + m[g] + (~d & c) + (d & b)
+	rol	$\s, \a
+	add	\b, \a
+.endm
+
+	// special-cased round 3
+	// f3 = c ^ (b | ~d) = ~(c ^ ~b & d) = -1 - (c ^ ~b & d)
+.macro	round3	a, b, c, d, k, m, s
+	andn	\d, \b, %ebp
+	add	$\k - 1, \a		// a + k[i] - 1
+	add	((\m)%16*4)(%rsi), \a	// a + k[i] + m[g]
+	xor	\c, %ebp
+	sub	%ebp, \a		// a + k[i] + m[g] + f
+	rol	$\s, \a
+	add	\b, \a
+.endm
+
+	.purgem	rounds1
+.macro	rounds1	i, k0, k1, k2, k3
+	round1	%eax, %ebx, %ecx, %edx, \k0, 5*\i+ 1,  5
+	round1	%edx, %eax, %ebx, %ecx, \k1, 5*\i+ 6,  9
+	round1	%ecx, %edx, %eax, %ebx, \k2, 5*\i+11, 14
+	round1	%ebx, %ecx, %edx, %eax, \k3, 5*\i+16, 20
+.endm
+
+	.purgem	rounds3
+.macro	rounds3	i, k0, k1, k2, k3
+	round3	%eax, %ebx, %ecx, %edx, \k0, 7*\i+ 0,  6
+	round3	%edx, %eax, %ebx, %ecx, \k1, 7*\i+ 7, 10
+	round3	%ecx, %edx, %eax, %ebx, \k2, 7*\i+14, 15
+	round3	%ebx, %ecx, %edx, %eax, \k3, 7*\i+21, 21
+.endm
+
+	push	%rbx
+	push	%rbp
+	push	%r12
+
+	and	$~63, %rdx		// length in blocks
+	lea	(%rsi, %rdx, 1), %r12	// end pointer
+
+	mov	(%rdi), %eax		// a
+	mov	4(%rdi), %ebx		// b
+	mov	8(%rdi), %ecx		// c
+	mov	12(%rdi), %edx		// d
+
+	cmp	%rsi, %r12		// any data to process?
+	je	0f
+
+	push	%rdi
+
+	.balign	16
+1:	mov	%eax, %r8d
+	mov	%ebx, %r9d
+	mov	%ecx, %r10d
+	mov	%edx, %r11d
+
+	allrounds	rounds0, rounds1, rounds2, rounds3
+
+	add	%r8d, %eax
+	add	%r9d, %ebx
+	add	%r10d, %ecx
+	add	%r11d, %edx
+
+	add	$64, %rsi
+	cmp	%rsi, %r12
+	jne	1b
+
+	pop	%rdi
+	mov	%eax, (%rdi)
+	mov	%ebx, 4(%rdi)
+	mov	%ecx, 8(%rdi)
+	mov	%edx, 12(%rdi)
+
+0:	pop	%r12
+	pop	%rbp
+	pop	%rbx
+	ret
+END(_libmd_md5block_bmi1)
+
+#ifndef _KERNEL
+	/*
+	 * An implementation leveraging AVX-512 for its VPTERNLOGD
+	 * instruction.  We're using only XMM registers here,
+	 * avoiding costly thermal licensing.
+	 */
+ENTRY(_libmd_md5block_avx512)
+.macro	vround		a, b, c, d, f, i, m, mi, s
+	vmovdqa		\b, %xmm4
+	vpternlogd	$\f, \d, \c, %xmm4
+	vpaddd		4*(\i)(%rax){1to4}, \m, %xmm5 // m[g] + k[i]
+.if	\mi != 0
+	vpshufd		$0x55 * \mi, %xmm5, %xmm5	// broadcast to each dword
+.endif
+	vpaddd		%xmm5, \a, \a		// a + k[i] + m[g]
+	vpaddd		%xmm4, \a, \a		// a + k[i] + m[g] + f
+	vprold		$\s, \a, \a
+	vpaddd		\b, \a, \a
+.endm
+
+.macro	vrounds		f, i, m0, i0, m1, i1, m2, i2, m3, i3, s0, s1, s2, s3
+	vround		%xmm0, %xmm1, %xmm2, %xmm3, \f, \i+0, \m0, \i0, \s0
+	vround		%xmm3, %xmm0, %xmm1, %xmm2, \f, \i+1, \m1, \i1, \s1
+	vround		%xmm2, %xmm3, %xmm0, %xmm1, \f, \i+2, \m2, \i2, \s2
+	vround		%xmm1, %xmm2, %xmm3, %xmm0, \f, \i+3, \m3, \i3, \s3
+.endm
+
+/*
+ * d c b f0 f1 f2 f3
+ * 0 0 0  0  0  0  1
+ * 1 0 0  1  0  1  0
+ * 0 1 0  0  1  1  0
+ * 1 1 0  1  0  0  1
+ * 0 0 1  0  0  1  1
+ * 1 0 1  0  1  0  1
+ * 0 1 1  1  1  0  0
+ * 1 1 1  1  1  1  0
+ */
+
+.macro	vrounds0	i, m
+	vrounds		0xca, \i, \m, 0, \m, 1, \m, 2, \m, 3, 7, 12, 17, 22
+.endm
+
+.macro	vrounds1	i, m0, i0, m1, i1, m2, i2, m3, i3
+	vrounds		0xe4, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 5, 9, 14, 20
+.endm
+
+.macro	vrounds2	i, m0, i0, m1, i1, m2, i2, m3, i3
+	vrounds		0x96, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 4, 11, 16, 23
+.endm
+
+.macro	vrounds3	i, m0, i0, m1, i1, m2, i2, m3, i3
+	vrounds		0x39, \i, \m0, \i0, \m1, \i1, \m2, \i2, \m3, \i3, 6, 10, 15, 21
+.endm
+
+	and		$~63, %rdx		// length in blocks
+	add		%rsi, %rdx		// end pointer
+
+	vmovd		(%rdi), %xmm0		// a
+	vmovd		4(%rdi), %xmm1		// b
+	vmovd		8(%rdi), %xmm2		// c
+	vmovd		12(%rdi), %xmm3		// d
+
+	lea		keys(%rip), %rax
+
+	cmp		%rsi, %rdx		// any data to process?
+	je		0f
+
+	.balign		16
+1:	vmovdqu		0*4(%rsi), %xmm8	// message words
+	vmovdqu		4*4(%rsi), %xmm9
+	vmovdqu		8*4(%rsi), %xmm10
+	vmovdqu		12*4(%rsi), %xmm11
+
+	vmovdqa		%xmm0, %xmm12		// stash old state variables
+	vmovdqa		%xmm1, %xmm13
+	vmovdqa		%xmm2, %xmm14
+	vmovdqa		%xmm3, %xmm15
+
+	vrounds0	 0, %xmm8
+	vrounds0	 4, %xmm9
+	vrounds0	 8, %xmm10
+	vrounds0	12, %xmm11
+
+	vrounds1	16,  %xmm8, 1,  %xmm9, 2, %xmm10, 3,  %xmm8, 0
+	vrounds1	20,  %xmm9, 1, %xmm10, 2, %xmm11, 3,  %xmm9, 0
+	vrounds1	24, %xmm10, 1, %xmm11, 2,  %xmm8, 3, %xmm10, 0
+	vrounds1	28, %xmm11, 1,  %xmm8, 2,  %xmm9, 3, %xmm11, 0
+
+	vrounds2	32,  %xmm9, 1, %xmm10, 0, %xmm10, 3, %xmm11, 2
+	vrounds2	36,  %xmm8, 1,  %xmm9, 0,  %xmm9, 3, %xmm10, 2
+	vrounds2	40, %xmm11, 1,  %xmm8, 0,  %xmm8, 3,  %xmm9, 2
+	vrounds2	44  %xmm10, 1, %xmm11, 0, %xmm11, 3,  %xmm8, 2
+
+	vrounds3	48,  %xmm8, 0,  %xmm9, 3, %xmm11, 2,  %xmm9, 1
+	vrounds3	52, %xmm11, 0,  %xmm8, 3, %xmm10, 2,  %xmm8, 1
+	vrounds3	56, %xmm10, 0, %xmm11, 3,  %xmm9, 2, %xmm11, 1
+	vrounds3	60,  %xmm9, 0, %xmm10, 3,  %xmm8, 2, %xmm10, 1
+
+	vpaddd		%xmm12, %xmm0, %xmm0
+	vpaddd		%xmm13, %xmm1, %xmm1
+	vpaddd		%xmm14, %xmm2, %xmm2
+	vpaddd		%xmm15, %xmm3, %xmm3
+
+	add		$64, %rsi
+	cmp		%rsi, %rdx
+	jne		1b
+
+	vmovd		%xmm0, (%rdi)
+	vmovd		%xmm1, 4(%rdi)
+	vmovd		%xmm2, 8(%rdi)
+	vmovd		%xmm3, 12(%rdi)
+
+0:	ret
+END(_libmd_md5block_avx512)
+
+	// round keys, for use in md5block_avx512
+	.section	.rodata
+	.balign		16
+
+.macro	putkeys		i, a, b, c, d
+	.4byte		\a, \b, \c, \d
+.endm
+
+keys:	allrounds	putkeys, putkeys, putkeys, putkeys
+	.size		keys, .-keys
+#endif /* !defined(_KERNEL) */
+
+	.section .note.GNU-stack,"",%progbits
diff --git a/lib/libmd/amd64/md5dispatch.c b/lib/libmd/amd64/md5dispatch.c
new file mode 100644
index 000000000000..dd2131c5a57c
--- /dev/null
+++ b/lib/libmd/amd64/md5dispatch.c
@@ -0,0 +1,41 @@
+/*-
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/types.h>
+#include <sys/md5.h>
+
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+#include <stdint.h>
+#include <string.h>
+#include <x86/ifunc.h>
+
+extern void _libmd_md5block_baseline(MD5_CTX *, const void *, size_t);
+extern void _libmd_md5block_bmi1(MD5_CTX *, const void *, size_t);
+extern void _libmd_md5block_avx512(MD5_CTX *, const void *, size_t);
+
+DEFINE_UIFUNC(, void, _libmd_md5block, (MD5_CTX *, const void *, size_t))
+{
+	if ((cpu_stdext_feature & (CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512VL))
+	    == (CPUID_STDEXT_AVX512F | CPUID_STDEXT_AVX512VL)) {
+		u_int regs[4];
+		char cpu_vendor[12];
+
+		do_cpuid(0, regs);
+		((u_int *)&cpu_vendor)[0] = regs[1];
+		((u_int *)&cpu_vendor)[1] = regs[3];
+		((u_int *)&cpu_vendor)[2] = regs[2];
+
+		/* the AVX-512 kernel performs poorly on AMD */
+		if (memcmp(cpu_vendor, AMD_VENDOR_ID, sizeof(cpu_vendor)) != 0)
+			return (_libmd_md5block_avx512);
+	}
+
+	if (cpu_stdext_feature & CPUID_STDEXT_BMI1)
+		return (_libmd_md5block_bmi1);
+	else
+		return (_libmd_md5block_baseline);
+}
diff --git a/lib/libmd/amd64/sha1block.S b/lib/libmd/amd64/sha1block.S
index f1291ef2647a..6ef083178abc 100644
--- a/lib/libmd/amd64/sha1block.S
+++ b/lib/libmd/amd64/sha1block.S
@@ -1,6 +1,6 @@
 /*-
  * Copyright (c) 2013 The Go Authors. All rights reserved.
- * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
  *
  * Adapted from Go's crypto/sha1/sha1block_amd64.s.
  *
diff --git a/lib/libmd/amd64/sha1dispatch.c b/lib/libmd/amd64/sha1dispatch.c
index 86509195d56e..c82a60334739 100644
--- a/lib/libmd/amd64/sha1dispatch.c
+++ b/lib/libmd/amd64/sha1dispatch.c
@@ -1,6 +1,6 @@
 /*-
  * Copyright (c) 2016 The Go Authors. All rights reserved.
- * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
  *
  * Adapted from Go's crypto/sha1/sha1block_amd64.go.
  *
diff --git a/lib/libmd/sha1c.c b/lib/libmd/sha1c.c
index 128e0b991742..02132d720dac 100644
--- a/lib/libmd/sha1c.c
+++ b/lib/libmd/sha1c.c
@@ -1,6 +1,6 @@
 /*-
  * Copyright (c) 2009 The Go Authors. All rights reserved.
- * Copyright (c) 2024 Robert Clausecker <fuz@freebsd.org>
+ * Copyright (c) 2024 Robert Clausecker <fuz@FreeBSD.org>
  *
  * Adapted from Go's crypto/sha1/sha1.go.
  *
diff --git a/lib/libsys/clock_gettime.2 b/lib/libsys/clock_gettime.2
index 89551d0f720b..841673648dfc 100644
--- a/lib/libsys/clock_gettime.2
+++ b/lib/libsys/clock_gettime.2
@@ -27,7 +27,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd August 10, 2024
+.Dd October 15, 2025
 .Dt CLOCK_GETTIME 2
 .Os
 .Sh NAME
@@ -80,7 +80,6 @@ Behavior during a leap second is not defined by and POSIX standard.
 Increments in SI seconds, even while the system is suspended.
 Its epoch is unspecified.
 The count is not adjusted by leap seconds.
-.Fx implements
 .It Dv CLOCK_UPTIME
 .It Dv CLOCK_UPTIME_PRECISE
 .It Dv CLOCK_UPTIME_FAST
diff --git a/lib/libsys/getrlimitusage.2 b/lib/libsys/getrlimitusage.2
index e2114def56c2..d0e92d7f88b4 100644
--- a/lib/libsys/getrlimitusage.2
+++ b/lib/libsys/getrlimitusage.2
@@ -25,7 +25,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd September 27, 2024
+.Dd October 24, 2025
 .Dt GETRLIMITUSAGE 2
 .Os
 .Sh NAME
@@ -97,4 +97,4 @@ and
 The
 .Fn getrlimitusage
 system call appeared in
-.Bx 15.0 .
+.Fx 14.2 .
diff --git a/lib/libunbound/config.h b/lib/libunbound/config.h
index 1cedf5b4de36..1b939b7f1074 100644
--- a/lib/libunbound/config.h
+++ b/lib/libunbound/config.h
@@ -884,7 +884,7 @@
 #define PACKAGE_NAME "unbound"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "unbound 1.24.0"
+#define PACKAGE_STRING "unbound 1.24.1"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "unbound"
@@ -893,7 +893,7 @@
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.24.0"
+#define PACKAGE_VERSION "1.24.1"
 
 /* default pidfile location */
 #define PIDFILE "/var/unbound/unbound.pid"