summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--UPDATING2
-rw-r--r--bin/sh/miscbltin.c2
-rw-r--r--lib/libc/sys/truncate.29
-rw-r--r--lib/libfetch/common.c6
-rw-r--r--sys/cam/scsi/scsi_da.c12
-rw-r--r--sys/compat/linuxkpi/common/include/linux/fs.h21
-rw-r--r--sys/compat/linuxkpi/common/include/linux/shmem_fs.h55
-rw-r--r--sys/compat/linuxkpi/common/src/linux_page.c84
-rw-r--r--sys/compat/linuxkpi/common/src/linux_shmemfs.c128
-rw-r--r--sys/conf/files2
-rw-r--r--sys/dev/acpica/acpi_lid.c12
-rw-r--r--sys/dev/ath/ah_osdep.c11
-rw-r--r--sys/dev/ath/ath_rate/sample/sample.c12
-rw-r--r--sys/dev/ath/if_ath_sysctl.c110
-rw-r--r--sys/dev/mlx5/mlx5_ib/mlx5_ib.h10
-rw-r--r--sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c6
-rw-r--r--sys/dev/otus/if_otus.c3
-rw-r--r--sys/kern/subr_compressor.c7
-rw-r--r--sys/kern/subr_smr.c472
-rw-r--r--sys/kern/subr_trap.c12
-rw-r--r--sys/kern/vfs_lookup.c15
-rw-r--r--sys/modules/linuxkpi/Makefile1
-rw-r--r--sys/net80211/ieee80211_alq.c6
-rw-r--r--sys/net80211/ieee80211_amrr.c4
-rw-r--r--sys/net80211/ieee80211_freebsd.c43
-rw-r--r--sys/net80211/ieee80211_ht.c21
-rw-r--r--sys/net80211/ieee80211_hwmp.c30
-rw-r--r--sys/net80211/ieee80211_mesh.c17
-rw-r--r--sys/net80211/ieee80211_rssadapt.c5
-rw-r--r--sys/net80211/ieee80211_superg.c7
-rw-r--r--sys/netgraph/ng_socket.c8
-rw-r--r--sys/netinet/ip_carp.c11
-rw-r--r--sys/netpfil/pf/if_pfsync.c3
-rw-r--r--sys/netpfil/pf/pf.c3
-rw-r--r--sys/powerpc/booke/pmap.c13
-rw-r--r--sys/security/audit/audit.h16
-rw-r--r--sys/security/audit/audit_arg.c38
-rw-r--r--sys/security/audit/audit_bsm_klib.c130
-rw-r--r--sys/security/audit/audit_private.h2
-rw-r--r--sys/sys/_smr.h1
-rw-r--r--sys/sys/param.h2
-rw-r--r--sys/sys/smr.h93
-rw-r--r--sys/vm/uma_core.c24
-rw-r--r--sys/x86/x86/identcpu.c58
-rw-r--r--tools/bsdbox/Makefile.base1
-rw-r--r--usr.bin/dtc/dtc.cc5
-rw-r--r--usr.sbin/bhyve/iov.c17
-rw-r--r--usr.sbin/bhyve/iov.h4
-rw-r--r--usr.sbin/bhyve/net_backends.c98
-rw-r--r--usr.sbin/bhyve/net_backends.h1
-rw-r--r--usr.sbin/bhyve/pci_virtio_net.c54
-rw-r--r--usr.sbin/iostat/iostat.c2
-rw-r--r--usr.sbin/pstat/pstat.c22
53 files changed, 1158 insertions, 573 deletions
diff --git a/UPDATING b/UPDATING
index bbf3e7718d7e..04745ac4f009 100644
--- a/UPDATING
+++ b/UPDATING
@@ -33,7 +33,7 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW:
using clang 3.5.0 or higher.
20200220:
- ncurses has been updated to a newer version (6.1-20200118). Given the ABI
+ ncurses has been updated to a newer version (6.2-20200215). Given the ABI
has changed, users will have to rebuild all the ports that are linked to
ncurses.
diff --git a/bin/sh/miscbltin.c b/bin/sh/miscbltin.c
index ad3d862fb6ef..a49dd05d109f 100644
--- a/bin/sh/miscbltin.c
+++ b/bin/sh/miscbltin.c
@@ -117,7 +117,7 @@ fdgetc(struct fdctx *fdc, char *c)
static void
fdctx_destroy(struct fdctx *fdc)
{
- size_t residue;
+ off_t residue;
if (fdc->buflen > 1) {
/*
diff --git a/lib/libc/sys/truncate.2 b/lib/libc/sys/truncate.2
index dfbe006965b2..a6ec8f44f44e 100644
--- a/lib/libc/sys/truncate.2
+++ b/lib/libc/sys/truncate.2
@@ -28,7 +28,7 @@
.\" @(#)truncate.2 8.1 (Berkeley) 6/4/93
.\" $FreeBSD$
.\"
-.Dd May 4, 2015
+.Dd January 24, 2020
.Dt TRUNCATE 2
.Os
.Sh NAME
@@ -160,6 +160,9 @@ system calls appeared in
These calls should be generalized to allow ranges
of bytes in a file to be discarded.
.Pp
-Use of
+Historically, the use of
.Fn truncate
-to extend a file is not portable.
+or
+.Fn ftruncate
+to extend a file was not portable, but this behavior became required in
+.St -p1003.1-2008 .
diff --git a/lib/libfetch/common.c b/lib/libfetch/common.c
index f6c026049f5c..ae119de32736 100644
--- a/lib/libfetch/common.c
+++ b/lib/libfetch/common.c
@@ -677,6 +677,7 @@ fetch_connect(const char *host, int port, int af, int verbose)
if (sockshost)
if (!fetch_socks5_init(conn, host, port, verbose))
goto fail;
+ free(sockshost);
if (cais != NULL)
freeaddrinfo(cais);
if (sais != NULL)
@@ -686,7 +687,10 @@ syserr:
fetch_syserr();
fail:
free(sockshost);
- if (sd >= 0)
+ /* Fully close if it was opened; otherwise just don't leak the fd. */
+ if (conn != NULL)
+ fetch_close(conn);
+ else if (sd >= 0)
close(sd);
if (cais != NULL)
freeaddrinfo(cais);
diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c
index 9c889f61cd45..25164bea55d4 100644
--- a/sys/cam/scsi/scsi_da.c
+++ b/sys/cam/scsi/scsi_da.c
@@ -342,7 +342,7 @@ struct da_softc {
LIST_HEAD(, ccb_hdr) pending_ccbs;
int refcount; /* Active xpt_action() calls */
da_state state;
- da_flags flags;
+ u_int flags;
da_quirks quirks;
int minimum_cmd_size;
int error_inject;
@@ -2335,11 +2335,11 @@ dasysctlinit(void *context, int pending)
"Flags for drive");
SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
OID_AUTO, "rotating", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &softc->flags, DA_FLAG_ROTATING, dabitsysctl, "I",
+ &softc->flags, (u_int)DA_FLAG_ROTATING, dabitsysctl, "I",
"Rotating media *DEPRECATED* gone in FreeBSD 14");
SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
OID_AUTO, "unmapped_io", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
- &softc->flags, DA_FLAG_UNMAPPEDIO, dabitsysctl, "I",
+ &softc->flags, (u_int)DA_FLAG_UNMAPPEDIO, dabitsysctl, "I",
"Unmapped I/O support *DEPRECATED* gone in FreeBSD 14");
#ifdef CAM_TEST_FAILURE
@@ -2619,11 +2619,11 @@ dadeletemethodchoose(struct da_softc *softc, da_delete_methods default_method)
static int
dabitsysctl(SYSCTL_HANDLER_ARGS)
{
- int flags = (intptr_t)arg1;
- int test = arg2;
+ u_int *flags = arg1;
+ u_int test = arg2;
int tmpout, error;
- tmpout = !!(flags & test);
+ tmpout = !!(*flags & test);
error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
if (error || !req->newptr)
return (error);
diff --git a/sys/compat/linuxkpi/common/include/linux/fs.h b/sys/compat/linuxkpi/common/include/linux/fs.h
index f68febf36fd4..7f5993bd7754 100644
--- a/sys/compat/linuxkpi/common/include/linux/fs.h
+++ b/sys/compat/linuxkpi/common/include/linux/fs.h
@@ -302,25 +302,4 @@ call_mmap(struct linux_file *file, struct vm_area_struct *vma)
return (file->f_op->mmap(file, vma));
}
-/* Shared memory support */
-unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t);
-struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t);
-struct linux_file *linux_shmem_file_setup(const char *, loff_t, unsigned long);
-void linux_shmem_truncate_range(vm_object_t, loff_t, loff_t);
-
-#define invalidate_mapping_pages(...) \
- linux_invalidate_mapping_pages(__VA_ARGS__)
-
-#define shmem_read_mapping_page(...) \
- linux_shmem_read_mapping_page_gfp(__VA_ARGS__, 0)
-
-#define shmem_read_mapping_page_gfp(...) \
- linux_shmem_read_mapping_page_gfp(__VA_ARGS__)
-
-#define shmem_file_setup(...) \
- linux_shmem_file_setup(__VA_ARGS__)
-
-#define shmem_truncate_range(...) \
- linux_shmem_truncate_range(__VA_ARGS__)
-
#endif /* _LINUX_FS_H_ */
diff --git a/sys/compat/linuxkpi/common/include/linux/shmem_fs.h b/sys/compat/linuxkpi/common/include/linux/shmem_fs.h
new file mode 100644
index 000000000000..63aff012c6bb
--- /dev/null
+++ b/sys/compat/linuxkpi/common/include/linux/shmem_fs.h
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2010 iX Systems, Inc.
+ * Copyright (c) 2010 Panasas, Inc.
+ * Copyright (c) 2013-2018 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _LINUX_SHMEM_FS_H_
+#define _LINUX_SHMEM_FS_H_
+
+/* Shared memory support */
+unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t);
+struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t);
+struct linux_file *linux_shmem_file_setup(const char *, loff_t, unsigned long);
+void linux_shmem_truncate_range(vm_object_t, loff_t, loff_t);
+
+#define invalidate_mapping_pages(...) \
+ linux_invalidate_mapping_pages(__VA_ARGS__)
+
+#define shmem_read_mapping_page(...) \
+ linux_shmem_read_mapping_page_gfp(__VA_ARGS__, 0)
+
+#define shmem_read_mapping_page_gfp(...) \
+ linux_shmem_read_mapping_page_gfp(__VA_ARGS__)
+
+#define shmem_file_setup(...) \
+ linux_shmem_file_setup(__VA_ARGS__)
+
+#define shmem_truncate_range(...) \
+ linux_shmem_truncate_range(__VA_ARGS__)
+
+#endif /* _LINUX_SHMEM_FS_H_ */
diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c
index ac8b1a2781aa..a6133a93b543 100644
--- a/sys/compat/linuxkpi/common/src/linux_page.c
+++ b/sys/compat/linuxkpi/common/src/linux_page.c
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <linux/mm.h>
#include <linux/preempt.h>
#include <linux/fs.h>
+#include <linux/shmem_fs.h>
void
si_meminfo(struct sysinfo *si)
@@ -275,86 +276,3 @@ is_vmalloc_addr(const void *addr)
{
return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL);
}
-
-struct page *
-linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp)
-{
- vm_page_t page;
- int rv;
-
- if ((gfp & GFP_NOWAIT) != 0)
- panic("GFP_NOWAIT is unimplemented");
-
- VM_OBJECT_WLOCK(obj);
- rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL |
- VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
- VM_OBJECT_WUNLOCK(obj);
- if (rv != VM_PAGER_OK)
- return (ERR_PTR(-EINVAL));
- return (page);
-}
-
-struct linux_file *
-linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags)
-{
- struct fileobj {
- struct linux_file file __aligned(sizeof(void *));
- struct vnode vnode __aligned(sizeof(void *));
- };
- struct fileobj *fileobj;
- struct linux_file *filp;
- struct vnode *vp;
- int error;
-
- fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL);
- if (fileobj == NULL) {
- error = -ENOMEM;
- goto err_0;
- }
- filp = &fileobj->file;
- vp = &fileobj->vnode;
-
- filp->f_count = 1;
- filp->f_vnode = vp;
- filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size,
- VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred);
- if (filp->f_shmem == NULL) {
- error = -ENOMEM;
- goto err_1;
- }
- return (filp);
-err_1:
- kfree(filp);
-err_0:
- return (ERR_PTR(error));
-}
-
-static vm_ooffset_t
-linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start,
- vm_pindex_t end, int flags)
-{
- int start_count, end_count;
-
- VM_OBJECT_WLOCK(obj);
- start_count = obj->resident_page_count;
- vm_object_page_remove(obj, start, end, flags);
- end_count = obj->resident_page_count;
- VM_OBJECT_WUNLOCK(obj);
- return (start_count - end_count);
-}
-
-unsigned long
-linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end)
-{
-
- return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY));
-}
-
-void
-linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend)
-{
- vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1);
- vm_pindex_t end = OFF_TO_IDX(lend + 1);
-
- (void) linux_invalidate_mapping_pages_sub(obj, start, end, 0);
-}
diff --git a/sys/compat/linuxkpi/common/src/linux_shmemfs.c b/sys/compat/linuxkpi/common/src/linux_shmemfs.c
new file mode 100644
index 000000000000..ead9cc9d9f40
--- /dev/null
+++ b/sys/compat/linuxkpi/common/src/linux_shmemfs.c
@@ -0,0 +1,128 @@
+/*-
+ * Copyright (c) 2010 Isilon Systems, Inc.
+ * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io)
+ * Copyright (c) 2017 Mellanox Technologies, Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/rwlock.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_object.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/shmem_fs.h>
+
+struct page *
+linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp)
+{
+ vm_page_t page;
+ int rv;
+
+ if ((gfp & GFP_NOWAIT) != 0)
+ panic("GFP_NOWAIT is unimplemented");
+
+ VM_OBJECT_WLOCK(obj);
+ rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL |
+ VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
+ VM_OBJECT_WUNLOCK(obj);
+ if (rv != VM_PAGER_OK)
+ return (ERR_PTR(-EINVAL));
+ return (page);
+}
+
+struct linux_file *
+linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags)
+{
+ struct fileobj {
+ struct linux_file file __aligned(sizeof(void *));
+ struct vnode vnode __aligned(sizeof(void *));
+ };
+ struct fileobj *fileobj;
+ struct linux_file *filp;
+ struct vnode *vp;
+ int error;
+
+ fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL);
+ if (fileobj == NULL) {
+ error = -ENOMEM;
+ goto err_0;
+ }
+ filp = &fileobj->file;
+ vp = &fileobj->vnode;
+
+ filp->f_count = 1;
+ filp->f_vnode = vp;
+ filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size,
+ VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred);
+ if (filp->f_shmem == NULL) {
+ error = -ENOMEM;
+ goto err_1;
+ }
+ return (filp);
+err_1:
+ kfree(filp);
+err_0:
+ return (ERR_PTR(error));
+}
+
+static vm_ooffset_t
+linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start,
+ vm_pindex_t end, int flags)
+{
+ int start_count, end_count;
+
+ VM_OBJECT_WLOCK(obj);
+ start_count = obj->resident_page_count;
+ vm_object_page_remove(obj, start, end, flags);
+ end_count = obj->resident_page_count;
+ VM_OBJECT_WUNLOCK(obj);
+ return (start_count - end_count);
+}
+
+unsigned long
+linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end)
+{
+
+ return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY));
+}
+
+void
+linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend)
+{
+ vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1);
+ vm_pindex_t end = OFF_TO_IDX(lend + 1);
+
+ (void) linux_invalidate_mapping_pages_sub(obj, start, end, 0);
+}
diff --git a/sys/conf/files b/sys/conf/files
index 9bb3b2a62565..b99b5838891f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4476,6 +4476,8 @@ compat/linuxkpi/common/src/linux_rcu.c optional compat_linuxkpi \
compile-with "${LINUXKPI_C} -I$S/contrib/ck/include"
compat/linuxkpi/common/src/linux_schedule.c optional compat_linuxkpi \
compile-with "${LINUXKPI_C}"
+compat/linuxkpi/common/src/linux_shmemfs.c optional compat_linuxkpi \
+ compile-with "${LINUXKPI_C}"
compat/linuxkpi/common/src/linux_slab.c optional compat_linuxkpi \
compile-with "${LINUXKPI_C}"
compat/linuxkpi/common/src/linux_usb.c optional compat_linuxkpi usb \
diff --git a/sys/dev/acpica/acpi_lid.c b/sys/dev/acpica/acpi_lid.c
index 80bc344d606d..5558b0f437e3 100644
--- a/sys/dev/acpica/acpi_lid.c
+++ b/sys/dev/acpica/acpi_lid.c
@@ -124,13 +124,16 @@ acpi_lid_attach(device_t dev)
if (acpi_parse_prw(sc->lid_handle, &prw) == 0)
AcpiEnableGpe(prw.gpe_handle, prw.gpe_bit);
+ /* Get the initial lid status, ignore failures */
+ (void) acpi_GetInteger(sc->lid_handle, "_LID", &sc->lid_status);
+
/*
* Export the lid status
*/
SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
"state", CTLFLAG_RD, &sc->lid_status, 0,
- "Device set to wake the system");
+ "Device state (0 = closed, 1 = open)");
return (0);
}
@@ -144,6 +147,13 @@ acpi_lid_suspend(device_t dev)
static int
acpi_lid_resume(device_t dev)
{
+ struct acpi_lid_softc *sc;
+
+ sc = device_get_softc(dev);
+
+ /* Get lid status after resume, ignore failures */
+ (void) acpi_GetInteger(sc->lid_handle, "_LID", &sc->lid_status);
+
return (0);
}
diff --git a/sys/dev/ath/ah_osdep.c b/sys/dev/ath/ah_osdep.c
index 23d967ec75e3..b141d7d66006 100644
--- a/sys/dev/ath/ah_osdep.c
+++ b/sys/dev/ath/ah_osdep.c
@@ -93,8 +93,9 @@ extern void DO_HALDEBUG(struct ath_hal *ah, u_int mask, const char* fmt, ...);
#endif /* AH_DEBUG */
/* NB: put this here instead of the driver to avoid circular references */
-SYSCTL_NODE(_hw, OID_AUTO, ath, CTLFLAG_RD, 0, "Atheros driver parameters");
-static SYSCTL_NODE(_hw_ath, OID_AUTO, hal, CTLFLAG_RD, 0,
+SYSCTL_NODE(_hw, OID_AUTO, ath, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "Atheros driver parameters");
+static SYSCTL_NODE(_hw_ath, OID_AUTO, hal, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"Atheros HAL parameters");
#ifdef AH_DEBUG
@@ -236,8 +237,10 @@ sysctl_hw_ath_hal_log(SYSCTL_HANDLER_ARGS)
else
return (ath_hal_setlogging(enable));
}
-SYSCTL_PROC(_hw_ath_hal, OID_AUTO, alq, CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_hw_ath_hal_log, "I", "Enable HAL register logging");
+SYSCTL_PROC(_hw_ath_hal, OID_AUTO, alq,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_hw_ath_hal_log, "I",
+ "Enable HAL register logging");
SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_size, CTLFLAG_RW,
&ath_hal_alq_qsize, 0, "In-memory log size (#records)");
SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_lost, CTLFLAG_RW,
diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c
index e0ab13eab719..ce22b36c539e 100644
--- a/sys/dev/ath/ath_rate/sample/sample.c
+++ b/sys/dev/ath/ath_rate/sample/sample.c
@@ -1364,17 +1364,17 @@ ath_rate_sysctlattach(struct ath_softc *sc, struct sample_softc *ssc)
struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev);
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "smoothing_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0,
- ath_rate_sysctl_smoothing_rate, "I",
+ "smoothing_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ ssc, 0, ath_rate_sysctl_smoothing_rate, "I",
"sample: smoothing rate for avg tx time (%%)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "sample_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0,
- ath_rate_sysctl_sample_rate, "I",
+ "sample_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ ssc, 0, ath_rate_sysctl_sample_rate, "I",
"sample: percent air time devoted to sampling new rates (%%)");
/* XXX max_successive_failures, stale_failure_timeout, min_switch */
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "sample_stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_rate_sysctl_stats, "I", "sample: print statistics");
+ "sample_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ sc, 0, ath_rate_sysctl_stats, "I", "sample: print statistics");
}
struct ath_ratectrl *
diff --git a/sys/dev/ath/if_ath_sysctl.c b/sys/dev/ath/if_ath_sysctl.c
index 3e4e47246357..662aa77008a3 100644
--- a/sys/dev/ath/if_ath_sysctl.c
+++ b/sys/dev/ath/if_ath_sysctl.c
@@ -786,16 +786,17 @@ ath_sysctl_alq_attach(struct ath_softc *sc)
struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev);
struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
- tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "alq", CTLFLAG_RD,
- NULL, "Atheros ALQ logging parameters");
+ tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "alq",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+ "Atheros ALQ logging parameters");
child = SYSCTL_CHILDREN(tree);
SYSCTL_ADD_STRING(ctx, child, OID_AUTO, "filename",
CTLFLAG_RW, sc->sc_alq.sc_alq_filename, 0, "ALQ filename");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "enable", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_alq_log, "I", "");
+ "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_alq_log, "I", "");
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"debugmask", CTLFLAG_RW, &sc->sc_alq.sc_alq_debug, 0,
@@ -831,21 +832,21 @@ ath_sysctlattach(struct ath_softc *sc)
"control debugging KTR");
#endif /* ATH_DEBUG_ALQ */
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "slottime", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_slottime, "I", "802.11 slot time (us)");
+ "slottime", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_slottime, "I", "802.11 slot time (us)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "acktimeout", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_acktimeout, "I", "802.11 ACK timeout (us)");
+ "acktimeout", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_acktimeout, "I", "802.11 ACK timeout (us)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "ctstimeout", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_ctstimeout, "I", "802.11 CTS timeout (us)");
+ "ctstimeout", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_ctstimeout, "I", "802.11 CTS timeout (us)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "softled", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_softled, "I", "enable/disable software LED support");
+ "softled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_softled, "I", "enable/disable software LED support");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "ledpin", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_ledpin, "I", "GPIO pin connected to LED");
+ "ledpin", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_ledpin, "I", "GPIO pin connected to LED");
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"ledon", CTLFLAG_RW, &sc->sc_ledon, 0,
"setting to turn LED on");
@@ -854,8 +855,8 @@ ath_sysctlattach(struct ath_softc *sc)
"idle time for inactivity LED (ticks)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "hardled", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_hardled, "I", "enable/disable hardware LED support");
+ "hardled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_hardled, "I", "enable/disable hardware LED support");
/* XXX Laziness - configure pins, then flip hardled off/on */
SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"led_net_pin", CTLFLAG_RW, &sc->sc_led_net_pin, 0,
@@ -865,61 +866,61 @@ ath_sysctlattach(struct ath_softc *sc)
"MAC Power LED pin, or -1 to disable");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "txantenna", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_txantenna, "I", "antenna switch");
+ "txantenna", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_txantenna, "I", "antenna switch");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "rxantenna", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_rxantenna, "I", "default/rx antenna");
+ "rxantenna", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_rxantenna, "I", "default/rx antenna");
if (ath_hal_hasdiversity(ah))
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "diversity", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_diversity, "I", "antenna diversity");
+ "diversity", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ sc, 0, ath_sysctl_diversity, "I", "antenna diversity");
sc->sc_txintrperiod = ATH_TXINTR_PERIOD;
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"txintrperiod", CTLFLAG_RW, &sc->sc_txintrperiod, 0,
"tx descriptor batching");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "diag", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_diag, "I", "h/w diagnostic control");
+ "diag", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_diag, "I", "h/w diagnostic control");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "tpscale", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_tpscale, "I", "tx power scaling");
+ "tpscale", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_tpscale, "I", "tx power scaling");
if (ath_hal_hastpc(ah)) {
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "tpc", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_tpc, "I", "enable/disable per-packet TPC");
+ "tpc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_tpc, "I", "enable/disable per-packet TPC");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "tpack", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_tpack, "I", "tx power for ack frames");
+ "tpack", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+ 0, ath_sysctl_tpack, "I", "tx power for ack frames");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "tpcts", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_tpcts, "I", "tx power for cts frames");
+ "tpcts", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+ 0, ath_sysctl_tpcts, "I", "tx power for cts frames");
}
if (ath_hal_hasrfsilent(ah)) {
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "rfsilent", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_rfsilent, "I", "h/w RF silent config");
+ "rfsilent", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ sc, 0, ath_sysctl_rfsilent, "I", "h/w RF silent config");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "rfkill", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_rfkill, "I", "enable/disable RF kill switch");
+ "rfkill", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+ 0, ath_sysctl_rfkill, "I", "enable/disable RF kill switch");
}
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "txagg", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_txagg, "I", "");
+ "txagg", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_txagg, "I", "");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "forcebstuck", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_forcebstuck, "I", "");
+ "forcebstuck", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+ 0, ath_sysctl_forcebstuck, "I", "");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "hangcheck", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_hangcheck, "I", "");
+ "hangcheck", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0,
+ ath_sysctl_hangcheck, "I", "");
if (ath_hal_hasintmit(ah)) {
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "intmit", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_intmit, "I", "interference mitigation");
+ "intmit", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+ 0, ath_sysctl_intmit, "I", "interference mitigation");
}
sc->sc_monpass = HAL_RXERR_DECRYPT | HAL_RXERR_MIC;
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
@@ -989,8 +990,8 @@ ath_sysctlattach(struct ath_softc *sc)
"superframe", CTLFLAG_RD, &sc->sc_tdmabintval, 0,
"TDMA calculated super frame");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "setcca", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_setcca, "I", "enable CCA control");
+ "setcca", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ sc, 0, ath_sysctl_setcca, "I", "enable CCA control");
}
#endif
@@ -1028,7 +1029,8 @@ ath_sysctl_stats_attach_rxphyerr(struct ath_softc *sc, struct sysctl_oid_list *p
int i;
char sn[8];
- tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx_phy_err", CTLFLAG_RD, NULL, "Per-code RX PHY Errors");
+ tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx_phy_err",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Per-code RX PHY Errors");
child = SYSCTL_CHILDREN(tree);
for (i = 0; i < 64; i++) {
snprintf(sn, sizeof(sn), "%d", i);
@@ -1047,7 +1049,7 @@ ath_sysctl_stats_attach_intr(struct ath_softc *sc,
char sn[8];
tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "sync_intr",
- CTLFLAG_RD, NULL, "Sync interrupt statistics");
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Sync interrupt statistics");
child = SYSCTL_CHILDREN(tree);
for (i = 0; i < 32; i++) {
snprintf(sn, sizeof(sn), "%d", i);
@@ -1065,12 +1067,12 @@ ath_sysctl_stats_attach(struct ath_softc *sc)
/* Create "clear" node */
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "clear_stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0,
- ath_sysctl_clearstats, "I", "clear stats");
+ "clear_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
+ 0, ath_sysctl_clearstats, "I", "clear stats");
/* Create stats node */
- tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD,
- NULL, "Statistics");
+ tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics");
child = SYSCTL_CHILDREN(tree);
/* This was generated from if_athioctl.h */
@@ -1315,8 +1317,8 @@ ath_sysctl_hal_attach(struct ath_softc *sc)
struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev);
struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
- tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hal", CTLFLAG_RD,
- NULL, "Atheros HAL parameters");
+ tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hal",
+ CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Atheros HAL parameters");
child = SYSCTL_CHILDREN(tree);
sc->sc_ah->ah_config.ah_debug = 0;
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
index 553b8b76f761..e24b28e41fc8 100644
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h
@@ -650,9 +650,13 @@ struct mlx5_ib_congestion {
struct sysctl_ctx_list ctx;
struct sx lock;
struct delayed_work dwork;
- u64 arg [0];
- MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR)
- MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR)
+ union {
+ u64 arg[1];
+ struct {
+ MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR)
+ MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR)
+ };
+ };
};
struct mlx5_ib_dev {
diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c
index b11cd0b53403..14cac913779e 100644
--- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c
+++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -37,7 +37,9 @@ static const char *mlx5_ib_cong_stats_desc[] = {
MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC)
};
-#define MLX5_IB_INDEX(field) (__offsetof(struct mlx5_ib_congestion, field) / sizeof(u64))
+#define MLX5_IB_INDEX(field) ( \
+ (__offsetof(struct mlx5_ib_congestion, field) - \
+ __offsetof(struct mlx5_ib_congestion, arg[0])) / sizeof(u64))
#define MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL)
#define MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \
/* rangecheck */ \
diff --git a/sys/dev/otus/if_otus.c b/sys/dev/otus/if_otus.c
index 0489b943820e..9e3903770714 100644
--- a/sys/dev/otus/if_otus.c
+++ b/sys/dev/otus/if_otus.c
@@ -75,7 +75,8 @@ __FBSDID("$FreeBSD$");
#include "if_otusreg.h"
static int otus_debug = 0;
-static SYSCTL_NODE(_hw_usb, OID_AUTO, otus, CTLFLAG_RW, 0, "USB otus");
+static SYSCTL_NODE(_hw_usb, OID_AUTO, otus, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "USB otus");
SYSCTL_INT(_hw_usb_otus, OID_AUTO, debug, CTLFLAG_RWTUN, &otus_debug, 0,
"Debug level");
#define OTUS_DEBUG_XMIT 0x00000001
diff --git a/sys/kern/subr_compressor.c b/sys/kern/subr_compressor.c
index 5950ade1d3ca..b202d271cfa3 100644
--- a/sys/kern/subr_compressor.c
+++ b/sys/kern/subr_compressor.c
@@ -117,6 +117,13 @@ gz_init(size_t maxiosize, int level)
s->gz_stream.next_in = Z_NULL;
s->gz_stream.avail_in = 0;
+ if (level != Z_DEFAULT_COMPRESSION) {
+ if (level < Z_BEST_SPEED)
+ level = Z_BEST_SPEED;
+ else if (level > Z_BEST_COMPRESSION)
+ level = Z_BEST_COMPRESSION;
+ }
+
error = deflateInit2(&s->gz_stream, level, Z_DEFLATED, -MAX_WBITS,
DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
if (error != 0)
diff --git a/sys/kern/subr_smr.c b/sys/kern/subr_smr.c
index 816e68966029..530cf5118dac 100644
--- a/sys/kern/subr_smr.c
+++ b/sys/kern/subr_smr.c
@@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
/*
+ * Global Unbounded Sequences (GUS)
+ *
* This is a novel safe memory reclamation technique inspired by
* epoch based reclamation from Samy Al Bahra's concurrency kit which
* in turn was based on work described in:
@@ -53,7 +55,8 @@ __FBSDID("$FreeBSD$");
* This is not an implementation of hazard pointers or related
* techniques. The term safe memory reclamation is used as a
* generic descriptor for algorithms that defer frees to avoid
- * use-after-free errors with lockless datastructures.
+ * use-after-free errors with lockless datastructures or as
+ * a mechanism to detect quiescence for writer synchronization.
*
* The basic approach is to maintain a monotonic write sequence
* number that is updated on some application defined granularity.
@@ -67,7 +70,7 @@ __FBSDID("$FreeBSD$");
* a global write clock that is used to mark memory on free.
*
* The write and read sequence numbers can be thought of as a two
- * handed clock with readers always advancing towards writers. SMR
+ * handed clock with readers always advancing towards writers. GUS
* maintains the invariant that all readers can safely access memory
* that was visible at the time they loaded their copy of the sequence
* number. Periodically the read sequence or hand is polled and
@@ -80,9 +83,12 @@ __FBSDID("$FreeBSD$");
* A stored sequence number that falls outside of this range has expired
* and needs no scan to reclaim.
*
- * A notable distinction between this SMR and Epoch, qsbr, rcu, etc. is
+ * A notable distinction between GUS and Epoch, qsbr, rcu, etc. is
* that advancing the sequence number is decoupled from detecting its
- * observation. This results in a more granular assignment of sequence
+ * observation. That is to say, the delta between read and write
+ * sequence numbers is not bound. This can be thought of as a more
+ * generalized form of epoch which requires them at most one step
+ * apart. This results in a more granular assignment of sequence
* numbers even as read latencies prohibit all or some expiration.
* It also allows writers to advance the sequence number and save the
* poll for expiration until a later time when it is likely to
@@ -164,58 +170,143 @@ static uma_zone_t smr_zone;
#define SMR_SEQ_MAX_ADVANCE SMR_SEQ_MAX_DELTA / 2
#endif
+/*
+ * The grace period for lazy (tick based) SMR.
+ *
+ * Hardclock is responsible for advancing ticks on a single CPU while every
+ * CPU receives a regular clock interrupt. The clock interrupts are flushing
+ * the store buffers and any speculative loads that may violate our invariants.
+ * Because these interrupts are not synchronized we must wait one additional
+ * tick in the future to be certain that all processors have had their state
+ * synchronized by an interrupt.
+ *
+ * This assumes that the clock interrupt will only be delayed by other causes
+ * that will flush the store buffer or prevent access to the section protected
+ * data. For example, an idle processor, or an system management interrupt,
+ * or a vm exit.
+ *
+ * We must wait one additional tick if we are around the wrap condition
+ * because the write seq will move forward by two with one interrupt.
+ */
+#define SMR_LAZY_GRACE 2
+#define SMR_LAZY_GRACE_MAX (SMR_LAZY_GRACE + 1)
+
+/*
+ * The maximum sequence number ahead of wr_seq that may still be valid. The
+ * sequence may not be advanced on write for lazy or deferred SMRs. In this
+ * case poll needs to attempt to forward the sequence number if the goal is
+ * within wr_seq + SMR_SEQ_ADVANCE.
+ */
+#define SMR_SEQ_ADVANCE MAX(SMR_SEQ_INCR, SMR_LAZY_GRACE_MAX)
+
static SYSCTL_NODE(_debug, OID_AUTO, smr, CTLFLAG_RW, NULL, "SMR Stats");
static counter_u64_t advance = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance, CTLFLAG_RD, &advance, "");
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance, CTLFLAG_RW, &advance, "");
static counter_u64_t advance_wait = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance_wait, CTLFLAG_RD, &advance_wait, "");
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance_wait, CTLFLAG_RW, &advance_wait, "");
static counter_u64_t poll = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll, CTLFLAG_RD, &poll, "");
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll, CTLFLAG_RW, &poll, "");
static counter_u64_t poll_scan = EARLY_COUNTER;
-SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RD, &poll_scan, "");
-
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RW, &poll_scan, "");
+static counter_u64_t poll_fail = EARLY_COUNTER;
+SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_fail, CTLFLAG_RW, &poll_fail, "");
/*
- * Advance the write sequence and return the new value for use as the
- * wait goal. This guarantees that any changes made by the calling
- * thread prior to this call will be visible to all threads after
- * rd_seq meets or exceeds the return value.
+ * Advance a lazy write sequence number. These move forward at the rate of
+ * ticks. Grace is two ticks in the future. lazy write sequence numbers can
+ * be even but not SMR_SEQ_INVALID so we pause time for a tick when we wrap.
*
- * This function may busy loop if the readers are roughly 1 billion
- * sequence numbers behind the writers.
+ * This returns the _current_ write sequence number. The lazy goal sequence
+ * number is SMR_LAZY_GRACE ticks ahead.
*/
-smr_seq_t
-smr_advance(smr_t smr)
+static smr_seq_t
+smr_lazy_advance(smr_t smr, smr_shared_t s)
{
- smr_shared_t s;
- smr_seq_t goal, s_rd_seq;
+ smr_seq_t s_rd_seq, s_wr_seq, goal;
+ int t;
+
+ CRITICAL_ASSERT(curthread);
/*
- * It is illegal to enter while in an smr section.
+ * Load s_wr_seq prior to ticks to ensure that the thread that
+ * observes the largest value wins.
*/
- SMR_ASSERT_NOT_ENTERED(smr);
+ s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);
/*
- * Modifications not done in a smr section need to be visible
- * before advancing the seq.
+ * We must not allow a zero tick value. We go back in time one tick
+ * and advance the grace period forward one tick around zero.
*/
- atomic_thread_fence_rel();
+ t = ticks;
+ if (t == SMR_SEQ_INVALID)
+ t--;
/*
- * Load the current read seq before incrementing the goal so
- * we are guaranteed it is always < goal.
+ * The most probable condition that the update already took place.
*/
- s = zpcpu_get(smr)->c_shared;
- s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
+ if (__predict_true(t == s_wr_seq))
+ goto out;
/*
- * Increment the shared write sequence by 2. Since it is
- * initialized to 1 this means the only valid values are
- * odd and an observed value of 0 in a particular CPU means
- * it is not currently in a read section.
+ * After long idle periods the read sequence may fall too far
+ * behind write. Prevent poll from ever seeing this condition
+ * by updating the stale rd_seq. This assumes that there can
+ * be no valid section 2bn ticks old. The rd_seq update must
+ * be visible before wr_seq to avoid races with other advance
+ * callers.
*/
- goal = atomic_fetchadd_int(&s->s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR;
+ s_rd_seq = atomic_load_int(&s->s_rd_seq);
+ if (SMR_SEQ_GT(s_rd_seq, t))
+ atomic_cmpset_rel_int(&s->s_rd_seq, s_rd_seq, t);
+
+ /*
+ * Release to synchronize with the wr_seq load above. Ignore
+ * cmpset failures from simultaneous updates.
+ */
+ atomic_cmpset_rel_int(&s->s_wr_seq, s_wr_seq, t);
counter_u64_add(advance, 1);
+ /* If we lost either update race another thread did it. */
+ s_wr_seq = t;
+out:
+ goal = s_wr_seq + SMR_LAZY_GRACE;
+ /* Skip over the SMR_SEQ_INVALID tick. */
+ if (goal < SMR_LAZY_GRACE)
+ goal++;
+ return (goal);
+}
+
+/*
+ * Increment the shared write sequence by 2. Since it is initialized
+ * to 1 this means the only valid values are odd and an observed value
+ * of 0 in a particular CPU means it is not currently in a read section.
+ */
+static smr_seq_t
+smr_shared_advance(smr_shared_t s)
+{
+
+ return (atomic_fetchadd_int(&s->s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR);
+}
+
+/*
+ * Advance the write sequence number for a normal smr section. If the
+ * write sequence is too far behind the read sequence we have to poll
+ * to advance rd_seq and prevent undetectable wraps.
+ */
+static smr_seq_t
+smr_default_advance(smr_t smr, smr_shared_t s)
+{
+ smr_seq_t goal, s_rd_seq;
+
+ CRITICAL_ASSERT(curthread);
+ KASSERT((zpcpu_get(smr)->c_flags & SMR_LAZY) == 0,
+ ("smr_default_advance: called with lazy smr."));
+
+ /*
+ * Load the current read seq before incrementing the goal so
+ * we are guaranteed it is always < goal.
+ */
+ s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
+ goal = smr_shared_advance(s);
/*
* Force a synchronization here if the goal is getting too
@@ -226,30 +317,172 @@ smr_advance(smr_t smr)
counter_u64_add(advance_wait, 1);
smr_wait(smr, goal - SMR_SEQ_MAX_ADVANCE);
}
+ counter_u64_add(advance, 1);
return (goal);
}
+/*
+ * Deferred SMRs conditionally update s_wr_seq based on an
+ * cpu local interval count.
+ */
+static smr_seq_t
+smr_deferred_advance(smr_t smr, smr_shared_t s, smr_t self)
+{
+
+ if (++self->c_deferred < self->c_limit)
+ return (smr_shared_current(s) + SMR_SEQ_INCR);
+ self->c_deferred = 0;
+ return (smr_default_advance(smr, s));
+}
+
+/*
+ * Advance the write sequence and return the value for use as the
+ * wait goal. This guarantees that any changes made by the calling
+ * thread prior to this call will be visible to all threads after
+ * rd_seq meets or exceeds the return value.
+ *
+ * This function may busy loop if the readers are roughly 1 billion
+ * sequence numbers behind the writers.
+ *
+ * Lazy SMRs will not busy loop and the wrap happens every 49.6 days
+ * at 1khz and 119 hours at 10khz. Readers can block for no longer
+ * than half of this for SMR_SEQ_ macros to continue working.
+ */
smr_seq_t
-smr_advance_deferred(smr_t smr, int limit)
+smr_advance(smr_t smr)
{
+ smr_t self;
+ smr_shared_t s;
smr_seq_t goal;
- smr_t csmr;
+ int flags;
+ /*
+ * It is illegal to enter while in an smr section.
+ */
SMR_ASSERT_NOT_ENTERED(smr);
+ /*
+ * Modifications not done in a smr section need to be visible
+ * before advancing the seq.
+ */
+ atomic_thread_fence_rel();
+
critical_enter();
- csmr = zpcpu_get(smr);
- if (++csmr->c_deferred >= limit) {
- goal = SMR_SEQ_INVALID;
- csmr->c_deferred = 0;
- } else
- goal = smr_shared_current(csmr->c_shared) + SMR_SEQ_INCR;
+ /* Try to touch the line once. */
+ self = zpcpu_get(smr);
+ s = self->c_shared;
+ flags = self->c_flags;
+ goal = SMR_SEQ_INVALID;
+ if ((flags & (SMR_LAZY | SMR_DEFERRED)) == 0)
+ goal = smr_default_advance(smr, s);
+ else if ((flags & SMR_LAZY) != 0)
+ goal = smr_lazy_advance(smr, s);
+ else if ((flags & SMR_DEFERRED) != 0)
+ goal = smr_deferred_advance(smr, s, self);
critical_exit();
- if (goal != SMR_SEQ_INVALID)
- return (goal);
- return (smr_advance(smr));
+ return (goal);
+}
+
+/*
+ * Poll to determine the currently observed sequence number on a cpu
+ * and spinwait if the 'wait' argument is true.
+ */
+static smr_seq_t
+smr_poll_cpu(smr_t c, smr_seq_t s_rd_seq, smr_seq_t goal, bool wait)
+{
+ smr_seq_t c_seq;
+
+ c_seq = SMR_SEQ_INVALID;
+ for (;;) {
+ c_seq = atomic_load_int(&c->c_seq);
+ if (c_seq == SMR_SEQ_INVALID)
+ break;
+
+ /*
+ * There is a race described in smr.h:smr_enter that
+ * can lead to a stale seq value but not stale data
+ * access. If we find a value out of range here we
+ * pin it to the current min to prevent it from
+ * advancing until that stale section has expired.
+ *
+ * The race is created when a cpu loads the s_wr_seq
+ * value in a local register and then another thread
+ * advances s_wr_seq and calls smr_poll() which will
+ * oberve no value yet in c_seq and advance s_rd_seq
+ * up to s_wr_seq which is beyond the register
+ * cached value. This is only likely to happen on
+ * hypervisor or with a system management interrupt.
+ */
+ if (SMR_SEQ_LT(c_seq, s_rd_seq))
+ c_seq = s_rd_seq;
+
+ /*
+ * If the sequence number meets the goal we are done
+ * with this cpu.
+ */
+ if (SMR_SEQ_LEQ(goal, c_seq))
+ break;
+
+ if (!wait)
+ break;
+ cpu_spinwait();
+ }
+
+ return (c_seq);
+}
+
+/*
+ * Loop until all cores have observed the goal sequence or have
+ * gone inactive. Returns the oldest sequence currently active;
+ *
+ * This function assumes a snapshot of sequence values has
+ * been obtained and validated by smr_poll().
+ */
+static smr_seq_t
+smr_poll_scan(smr_t smr, smr_shared_t s, smr_seq_t s_rd_seq,
+ smr_seq_t s_wr_seq, smr_seq_t goal, bool wait)
+{
+ smr_seq_t rd_seq, c_seq;
+ int i;
+
+ CRITICAL_ASSERT(curthread);
+ counter_u64_add_protected(poll_scan, 1);
+
+ /*
+ * The read sequence can be no larger than the write sequence at
+ * the start of the poll.
+ */
+ rd_seq = s_wr_seq;
+ CPU_FOREACH(i) {
+ /*
+ * Query the active sequence on this cpu. If we're not
+ * waiting and we don't meet the goal we will still scan
+ * the rest of the cpus to update s_rd_seq before returning
+ * failure.
+ */
+ c_seq = smr_poll_cpu(zpcpu_get_cpu(smr, i), s_rd_seq, goal,
+ wait);
+
+ /*
+ * Limit the minimum observed rd_seq whether we met the goal
+ * or not.
+ */
+ if (c_seq != SMR_SEQ_INVALID)
+ rd_seq = SMR_SEQ_MIN(rd_seq, c_seq);
+ }
+
+ /*
+ * Advance the rd_seq as long as we observed a more recent value.
+ */
+ s_rd_seq = atomic_load_int(&s->s_rd_seq);
+ if (SMR_SEQ_GEQ(rd_seq, s_rd_seq)) {
+ atomic_cmpset_int(&s->s_rd_seq, s_rd_seq, rd_seq);
+ s_rd_seq = rd_seq;
+ }
+
+ return (s_rd_seq);
}
/*
@@ -268,9 +501,10 @@ bool
smr_poll(smr_t smr, smr_seq_t goal, bool wait)
{
smr_shared_t s;
- smr_t c;
- smr_seq_t s_wr_seq, s_rd_seq, rd_seq, c_seq;
- int i;
+ smr_t self;
+ smr_seq_t s_wr_seq, s_rd_seq;
+ smr_delta_t delta;
+ int flags;
bool success;
/*
@@ -278,6 +512,8 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
*/
KASSERT(!wait || !SMR_ENTERED(smr),
("smr_poll: Blocking not allowed in a SMR section."));
+ KASSERT(!wait || (zpcpu_get(smr)->c_flags & SMR_LAZY) == 0,
+ ("smr_poll: Blocking not allowed on lazy smrs."));
/*
* Use a critical section so that we can avoid ABA races
@@ -285,116 +521,79 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait)
*/
success = true;
critical_enter();
- s = zpcpu_get(smr)->c_shared;
+ /* Attempt to load from self only once. */
+ self = zpcpu_get(smr);
+ s = self->c_shared;
+ flags = self->c_flags;
counter_u64_add_protected(poll, 1);
/*
+ * Conditionally advance the lazy write clock on any writer
+ * activity. This may reset s_rd_seq.
+ */
+ if ((flags & SMR_LAZY) != 0)
+ smr_lazy_advance(smr, s);
+
+ /*
* Acquire barrier loads s_wr_seq after s_rd_seq so that we can not
* observe an updated read sequence that is larger than write.
*/
s_rd_seq = atomic_load_acq_int(&s->s_rd_seq);
/*
- * wr_seq must be loaded prior to any c_seq value so that a stale
- * c_seq can only reference time after this wr_seq.
+ * If we have already observed the sequence number we can immediately
+ * return success. Most polls should meet this criterion.
*/
- s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);
+ if (SMR_SEQ_LEQ(goal, s_rd_seq))
+ goto out;
/*
- * This may have come from a deferred advance. Consider one
- * increment past the current wr_seq valid and make sure we
- * have advanced far enough to succeed. We simply add to avoid
- * an additional fence.
+ * wr_seq must be loaded prior to any c_seq value so that a
+ * stale c_seq can only reference time after this wr_seq.
*/
- if (goal == s_wr_seq + SMR_SEQ_INCR) {
- atomic_add_int(&s->s_wr_seq, SMR_SEQ_INCR);
- s_wr_seq = goal;
- }
+ s_wr_seq = atomic_load_acq_int(&s->s_wr_seq);
/*
- * Detect whether the goal is valid and has already been observed.
- *
- * The goal must be in the range of s_wr_seq >= goal >= s_rd_seq for
- * it to be valid. If it is not then the caller held on to it and
- * the integer wrapped. If we wrapped back within range the caller
- * will harmlessly scan.
- *
- * A valid goal must be greater than s_rd_seq or we have not verified
- * that it has been observed and must fall through to polling.
+ * This is the distance from s_wr_seq to goal. Positive values
+ * are in the future.
*/
- if (SMR_SEQ_GEQ(s_rd_seq, goal) || SMR_SEQ_LT(s_wr_seq, goal))
- goto out;
+ delta = SMR_SEQ_DELTA(goal, s_wr_seq);
/*
- * Loop until all cores have observed the goal sequence or have
- * gone inactive. Keep track of the oldest sequence currently
- * active as rd_seq.
+ * Detect a stale wr_seq.
+ *
+ * This goal may have come from a deferred advance or a lazy
+ * smr. If we are not blocking we can not succeed but the
+ * sequence number is valid.
*/
- counter_u64_add_protected(poll_scan, 1);
- rd_seq = s_wr_seq;
- CPU_FOREACH(i) {
- c = zpcpu_get_cpu(smr, i);
- c_seq = SMR_SEQ_INVALID;
- for (;;) {
- c_seq = atomic_load_int(&c->c_seq);
- if (c_seq == SMR_SEQ_INVALID)
- break;
-
- /*
- * There is a race described in smr.h:smr_enter that
- * can lead to a stale seq value but not stale data
- * access. If we find a value out of range here we
- * pin it to the current min to prevent it from
- * advancing until that stale section has expired.
- *
- * The race is created when a cpu loads the s_wr_seq
- * value in a local register and then another thread
- * advances s_wr_seq and calls smr_poll() which will
- * oberve no value yet in c_seq and advance s_rd_seq
- * up to s_wr_seq which is beyond the register
- * cached value. This is only likely to happen on
- * hypervisor or with a system management interrupt.
- */
- if (SMR_SEQ_LT(c_seq, s_rd_seq))
- c_seq = s_rd_seq;
-
- /*
- * If the sequence number meets the goal we are
- * done with this cpu.
- */
- if (SMR_SEQ_GEQ(c_seq, goal))
- break;
-
- /*
- * If we're not waiting we will still scan the rest
- * of the cpus and update s_rd_seq before returning
- * an error.
- */
- if (!wait) {
- success = false;
- break;
- }
- cpu_spinwait();
+ if (delta > 0 && delta <= SMR_SEQ_MAX_ADVANCE &&
+ (flags & (SMR_LAZY | SMR_DEFERRED)) != 0) {
+ if (!wait) {
+ success = false;
+ goto out;
}
-
- /*
- * Limit the minimum observed rd_seq whether we met the goal
- * or not.
- */
- if (c_seq != SMR_SEQ_INVALID && SMR_SEQ_GT(rd_seq, c_seq))
- rd_seq = c_seq;
+ /* LAZY is always !wait. */
+ s_wr_seq = smr_shared_advance(s);
+ delta = 0;
}
/*
- * Advance the rd_seq as long as we observed the most recent one.
+ * Detect an invalid goal.
+ *
+ * The goal must be in the range of s_wr_seq >= goal >= s_rd_seq for
+ * it to be valid. If it is not then the caller held on to it and
+ * the integer wrapped. If we wrapped back within range the caller
+ * will harmlessly scan.
*/
- s_rd_seq = atomic_load_int(&s->s_rd_seq);
- do {
- if (SMR_SEQ_LEQ(rd_seq, s_rd_seq))
- goto out;
- } while (atomic_fcmpset_int(&s->s_rd_seq, &s_rd_seq, rd_seq) == 0);
+ if (delta > 0)
+ goto out;
+ /* Determine the lowest visible sequence number. */
+ s_rd_seq = smr_poll_scan(smr, s, s_rd_seq, s_wr_seq, goal, wait);
+ success = SMR_SEQ_LEQ(goal, s_rd_seq);
out:
+ if (!success)
+ counter_u64_add_protected(poll_fail, 1);
critical_exit();
/*
@@ -407,7 +606,7 @@ out:
}
smr_t
-smr_create(const char *name)
+smr_create(const char *name, int limit, int flags)
{
smr_t smr, c;
smr_shared_t s;
@@ -417,13 +616,19 @@ smr_create(const char *name)
smr = uma_zalloc_pcpu(smr_zone, M_WAITOK);
s->s_name = name;
- s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+ if ((flags & SMR_LAZY) == 0)
+ s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT;
+ else
+ s->s_rd_seq = s->s_wr_seq = ticks;
/* Initialize all CPUS, not just those running. */
for (i = 0; i <= mp_maxid; i++) {
c = zpcpu_get_cpu(smr, i);
c->c_seq = SMR_SEQ_INVALID;
c->c_shared = s;
+ c->c_deferred = 0;
+ c->c_limit = limit;
+ c->c_flags = flags;
}
atomic_thread_fence_seq_cst();
@@ -460,5 +665,6 @@ smr_init_counters(void *unused)
advance_wait = counter_u64_alloc(M_WAITOK);
poll = counter_u64_alloc(M_WAITOK);
poll_scan = counter_u64_alloc(M_WAITOK);
+ poll_fail = counter_u64_alloc(M_WAITOK);
}
SYSINIT(smr_counters, SI_SUB_CPU, SI_ORDER_ANY, smr_init_counters, NULL);
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 5ef3f1c6935f..8dd7b48e59ed 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -326,21 +326,23 @@ ast(struct trapframe *framep)
if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 ||
!SIGISEMPTY(p->p_siglist)) {
sigfastblock_fetch(td);
- PROC_LOCK(p);
- mtx_lock(&p->p_sigacts->ps_mtx);
if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0 &&
td->td_sigblock_val != 0) {
sigfastblock_setpend(td);
+ PROC_LOCK(p);
reschedule_signals(p, fastblock_mask,
- SIGPROCMASK_PS_LOCKED | SIGPROCMASK_FASTBLK);
+ SIGPROCMASK_FASTBLK);
+ PROC_UNLOCK(p);
} else {
+ PROC_LOCK(p);
+ mtx_lock(&p->p_sigacts->ps_mtx);
while ((sig = cursig(td)) != 0) {
KASSERT(sig >= 0, ("sig %d", sig));
postsig(sig);
}
+ mtx_unlock(&p->p_sigacts->ps_mtx);
+ PROC_UNLOCK(p);
}
- mtx_unlock(&p->p_sigacts->ps_mtx);
- PROC_UNLOCK(p);
}
/*
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 1659598d5e49..58a97a1337f0 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -403,14 +403,6 @@ namei(struct nameidata *ndp)
ndp->ni_rootdir = fdp->fd_rdir;
ndp->ni_topdir = fdp->fd_jdir;
- /*
- * If we are auditing the kernel pathname, save the user pathname.
- */
- if (cnp->cn_flags & AUDITVNODE1)
- AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf);
- if (cnp->cn_flags & AUDITVNODE2)
- AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf);
-
startdir_used = 0;
dp = NULL;
cnp->cn_nameptr = cnp->cn_pnbuf;
@@ -505,6 +497,13 @@ namei(struct nameidata *ndp)
ndp->ni_lcf |= NI_LCF_LATCH;
}
FILEDESC_SUNLOCK(fdp);
+ /*
+ * If we are auditing the kernel pathname, save the user pathname.
+ */
+ if (cnp->cn_flags & AUDITVNODE1)
+ AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
+ if (cnp->cn_flags & AUDITVNODE2)
+ AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
if (ndp->ni_startdir != NULL && !startdir_used)
vrele(ndp->ni_startdir);
if (error != 0) {
diff --git a/sys/modules/linuxkpi/Makefile b/sys/modules/linuxkpi/Makefile
index 7eac41ab9273..dea5512d2b9b 100644
--- a/sys/modules/linuxkpi/Makefile
+++ b/sys/modules/linuxkpi/Makefile
@@ -15,6 +15,7 @@ SRCS= linux_compat.c \
linux_rcu.c \
linux_seq_file.c \
linux_schedule.c \
+ linux_shmemfs.c \
linux_slab.c \
linux_tasklet.c \
linux_usb.c \
diff --git a/sys/net80211/ieee80211_alq.c b/sys/net80211/ieee80211_alq.c
index 77824515a1fc..d7e6a78319db 100644
--- a/sys/net80211/ieee80211_alq.c
+++ b/sys/net80211/ieee80211_alq.c
@@ -111,8 +111,10 @@ sysctl_ieee80211_alq_log(SYSCTL_HANDLER_ARGS)
return (ieee80211_alq_setlogging(enable));
}
-SYSCTL_PROC(_net_wlan, OID_AUTO, alq, CTLTYPE_INT|CTLFLAG_RW,
- 0, 0, sysctl_ieee80211_alq_log, "I", "Enable net80211 alq logging");
+SYSCTL_PROC(_net_wlan, OID_AUTO, alq,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, 0,
+ sysctl_ieee80211_alq_log, "I",
+ "Enable net80211 alq logging");
SYSCTL_INT(_net_wlan, OID_AUTO, alq_size, CTLFLAG_RW,
&ieee80211_alq_qsize, 0, "In-memory log size (bytes)");
SYSCTL_INT(_net_wlan, OID_AUTO, alq_lost, CTLFLAG_RW,
diff --git a/sys/net80211/ieee80211_amrr.c b/sys/net80211/ieee80211_amrr.c
index 384a5956fe4e..f89437d3e857 100644
--- a/sys/net80211/ieee80211_amrr.c
+++ b/sys/net80211/ieee80211_amrr.c
@@ -465,8 +465,8 @@ amrr_sysctlattach(struct ieee80211vap *vap,
return;
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap,
- 0, amrr_sysctl_interval, "I", "amrr operation interval (ms)");
+ "amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ vap, 0, amrr_sysctl_interval, "I", "amrr operation interval (ms)");
/* XXX bounds check values */
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"amrr_max_sucess_threshold", CTLFLAG_RW,
diff --git a/sys/net80211/ieee80211_freebsd.c b/sys/net80211/ieee80211_freebsd.c
index 251b6b8d3c96..35c999587f58 100644
--- a/sys/net80211/ieee80211_freebsd.c
+++ b/sys/net80211/ieee80211_freebsd.c
@@ -60,7 +60,8 @@ __FBSDID("$FreeBSD$");
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_input.h>
-SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD, 0, "IEEE 80211 parameters");
+SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "IEEE 80211 parameters");
#ifdef IEEE80211_DEBUG
static int ieee80211_debug = 0;
@@ -227,10 +228,10 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap)
sysctl_ctx_init(ctx);
snprintf(num, sizeof(num), "%u", ifp->if_dunit);
oid = SYSCTL_ADD_NODE(ctx, &SYSCTL_NODE_CHILDREN(_net, wlan),
- OID_AUTO, num, CTLFLAG_RD, NULL, "");
+ OID_AUTO, num, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "%parent", CTLTYPE_STRING | CTLFLAG_RD, vap->iv_ic, 0,
- ieee80211_sysctl_parent, "A", "parent device");
+ "%parent", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
+ vap->iv_ic, 0, ieee80211_sysctl_parent, "A", "parent device");
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
"driver_caps", CTLFLAG_RW, &vap->iv_caps, 0,
"driver capabilities");
@@ -245,21 +246,21 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap)
"consecutive beacon misses before scanning");
/* XXX inherit from tunables */
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "inact_run", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_run, 0,
- ieee80211_sysctl_inact, "I",
- "station inactivity timeout (sec)");
+ "inact_run", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &vap->iv_inact_run, 0, ieee80211_sysctl_inact, "I",
+ "station inactivity timeout (sec)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "inact_probe", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_probe, 0,
- ieee80211_sysctl_inact, "I",
- "station inactivity probe timeout (sec)");
+ "inact_probe", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &vap->iv_inact_probe, 0, ieee80211_sysctl_inact, "I",
+ "station inactivity probe timeout (sec)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "inact_auth", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_auth, 0,
- ieee80211_sysctl_inact, "I",
- "station authentication timeout (sec)");
+ "inact_auth", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &vap->iv_inact_auth, 0, ieee80211_sysctl_inact, "I",
+ "station authentication timeout (sec)");
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "inact_init", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_init, 0,
- ieee80211_sysctl_inact, "I",
- "station initial state timeout (sec)");
+ "inact_init", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &vap->iv_inact_init, 0, ieee80211_sysctl_inact, "I",
+ "station initial state timeout (sec)");
if (vap->iv_htcaps & IEEE80211_HTC_HT) {
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
"ampdu_mintraffic_bk", CTLFLAG_RW,
@@ -280,14 +281,14 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap)
}
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "force_restart", CTLTYPE_INT | CTLFLAG_RW, vap, 0,
- ieee80211_sysctl_vap_restart, "I",
- "force a VAP restart");
+ "force_restart", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ vap, 0, ieee80211_sysctl_vap_restart, "I", "force a VAP restart");
if (vap->iv_caps & IEEE80211_C_DFS) {
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
- "radar", CTLTYPE_INT | CTLFLAG_RW, vap->iv_ic, 0,
- ieee80211_sysctl_radar, "I", "simulate radar event");
+ "radar", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ vap->iv_ic, 0, ieee80211_sysctl_radar, "I",
+ "simulate radar event");
}
vap->iv_sysctl = ctx;
vap->iv_oid = oid;
diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c
index d0c6e7a4f415..8055adc5635c 100644
--- a/sys/net80211/ieee80211_ht.c
+++ b/sys/net80211/ieee80211_ht.c
@@ -139,22 +139,25 @@ const struct ieee80211_mcs_rates ieee80211_htrates[IEEE80211_HTRATE_MAXSIZE] = {
};
static int ieee80211_ampdu_age = -1; /* threshold for ampdu reorder q (ms) */
-SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, CTLTYPE_INT | CTLFLAG_RW,
- &ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I",
- "AMPDU max reorder age (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I",
+ "AMPDU max reorder age (ms)");
static int ieee80211_recv_bar_ena = 1;
SYSCTL_INT(_net_wlan, OID_AUTO, recv_bar, CTLFLAG_RW, &ieee80211_recv_bar_ena,
0, "BAR frame processing (ena/dis)");
static int ieee80211_addba_timeout = -1;/* timeout for ADDBA response */
-SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout, CTLTYPE_INT | CTLFLAG_RW,
- &ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I",
- "ADDBA request timeout (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I",
+ "ADDBA request timeout (ms)");
static int ieee80211_addba_backoff = -1;/* backoff after max ADDBA requests */
-SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff, CTLTYPE_INT | CTLFLAG_RW,
- &ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I",
- "ADDBA request backoff (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I",
+ "ADDBA request backoff (ms)");
static int ieee80211_addba_maxtries = 3;/* max ADDBA requests before backoff */
SYSCTL_INT(_net_wlan, OID_AUTO, addba_maxtries, CTLFLAG_RW,
&ieee80211_addba_maxtries, 0, "max ADDBA requests sent before backoff");
diff --git a/sys/net80211/ieee80211_hwmp.c b/sys/net80211/ieee80211_hwmp.c
index 38d85d622028..9f1a9fef6818 100644
--- a/sys/net80211/ieee80211_hwmp.c
+++ b/sys/net80211/ieee80211_hwmp.c
@@ -154,39 +154,46 @@ struct ieee80211_hwmp_state {
uint8_t hs_maxhops; /* max hop count */
};
-static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD, 0,
+static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"IEEE 802.11s HWMP parameters");
static int ieee80211_hwmp_targetonly = 0;
SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, targetonly, CTLFLAG_RW,
&ieee80211_hwmp_targetonly, 0, "Set TO bit on generated PREQs");
static int ieee80211_hwmp_pathtimeout = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, pathlifetime, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, pathlifetime,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&ieee80211_hwmp_pathtimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
"path entry lifetime (ms)");
static int ieee80211_hwmp_maxpreq_retries = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, maxpreq_retries, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, maxpreq_retries,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&ieee80211_hwmp_maxpreq_retries, 0, ieee80211_sysctl_msecs_ticks, "I",
"maximum number of preq retries");
static int ieee80211_hwmp_net_diameter_traversaltime = -1;
SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, net_diameter_traversal_time,
- CTLTYPE_INT | CTLFLAG_RW, &ieee80211_hwmp_net_diameter_traversaltime, 0,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ &ieee80211_hwmp_net_diameter_traversaltime, 0,
ieee80211_sysctl_msecs_ticks, "I",
"estimate travelse time across the MBSS (ms)");
static int ieee80211_hwmp_roottimeout = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, roottimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, roottimeout,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&ieee80211_hwmp_roottimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
"root PREQ timeout (ms)");
static int ieee80211_hwmp_rootint = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootint, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootint,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&ieee80211_hwmp_rootint, 0, ieee80211_sysctl_msecs_ticks, "I",
"root interval (ms)");
static int ieee80211_hwmp_rannint = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rannint, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rannint,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
&ieee80211_hwmp_rannint, 0, ieee80211_sysctl_msecs_ticks, "I",
"root announcement interval (ms)");
static struct timeval ieee80211_hwmp_rootconfint = { 0, 0 };
static int ieee80211_hwmp_rootconfint_internal = -1;
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootconfint, CTLTYPE_INT | CTLFLAG_RD,
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootconfint,
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
&ieee80211_hwmp_rootconfint_internal, 0, ieee80211_sysctl_msecs_ticks, "I",
"root confirmation interval (ms) (read-only)");
@@ -205,9 +212,10 @@ static struct ieee80211_mesh_proto_path mesh_proto_hwmp = {
.mpp_newstate = hwmp_newstate,
.mpp_privlen = sizeof(struct ieee80211_hwmp_route),
};
-SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact, CTLTYPE_INT | CTLFLAG_RW,
- &mesh_proto_hwmp.mpp_inact, 0, ieee80211_sysctl_msecs_ticks, "I",
- "mesh route inactivity timeout (ms)");
+SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &mesh_proto_hwmp.mpp_inact, 0, ieee80211_sysctl_msecs_ticks, "I",
+ "mesh route inactivity timeout (ms)");
static void
diff --git a/sys/net80211/ieee80211_mesh.c b/sys/net80211/ieee80211_mesh.c
index 9816473b6ece..4ff3c9ba66d8 100644
--- a/sys/net80211/ieee80211_mesh.c
+++ b/sys/net80211/ieee80211_mesh.c
@@ -106,27 +106,32 @@ uint32_t mesh_airtime_calc(struct ieee80211_node *);
/*
* Timeout values come from the specification and are in milliseconds.
*/
-static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD, 0,
+static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"IEEE 802.11s parameters");
static int ieee80211_mesh_gateint = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, gateint, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, gateint,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&ieee80211_mesh_gateint, 0, ieee80211_sysctl_msecs_ticks, "I",
"mesh gate interval (ms)");
static int ieee80211_mesh_retrytimeout = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&ieee80211_mesh_retrytimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
"Retry timeout (msec)");
static int ieee80211_mesh_holdingtimeout = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, holdingtimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, holdingtimeout,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&ieee80211_mesh_holdingtimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
"Holding state timeout (msec)");
static int ieee80211_mesh_confirmtimeout = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, confirmtimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, confirmtimeout,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&ieee80211_mesh_confirmtimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
"Confirm state timeout (msec)");
static int ieee80211_mesh_backofftimeout = -1;
-SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, backofftimeout, CTLTYPE_INT | CTLFLAG_RW,
+SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, backofftimeout,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
&ieee80211_mesh_backofftimeout, 0, ieee80211_sysctl_msecs_ticks, "I",
"Backoff timeout (msec). This is to throutles peering forever when "
"not receiving answer or is rejected by a neighbor");
diff --git a/sys/net80211/ieee80211_rssadapt.c b/sys/net80211/ieee80211_rssadapt.c
index 7d9158414a4c..4ca0b29fa569 100644
--- a/sys/net80211/ieee80211_rssadapt.c
+++ b/sys/net80211/ieee80211_rssadapt.c
@@ -381,6 +381,7 @@ rssadapt_sysctlattach(struct ieee80211vap *vap,
{
SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
- "rssadapt_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap,
- 0, rssadapt_sysctl_interval, "I", "rssadapt operation interval (ms)");
+ "rssadapt_rate_interval",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, vap, 0,
+ rssadapt_sysctl_interval, "I", "rssadapt operation interval (ms)");
}
diff --git a/sys/net80211/ieee80211_superg.c b/sys/net80211/ieee80211_superg.c
index c9c6d96e6b87..dacd466c3de7 100644
--- a/sys/net80211/ieee80211_superg.c
+++ b/sys/net80211/ieee80211_superg.c
@@ -92,9 +92,10 @@ static int ieee80211_ffppsmin = 2; /* pps threshold for ff aggregation */
SYSCTL_INT(_net_wlan, OID_AUTO, ffppsmin, CTLFLAG_RW,
&ieee80211_ffppsmin, 0, "min packet rate before fast-frame staging");
static int ieee80211_ffagemax = -1; /* max time frames held on stage q */
-SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax, CTLTYPE_INT | CTLFLAG_RW,
- &ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I",
- "max hold time for fast-frame staging (ms)");
+SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I",
+ "max hold time for fast-frame staging (ms)");
static void
ff_age_all(void *arg, int npending)
diff --git a/sys/netgraph/ng_socket.c b/sys/netgraph/ng_socket.c
index 6339ce0e2ccf..e772ace9c04a 100644
--- a/sys/netgraph/ng_socket.c
+++ b/sys/netgraph/ng_socket.c
@@ -219,7 +219,6 @@ static int
ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *td)
{
- struct epoch_tracker et;
struct ngpcb *const pcbp = sotongpcb(so);
struct ngsock *const priv = NG_NODE_PRIVATE(pcbp->sockdata->node);
struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
@@ -338,9 +337,7 @@ ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
item->apply = &apply;
priv->error = -1;
- NET_EPOCH_ENTER(et);
error = ng_snd_item(item, 0);
- NET_EPOCH_EXIT(et);
mtx_lock(&priv->mtx);
if (priv->error == -1)
@@ -413,6 +410,7 @@ ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr;
int len, error;
hook_p hook = NULL;
+ item_p item;
char hookname[NG_HOOKSIZ];
if ((pcbp == NULL) || (control != NULL)) {
@@ -465,8 +463,10 @@ ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
}
/* Send data. */
+ item = ng_package_data(m, NG_WAITOK);
+ m = NULL;
NET_EPOCH_ENTER(et);
- NG_SEND_DATA_FLAGS(error, hook, m, NG_WAITOK);
+ NG_FWD_ITEM_HOOK(error, item, hook);
NET_EPOCH_EXIT(et);
release:
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
index 1b257fb2a372..9d3c51a066b8 100644
--- a/sys/netinet/ip_carp.c
+++ b/sys/netinet/ip_carp.c
@@ -218,19 +218,22 @@ static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS);
static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS);
static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS);
-SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
+SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "CARP");
SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow,
- CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_allow_sysctl, "I",
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, carp_allow_sysctl, "I",
"Accept incoming CARP packets");
SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp,
- CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_dscp_sysctl, "I",
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, carp_dscp_sysctl, "I",
"DSCP value for carp packets");
SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode");
SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(carp_log), 0, "CARP log level");
SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion,
- CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
+ CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
0, 0, carp_demote_adj_sysctl, "I",
"Adjust demotion factor (skew of advskew)");
SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor,
diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c
index 5ab0bbb925b6..e6c85dbb884a 100644
--- a/sys/netpfil/pf/if_pfsync.c
+++ b/sys/netpfil/pf/if_pfsync.c
@@ -273,7 +273,8 @@ static void pfsync_uninit(void);
static unsigned long pfsync_buckets;
-SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
+SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "PFSYNC");
SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(pfsyncstats), pfsyncstats,
"PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 19eec6690c08..975f9a2f1951 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -369,7 +369,8 @@ VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
-SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)");
+SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "pf(4)");
u_long pf_hashmask;
u_long pf_srchashmask;
diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c
index 82e5009ca888..5abb19102a2b 100644
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@@ -705,11 +705,10 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx,
req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED;
while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) {
+ if (nosleep)
+ return (NULL);
PMAP_UNLOCK(pmap);
rw_wunlock(&pvh_global_lock);
- if (nosleep) {
- return (NULL);
- }
vm_wait(NULL);
rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
@@ -905,8 +904,6 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
pidx = (PTBL_PAGES * pdir_idx) + i;
while ((m = vm_page_alloc(NULL, pidx,
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) {
- PMAP_UNLOCK(pmap);
- rw_wunlock(&pvh_global_lock);
if (nosleep) {
ptbl_free_pmap_ptbl(pmap, ptbl);
for (j = 0; j < i; j++)
@@ -914,6 +911,8 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep)
vm_wire_sub(i);
return (NULL);
}
+ PMAP_UNLOCK(pmap);
+ rw_wunlock(&pvh_global_lock);
vm_wait(NULL);
rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
@@ -2481,8 +2480,8 @@ mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start,
PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0);
m = TAILQ_NEXT(m, listq);
}
- rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ rw_wunlock(&pvh_global_lock);
}
static void
@@ -2495,8 +2494,8 @@ mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m,
mmu_booke_enter_locked(mmu, pmap, va, m,
prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP |
PMAP_ENTER_QUICK_LOCKED, 0);
- rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ rw_wunlock(&pvh_global_lock);
}
/*
diff --git a/sys/security/audit/audit.h b/sys/security/audit/audit.h
index a7600d5c1f90..7e13806a0171 100644
--- a/sys/security/audit/audit.h
+++ b/sys/security/audit/audit.h
@@ -120,6 +120,10 @@ void audit_arg_upath1(struct thread *td, int dirfd, char *upath);
void audit_arg_upath1_canon(char *upath);
void audit_arg_upath2(struct thread *td, int dirfd, char *upath);
void audit_arg_upath2_canon(char *upath);
+void audit_arg_upath1_vp(struct thread *td, struct vnode *rdir,
+ struct vnode *cdir, char *upath);
+void audit_arg_upath2_vp(struct thread *td, struct vnode *rdir,
+ struct vnode *cdir, char *upath);
void audit_arg_vnode1(struct vnode *vp);
void audit_arg_vnode2(struct vnode *vp);
void audit_arg_text(const char *text);
@@ -362,6 +366,16 @@ void audit_thread_free(struct thread *td);
audit_arg_upath2_canon((upath)); \
} while (0)
+#define AUDIT_ARG_UPATH1_VP(td, rdir, cdir, upath) do { \
+ if (AUDITING_TD(curthread)) \
+ audit_arg_upath1_vp((td), (rdir), (cdir), (upath)); \
+} while (0)
+
+#define AUDIT_ARG_UPATH2_VP(td, rdir, cdir, upath) do { \
+ if (AUDITING_TD(curthread)) \
+ audit_arg_upath2_vp((td), (rdir), (cdir), (upath)); \
+} while (0)
+
#define AUDIT_ARG_VALUE(value) do { \
if (AUDITING_TD(curthread)) \
audit_arg_value((value)); \
@@ -448,6 +462,8 @@ void audit_thread_free(struct thread *td);
#define AUDIT_ARG_UPATH1_CANON(upath)
#define AUDIT_ARG_UPATH2(td, dirfd, upath)
#define AUDIT_ARG_UPATH2_CANON(upath)
+#define AUDIT_ARG_UPATH1_VP(td, rdir, cdir, upath)
+#define AUDIT_ARG_UPATH2_VP(td, rdir, cdir, upath)
#define AUDIT_ARG_VALUE(value)
#define AUDIT_ARG_VNODE1(vp)
#define AUDIT_ARG_VNODE2(vp)
diff --git a/sys/security/audit/audit_arg.c b/sys/security/audit/audit_arg.c
index f50d77281095..fc5318750e3e 100644
--- a/sys/security/audit/audit_arg.c
+++ b/sys/security/audit/audit_arg.c
@@ -767,6 +767,44 @@ audit_arg_upath2(struct thread *td, int dirfd, char *upath)
ARG_SET_VALID(ar, ARG_UPATH2);
}
+static void
+audit_arg_upath_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+ char *upath, char **pathp)
+{
+
+ if (*pathp == NULL)
+ *pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK);
+ audit_canon_path_vp(td, rdir, cdir, upath, *pathp);
+}
+
+void
+audit_arg_upath1_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+ char *upath)
+{
+ struct kaudit_record *ar;
+
+ ar = currecord();
+ if (ar == NULL)
+ return;
+
+ audit_arg_upath_vp(td, rdir, cdir, upath, &ar->k_ar.ar_arg_upath1);
+ ARG_SET_VALID(ar, ARG_UPATH1);
+}
+
+void
+audit_arg_upath2_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+ char *upath)
+{
+ struct kaudit_record *ar;
+
+ ar = currecord();
+ if (ar == NULL)
+ return;
+
+ audit_arg_upath_vp(td, rdir, cdir, upath, &ar->k_ar.ar_arg_upath2);
+ ARG_SET_VALID(ar, ARG_UPATH2);
+}
+
/*
* Variants on path auditing that do not canonicalise the path passed in;
* these are for use with filesystem-like subsystems that employ string names,
diff --git a/sys/security/audit/audit_bsm_klib.c b/sys/security/audit/audit_bsm_klib.c
index 3ce57d0af5c5..b10722a4e0e4 100644
--- a/sys/security/audit/audit_bsm_klib.c
+++ b/sys/security/audit/audit_bsm_klib.c
@@ -421,57 +421,23 @@ auditon_command_event(int cmd)
* leave the filename starting with '/' in the audit log in this case.
*/
void
-audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
+audit_canon_path_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir,
+ char *path, char *cpath)
{
- struct vnode *cvnp, *rvnp;
+ struct vnode *vp;
char *rbuf, *fbuf, *copy;
- struct filedesc *fdp;
struct sbuf sbf;
- cap_rights_t rights;
- int error, needslash;
+ int error;
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d",
__func__, __FILE__, __LINE__);
copy = path;
- rvnp = cvnp = NULL;
- fdp = td->td_proc->p_fd;
- FILEDESC_SLOCK(fdp);
- /*
- * Make sure that we handle the chroot(2) case. If there is an
- * alternate root directory, prepend it to the audited pathname.
- */
- if (fdp->fd_rdir != NULL && fdp->fd_rdir != rootvnode) {
- rvnp = fdp->fd_rdir;
- vhold(rvnp);
- }
- /*
- * If the supplied path is relative, make sure we capture the current
- * working directory so we can prepend it to the supplied relative
- * path.
- */
- if (*path != '/') {
- if (dirfd == AT_FDCWD) {
- cvnp = fdp->fd_cdir;
- vhold(cvnp);
- } else {
- /* XXX: fgetvp() that vhold()s vnode instead of vref()ing it would be better */
- error = fgetvp(td, dirfd, cap_rights_init(&rights), &cvnp);
- if (error) {
- FILEDESC_SUNLOCK(fdp);
- cpath[0] = '\0';
- if (rvnp != NULL)
- vdrop(rvnp);
- return;
- }
- vhold(cvnp);
- vrele(cvnp);
- }
- needslash = (fdp->fd_rdir != cvnp);
- } else {
- needslash = 1;
- }
- FILEDESC_SUNLOCK(fdp);
+ if (*path == '/')
+ vp = rdir;
+ else
+ vp = cdir;
+ MPASS(vp != NULL);
/*
* NB: We require that the supplied array be at least MAXPATHLEN bytes
* long. If this is not the case, then we can run into serious trouble.
@@ -479,6 +445,8 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
(void) sbuf_new(&sbf, cpath, MAXPATHLEN, SBUF_FIXEDLEN);
/*
* Strip leading forward slashes.
+ *
+ * Note this does nothing to fully canonicalize the path.
*/
while (*copy == '/')
copy++;
@@ -490,35 +458,25 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
* on Darwin. As a result, this may need some additional attention
* in the future.
*/
- if (rvnp != NULL) {
- error = vn_fullpath_global(td, rvnp, &rbuf, &fbuf);
- vdrop(rvnp);
- if (error) {
- cpath[0] = '\0';
- if (cvnp != NULL)
- vdrop(cvnp);
- return;
- }
- (void) sbuf_cat(&sbf, rbuf);
- free(fbuf, M_TEMP);
- }
- if (cvnp != NULL) {
- error = vn_fullpath(td, cvnp, &rbuf, &fbuf);
- vdrop(cvnp);
- if (error) {
- cpath[0] = '\0';
- return;
- }
- (void) sbuf_cat(&sbf, rbuf);
- free(fbuf, M_TEMP);
+ error = vn_fullpath_global(td, vp, &rbuf, &fbuf);
+ if (error) {
+ cpath[0] = '\0';
+ return;
}
- if (needslash)
+ (void) sbuf_cat(&sbf, rbuf);
+ /*
+ * We are going to concatenate the resolved path with the passed path
+ * with all slashes removed and we want them glued with a single slash.
+ * However, if the directory is /, the slash is already there.
+ */
+ if (rbuf[1] != '\0')
(void) sbuf_putc(&sbf, '/');
+ free(fbuf, M_TEMP);
/*
* Now that we have processed any alternate root and relative path
* names, add the supplied pathname.
*/
- (void) sbuf_cat(&sbf, copy);
+ (void) sbuf_cat(&sbf, copy);
/*
* One or more of the previous sbuf operations could have resulted in
* the supplied buffer being overflowed. Check to see if this is the
@@ -530,3 +488,43 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
}
sbuf_finish(&sbf);
}
+
+void
+audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath)
+{
+ struct vnode *cdir, *rdir;
+ struct filedesc *fdp;
+ cap_rights_t rights;
+ int error;
+
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d",
+ __func__, __FILE__, __LINE__);
+
+ rdir = cdir = NULL;
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
+ if (*path == '/') {
+ rdir = fdp->fd_rdir;
+ vrefact(rdir);
+ } else {
+ if (dirfd == AT_FDCWD) {
+ cdir = fdp->fd_cdir;
+ vrefact(cdir);
+ } else {
+ error = fgetvp(td, dirfd, cap_rights_init(&rights), &cdir);
+ if (error != 0) {
+ FILEDESC_SUNLOCK(fdp);
+ cpath[0] = '\0';
+ return;
+ }
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+
+ audit_canon_path_vp(td, rdir, cdir, path, cpath);
+
+ if (rdir != NULL)
+ vrele(rdir);
+ if (cdir != NULL)
+ vrele(cdir);
+}
diff --git a/sys/security/audit/audit_private.h b/sys/security/audit/audit_private.h
index 4aa811bc1516..890473722552 100644
--- a/sys/security/audit/audit_private.h
+++ b/sys/security/audit/audit_private.h
@@ -472,6 +472,8 @@ au_event_t audit_semsys_to_event(int which);
au_event_t audit_shmsys_to_event(int which);
void audit_canon_path(struct thread *td, int dirfd, char *path,
char *cpath);
+void audit_canon_path_vp(struct thread *td, struct vnode *rdir,
+ struct vnode *cdir, char *path, char *cpath);
au_event_t auditon_command_event(int cmd);
/*
diff --git a/sys/sys/_smr.h b/sys/sys/_smr.h
index 71658aa65e01..834d912783f5 100644
--- a/sys/sys/_smr.h
+++ b/sys/sys/_smr.h
@@ -32,6 +32,7 @@
#define _SYS__SMR_H_
typedef uint32_t smr_seq_t;
+typedef int32_t smr_delta_t;
typedef struct smr *smr_t;
#endif /* __SYS_SMR_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index e10be84bd0f8..a690ad9ffdb3 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -60,7 +60,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1300080 /* Master, propagated to newvers */
+#define __FreeBSD_version 1300081 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/smr.h b/sys/sys/smr.h
index 378088f37cad..9e9cc3dec3a5 100644
--- a/sys/sys/smr.h
+++ b/sys/sys/smr.h
@@ -45,11 +45,13 @@
* Modular arithmetic for comparing sequence numbers that have
* potentially wrapped. Copied from tcp_seq.h.
*/
-#define SMR_SEQ_LT(a, b) ((int32_t)((a)-(b)) < 0)
-#define SMR_SEQ_LEQ(a, b) ((int32_t)((a)-(b)) <= 0)
-#define SMR_SEQ_GT(a, b) ((int32_t)((a)-(b)) > 0)
-#define SMR_SEQ_GEQ(a, b) ((int32_t)((a)-(b)) >= 0)
-#define SMR_SEQ_DELTA(a, b) ((int32_t)((a)-(b)))
+#define SMR_SEQ_LT(a, b) ((smr_delta_t)((a)-(b)) < 0)
+#define SMR_SEQ_LEQ(a, b) ((smr_delta_t)((a)-(b)) <= 0)
+#define SMR_SEQ_GT(a, b) ((smr_delta_t)((a)-(b)) > 0)
+#define SMR_SEQ_GEQ(a, b) ((smr_delta_t)((a)-(b)) >= 0)
+#define SMR_SEQ_DELTA(a, b) ((smr_delta_t)((a)-(b)))
+#define SMR_SEQ_MIN(a, b) (SMR_SEQ_LT((a), (b)) ? (a) : (b))
+#define SMR_SEQ_MAX(a, b) (SMR_SEQ_GT((a), (b)) ? (a) : (b))
#define SMR_SEQ_INVALID 0
@@ -66,8 +68,13 @@ struct smr {
smr_seq_t c_seq; /* Current observed sequence. */
smr_shared_t c_shared; /* Shared SMR state. */
int c_deferred; /* Deferred advance counter. */
+ int c_limit; /* Deferred advance limit. */
+ int c_flags; /* SMR Configuration */
};
+#define SMR_LAZY 0x0001 /* Higher latency write, fast read. */
+#define SMR_DEFERRED 0x0002 /* Aggregate updates to wr_seq. */
+
#define SMR_ENTERED(smr) \
(curthread->td_critnest != 0 && zpcpu_get((smr))->c_seq != SMR_SEQ_INVALID)
@@ -94,7 +101,7 @@ struct smr {
* All acceses include a parameter for an assert to verify the required
* synchronization. For example, a writer might use:
*
- * smr_serilized_store(pointer, value, mtx_owned(&writelock));
+ * smr_serialized_store(pointer, value, mtx_owned(&writelock));
*
* These are only enabled in INVARIANTS kernels.
*/
@@ -127,6 +134,9 @@ typedef struct { \
* Store 'v' to an SMR protected pointer while serialized by an
* external mechanism. 'ex' should contain an assert that the
* external mechanism is held. i.e. mtx_owned()
+ *
+ * Writers that are serialized with mutual exclusion or on a single
+ * thread should use smr_serialized_store() rather than swap.
*/
#define smr_serialized_store(p, v, ex) do { \
SMR_ASSERT(ex, "smr_serialized_store"); \
@@ -138,6 +148,8 @@ typedef struct { \
* swap 'v' with an SMR protected pointer and return the old value
* while serialized by an external mechanism. 'ex' should contain
* an assert that the external mechanism is provided. i.e. mtx_owned()
+ *
+ * Swap permits multiple writers to update a pointer concurrently.
*/
#define smr_serialized_swap(p, v, ex) ({ \
SMR_ASSERT(ex, "smr_serialized_swap"); \
@@ -170,7 +182,8 @@ typedef struct { \
} while (0)
/*
- * Return the current write sequence number.
+ * Return the current write sequence number. This is not the same as the
+ * current goal which may be in the future.
*/
static inline smr_seq_t
smr_shared_current(smr_shared_t s)
@@ -195,6 +208,8 @@ smr_enter(smr_t smr)
critical_enter();
smr = zpcpu_get(smr);
+ KASSERT((smr->c_flags & SMR_LAZY) == 0,
+ ("smr_enter(%s) lazy smr.", smr->c_shared->s_name));
KASSERT(smr->c_seq == 0,
("smr_enter(%s) does not support recursion.",
smr->c_shared->s_name));
@@ -228,6 +243,8 @@ smr_exit(smr_t smr)
smr = zpcpu_get(smr);
CRITICAL_ASSERT(curthread);
+ KASSERT((smr->c_flags & SMR_LAZY) == 0,
+ ("smr_exit(%s) lazy smr.", smr->c_shared->s_name));
KASSERT(smr->c_seq != SMR_SEQ_INVALID,
("smr_exit(%s) not in a smr section.", smr->c_shared->s_name));
@@ -243,17 +260,61 @@ smr_exit(smr_t smr)
}
/*
- * Advances the write sequence number. Returns the sequence number
- * required to ensure that all modifications are visible to readers.
+ * Enter a lazy smr section. This is used for read-mostly state that
+ * can tolerate a high free latency.
*/
-smr_seq_t smr_advance(smr_t smr);
+static inline void
+smr_lazy_enter(smr_t smr)
+{
+
+ critical_enter();
+ smr = zpcpu_get(smr);
+ KASSERT((smr->c_flags & SMR_LAZY) != 0,
+ ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+ KASSERT(smr->c_seq == 0,
+ ("smr_lazy_enter(%s) does not support recursion.",
+ smr->c_shared->s_name));
+
+ /*
+ * This needs no serialization. If an interrupt occurs before we
+ * assign sr_seq to c_seq any speculative loads will be discarded.
+ * If we assign a stale wr_seq value due to interrupt we use the
+ * same algorithm that renders smr_enter() safe.
+ */
+ smr->c_seq = smr_shared_current(smr->c_shared);
+}
/*
- * Advances the write sequence number only after N calls. Returns
- * the correct goal for a wr_seq that has not yet occurred. Used to
- * minimize shared cacheline invalidations for frequent writers.
+ * Exit a lazy smr section. This is used for read-mostly state that
+ * can tolerate a high free latency.
*/
-smr_seq_t smr_advance_deferred(smr_t smr, int limit);
+static inline void
+smr_lazy_exit(smr_t smr)
+{
+
+ smr = zpcpu_get(smr);
+ CRITICAL_ASSERT(curthread);
+ KASSERT((smr->c_flags & SMR_LAZY) != 0,
+ ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name));
+ KASSERT(smr->c_seq != SMR_SEQ_INVALID,
+ ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name));
+
+ /*
+ * All loads/stores must be retired before the sequence becomes
+ * visible. The fence compiles away on amd64. Another
+ * alternative would be to omit the fence but store the exit
+ * time and wait 1 tick longer.
+ */
+ atomic_thread_fence_rel();
+ smr->c_seq = SMR_SEQ_INVALID;
+ critical_exit();
+}
+
+/*
+ * Advances the write sequence number. Returns the sequence number
+ * required to ensure that all modifications are visible to readers.
+ */
+smr_seq_t smr_advance(smr_t smr);
/*
* Returns true if a goal sequence has been reached. If
@@ -262,7 +323,9 @@ smr_seq_t smr_advance_deferred(smr_t smr, int limit);
bool smr_poll(smr_t smr, smr_seq_t goal, bool wait);
/* Create a new SMR context. */
-smr_t smr_create(const char *name);
+smr_t smr_create(const char *name, int limit, int flags);
+
+/* Destroy the context. */
void smr_destroy(smr_t smr);
/*
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index a077fd6f7c8a..1b72d85496a8 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -1140,7 +1140,6 @@ hash_free(struct uma_hash *hash)
* Returns:
* Nothing
*/
-
static void
bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
{
@@ -1200,7 +1199,7 @@ cache_drain(uma_zone_t zone)
*/
seq = SMR_SEQ_INVALID;
if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
- seq = smr_current(zone->uz_smr);
+ seq = smr_advance(zone->uz_smr);
CPU_FOREACH(cpu) {
cache = &zone->uz_cpu[cpu];
bucket = cache_bucket_unload_alloc(cache);
@@ -1329,7 +1328,7 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain)
* the item count. Reclaim it individually here.
*/
zdom = ZDOM_GET(zone, i);
- if ((zone->uz_flags & UMA_ZONE_SMR) == 0) {
+ if ((zone->uz_flags & UMA_ZONE_SMR) == 0 || drain) {
ZONE_CROSS_LOCK(zone);
bucket = zdom->uzd_cross;
zdom->uzd_cross = NULL;
@@ -2679,7 +2678,7 @@ out:
/* Caller requests a private SMR context. */
if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
- zone->uz_smr = smr_create(zone->uz_name);
+ zone->uz_smr = smr_create(zone->uz_name, 0, 0);
KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) !=
(UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET),
@@ -4137,22 +4136,21 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
"uma_zfree: zone %s(%p) draining cross bucket %p",
zone->uz_name, zone, bucket);
- STAILQ_INIT(&fullbuckets);
+ /*
+ * It is possible for buckets to arrive here out of order so we fetch
+ * the current smr seq rather than accepting the bucket's.
+ */
+ seq = SMR_SEQ_INVALID;
+ if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
+ seq = smr_advance(zone->uz_smr);
/*
* To avoid having ndomain * ndomain buckets for sorting we have a
* lock on the current crossfree bucket. A full matrix with
* per-domain locking could be used if necessary.
*/
+ STAILQ_INIT(&fullbuckets);
ZONE_CROSS_LOCK(zone);
-
- /*
- * It is possible for buckets to arrive here out of order so we fetch
- * the current smr seq rather than accepting the bucket's.
- */
- seq = SMR_SEQ_INVALID;
- if ((zone->uz_flags & UMA_ZONE_SMR) != 0)
- seq = smr_current(zone->uz_smr);
while (bucket->ub_cnt > 0) {
item = bucket->ub_bucket[bucket->ub_cnt - 1];
domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c
index 8c554c6f65d5..c05020d7dba4 100644
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@@ -284,7 +284,7 @@ printcpuinfo(void)
switch (cpu_id & 0xf00) {
case 0x400:
strcat(cpu_model, "i486 ");
- /* Check the particular flavor of 486 */
+ /* Check the particular flavor of 486 */
switch (cpu_id & 0xf0) {
case 0x00:
case 0x10:
@@ -312,32 +312,32 @@ printcpuinfo(void)
}
break;
case 0x500:
- /* Check the particular flavor of 586 */
- strcat(cpu_model, "Pentium");
- switch (cpu_id & 0xf0) {
+ /* Check the particular flavor of 586 */
+ strcat(cpu_model, "Pentium");
+ switch (cpu_id & 0xf0) {
case 0x00:
- strcat(cpu_model, " A-step");
+ strcat(cpu_model, " A-step");
break;
case 0x10:
- strcat(cpu_model, "/P5");
+ strcat(cpu_model, "/P5");
break;
case 0x20:
- strcat(cpu_model, "/P54C");
+ strcat(cpu_model, "/P54C");
break;
case 0x30:
- strcat(cpu_model, "/P24T");
+ strcat(cpu_model, "/P24T");
break;
case 0x40:
- strcat(cpu_model, "/P55C");
+ strcat(cpu_model, "/P55C");
break;
case 0x70:
- strcat(cpu_model, "/P54C");
+ strcat(cpu_model, "/P54C");
break;
case 0x80:
- strcat(cpu_model, "/P55C (quarter-micron)");
+ strcat(cpu_model, "/P55C (quarter-micron)");
break;
default:
- /* nothing */
+ /* nothing */
break;
}
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
@@ -350,18 +350,18 @@ printcpuinfo(void)
#endif
break;
case 0x600:
- /* Check the particular flavor of 686 */
- switch (cpu_id & 0xf0) {
+ /* Check the particular flavor of 686 */
+ switch (cpu_id & 0xf0) {
case 0x00:
- strcat(cpu_model, "Pentium Pro A-step");
+ strcat(cpu_model, "Pentium Pro A-step");
break;
case 0x10:
- strcat(cpu_model, "Pentium Pro");
+ strcat(cpu_model, "Pentium Pro");
break;
case 0x30:
case 0x50:
case 0x60:
- strcat(cpu_model,
+ strcat(cpu_model,
"Pentium II/Pentium II Xeon/Celeron");
cpu = CPU_PII;
break;
@@ -369,12 +369,12 @@ printcpuinfo(void)
case 0x80:
case 0xa0:
case 0xb0:
- strcat(cpu_model,
+ strcat(cpu_model,
"Pentium III/Pentium III Xeon/Celeron");
cpu = CPU_PIII;
break;
default:
- strcat(cpu_model, "Unknown 80686");
+ strcat(cpu_model, "Unknown 80686");
break;
}
break;
@@ -1411,7 +1411,7 @@ identify_hypervisor_cpuid_base(void)
if (regs[0] == 0 && regs[1] == 0x4b4d564b &&
regs[2] == 0x564b4d56 && regs[3] == 0x0000004d)
regs[0] = leaf + 1;
-
+
if (regs[0] >= leaf) {
for (i = 0; i < nitems(vm_cpuids); i++)
if (strncmp((const char *)&regs[1],
@@ -1471,7 +1471,7 @@ identify_hypervisor(void)
if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) {
vmware_hvcall(VMW_HVCMD_GETVERSION, regs);
if (regs[1] == VMW_HVMAGIC) {
- vm_guest = VM_GUEST_VMWARE;
+ vm_guest = VM_GUEST_VMWARE;
freeenv(p);
return;
}
@@ -2341,23 +2341,23 @@ print_svm_info(void)
comma = 0;
if (features & (1 << 0)) {
printf("%sNP", comma ? "," : "");
- comma = 1;
+ comma = 1;
}
if (features & (1 << 3)) {
printf("%sNRIP", comma ? "," : "");
- comma = 1;
+ comma = 1;
}
if (features & (1 << 5)) {
printf("%sVClean", comma ? "," : "");
- comma = 1;
+ comma = 1;
}
if (features & (1 << 6)) {
printf("%sAFlush", comma ? "," : "");
- comma = 1;
+ comma = 1;
}
if (features & (1 << 7)) {
printf("%sDAssist", comma ? "," : "");
- comma = 1;
+ comma = 1;
}
printf("%sNAsids=%d", comma ? "," : "", regs[1]);
return;
@@ -2375,7 +2375,7 @@ print_svm_info(void)
"\010DecodeAssist" /* Decode assist */
"\011<b8>"
"\012<b9>"
- "\013PauseFilter" /* PAUSE intercept filter */
+ "\013PauseFilter" /* PAUSE intercept filter */
"\014EncryptedMcodePatch"
"\015PauseFilterThreshold" /* PAUSE filter threshold */
"\016AVIC" /* virtual interrupt controller */
@@ -2385,7 +2385,7 @@ print_svm_info(void)
"\022GMET" /* Guest Mode Execute Trap */
"\023<b18>"
"\024<b19>"
- "\025<b20>"
+ "\025GuesSpecCtl" /* Guest Spec_ctl */
"\026<b21>"
"\027<b22>"
"\030<b23>"
@@ -2397,7 +2397,7 @@ print_svm_info(void)
"\036<b29>"
"\037<b30>"
"\040<b31>"
- );
+ );
printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]);
}
diff --git a/tools/bsdbox/Makefile.base b/tools/bsdbox/Makefile.base
index 027494905519..aac44480e2ca 100644
--- a/tools/bsdbox/Makefile.base
+++ b/tools/bsdbox/Makefile.base
@@ -20,6 +20,7 @@ CRUNCH_LIBS+= -ldevstat -lncursesw -lncurses -lmemstat -lkvm -lelf
CRUNCH_PROGS_usr.bin+= cpio
# XXX SSL ?
CRUNCH_LIBS+= -larchive -lbz2 -lz -llzma -lbsdxml -lssl -lcrypto
+CRUNCH_LIBS+= -lprivatezstd -lthr
# Clear requires tput, and it's a shell script so it won't be crunched
CRUNCH_PROGS_usr.bin+= tput
diff --git a/usr.bin/dtc/dtc.cc b/usr.bin/dtc/dtc.cc
index 0cda698011ef..14d3685ba9d4 100644
--- a/usr.bin/dtc/dtc.cc
+++ b/usr.bin/dtc/dtc.cc
@@ -304,7 +304,10 @@ main(int argc, char **argv)
}
break;
default:
- fprintf(stderr, "Unknown option %c\n", ch);
+ /*
+ * Since opterr is non-zero, getopt will have
+ * already printed an error message.
+ */
return EXIT_FAILURE;
}
}
diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c
index 54ea22aa9498..af36cb056229 100644
--- a/usr.sbin/bhyve/iov.c
+++ b/usr.sbin/bhyve/iov.c
@@ -119,24 +119,25 @@ iov_to_buf(const struct iovec *iov, int niov, void **buf)
}
ssize_t
-buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov,
+buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov,
size_t seek)
{
struct iovec *diov;
- int ndiov, i;
size_t off = 0, len;
+ int i;
if (seek > 0) {
+ int ndiov;
+
diov = malloc(sizeof(struct iovec) * niov);
seek_iov(iov, niov, diov, &ndiov, seek);
- } else {
- diov = iov;
- ndiov = niov;
+ iov = diov;
+ niov = ndiov;
}
- for (i = 0; i < ndiov && off < buflen; i++) {
- len = MIN(diov[i].iov_len, buflen - off);
- memcpy(diov[i].iov_base, buf + off, len);
+ for (i = 0; i < niov && off < buflen; i++) {
+ len = MIN(iov[i].iov_len, buflen - off);
+ memcpy(iov[i].iov_base, buf + off, len);
off += len;
}
diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h
index e3b5916edb10..f46b04b71eb5 100644
--- a/usr.sbin/bhyve/iov.h
+++ b/usr.sbin/bhyve/iov.h
@@ -38,7 +38,7 @@ void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2,
void truncate_iov(struct iovec *iov, int *niov, size_t length);
size_t count_iov(const struct iovec *iov, int niov);
ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf);
-ssize_t buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov,
- size_t seek);
+ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov,
+ int niov, size_t seek);
#endif /* _IOV_H_ */
diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c
index dcb5a27aa4a4..205c0cf535c4 100644
--- a/usr.sbin/bhyve/net_backends.c
+++ b/usr.sbin/bhyve/net_backends.c
@@ -103,6 +103,13 @@ struct net_backend {
int iovcnt);
/*
+ * Get the length of the next packet that can be received from
+ * the backend. If no packets are currently available, this
+ * function returns 0.
+ */
+ ssize_t (*peek_recvlen)(struct net_backend *be);
+
+ /*
* Called to receive a packet from the backend. When the function
* returns a positive value 'len', the scatter-gather vector
* provided by the caller contains a packet with such length.
@@ -167,6 +174,13 @@ SET_DECLARE(net_backend_set, struct net_backend);
struct tap_priv {
struct mevent *mevp;
+ /*
+ * A bounce buffer that allows us to implement the peek_recvlen
+ * callback. In the future we may get the same information from
+ * the kevent data.
+ */
+ char bbuf[1 << 16];
+ ssize_t bbuflen;
};
static void
@@ -223,6 +237,9 @@ tap_init(struct net_backend *be, const char *devname,
errx(EX_OSERR, "Unable to apply rights for sandbox");
#endif
+ memset(priv->bbuf, 0, sizeof(priv->bbuf));
+ priv->bbuflen = 0;
+
priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
if (priv->mevp == NULL) {
WPRINTF(("Could not register event"));
@@ -246,15 +263,56 @@ tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
}
static ssize_t
+tap_peek_recvlen(struct net_backend *be)
+{
+ struct tap_priv *priv = (struct tap_priv *)be->opaque;
+ ssize_t ret;
+
+ if (priv->bbuflen > 0) {
+ /*
+ * We already have a packet in the bounce buffer.
+ * Just return its length.
+ */
+ return priv->bbuflen;
+ }
+
+ /*
+ * Read the next packet (if any) into the bounce buffer, so
+ * that we get to know its length and we can return that
+ * to the caller.
+ */
+ ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
+ if (ret < 0 && errno == EWOULDBLOCK) {
+ return (0);
+ }
+
+ if (ret > 0)
+ priv->bbuflen = ret;
+
+ return (ret);
+}
+
+static ssize_t
tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
+ struct tap_priv *priv = (struct tap_priv *)be->opaque;
ssize_t ret;
- /* Should never be called without a valid tap fd */
- assert(be->fd != -1);
+ if (priv->bbuflen > 0) {
+ /*
+ * A packet is available in the bounce buffer, so
+ * we read it from there.
+ */
+ ret = buf_to_iov(priv->bbuf, priv->bbuflen,
+ iov, iovcnt, 0);
- ret = readv(be->fd, iov, iovcnt);
+ /* Mark the bounce buffer as empty. */
+ priv->bbuflen = 0;
+ return (ret);
+ }
+
+ ret = readv(be->fd, iov, iovcnt);
if (ret < 0 && errno == EWOULDBLOCK) {
return (0);
}
@@ -299,6 +357,7 @@ static struct net_backend tap_backend = {
.init = tap_init,
.cleanup = tap_cleanup,
.send = tap_send,
+ .peek_recvlen = tap_peek_recvlen,
.recv = tap_recv,
.recv_enable = tap_recv_enable,
.recv_disable = tap_recv_disable,
@@ -313,6 +372,7 @@ static struct net_backend vmnet_backend = {
.init = tap_init,
.cleanup = tap_cleanup,
.send = tap_send,
+ .peek_recvlen = tap_peek_recvlen,
.recv = tap_recv,
.recv_enable = tap_recv_enable,
.recv_disable = tap_recv_disable,
@@ -331,8 +391,7 @@ DATA_SET(net_backend_set, vmnet_backend);
#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
- VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \
- VIRTIO_NET_F_MRG_RXBUF)
+ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
struct netmap_priv {
char ifname[IFNAMSIZ];
@@ -540,6 +599,26 @@ txsync:
}
static ssize_t
+netmap_peek_recvlen(struct net_backend *be)
+{
+ struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+ struct netmap_ring *ring = priv->rx;
+ uint32_t head = ring->head;
+ ssize_t totlen = 0;
+
+ while (head != ring->tail) {
+ struct netmap_slot *slot = ring->slot + head;
+
+ totlen += slot->len;
+ if ((slot->flags & NS_MOREFRAG) == 0)
+ break;
+ head = nm_ring_next(ring, head);
+ }
+
+ return (totlen);
+}
+
+static ssize_t
netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
@@ -628,6 +707,7 @@ static struct net_backend netmap_backend = {
.init = netmap_init,
.cleanup = netmap_cleanup,
.send = netmap_send,
+ .peek_recvlen = netmap_peek_recvlen,
.recv = netmap_recv,
.recv_enable = netmap_recv_enable,
.recv_disable = netmap_recv_disable,
@@ -642,6 +722,7 @@ static struct net_backend vale_backend = {
.init = netmap_init,
.cleanup = netmap_cleanup,
.send = netmap_send,
+ .peek_recvlen = netmap_peek_recvlen,
.recv = netmap_recv,
.recv_enable = netmap_recv_enable,
.recv_disable = netmap_recv_disable,
@@ -758,6 +839,13 @@ netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
return (be->send(be, iov, iovcnt));
}
+ssize_t
+netbe_peek_recvlen(struct net_backend *be)
+{
+
+ return (be->peek_recvlen(be));
+}
+
/*
* Try to read a packet from the backend, without blocking.
* If no packets are available, return 0. In case of success, return
diff --git a/usr.sbin/bhyve/net_backends.h b/usr.sbin/bhyve/net_backends.h
index de80692f1487..25c1468622dd 100644
--- a/usr.sbin/bhyve/net_backends.h
+++ b/usr.sbin/bhyve/net_backends.h
@@ -45,6 +45,7 @@ int netbe_set_cap(net_backend_t *be, uint64_t cap,
unsigned vnet_hdr_len);
size_t netbe_get_vnet_hdr_len(net_backend_t *be);
ssize_t netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt);
+ssize_t netbe_peek_recvlen(net_backend_t *be);
ssize_t netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt);
ssize_t netbe_rx_discard(net_backend_t *be);
void netbe_rx_disable(net_backend_t *be);
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index eb35d088d568..52893f283919 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -228,22 +228,34 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
struct iovec iov[VTNET_MAXSEGS + 1];
struct vqueue_info *vq;
- uint32_t riov_bytes;
- struct iovec *riov;
- int riov_len;
- uint32_t ulen;
- int n_chains;
- int len;
vq = &sc->vsc_queues[VTNET_RXQ];
for (;;) {
struct virtio_net_rxhdr *hdr;
+ uint32_t riov_bytes;
+ struct iovec *riov;
+ uint32_t ulen;
+ int riov_len;
+ int n_chains;
+ ssize_t rlen;
+ ssize_t plen;
+
+ plen = netbe_peek_recvlen(sc->vsc_be);
+ if (plen <= 0) {
+ /*
+ * No more packets (plen == 0), or backend errored
+ * (plen < 0). Interrupt if needed and stop.
+ */
+ vq_endchains(vq, /*used_all_avail=*/0);
+ return;
+ }
+ plen += prepend_hdr_len;
/*
* Get a descriptor chain to store the next ingress
* packet. In case of mergeable rx buffers, get as
* many chains as necessary in order to make room
- * for a maximum sized LRO packet.
+ * for plen bytes.
*/
riov_bytes = 0;
riov_len = 0;
@@ -287,8 +299,7 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
riov_bytes += info[n_chains].len;
riov += n;
n_chains++;
- } while (riov_bytes < VTNET_MAX_PKT_LEN &&
- riov_len < VTNET_MAXSEGS);
+ } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS);
riov = iov;
hdr = riov[0].iov_base;
@@ -312,21 +323,20 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
memset(hdr, 0, prepend_hdr_len);
}
- len = netbe_recv(sc->vsc_be, riov, riov_len);
-
- if (len <= 0) {
+ rlen = netbe_recv(sc->vsc_be, riov, riov_len);
+ if (rlen != plen - prepend_hdr_len) {
/*
- * No more packets (len == 0), or backend errored
- * (err < 0). Return unused available buffers
- * and stop.
+ * If this happens it means there is something
+ * wrong with the backend (e.g., some other
+ * process is stealing our packets).
*/
+ WPRINTF(("netbe_recv: expected %zd bytes, "
+ "got %zd", plen - prepend_hdr_len, rlen));
vq_retchains(vq, n_chains);
- /* Interrupt if needed/appropriate and stop. */
- vq_endchains(vq, /*used_all_avail=*/0);
- return;
+ continue;
}
- ulen = (uint32_t)(len + prepend_hdr_len);
+ ulen = (uint32_t)plen;
/*
* Publish the used buffers to the guest, reporting the
@@ -346,12 +356,11 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
vq_relchain_prepare(vq, info[i].idx, iolen);
ulen -= iolen;
i++;
- assert(i <= n_chains);
} while (ulen > 0);
hdr->vrh_bufs = i;
vq_relchain_publish(vq);
- vq_retchains(vq, n_chains - i);
+ assert(i == n_chains);
}
}
@@ -592,7 +601,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
free(sc);
return (err);
}
- sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be);
+ sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF |
+ netbe_get_cap(sc->vsc_be);
}
if (!mac_provided) {
diff --git a/usr.sbin/iostat/iostat.c b/usr.sbin/iostat/iostat.c
index dbe7219095e8..4cbfcfcbcbd5 100644
--- a/usr.sbin/iostat/iostat.c
+++ b/usr.sbin/iostat/iostat.c
@@ -929,7 +929,7 @@ devstats(int perf_select, long double etime, int havelast)
}
free(devicename);
} else if (oflag > 0) {
- int msdig = (ms_per_transaction < 100.0) ? 1 : 0;
+ int msdig = (ms_per_transaction < 99.94) ? 1 : 0;
if (Iflag == 0)
printf("%4.0Lf%4.0Lf%5.*Lf ",
diff --git a/usr.sbin/pstat/pstat.c b/usr.sbin/pstat/pstat.c
index c704f1599fff..923357b2c24d 100644
--- a/usr.sbin/pstat/pstat.c
+++ b/usr.sbin/pstat/pstat.c
@@ -95,6 +95,8 @@ static struct {
#define NNAMES (sizeof(namelist) / sizeof(*namelist))
static struct nlist nl[NNAMES];
+#define SIZEHDR "Size"
+
static int humanflag;
static int usenumflag;
static int totalflag;
@@ -471,7 +473,12 @@ print_swap_header(void)
long blocksize;
const char *header;
- header = getbsize(&hlen, &blocksize);
+ if (humanflag) {
+ header = SIZEHDR;
+ hlen = sizeof(SIZEHDR);
+ } else {
+ header = getbsize(&hlen, &blocksize);
+ }
if (totalflag == 0)
(void)printf("%-15s %*s %8s %8s %8s\n",
"Device", hlen, header,
@@ -484,23 +491,30 @@ print_swap_line(const char *swdevname, intmax_t nblks, intmax_t bused,
{
char usedbuf[5];
char availbuf[5];
+ char sizebuf[5];
int hlen, pagesize;
long blocksize;
pagesize = getpagesize();
getbsize(&hlen, &blocksize);
- printf("%-15s %*jd ", swdevname, hlen, CONVERT(nblks));
+ printf("%-15s ", swdevname);
if (humanflag) {
+ humanize_number(sizebuf, sizeof(sizebuf),
+ CONVERT_BLOCKS(nblks), "",
+ HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
humanize_number(usedbuf, sizeof(usedbuf),
CONVERT_BLOCKS(bused), "",
HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
humanize_number(availbuf, sizeof(availbuf),
CONVERT_BLOCKS(bavail), "",
HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL);
- printf("%8s %8s %5.0f%%\n", usedbuf, availbuf, bpercent);
+ printf("%8s %8s %8s %5.0f%%\n", sizebuf,
+ usedbuf, availbuf, bpercent);
} else {
- printf("%8jd %8jd %5.0f%%\n", (intmax_t)CONVERT(bused),
+ printf("%*jd %8jd %8jd %5.0f%%\n", hlen,
+ (intmax_t)CONVERT(nblks),
+ (intmax_t)CONVERT(bused),
(intmax_t)CONVERT(bavail), bpercent);
}
}