diff options
53 files changed, 1158 insertions, 573 deletions
@@ -33,7 +33,7 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 13.x IS SLOW: using clang 3.5.0 or higher. 20200220: - ncurses has been updated to a newer version (6.1-20200118). Given the ABI + ncurses has been updated to a newer version (6.2-20200215). Given the ABI has changed, users will have to rebuild all the ports that are linked to ncurses. diff --git a/bin/sh/miscbltin.c b/bin/sh/miscbltin.c index ad3d862fb6ef..a49dd05d109f 100644 --- a/bin/sh/miscbltin.c +++ b/bin/sh/miscbltin.c @@ -117,7 +117,7 @@ fdgetc(struct fdctx *fdc, char *c) static void fdctx_destroy(struct fdctx *fdc) { - size_t residue; + off_t residue; if (fdc->buflen > 1) { /* diff --git a/lib/libc/sys/truncate.2 b/lib/libc/sys/truncate.2 index dfbe006965b2..a6ec8f44f44e 100644 --- a/lib/libc/sys/truncate.2 +++ b/lib/libc/sys/truncate.2 @@ -28,7 +28,7 @@ .\" @(#)truncate.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd May 4, 2015 +.Dd January 24, 2020 .Dt TRUNCATE 2 .Os .Sh NAME @@ -160,6 +160,9 @@ system calls appeared in These calls should be generalized to allow ranges of bytes in a file to be discarded. .Pp -Use of +Historically, the use of .Fn truncate -to extend a file is not portable. +or +.Fn ftruncate +to extend a file was not portable, but this behavior became required in +.St -p1003.1-2008 . diff --git a/lib/libfetch/common.c b/lib/libfetch/common.c index f6c026049f5c..ae119de32736 100644 --- a/lib/libfetch/common.c +++ b/lib/libfetch/common.c @@ -677,6 +677,7 @@ fetch_connect(const char *host, int port, int af, int verbose) if (sockshost) if (!fetch_socks5_init(conn, host, port, verbose)) goto fail; + free(sockshost); if (cais != NULL) freeaddrinfo(cais); if (sais != NULL) @@ -686,7 +687,10 @@ syserr: fetch_syserr(); fail: free(sockshost); - if (sd >= 0) + /* Fully close if it was opened; otherwise just don't leak the fd. */ + if (conn != NULL) + fetch_close(conn); + else if (sd >= 0) close(sd); if (cais != NULL) freeaddrinfo(cais); diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 9c889f61cd45..25164bea55d4 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -342,7 +342,7 @@ struct da_softc { LIST_HEAD(, ccb_hdr) pending_ccbs; int refcount; /* Active xpt_action() calls */ da_state state; - da_flags flags; + u_int flags; da_quirks quirks; int minimum_cmd_size; int error_inject; @@ -2335,11 +2335,11 @@ dasysctlinit(void *context, int pending) "Flags for drive"); SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "rotating", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, - &softc->flags, DA_FLAG_ROTATING, dabitsysctl, "I", + &softc->flags, (u_int)DA_FLAG_ROTATING, dabitsysctl, "I", "Rotating media *DEPRECATED* gone in FreeBSD 14"); SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), OID_AUTO, "unmapped_io", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, - &softc->flags, DA_FLAG_UNMAPPEDIO, dabitsysctl, "I", + &softc->flags, (u_int)DA_FLAG_UNMAPPEDIO, dabitsysctl, "I", "Unmapped I/O support *DEPRECATED* gone in FreeBSD 14"); #ifdef CAM_TEST_FAILURE @@ -2619,11 +2619,11 @@ dadeletemethodchoose(struct da_softc *softc, da_delete_methods default_method) static int dabitsysctl(SYSCTL_HANDLER_ARGS) { - int flags = (intptr_t)arg1; - int test = arg2; + u_int *flags = arg1; + u_int test = arg2; int tmpout, error; - tmpout = !!(flags & test); + tmpout = !!(*flags & test); error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout)); if (error || !req->newptr) return (error); diff --git a/sys/compat/linuxkpi/common/include/linux/fs.h b/sys/compat/linuxkpi/common/include/linux/fs.h index f68febf36fd4..7f5993bd7754 100644 --- a/sys/compat/linuxkpi/common/include/linux/fs.h +++ b/sys/compat/linuxkpi/common/include/linux/fs.h @@ -302,25 +302,4 @@ call_mmap(struct linux_file *file, struct vm_area_struct *vma) return (file->f_op->mmap(file, vma)); } -/* Shared memory support */ -unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t); -struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t); -struct linux_file *linux_shmem_file_setup(const char *, loff_t, unsigned long); -void linux_shmem_truncate_range(vm_object_t, loff_t, loff_t); - -#define invalidate_mapping_pages(...) \ - linux_invalidate_mapping_pages(__VA_ARGS__) - -#define shmem_read_mapping_page(...) \ - linux_shmem_read_mapping_page_gfp(__VA_ARGS__, 0) - -#define shmem_read_mapping_page_gfp(...) \ - linux_shmem_read_mapping_page_gfp(__VA_ARGS__) - -#define shmem_file_setup(...) \ - linux_shmem_file_setup(__VA_ARGS__) - -#define shmem_truncate_range(...) \ - linux_shmem_truncate_range(__VA_ARGS__) - #endif /* _LINUX_FS_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/shmem_fs.h b/sys/compat/linuxkpi/common/include/linux/shmem_fs.h new file mode 100644 index 000000000000..63aff012c6bb --- /dev/null +++ b/sys/compat/linuxkpi/common/include/linux/shmem_fs.h @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2010 iX Systems, Inc. + * Copyright (c) 2010 Panasas, Inc. + * Copyright (c) 2013-2018 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _LINUX_SHMEM_FS_H_ +#define _LINUX_SHMEM_FS_H_ + +/* Shared memory support */ +unsigned long linux_invalidate_mapping_pages(vm_object_t, pgoff_t, pgoff_t); +struct page *linux_shmem_read_mapping_page_gfp(vm_object_t, int, gfp_t); +struct linux_file *linux_shmem_file_setup(const char *, loff_t, unsigned long); +void linux_shmem_truncate_range(vm_object_t, loff_t, loff_t); + +#define invalidate_mapping_pages(...) \ + linux_invalidate_mapping_pages(__VA_ARGS__) + +#define shmem_read_mapping_page(...) \ + linux_shmem_read_mapping_page_gfp(__VA_ARGS__, 0) + +#define shmem_read_mapping_page_gfp(...) \ + linux_shmem_read_mapping_page_gfp(__VA_ARGS__) + +#define shmem_file_setup(...) \ + linux_shmem_file_setup(__VA_ARGS__) + +#define shmem_truncate_range(...) \ + linux_shmem_truncate_range(__VA_ARGS__) + +#endif /* _LINUX_SHMEM_FS_H_ */ diff --git a/sys/compat/linuxkpi/common/src/linux_page.c b/sys/compat/linuxkpi/common/src/linux_page.c index ac8b1a2781aa..a6133a93b543 100644 --- a/sys/compat/linuxkpi/common/src/linux_page.c +++ b/sys/compat/linuxkpi/common/src/linux_page.c @@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$"); #include <linux/mm.h> #include <linux/preempt.h> #include <linux/fs.h> +#include <linux/shmem_fs.h> void si_meminfo(struct sysinfo *si) @@ -275,86 +276,3 @@ is_vmalloc_addr(const void *addr) { return (vtoslab((vm_offset_t)addr & ~UMA_SLAB_MASK) != NULL); } - -struct page * -linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp) -{ - vm_page_t page; - int rv; - - if ((gfp & GFP_NOWAIT) != 0) - panic("GFP_NOWAIT is unimplemented"); - - VM_OBJECT_WLOCK(obj); - rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL | - VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); - VM_OBJECT_WUNLOCK(obj); - if (rv != VM_PAGER_OK) - return (ERR_PTR(-EINVAL)); - return (page); -} - -struct linux_file * -linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags) -{ - struct fileobj { - struct linux_file file __aligned(sizeof(void *)); - struct vnode vnode __aligned(sizeof(void *)); - }; - struct fileobj *fileobj; - struct linux_file *filp; - struct vnode *vp; - int error; - - fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL); - if (fileobj == NULL) { - error = -ENOMEM; - goto err_0; - } - filp = &fileobj->file; - vp = &fileobj->vnode; - - filp->f_count = 1; - filp->f_vnode = vp; - filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size, - VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred); - if (filp->f_shmem == NULL) { - error = -ENOMEM; - goto err_1; - } - return (filp); -err_1: - kfree(filp); -err_0: - return (ERR_PTR(error)); -} - -static vm_ooffset_t -linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start, - vm_pindex_t end, int flags) -{ - int start_count, end_count; - - VM_OBJECT_WLOCK(obj); - start_count = obj->resident_page_count; - vm_object_page_remove(obj, start, end, flags); - end_count = obj->resident_page_count; - VM_OBJECT_WUNLOCK(obj); - return (start_count - end_count); -} - -unsigned long -linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end) -{ - - return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY)); -} - -void -linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend) -{ - vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1); - vm_pindex_t end = OFF_TO_IDX(lend + 1); - - (void) linux_invalidate_mapping_pages_sub(obj, start, end, 0); -} diff --git a/sys/compat/linuxkpi/common/src/linux_shmemfs.c b/sys/compat/linuxkpi/common/src/linux_shmemfs.c new file mode 100644 index 000000000000..ead9cc9d9f40 --- /dev/null +++ b/sys/compat/linuxkpi/common/src/linux_shmemfs.c @@ -0,0 +1,128 @@ +/*- + * Copyright (c) 2010 Isilon Systems, Inc. + * Copyright (c) 2016 Matthew Macy (mmacy@mattmacy.io) + * Copyright (c) 2017 Mellanox Technologies, Ltd. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/rwlock.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_object.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/shmem_fs.h> + +struct page * +linux_shmem_read_mapping_page_gfp(vm_object_t obj, int pindex, gfp_t gfp) +{ + vm_page_t page; + int rv; + + if ((gfp & GFP_NOWAIT) != 0) + panic("GFP_NOWAIT is unimplemented"); + + VM_OBJECT_WLOCK(obj); + rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL | + VM_ALLOC_NOBUSY | VM_ALLOC_WIRED); + VM_OBJECT_WUNLOCK(obj); + if (rv != VM_PAGER_OK) + return (ERR_PTR(-EINVAL)); + return (page); +} + +struct linux_file * +linux_shmem_file_setup(const char *name, loff_t size, unsigned long flags) +{ + struct fileobj { + struct linux_file file __aligned(sizeof(void *)); + struct vnode vnode __aligned(sizeof(void *)); + }; + struct fileobj *fileobj; + struct linux_file *filp; + struct vnode *vp; + int error; + + fileobj = kzalloc(sizeof(*fileobj), GFP_KERNEL); + if (fileobj == NULL) { + error = -ENOMEM; + goto err_0; + } + filp = &fileobj->file; + vp = &fileobj->vnode; + + filp->f_count = 1; + filp->f_vnode = vp; + filp->f_shmem = vm_pager_allocate(OBJT_DEFAULT, NULL, size, + VM_PROT_READ | VM_PROT_WRITE, 0, curthread->td_ucred); + if (filp->f_shmem == NULL) { + error = -ENOMEM; + goto err_1; + } + return (filp); +err_1: + kfree(filp); +err_0: + return (ERR_PTR(error)); +} + +static vm_ooffset_t +linux_invalidate_mapping_pages_sub(vm_object_t obj, vm_pindex_t start, + vm_pindex_t end, int flags) +{ + int start_count, end_count; + + VM_OBJECT_WLOCK(obj); + start_count = obj->resident_page_count; + vm_object_page_remove(obj, start, end, flags); + end_count = obj->resident_page_count; + VM_OBJECT_WUNLOCK(obj); + return (start_count - end_count); +} + +unsigned long +linux_invalidate_mapping_pages(vm_object_t obj, pgoff_t start, pgoff_t end) +{ + + return (linux_invalidate_mapping_pages_sub(obj, start, end, OBJPR_CLEANONLY)); +} + +void +linux_shmem_truncate_range(vm_object_t obj, loff_t lstart, loff_t lend) +{ + vm_pindex_t start = OFF_TO_IDX(lstart + PAGE_SIZE - 1); + vm_pindex_t end = OFF_TO_IDX(lend + 1); + + (void) linux_invalidate_mapping_pages_sub(obj, start, end, 0); +} diff --git a/sys/conf/files b/sys/conf/files index 9bb3b2a62565..b99b5838891f 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4476,6 +4476,8 @@ compat/linuxkpi/common/src/linux_rcu.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C} -I$S/contrib/ck/include" compat/linuxkpi/common/src/linux_schedule.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" +compat/linuxkpi/common/src/linux_shmemfs.c optional compat_linuxkpi \ + compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_slab.c optional compat_linuxkpi \ compile-with "${LINUXKPI_C}" compat/linuxkpi/common/src/linux_usb.c optional compat_linuxkpi usb \ diff --git a/sys/dev/acpica/acpi_lid.c b/sys/dev/acpica/acpi_lid.c index 80bc344d606d..5558b0f437e3 100644 --- a/sys/dev/acpica/acpi_lid.c +++ b/sys/dev/acpica/acpi_lid.c @@ -124,13 +124,16 @@ acpi_lid_attach(device_t dev) if (acpi_parse_prw(sc->lid_handle, &prw) == 0) AcpiEnableGpe(prw.gpe_handle, prw.gpe_bit); + /* Get the initial lid status, ignore failures */ + (void) acpi_GetInteger(sc->lid_handle, "_LID", &sc->lid_status); + /* * Export the lid status */ SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "state", CTLFLAG_RD, &sc->lid_status, 0, - "Device set to wake the system"); + "Device state (0 = closed, 1 = open)"); return (0); } @@ -144,6 +147,13 @@ acpi_lid_suspend(device_t dev) static int acpi_lid_resume(device_t dev) { + struct acpi_lid_softc *sc; + + sc = device_get_softc(dev); + + /* Get lid status after resume, ignore failures */ + (void) acpi_GetInteger(sc->lid_handle, "_LID", &sc->lid_status); + return (0); } diff --git a/sys/dev/ath/ah_osdep.c b/sys/dev/ath/ah_osdep.c index 23d967ec75e3..b141d7d66006 100644 --- a/sys/dev/ath/ah_osdep.c +++ b/sys/dev/ath/ah_osdep.c @@ -93,8 +93,9 @@ extern void DO_HALDEBUG(struct ath_hal *ah, u_int mask, const char* fmt, ...); #endif /* AH_DEBUG */ /* NB: put this here instead of the driver to avoid circular references */ -SYSCTL_NODE(_hw, OID_AUTO, ath, CTLFLAG_RD, 0, "Atheros driver parameters"); -static SYSCTL_NODE(_hw_ath, OID_AUTO, hal, CTLFLAG_RD, 0, +SYSCTL_NODE(_hw, OID_AUTO, ath, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "Atheros driver parameters"); +static SYSCTL_NODE(_hw_ath, OID_AUTO, hal, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Atheros HAL parameters"); #ifdef AH_DEBUG @@ -236,8 +237,10 @@ sysctl_hw_ath_hal_log(SYSCTL_HANDLER_ARGS) else return (ath_hal_setlogging(enable)); } -SYSCTL_PROC(_hw_ath_hal, OID_AUTO, alq, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hw_ath_hal_log, "I", "Enable HAL register logging"); +SYSCTL_PROC(_hw_ath_hal, OID_AUTO, alq, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + 0, 0, sysctl_hw_ath_hal_log, "I", + "Enable HAL register logging"); SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_size, CTLFLAG_RW, &ath_hal_alq_qsize, 0, "In-memory log size (#records)"); SYSCTL_INT(_hw_ath_hal, OID_AUTO, alq_lost, CTLFLAG_RW, diff --git a/sys/dev/ath/ath_rate/sample/sample.c b/sys/dev/ath/ath_rate/sample/sample.c index e0ab13eab719..ce22b36c539e 100644 --- a/sys/dev/ath/ath_rate/sample/sample.c +++ b/sys/dev/ath/ath_rate/sample/sample.c @@ -1364,17 +1364,17 @@ ath_rate_sysctlattach(struct ath_softc *sc, struct sample_softc *ssc) struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "smoothing_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0, - ath_rate_sysctl_smoothing_rate, "I", + "smoothing_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + ssc, 0, ath_rate_sysctl_smoothing_rate, "I", "sample: smoothing rate for avg tx time (%%)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "sample_rate", CTLTYPE_INT | CTLFLAG_RW, ssc, 0, - ath_rate_sysctl_sample_rate, "I", + "sample_rate", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + ssc, 0, ath_rate_sysctl_sample_rate, "I", "sample: percent air time devoted to sampling new rates (%%)"); /* XXX max_successive_failures, stale_failure_timeout, min_switch */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "sample_stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_rate_sysctl_stats, "I", "sample: print statistics"); + "sample_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + sc, 0, ath_rate_sysctl_stats, "I", "sample: print statistics"); } struct ath_ratectrl * diff --git a/sys/dev/ath/if_ath_sysctl.c b/sys/dev/ath/if_ath_sysctl.c index 3e4e47246357..662aa77008a3 100644 --- a/sys/dev/ath/if_ath_sysctl.c +++ b/sys/dev/ath/if_ath_sysctl.c @@ -786,16 +786,17 @@ ath_sysctl_alq_attach(struct ath_softc *sc) struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); - tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "alq", CTLFLAG_RD, - NULL, "Atheros ALQ logging parameters"); + tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "alq", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, + "Atheros ALQ logging parameters"); child = SYSCTL_CHILDREN(tree); SYSCTL_ADD_STRING(ctx, child, OID_AUTO, "filename", CTLFLAG_RW, sc->sc_alq.sc_alq_filename, 0, "ALQ filename"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "enable", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_alq_log, "I", ""); + "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_alq_log, "I", ""); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "debugmask", CTLFLAG_RW, &sc->sc_alq.sc_alq_debug, 0, @@ -831,21 +832,21 @@ ath_sysctlattach(struct ath_softc *sc) "control debugging KTR"); #endif /* ATH_DEBUG_ALQ */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "slottime", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_slottime, "I", "802.11 slot time (us)"); + "slottime", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_slottime, "I", "802.11 slot time (us)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "acktimeout", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_acktimeout, "I", "802.11 ACK timeout (us)"); + "acktimeout", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_acktimeout, "I", "802.11 ACK timeout (us)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "ctstimeout", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_ctstimeout, "I", "802.11 CTS timeout (us)"); + "ctstimeout", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_ctstimeout, "I", "802.11 CTS timeout (us)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "softled", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_softled, "I", "enable/disable software LED support"); + "softled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_softled, "I", "enable/disable software LED support"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "ledpin", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_ledpin, "I", "GPIO pin connected to LED"); + "ledpin", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_ledpin, "I", "GPIO pin connected to LED"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "ledon", CTLFLAG_RW, &sc->sc_ledon, 0, "setting to turn LED on"); @@ -854,8 +855,8 @@ ath_sysctlattach(struct ath_softc *sc) "idle time for inactivity LED (ticks)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "hardled", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_hardled, "I", "enable/disable hardware LED support"); + "hardled", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_hardled, "I", "enable/disable hardware LED support"); /* XXX Laziness - configure pins, then flip hardled off/on */ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "led_net_pin", CTLFLAG_RW, &sc->sc_led_net_pin, 0, @@ -865,61 +866,61 @@ ath_sysctlattach(struct ath_softc *sc) "MAC Power LED pin, or -1 to disable"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "txantenna", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_txantenna, "I", "antenna switch"); + "txantenna", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_txantenna, "I", "antenna switch"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "rxantenna", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_rxantenna, "I", "default/rx antenna"); + "rxantenna", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_rxantenna, "I", "default/rx antenna"); if (ath_hal_hasdiversity(ah)) SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "diversity", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_diversity, "I", "antenna diversity"); + "diversity", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + sc, 0, ath_sysctl_diversity, "I", "antenna diversity"); sc->sc_txintrperiod = ATH_TXINTR_PERIOD; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "txintrperiod", CTLFLAG_RW, &sc->sc_txintrperiod, 0, "tx descriptor batching"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "diag", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_diag, "I", "h/w diagnostic control"); + "diag", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_diag, "I", "h/w diagnostic control"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "tpscale", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_tpscale, "I", "tx power scaling"); + "tpscale", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_tpscale, "I", "tx power scaling"); if (ath_hal_hastpc(ah)) { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "tpc", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_tpc, "I", "enable/disable per-packet TPC"); + "tpc", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_tpc, "I", "enable/disable per-packet TPC"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "tpack", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_tpack, "I", "tx power for ack frames"); + "tpack", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, + 0, ath_sysctl_tpack, "I", "tx power for ack frames"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "tpcts", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_tpcts, "I", "tx power for cts frames"); + "tpcts", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, + 0, ath_sysctl_tpcts, "I", "tx power for cts frames"); } if (ath_hal_hasrfsilent(ah)) { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "rfsilent", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_rfsilent, "I", "h/w RF silent config"); + "rfsilent", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + sc, 0, ath_sysctl_rfsilent, "I", "h/w RF silent config"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "rfkill", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_rfkill, "I", "enable/disable RF kill switch"); + "rfkill", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, + 0, ath_sysctl_rfkill, "I", "enable/disable RF kill switch"); } SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "txagg", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_txagg, "I", ""); + "txagg", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_txagg, "I", ""); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "forcebstuck", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_forcebstuck, "I", ""); + "forcebstuck", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, + 0, ath_sysctl_forcebstuck, "I", ""); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "hangcheck", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_hangcheck, "I", ""); + "hangcheck", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, 0, + ath_sysctl_hangcheck, "I", ""); if (ath_hal_hasintmit(ah)) { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "intmit", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_intmit, "I", "interference mitigation"); + "intmit", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, + 0, ath_sysctl_intmit, "I", "interference mitigation"); } sc->sc_monpass = HAL_RXERR_DECRYPT | HAL_RXERR_MIC; SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, @@ -989,8 +990,8 @@ ath_sysctlattach(struct ath_softc *sc) "superframe", CTLFLAG_RD, &sc->sc_tdmabintval, 0, "TDMA calculated super frame"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "setcca", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_setcca, "I", "enable CCA control"); + "setcca", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + sc, 0, ath_sysctl_setcca, "I", "enable CCA control"); } #endif @@ -1028,7 +1029,8 @@ ath_sysctl_stats_attach_rxphyerr(struct ath_softc *sc, struct sysctl_oid_list *p int i; char sn[8]; - tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx_phy_err", CTLFLAG_RD, NULL, "Per-code RX PHY Errors"); + tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "rx_phy_err", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Per-code RX PHY Errors"); child = SYSCTL_CHILDREN(tree); for (i = 0; i < 64; i++) { snprintf(sn, sizeof(sn), "%d", i); @@ -1047,7 +1049,7 @@ ath_sysctl_stats_attach_intr(struct ath_softc *sc, char sn[8]; tree = SYSCTL_ADD_NODE(ctx, parent, OID_AUTO, "sync_intr", - CTLFLAG_RD, NULL, "Sync interrupt statistics"); + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Sync interrupt statistics"); child = SYSCTL_CHILDREN(tree); for (i = 0; i < 32; i++) { snprintf(sn, sizeof(sn), "%d", i); @@ -1065,12 +1067,12 @@ ath_sysctl_stats_attach(struct ath_softc *sc) /* Create "clear" node */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "clear_stats", CTLTYPE_INT | CTLFLAG_RW, sc, 0, - ath_sysctl_clearstats, "I", "clear stats"); + "clear_stats", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc, + 0, ath_sysctl_clearstats, "I", "clear stats"); /* Create stats node */ - tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", CTLFLAG_RD, - NULL, "Statistics"); + tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "stats", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics"); child = SYSCTL_CHILDREN(tree); /* This was generated from if_athioctl.h */ @@ -1315,8 +1317,8 @@ ath_sysctl_hal_attach(struct ath_softc *sc) struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); - tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hal", CTLFLAG_RD, - NULL, "Atheros HAL parameters"); + tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hal", + CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Atheros HAL parameters"); child = SYSCTL_CHILDREN(tree); sc->sc_ah->ah_config.ah_debug = 0; diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h index 553b8b76f761..e24b28e41fc8 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h @@ -650,9 +650,13 @@ struct mlx5_ib_congestion { struct sysctl_ctx_list ctx; struct sx lock; struct delayed_work dwork; - u64 arg [0]; - MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR) - MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR) + union { + u64 arg[1]; + struct { + MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR) + MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR) + }; + }; }; struct mlx5_ib_dev { diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c index b11cd0b53403..14cac913779e 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -37,7 +37,9 @@ static const char *mlx5_ib_cong_stats_desc[] = { MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC) }; -#define MLX5_IB_INDEX(field) (__offsetof(struct mlx5_ib_congestion, field) / sizeof(u64)) +#define MLX5_IB_INDEX(field) ( \ + (__offsetof(struct mlx5_ib_congestion, field) - \ + __offsetof(struct mlx5_ib_congestion, arg[0])) / sizeof(u64)) #define MLX5_IB_FLD_MAX(type, field) ((1ULL << __mlx5_bit_sz(type, field)) - 1ULL) #define MLX5_IB_SET_CLIPPED(type, ptr, field, var) do { \ /* rangecheck */ \ diff --git a/sys/dev/otus/if_otus.c b/sys/dev/otus/if_otus.c index 0489b943820e..9e3903770714 100644 --- a/sys/dev/otus/if_otus.c +++ b/sys/dev/otus/if_otus.c @@ -75,7 +75,8 @@ __FBSDID("$FreeBSD$"); #include "if_otusreg.h" static int otus_debug = 0; -static SYSCTL_NODE(_hw_usb, OID_AUTO, otus, CTLFLAG_RW, 0, "USB otus"); +static SYSCTL_NODE(_hw_usb, OID_AUTO, otus, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "USB otus"); SYSCTL_INT(_hw_usb_otus, OID_AUTO, debug, CTLFLAG_RWTUN, &otus_debug, 0, "Debug level"); #define OTUS_DEBUG_XMIT 0x00000001 diff --git a/sys/kern/subr_compressor.c b/sys/kern/subr_compressor.c index 5950ade1d3ca..b202d271cfa3 100644 --- a/sys/kern/subr_compressor.c +++ b/sys/kern/subr_compressor.c @@ -117,6 +117,13 @@ gz_init(size_t maxiosize, int level) s->gz_stream.next_in = Z_NULL; s->gz_stream.avail_in = 0; + if (level != Z_DEFAULT_COMPRESSION) { + if (level < Z_BEST_SPEED) + level = Z_BEST_SPEED; + else if (level > Z_BEST_COMPRESSION) + level = Z_BEST_COMPRESSION; + } + error = deflateInit2(&s->gz_stream, level, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); if (error != 0) diff --git a/sys/kern/subr_smr.c b/sys/kern/subr_smr.c index 816e68966029..530cf5118dac 100644 --- a/sys/kern/subr_smr.c +++ b/sys/kern/subr_smr.c @@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$"); #include <vm/uma.h> /* + * Global Unbounded Sequences (GUS) + * * This is a novel safe memory reclamation technique inspired by * epoch based reclamation from Samy Al Bahra's concurrency kit which * in turn was based on work described in: @@ -53,7 +55,8 @@ __FBSDID("$FreeBSD$"); * This is not an implementation of hazard pointers or related * techniques. The term safe memory reclamation is used as a * generic descriptor for algorithms that defer frees to avoid - * use-after-free errors with lockless datastructures. + * use-after-free errors with lockless datastructures or as + * a mechanism to detect quiescence for writer synchronization. * * The basic approach is to maintain a monotonic write sequence * number that is updated on some application defined granularity. @@ -67,7 +70,7 @@ __FBSDID("$FreeBSD$"); * a global write clock that is used to mark memory on free. * * The write and read sequence numbers can be thought of as a two - * handed clock with readers always advancing towards writers. SMR + * handed clock with readers always advancing towards writers. GUS * maintains the invariant that all readers can safely access memory * that was visible at the time they loaded their copy of the sequence * number. Periodically the read sequence or hand is polled and @@ -80,9 +83,12 @@ __FBSDID("$FreeBSD$"); * A stored sequence number that falls outside of this range has expired * and needs no scan to reclaim. * - * A notable distinction between this SMR and Epoch, qsbr, rcu, etc. is + * A notable distinction between GUS and Epoch, qsbr, rcu, etc. is * that advancing the sequence number is decoupled from detecting its - * observation. This results in a more granular assignment of sequence + * observation. That is to say, the delta between read and write + * sequence numbers is not bound. This can be thought of as a more + * generalized form of epoch which requires them at most one step + * apart. This results in a more granular assignment of sequence * numbers even as read latencies prohibit all or some expiration. * It also allows writers to advance the sequence number and save the * poll for expiration until a later time when it is likely to @@ -164,58 +170,143 @@ static uma_zone_t smr_zone; #define SMR_SEQ_MAX_ADVANCE SMR_SEQ_MAX_DELTA / 2 #endif +/* + * The grace period for lazy (tick based) SMR. + * + * Hardclock is responsible for advancing ticks on a single CPU while every + * CPU receives a regular clock interrupt. The clock interrupts are flushing + * the store buffers and any speculative loads that may violate our invariants. + * Because these interrupts are not synchronized we must wait one additional + * tick in the future to be certain that all processors have had their state + * synchronized by an interrupt. + * + * This assumes that the clock interrupt will only be delayed by other causes + * that will flush the store buffer or prevent access to the section protected + * data. For example, an idle processor, or an system management interrupt, + * or a vm exit. + * + * We must wait one additional tick if we are around the wrap condition + * because the write seq will move forward by two with one interrupt. + */ +#define SMR_LAZY_GRACE 2 +#define SMR_LAZY_GRACE_MAX (SMR_LAZY_GRACE + 1) + +/* + * The maximum sequence number ahead of wr_seq that may still be valid. The + * sequence may not be advanced on write for lazy or deferred SMRs. In this + * case poll needs to attempt to forward the sequence number if the goal is + * within wr_seq + SMR_SEQ_ADVANCE. + */ +#define SMR_SEQ_ADVANCE MAX(SMR_SEQ_INCR, SMR_LAZY_GRACE_MAX) + static SYSCTL_NODE(_debug, OID_AUTO, smr, CTLFLAG_RW, NULL, "SMR Stats"); static counter_u64_t advance = EARLY_COUNTER; -SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance, CTLFLAG_RD, &advance, ""); +SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance, CTLFLAG_RW, &advance, ""); static counter_u64_t advance_wait = EARLY_COUNTER; -SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance_wait, CTLFLAG_RD, &advance_wait, ""); +SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, advance_wait, CTLFLAG_RW, &advance_wait, ""); static counter_u64_t poll = EARLY_COUNTER; -SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll, CTLFLAG_RD, &poll, ""); +SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll, CTLFLAG_RW, &poll, ""); static counter_u64_t poll_scan = EARLY_COUNTER; -SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RD, &poll_scan, ""); - +SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_scan, CTLFLAG_RW, &poll_scan, ""); +static counter_u64_t poll_fail = EARLY_COUNTER; +SYSCTL_COUNTER_U64(_debug_smr, OID_AUTO, poll_fail, CTLFLAG_RW, &poll_fail, ""); /* - * Advance the write sequence and return the new value for use as the - * wait goal. This guarantees that any changes made by the calling - * thread prior to this call will be visible to all threads after - * rd_seq meets or exceeds the return value. + * Advance a lazy write sequence number. These move forward at the rate of + * ticks. Grace is two ticks in the future. lazy write sequence numbers can + * be even but not SMR_SEQ_INVALID so we pause time for a tick when we wrap. * - * This function may busy loop if the readers are roughly 1 billion - * sequence numbers behind the writers. + * This returns the _current_ write sequence number. The lazy goal sequence + * number is SMR_LAZY_GRACE ticks ahead. */ -smr_seq_t -smr_advance(smr_t smr) +static smr_seq_t +smr_lazy_advance(smr_t smr, smr_shared_t s) { - smr_shared_t s; - smr_seq_t goal, s_rd_seq; + smr_seq_t s_rd_seq, s_wr_seq, goal; + int t; + + CRITICAL_ASSERT(curthread); /* - * It is illegal to enter while in an smr section. + * Load s_wr_seq prior to ticks to ensure that the thread that + * observes the largest value wins. */ - SMR_ASSERT_NOT_ENTERED(smr); + s_wr_seq = atomic_load_acq_int(&s->s_wr_seq); /* - * Modifications not done in a smr section need to be visible - * before advancing the seq. + * We must not allow a zero tick value. We go back in time one tick + * and advance the grace period forward one tick around zero. */ - atomic_thread_fence_rel(); + t = ticks; + if (t == SMR_SEQ_INVALID) + t--; /* - * Load the current read seq before incrementing the goal so - * we are guaranteed it is always < goal. + * The most probable condition that the update already took place. */ - s = zpcpu_get(smr)->c_shared; - s_rd_seq = atomic_load_acq_int(&s->s_rd_seq); + if (__predict_true(t == s_wr_seq)) + goto out; /* - * Increment the shared write sequence by 2. Since it is - * initialized to 1 this means the only valid values are - * odd and an observed value of 0 in a particular CPU means - * it is not currently in a read section. + * After long idle periods the read sequence may fall too far + * behind write. Prevent poll from ever seeing this condition + * by updating the stale rd_seq. This assumes that there can + * be no valid section 2bn ticks old. The rd_seq update must + * be visible before wr_seq to avoid races with other advance + * callers. */ - goal = atomic_fetchadd_int(&s->s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR; + s_rd_seq = atomic_load_int(&s->s_rd_seq); + if (SMR_SEQ_GT(s_rd_seq, t)) + atomic_cmpset_rel_int(&s->s_rd_seq, s_rd_seq, t); + + /* + * Release to synchronize with the wr_seq load above. Ignore + * cmpset failures from simultaneous updates. + */ + atomic_cmpset_rel_int(&s->s_wr_seq, s_wr_seq, t); counter_u64_add(advance, 1); + /* If we lost either update race another thread did it. */ + s_wr_seq = t; +out: + goal = s_wr_seq + SMR_LAZY_GRACE; + /* Skip over the SMR_SEQ_INVALID tick. */ + if (goal < SMR_LAZY_GRACE) + goal++; + return (goal); +} + +/* + * Increment the shared write sequence by 2. Since it is initialized + * to 1 this means the only valid values are odd and an observed value + * of 0 in a particular CPU means it is not currently in a read section. + */ +static smr_seq_t +smr_shared_advance(smr_shared_t s) +{ + + return (atomic_fetchadd_int(&s->s_wr_seq, SMR_SEQ_INCR) + SMR_SEQ_INCR); +} + +/* + * Advance the write sequence number for a normal smr section. If the + * write sequence is too far behind the read sequence we have to poll + * to advance rd_seq and prevent undetectable wraps. + */ +static smr_seq_t +smr_default_advance(smr_t smr, smr_shared_t s) +{ + smr_seq_t goal, s_rd_seq; + + CRITICAL_ASSERT(curthread); + KASSERT((zpcpu_get(smr)->c_flags & SMR_LAZY) == 0, + ("smr_default_advance: called with lazy smr.")); + + /* + * Load the current read seq before incrementing the goal so + * we are guaranteed it is always < goal. + */ + s_rd_seq = atomic_load_acq_int(&s->s_rd_seq); + goal = smr_shared_advance(s); /* * Force a synchronization here if the goal is getting too @@ -226,30 +317,172 @@ smr_advance(smr_t smr) counter_u64_add(advance_wait, 1); smr_wait(smr, goal - SMR_SEQ_MAX_ADVANCE); } + counter_u64_add(advance, 1); return (goal); } +/* + * Deferred SMRs conditionally update s_wr_seq based on an + * cpu local interval count. + */ +static smr_seq_t +smr_deferred_advance(smr_t smr, smr_shared_t s, smr_t self) +{ + + if (++self->c_deferred < self->c_limit) + return (smr_shared_current(s) + SMR_SEQ_INCR); + self->c_deferred = 0; + return (smr_default_advance(smr, s)); +} + +/* + * Advance the write sequence and return the value for use as the + * wait goal. This guarantees that any changes made by the calling + * thread prior to this call will be visible to all threads after + * rd_seq meets or exceeds the return value. + * + * This function may busy loop if the readers are roughly 1 billion + * sequence numbers behind the writers. + * + * Lazy SMRs will not busy loop and the wrap happens every 49.6 days + * at 1khz and 119 hours at 10khz. Readers can block for no longer + * than half of this for SMR_SEQ_ macros to continue working. + */ smr_seq_t -smr_advance_deferred(smr_t smr, int limit) +smr_advance(smr_t smr) { + smr_t self; + smr_shared_t s; smr_seq_t goal; - smr_t csmr; + int flags; + /* + * It is illegal to enter while in an smr section. + */ SMR_ASSERT_NOT_ENTERED(smr); + /* + * Modifications not done in a smr section need to be visible + * before advancing the seq. + */ + atomic_thread_fence_rel(); + critical_enter(); - csmr = zpcpu_get(smr); - if (++csmr->c_deferred >= limit) { - goal = SMR_SEQ_INVALID; - csmr->c_deferred = 0; - } else - goal = smr_shared_current(csmr->c_shared) + SMR_SEQ_INCR; + /* Try to touch the line once. */ + self = zpcpu_get(smr); + s = self->c_shared; + flags = self->c_flags; + goal = SMR_SEQ_INVALID; + if ((flags & (SMR_LAZY | SMR_DEFERRED)) == 0) + goal = smr_default_advance(smr, s); + else if ((flags & SMR_LAZY) != 0) + goal = smr_lazy_advance(smr, s); + else if ((flags & SMR_DEFERRED) != 0) + goal = smr_deferred_advance(smr, s, self); critical_exit(); - if (goal != SMR_SEQ_INVALID) - return (goal); - return (smr_advance(smr)); + return (goal); +} + +/* + * Poll to determine the currently observed sequence number on a cpu + * and spinwait if the 'wait' argument is true. + */ +static smr_seq_t +smr_poll_cpu(smr_t c, smr_seq_t s_rd_seq, smr_seq_t goal, bool wait) +{ + smr_seq_t c_seq; + + c_seq = SMR_SEQ_INVALID; + for (;;) { + c_seq = atomic_load_int(&c->c_seq); + if (c_seq == SMR_SEQ_INVALID) + break; + + /* + * There is a race described in smr.h:smr_enter that + * can lead to a stale seq value but not stale data + * access. If we find a value out of range here we + * pin it to the current min to prevent it from + * advancing until that stale section has expired. + * + * The race is created when a cpu loads the s_wr_seq + * value in a local register and then another thread + * advances s_wr_seq and calls smr_poll() which will + * oberve no value yet in c_seq and advance s_rd_seq + * up to s_wr_seq which is beyond the register + * cached value. This is only likely to happen on + * hypervisor or with a system management interrupt. + */ + if (SMR_SEQ_LT(c_seq, s_rd_seq)) + c_seq = s_rd_seq; + + /* + * If the sequence number meets the goal we are done + * with this cpu. + */ + if (SMR_SEQ_LEQ(goal, c_seq)) + break; + + if (!wait) + break; + cpu_spinwait(); + } + + return (c_seq); +} + +/* + * Loop until all cores have observed the goal sequence or have + * gone inactive. Returns the oldest sequence currently active; + * + * This function assumes a snapshot of sequence values has + * been obtained and validated by smr_poll(). + */ +static smr_seq_t +smr_poll_scan(smr_t smr, smr_shared_t s, smr_seq_t s_rd_seq, + smr_seq_t s_wr_seq, smr_seq_t goal, bool wait) +{ + smr_seq_t rd_seq, c_seq; + int i; + + CRITICAL_ASSERT(curthread); + counter_u64_add_protected(poll_scan, 1); + + /* + * The read sequence can be no larger than the write sequence at + * the start of the poll. + */ + rd_seq = s_wr_seq; + CPU_FOREACH(i) { + /* + * Query the active sequence on this cpu. If we're not + * waiting and we don't meet the goal we will still scan + * the rest of the cpus to update s_rd_seq before returning + * failure. + */ + c_seq = smr_poll_cpu(zpcpu_get_cpu(smr, i), s_rd_seq, goal, + wait); + + /* + * Limit the minimum observed rd_seq whether we met the goal + * or not. + */ + if (c_seq != SMR_SEQ_INVALID) + rd_seq = SMR_SEQ_MIN(rd_seq, c_seq); + } + + /* + * Advance the rd_seq as long as we observed a more recent value. + */ + s_rd_seq = atomic_load_int(&s->s_rd_seq); + if (SMR_SEQ_GEQ(rd_seq, s_rd_seq)) { + atomic_cmpset_int(&s->s_rd_seq, s_rd_seq, rd_seq); + s_rd_seq = rd_seq; + } + + return (s_rd_seq); } /* @@ -268,9 +501,10 @@ bool smr_poll(smr_t smr, smr_seq_t goal, bool wait) { smr_shared_t s; - smr_t c; - smr_seq_t s_wr_seq, s_rd_seq, rd_seq, c_seq; - int i; + smr_t self; + smr_seq_t s_wr_seq, s_rd_seq; + smr_delta_t delta; + int flags; bool success; /* @@ -278,6 +512,8 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait) */ KASSERT(!wait || !SMR_ENTERED(smr), ("smr_poll: Blocking not allowed in a SMR section.")); + KASSERT(!wait || (zpcpu_get(smr)->c_flags & SMR_LAZY) == 0, + ("smr_poll: Blocking not allowed on lazy smrs.")); /* * Use a critical section so that we can avoid ABA races @@ -285,116 +521,79 @@ smr_poll(smr_t smr, smr_seq_t goal, bool wait) */ success = true; critical_enter(); - s = zpcpu_get(smr)->c_shared; + /* Attempt to load from self only once. */ + self = zpcpu_get(smr); + s = self->c_shared; + flags = self->c_flags; counter_u64_add_protected(poll, 1); /* + * Conditionally advance the lazy write clock on any writer + * activity. This may reset s_rd_seq. + */ + if ((flags & SMR_LAZY) != 0) + smr_lazy_advance(smr, s); + + /* * Acquire barrier loads s_wr_seq after s_rd_seq so that we can not * observe an updated read sequence that is larger than write. */ s_rd_seq = atomic_load_acq_int(&s->s_rd_seq); /* - * wr_seq must be loaded prior to any c_seq value so that a stale - * c_seq can only reference time after this wr_seq. + * If we have already observed the sequence number we can immediately + * return success. Most polls should meet this criterion. */ - s_wr_seq = atomic_load_acq_int(&s->s_wr_seq); + if (SMR_SEQ_LEQ(goal, s_rd_seq)) + goto out; /* - * This may have come from a deferred advance. Consider one - * increment past the current wr_seq valid and make sure we - * have advanced far enough to succeed. We simply add to avoid - * an additional fence. + * wr_seq must be loaded prior to any c_seq value so that a + * stale c_seq can only reference time after this wr_seq. */ - if (goal == s_wr_seq + SMR_SEQ_INCR) { - atomic_add_int(&s->s_wr_seq, SMR_SEQ_INCR); - s_wr_seq = goal; - } + s_wr_seq = atomic_load_acq_int(&s->s_wr_seq); /* - * Detect whether the goal is valid and has already been observed. - * - * The goal must be in the range of s_wr_seq >= goal >= s_rd_seq for - * it to be valid. If it is not then the caller held on to it and - * the integer wrapped. If we wrapped back within range the caller - * will harmlessly scan. - * - * A valid goal must be greater than s_rd_seq or we have not verified - * that it has been observed and must fall through to polling. + * This is the distance from s_wr_seq to goal. Positive values + * are in the future. */ - if (SMR_SEQ_GEQ(s_rd_seq, goal) || SMR_SEQ_LT(s_wr_seq, goal)) - goto out; + delta = SMR_SEQ_DELTA(goal, s_wr_seq); /* - * Loop until all cores have observed the goal sequence or have - * gone inactive. Keep track of the oldest sequence currently - * active as rd_seq. + * Detect a stale wr_seq. + * + * This goal may have come from a deferred advance or a lazy + * smr. If we are not blocking we can not succeed but the + * sequence number is valid. */ - counter_u64_add_protected(poll_scan, 1); - rd_seq = s_wr_seq; - CPU_FOREACH(i) { - c = zpcpu_get_cpu(smr, i); - c_seq = SMR_SEQ_INVALID; - for (;;) { - c_seq = atomic_load_int(&c->c_seq); - if (c_seq == SMR_SEQ_INVALID) - break; - - /* - * There is a race described in smr.h:smr_enter that - * can lead to a stale seq value but not stale data - * access. If we find a value out of range here we - * pin it to the current min to prevent it from - * advancing until that stale section has expired. - * - * The race is created when a cpu loads the s_wr_seq - * value in a local register and then another thread - * advances s_wr_seq and calls smr_poll() which will - * oberve no value yet in c_seq and advance s_rd_seq - * up to s_wr_seq which is beyond the register - * cached value. This is only likely to happen on - * hypervisor or with a system management interrupt. - */ - if (SMR_SEQ_LT(c_seq, s_rd_seq)) - c_seq = s_rd_seq; - - /* - * If the sequence number meets the goal we are - * done with this cpu. - */ - if (SMR_SEQ_GEQ(c_seq, goal)) - break; - - /* - * If we're not waiting we will still scan the rest - * of the cpus and update s_rd_seq before returning - * an error. - */ - if (!wait) { - success = false; - break; - } - cpu_spinwait(); + if (delta > 0 && delta <= SMR_SEQ_MAX_ADVANCE && + (flags & (SMR_LAZY | SMR_DEFERRED)) != 0) { + if (!wait) { + success = false; + goto out; } - - /* - * Limit the minimum observed rd_seq whether we met the goal - * or not. - */ - if (c_seq != SMR_SEQ_INVALID && SMR_SEQ_GT(rd_seq, c_seq)) - rd_seq = c_seq; + /* LAZY is always !wait. */ + s_wr_seq = smr_shared_advance(s); + delta = 0; } /* - * Advance the rd_seq as long as we observed the most recent one. + * Detect an invalid goal. + * + * The goal must be in the range of s_wr_seq >= goal >= s_rd_seq for + * it to be valid. If it is not then the caller held on to it and + * the integer wrapped. If we wrapped back within range the caller + * will harmlessly scan. */ - s_rd_seq = atomic_load_int(&s->s_rd_seq); - do { - if (SMR_SEQ_LEQ(rd_seq, s_rd_seq)) - goto out; - } while (atomic_fcmpset_int(&s->s_rd_seq, &s_rd_seq, rd_seq) == 0); + if (delta > 0) + goto out; + /* Determine the lowest visible sequence number. */ + s_rd_seq = smr_poll_scan(smr, s, s_rd_seq, s_wr_seq, goal, wait); + success = SMR_SEQ_LEQ(goal, s_rd_seq); out: + if (!success) + counter_u64_add_protected(poll_fail, 1); critical_exit(); /* @@ -407,7 +606,7 @@ out: } smr_t -smr_create(const char *name) +smr_create(const char *name, int limit, int flags) { smr_t smr, c; smr_shared_t s; @@ -417,13 +616,19 @@ smr_create(const char *name) smr = uma_zalloc_pcpu(smr_zone, M_WAITOK); s->s_name = name; - s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT; + if ((flags & SMR_LAZY) == 0) + s->s_rd_seq = s->s_wr_seq = SMR_SEQ_INIT; + else + s->s_rd_seq = s->s_wr_seq = ticks; /* Initialize all CPUS, not just those running. */ for (i = 0; i <= mp_maxid; i++) { c = zpcpu_get_cpu(smr, i); c->c_seq = SMR_SEQ_INVALID; c->c_shared = s; + c->c_deferred = 0; + c->c_limit = limit; + c->c_flags = flags; } atomic_thread_fence_seq_cst(); @@ -460,5 +665,6 @@ smr_init_counters(void *unused) advance_wait = counter_u64_alloc(M_WAITOK); poll = counter_u64_alloc(M_WAITOK); poll_scan = counter_u64_alloc(M_WAITOK); + poll_fail = counter_u64_alloc(M_WAITOK); } SYSINIT(smr_counters, SI_SUB_CPU, SI_ORDER_ANY, smr_init_counters, NULL); diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 5ef3f1c6935f..8dd7b48e59ed 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -326,21 +326,23 @@ ast(struct trapframe *framep) if (flags & TDF_NEEDSIGCHK || p->p_pendingcnt > 0 || !SIGISEMPTY(p->p_siglist)) { sigfastblock_fetch(td); - PROC_LOCK(p); - mtx_lock(&p->p_sigacts->ps_mtx); if ((td->td_pflags & TDP_SIGFASTBLOCK) != 0 && td->td_sigblock_val != 0) { sigfastblock_setpend(td); + PROC_LOCK(p); reschedule_signals(p, fastblock_mask, - SIGPROCMASK_PS_LOCKED | SIGPROCMASK_FASTBLK); + SIGPROCMASK_FASTBLK); + PROC_UNLOCK(p); } else { + PROC_LOCK(p); + mtx_lock(&p->p_sigacts->ps_mtx); while ((sig = cursig(td)) != 0) { KASSERT(sig >= 0, ("sig %d", sig)); postsig(sig); } + mtx_unlock(&p->p_sigacts->ps_mtx); + PROC_UNLOCK(p); } - mtx_unlock(&p->p_sigacts->ps_mtx); - PROC_UNLOCK(p); } /* diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 1659598d5e49..58a97a1337f0 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -403,14 +403,6 @@ namei(struct nameidata *ndp) ndp->ni_rootdir = fdp->fd_rdir; ndp->ni_topdir = fdp->fd_jdir; - /* - * If we are auditing the kernel pathname, save the user pathname. - */ - if (cnp->cn_flags & AUDITVNODE1) - AUDIT_ARG_UPATH1(td, ndp->ni_dirfd, cnp->cn_pnbuf); - if (cnp->cn_flags & AUDITVNODE2) - AUDIT_ARG_UPATH2(td, ndp->ni_dirfd, cnp->cn_pnbuf); - startdir_used = 0; dp = NULL; cnp->cn_nameptr = cnp->cn_pnbuf; @@ -505,6 +497,13 @@ namei(struct nameidata *ndp) ndp->ni_lcf |= NI_LCF_LATCH; } FILEDESC_SUNLOCK(fdp); + /* + * If we are auditing the kernel pathname, save the user pathname. + */ + if (cnp->cn_flags & AUDITVNODE1) + AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf); + if (cnp->cn_flags & AUDITVNODE2) + AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf); if (ndp->ni_startdir != NULL && !startdir_used) vrele(ndp->ni_startdir); if (error != 0) { diff --git a/sys/modules/linuxkpi/Makefile b/sys/modules/linuxkpi/Makefile index 7eac41ab9273..dea5512d2b9b 100644 --- a/sys/modules/linuxkpi/Makefile +++ b/sys/modules/linuxkpi/Makefile @@ -15,6 +15,7 @@ SRCS= linux_compat.c \ linux_rcu.c \ linux_seq_file.c \ linux_schedule.c \ + linux_shmemfs.c \ linux_slab.c \ linux_tasklet.c \ linux_usb.c \ diff --git a/sys/net80211/ieee80211_alq.c b/sys/net80211/ieee80211_alq.c index 77824515a1fc..d7e6a78319db 100644 --- a/sys/net80211/ieee80211_alq.c +++ b/sys/net80211/ieee80211_alq.c @@ -111,8 +111,10 @@ sysctl_ieee80211_alq_log(SYSCTL_HANDLER_ARGS) return (ieee80211_alq_setlogging(enable)); } -SYSCTL_PROC(_net_wlan, OID_AUTO, alq, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_ieee80211_alq_log, "I", "Enable net80211 alq logging"); +SYSCTL_PROC(_net_wlan, OID_AUTO, alq, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, 0, + sysctl_ieee80211_alq_log, "I", + "Enable net80211 alq logging"); SYSCTL_INT(_net_wlan, OID_AUTO, alq_size, CTLFLAG_RW, &ieee80211_alq_qsize, 0, "In-memory log size (bytes)"); SYSCTL_INT(_net_wlan, OID_AUTO, alq_lost, CTLFLAG_RW, diff --git a/sys/net80211/ieee80211_amrr.c b/sys/net80211/ieee80211_amrr.c index 384a5956fe4e..f89437d3e857 100644 --- a/sys/net80211/ieee80211_amrr.c +++ b/sys/net80211/ieee80211_amrr.c @@ -465,8 +465,8 @@ amrr_sysctlattach(struct ieee80211vap *vap, return; SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap, - 0, amrr_sysctl_interval, "I", "amrr operation interval (ms)"); + "amrr_rate_interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + vap, 0, amrr_sysctl_interval, "I", "amrr operation interval (ms)"); /* XXX bounds check values */ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "amrr_max_sucess_threshold", CTLFLAG_RW, diff --git a/sys/net80211/ieee80211_freebsd.c b/sys/net80211/ieee80211_freebsd.c index 251b6b8d3c96..35c999587f58 100644 --- a/sys/net80211/ieee80211_freebsd.c +++ b/sys/net80211/ieee80211_freebsd.c @@ -60,7 +60,8 @@ __FBSDID("$FreeBSD$"); #include <net80211/ieee80211_var.h> #include <net80211/ieee80211_input.h> -SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD, 0, "IEEE 80211 parameters"); +SYSCTL_NODE(_net, OID_AUTO, wlan, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "IEEE 80211 parameters"); #ifdef IEEE80211_DEBUG static int ieee80211_debug = 0; @@ -227,10 +228,10 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap) sysctl_ctx_init(ctx); snprintf(num, sizeof(num), "%u", ifp->if_dunit); oid = SYSCTL_ADD_NODE(ctx, &SYSCTL_NODE_CHILDREN(_net, wlan), - OID_AUTO, num, CTLFLAG_RD, NULL, ""); + OID_AUTO, num, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, ""); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "%parent", CTLTYPE_STRING | CTLFLAG_RD, vap->iv_ic, 0, - ieee80211_sysctl_parent, "A", "parent device"); + "%parent", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, + vap->iv_ic, 0, ieee80211_sysctl_parent, "A", "parent device"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "driver_caps", CTLFLAG_RW, &vap->iv_caps, 0, "driver capabilities"); @@ -245,21 +246,21 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap) "consecutive beacon misses before scanning"); /* XXX inherit from tunables */ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "inact_run", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_run, 0, - ieee80211_sysctl_inact, "I", - "station inactivity timeout (sec)"); + "inact_run", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &vap->iv_inact_run, 0, ieee80211_sysctl_inact, "I", + "station inactivity timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "inact_probe", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_probe, 0, - ieee80211_sysctl_inact, "I", - "station inactivity probe timeout (sec)"); + "inact_probe", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &vap->iv_inact_probe, 0, ieee80211_sysctl_inact, "I", + "station inactivity probe timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "inact_auth", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_auth, 0, - ieee80211_sysctl_inact, "I", - "station authentication timeout (sec)"); + "inact_auth", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &vap->iv_inact_auth, 0, ieee80211_sysctl_inact, "I", + "station authentication timeout (sec)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "inact_init", CTLTYPE_INT | CTLFLAG_RW, &vap->iv_inact_init, 0, - ieee80211_sysctl_inact, "I", - "station initial state timeout (sec)"); + "inact_init", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &vap->iv_inact_init, 0, ieee80211_sysctl_inact, "I", + "station initial state timeout (sec)"); if (vap->iv_htcaps & IEEE80211_HTC_HT) { SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "ampdu_mintraffic_bk", CTLFLAG_RW, @@ -280,14 +281,14 @@ ieee80211_sysctl_vattach(struct ieee80211vap *vap) } SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "force_restart", CTLTYPE_INT | CTLFLAG_RW, vap, 0, - ieee80211_sysctl_vap_restart, "I", - "force a VAP restart"); + "force_restart", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + vap, 0, ieee80211_sysctl_vap_restart, "I", "force a VAP restart"); if (vap->iv_caps & IEEE80211_C_DFS) { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "radar", CTLTYPE_INT | CTLFLAG_RW, vap->iv_ic, 0, - ieee80211_sysctl_radar, "I", "simulate radar event"); + "radar", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + vap->iv_ic, 0, ieee80211_sysctl_radar, "I", + "simulate radar event"); } vap->iv_sysctl = ctx; vap->iv_oid = oid; diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c index d0c6e7a4f415..8055adc5635c 100644 --- a/sys/net80211/ieee80211_ht.c +++ b/sys/net80211/ieee80211_ht.c @@ -139,22 +139,25 @@ const struct ieee80211_mcs_rates ieee80211_htrates[IEEE80211_HTRATE_MAXSIZE] = { }; static int ieee80211_ampdu_age = -1; /* threshold for ampdu reorder q (ms) */ -SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, CTLTYPE_INT | CTLFLAG_RW, - &ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I", - "AMPDU max reorder age (ms)"); +SYSCTL_PROC(_net_wlan, OID_AUTO, ampdu_age, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &ieee80211_ampdu_age, 0, ieee80211_sysctl_msecs_ticks, "I", + "AMPDU max reorder age (ms)"); static int ieee80211_recv_bar_ena = 1; SYSCTL_INT(_net_wlan, OID_AUTO, recv_bar, CTLFLAG_RW, &ieee80211_recv_bar_ena, 0, "BAR frame processing (ena/dis)"); static int ieee80211_addba_timeout = -1;/* timeout for ADDBA response */ -SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout, CTLTYPE_INT | CTLFLAG_RW, - &ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I", - "ADDBA request timeout (ms)"); +SYSCTL_PROC(_net_wlan, OID_AUTO, addba_timeout, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &ieee80211_addba_timeout, 0, ieee80211_sysctl_msecs_ticks, "I", + "ADDBA request timeout (ms)"); static int ieee80211_addba_backoff = -1;/* backoff after max ADDBA requests */ -SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff, CTLTYPE_INT | CTLFLAG_RW, - &ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I", - "ADDBA request backoff (ms)"); +SYSCTL_PROC(_net_wlan, OID_AUTO, addba_backoff, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &ieee80211_addba_backoff, 0, ieee80211_sysctl_msecs_ticks, "I", + "ADDBA request backoff (ms)"); static int ieee80211_addba_maxtries = 3;/* max ADDBA requests before backoff */ SYSCTL_INT(_net_wlan, OID_AUTO, addba_maxtries, CTLFLAG_RW, &ieee80211_addba_maxtries, 0, "max ADDBA requests sent before backoff"); diff --git a/sys/net80211/ieee80211_hwmp.c b/sys/net80211/ieee80211_hwmp.c index 38d85d622028..9f1a9fef6818 100644 --- a/sys/net80211/ieee80211_hwmp.c +++ b/sys/net80211/ieee80211_hwmp.c @@ -154,39 +154,46 @@ struct ieee80211_hwmp_state { uint8_t hs_maxhops; /* max hop count */ }; -static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD, 0, +static SYSCTL_NODE(_net_wlan, OID_AUTO, hwmp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "IEEE 802.11s HWMP parameters"); static int ieee80211_hwmp_targetonly = 0; SYSCTL_INT(_net_wlan_hwmp, OID_AUTO, targetonly, CTLFLAG_RW, &ieee80211_hwmp_targetonly, 0, "Set TO bit on generated PREQs"); static int ieee80211_hwmp_pathtimeout = -1; -SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, pathlifetime, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, pathlifetime, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ieee80211_hwmp_pathtimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "path entry lifetime (ms)"); static int ieee80211_hwmp_maxpreq_retries = -1; -SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, maxpreq_retries, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, maxpreq_retries, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ieee80211_hwmp_maxpreq_retries, 0, ieee80211_sysctl_msecs_ticks, "I", "maximum number of preq retries"); static int ieee80211_hwmp_net_diameter_traversaltime = -1; SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, net_diameter_traversal_time, - CTLTYPE_INT | CTLFLAG_RW, &ieee80211_hwmp_net_diameter_traversaltime, 0, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + &ieee80211_hwmp_net_diameter_traversaltime, 0, ieee80211_sysctl_msecs_ticks, "I", "estimate travelse time across the MBSS (ms)"); static int ieee80211_hwmp_roottimeout = -1; -SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, roottimeout, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, roottimeout, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ieee80211_hwmp_roottimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "root PREQ timeout (ms)"); static int ieee80211_hwmp_rootint = -1; -SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootint, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootint, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ieee80211_hwmp_rootint, 0, ieee80211_sysctl_msecs_ticks, "I", "root interval (ms)"); static int ieee80211_hwmp_rannint = -1; -SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rannint, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rannint, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &ieee80211_hwmp_rannint, 0, ieee80211_sysctl_msecs_ticks, "I", "root announcement interval (ms)"); static struct timeval ieee80211_hwmp_rootconfint = { 0, 0 }; static int ieee80211_hwmp_rootconfint_internal = -1; -SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootconfint, CTLTYPE_INT | CTLFLAG_RD, +SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, rootconfint, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, &ieee80211_hwmp_rootconfint_internal, 0, ieee80211_sysctl_msecs_ticks, "I", "root confirmation interval (ms) (read-only)"); @@ -205,9 +212,10 @@ static struct ieee80211_mesh_proto_path mesh_proto_hwmp = { .mpp_newstate = hwmp_newstate, .mpp_privlen = sizeof(struct ieee80211_hwmp_route), }; -SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact, CTLTYPE_INT | CTLFLAG_RW, - &mesh_proto_hwmp.mpp_inact, 0, ieee80211_sysctl_msecs_ticks, "I", - "mesh route inactivity timeout (ms)"); +SYSCTL_PROC(_net_wlan_hwmp, OID_AUTO, inact, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &mesh_proto_hwmp.mpp_inact, 0, ieee80211_sysctl_msecs_ticks, "I", + "mesh route inactivity timeout (ms)"); static void diff --git a/sys/net80211/ieee80211_mesh.c b/sys/net80211/ieee80211_mesh.c index 9816473b6ece..4ff3c9ba66d8 100644 --- a/sys/net80211/ieee80211_mesh.c +++ b/sys/net80211/ieee80211_mesh.c @@ -106,27 +106,32 @@ uint32_t mesh_airtime_calc(struct ieee80211_node *); /* * Timeout values come from the specification and are in milliseconds. */ -static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD, 0, +static SYSCTL_NODE(_net_wlan, OID_AUTO, mesh, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "IEEE 802.11s parameters"); static int ieee80211_mesh_gateint = -1; -SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, gateint, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, gateint, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_mesh_gateint, 0, ieee80211_sysctl_msecs_ticks, "I", "mesh gate interval (ms)"); static int ieee80211_mesh_retrytimeout = -1; -SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, retrytimeout, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_mesh_retrytimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "Retry timeout (msec)"); static int ieee80211_mesh_holdingtimeout = -1; -SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, holdingtimeout, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, holdingtimeout, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_mesh_holdingtimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "Holding state timeout (msec)"); static int ieee80211_mesh_confirmtimeout = -1; -SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, confirmtimeout, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, confirmtimeout, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_mesh_confirmtimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "Confirm state timeout (msec)"); static int ieee80211_mesh_backofftimeout = -1; -SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, backofftimeout, CTLTYPE_INT | CTLFLAG_RW, +SYSCTL_PROC(_net_wlan_mesh, OID_AUTO, backofftimeout, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &ieee80211_mesh_backofftimeout, 0, ieee80211_sysctl_msecs_ticks, "I", "Backoff timeout (msec). This is to throutles peering forever when " "not receiving answer or is rejected by a neighbor"); diff --git a/sys/net80211/ieee80211_rssadapt.c b/sys/net80211/ieee80211_rssadapt.c index 7d9158414a4c..4ca0b29fa569 100644 --- a/sys/net80211/ieee80211_rssadapt.c +++ b/sys/net80211/ieee80211_rssadapt.c @@ -381,6 +381,7 @@ rssadapt_sysctlattach(struct ieee80211vap *vap, { SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, - "rssadapt_rate_interval", CTLTYPE_INT | CTLFLAG_RW, vap, - 0, rssadapt_sysctl_interval, "I", "rssadapt operation interval (ms)"); + "rssadapt_rate_interval", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, vap, 0, + rssadapt_sysctl_interval, "I", "rssadapt operation interval (ms)"); } diff --git a/sys/net80211/ieee80211_superg.c b/sys/net80211/ieee80211_superg.c index c9c6d96e6b87..dacd466c3de7 100644 --- a/sys/net80211/ieee80211_superg.c +++ b/sys/net80211/ieee80211_superg.c @@ -92,9 +92,10 @@ static int ieee80211_ffppsmin = 2; /* pps threshold for ff aggregation */ SYSCTL_INT(_net_wlan, OID_AUTO, ffppsmin, CTLFLAG_RW, &ieee80211_ffppsmin, 0, "min packet rate before fast-frame staging"); static int ieee80211_ffagemax = -1; /* max time frames held on stage q */ -SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax, CTLTYPE_INT | CTLFLAG_RW, - &ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I", - "max hold time for fast-frame staging (ms)"); +SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, + &ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I", + "max hold time for fast-frame staging (ms)"); static void ff_age_all(void *arg, int npending) diff --git a/sys/netgraph/ng_socket.c b/sys/netgraph/ng_socket.c index 6339ce0e2ccf..e772ace9c04a 100644 --- a/sys/netgraph/ng_socket.c +++ b/sys/netgraph/ng_socket.c @@ -219,7 +219,6 @@ static int ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *td) { - struct epoch_tracker et; struct ngpcb *const pcbp = sotongpcb(so); struct ngsock *const priv = NG_NODE_PRIVATE(pcbp->sockdata->node); struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr; @@ -338,9 +337,7 @@ ngc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, item->apply = &apply; priv->error = -1; - NET_EPOCH_ENTER(et); error = ng_snd_item(item, 0); - NET_EPOCH_EXIT(et); mtx_lock(&priv->mtx); if (priv->error == -1) @@ -413,6 +410,7 @@ ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, struct sockaddr_ng *const sap = (struct sockaddr_ng *) addr; int len, error; hook_p hook = NULL; + item_p item; char hookname[NG_HOOKSIZ]; if ((pcbp == NULL) || (control != NULL)) { @@ -465,8 +463,10 @@ ngd_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr, } /* Send data. */ + item = ng_package_data(m, NG_WAITOK); + m = NULL; NET_EPOCH_ENTER(et); - NG_SEND_DATA_FLAGS(error, hook, m, NG_WAITOK); + NG_FWD_ITEM_HOOK(error, item, hook); NET_EPOCH_EXIT(et); release: diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 1b257fb2a372..9d3c51a066b8 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -218,19 +218,22 @@ static int carp_allow_sysctl(SYSCTL_HANDLER_ARGS); static int carp_dscp_sysctl(SYSCTL_HANDLER_ARGS); static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); -SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); +SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "CARP"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, allow, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_allow_sysctl, "I", + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + 0, 0, carp_allow_sysctl, "I", "Accept incoming CARP packets"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, dscp, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_dscp_sysctl, "I", + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + 0, 0, carp_dscp_sysctl, "I", "DSCP value for carp packets"); SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_log), 0, "CARP log level"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, - CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, carp_demote_adj_sysctl, "I", "Adjust demotion factor (skew of advskew)"); SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 5ab0bbb925b6..e6c85dbb884a 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -273,7 +273,8 @@ static void pfsync_uninit(void); static unsigned long pfsync_buckets; -SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); +SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "PFSYNC"); SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index 19eec6690c08..975f9a2f1951 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -369,7 +369,8 @@ VNET_DEFINE(struct pf_keyhash *, pf_keyhash); VNET_DEFINE(struct pf_idhash *, pf_idhash); VNET_DEFINE(struct pf_srchash *, pf_srchash); -SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)"); +SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "pf(4)"); u_long pf_hashmask; u_long pf_srchashmask; diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 82e5009ca888..5abb19102a2b 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -705,11 +705,10 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, pte_t ** pdir, unsigned int pdir_idx, req = VM_ALLOC_NOOBJ | VM_ALLOC_WIRED; while ((m = vm_page_alloc(NULL, pdir_idx, req)) == NULL) { + if (nosleep) + return (NULL); PMAP_UNLOCK(pmap); rw_wunlock(&pvh_global_lock); - if (nosleep) { - return (NULL); - } vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); @@ -905,8 +904,6 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) pidx = (PTBL_PAGES * pdir_idx) + i; while ((m = vm_page_alloc(NULL, pidx, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { - PMAP_UNLOCK(pmap); - rw_wunlock(&pvh_global_lock); if (nosleep) { ptbl_free_pmap_ptbl(pmap, ptbl); for (j = 0; j < i; j++) @@ -914,6 +911,8 @@ ptbl_alloc(mmu_t mmu, pmap_t pmap, unsigned int pdir_idx, boolean_t nosleep) vm_wire_sub(i); return (NULL); } + PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); vm_wait(NULL); rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); @@ -2481,8 +2480,8 @@ mmu_booke_enter_object(mmu_t mmu, pmap_t pmap, vm_offset_t start, PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); m = TAILQ_NEXT(m, listq); } - rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); } static void @@ -2495,8 +2494,8 @@ mmu_booke_enter_quick(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, mmu_booke_enter_locked(mmu, pmap, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), PMAP_ENTER_NOSLEEP | PMAP_ENTER_QUICK_LOCKED, 0); - rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); + rw_wunlock(&pvh_global_lock); } /* diff --git a/sys/security/audit/audit.h b/sys/security/audit/audit.h index a7600d5c1f90..7e13806a0171 100644 --- a/sys/security/audit/audit.h +++ b/sys/security/audit/audit.h @@ -120,6 +120,10 @@ void audit_arg_upath1(struct thread *td, int dirfd, char *upath); void audit_arg_upath1_canon(char *upath); void audit_arg_upath2(struct thread *td, int dirfd, char *upath); void audit_arg_upath2_canon(char *upath); +void audit_arg_upath1_vp(struct thread *td, struct vnode *rdir, + struct vnode *cdir, char *upath); +void audit_arg_upath2_vp(struct thread *td, struct vnode *rdir, + struct vnode *cdir, char *upath); void audit_arg_vnode1(struct vnode *vp); void audit_arg_vnode2(struct vnode *vp); void audit_arg_text(const char *text); @@ -362,6 +366,16 @@ void audit_thread_free(struct thread *td); audit_arg_upath2_canon((upath)); \ } while (0) +#define AUDIT_ARG_UPATH1_VP(td, rdir, cdir, upath) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_upath1_vp((td), (rdir), (cdir), (upath)); \ +} while (0) + +#define AUDIT_ARG_UPATH2_VP(td, rdir, cdir, upath) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_upath2_vp((td), (rdir), (cdir), (upath)); \ +} while (0) + #define AUDIT_ARG_VALUE(value) do { \ if (AUDITING_TD(curthread)) \ audit_arg_value((value)); \ @@ -448,6 +462,8 @@ void audit_thread_free(struct thread *td); #define AUDIT_ARG_UPATH1_CANON(upath) #define AUDIT_ARG_UPATH2(td, dirfd, upath) #define AUDIT_ARG_UPATH2_CANON(upath) +#define AUDIT_ARG_UPATH1_VP(td, rdir, cdir, upath) +#define AUDIT_ARG_UPATH2_VP(td, rdir, cdir, upath) #define AUDIT_ARG_VALUE(value) #define AUDIT_ARG_VNODE1(vp) #define AUDIT_ARG_VNODE2(vp) diff --git a/sys/security/audit/audit_arg.c b/sys/security/audit/audit_arg.c index f50d77281095..fc5318750e3e 100644 --- a/sys/security/audit/audit_arg.c +++ b/sys/security/audit/audit_arg.c @@ -767,6 +767,44 @@ audit_arg_upath2(struct thread *td, int dirfd, char *upath) ARG_SET_VALID(ar, ARG_UPATH2); } +static void +audit_arg_upath_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir, + char *upath, char **pathp) +{ + + if (*pathp == NULL) + *pathp = malloc(MAXPATHLEN, M_AUDITPATH, M_WAITOK); + audit_canon_path_vp(td, rdir, cdir, upath, *pathp); +} + +void +audit_arg_upath1_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir, + char *upath) +{ + struct kaudit_record *ar; + + ar = currecord(); + if (ar == NULL) + return; + + audit_arg_upath_vp(td, rdir, cdir, upath, &ar->k_ar.ar_arg_upath1); + ARG_SET_VALID(ar, ARG_UPATH1); +} + +void +audit_arg_upath2_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir, + char *upath) +{ + struct kaudit_record *ar; + + ar = currecord(); + if (ar == NULL) + return; + + audit_arg_upath_vp(td, rdir, cdir, upath, &ar->k_ar.ar_arg_upath2); + ARG_SET_VALID(ar, ARG_UPATH2); +} + /* * Variants on path auditing that do not canonicalise the path passed in; * these are for use with filesystem-like subsystems that employ string names, diff --git a/sys/security/audit/audit_bsm_klib.c b/sys/security/audit/audit_bsm_klib.c index 3ce57d0af5c5..b10722a4e0e4 100644 --- a/sys/security/audit/audit_bsm_klib.c +++ b/sys/security/audit/audit_bsm_klib.c @@ -421,57 +421,23 @@ auditon_command_event(int cmd) * leave the filename starting with '/' in the audit log in this case. */ void -audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) +audit_canon_path_vp(struct thread *td, struct vnode *rdir, struct vnode *cdir, + char *path, char *cpath) { - struct vnode *cvnp, *rvnp; + struct vnode *vp; char *rbuf, *fbuf, *copy; - struct filedesc *fdp; struct sbuf sbf; - cap_rights_t rights; - int error, needslash; + int error; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d", __func__, __FILE__, __LINE__); copy = path; - rvnp = cvnp = NULL; - fdp = td->td_proc->p_fd; - FILEDESC_SLOCK(fdp); - /* - * Make sure that we handle the chroot(2) case. If there is an - * alternate root directory, prepend it to the audited pathname. - */ - if (fdp->fd_rdir != NULL && fdp->fd_rdir != rootvnode) { - rvnp = fdp->fd_rdir; - vhold(rvnp); - } - /* - * If the supplied path is relative, make sure we capture the current - * working directory so we can prepend it to the supplied relative - * path. - */ - if (*path != '/') { - if (dirfd == AT_FDCWD) { - cvnp = fdp->fd_cdir; - vhold(cvnp); - } else { - /* XXX: fgetvp() that vhold()s vnode instead of vref()ing it would be better */ - error = fgetvp(td, dirfd, cap_rights_init(&rights), &cvnp); - if (error) { - FILEDESC_SUNLOCK(fdp); - cpath[0] = '\0'; - if (rvnp != NULL) - vdrop(rvnp); - return; - } - vhold(cvnp); - vrele(cvnp); - } - needslash = (fdp->fd_rdir != cvnp); - } else { - needslash = 1; - } - FILEDESC_SUNLOCK(fdp); + if (*path == '/') + vp = rdir; + else + vp = cdir; + MPASS(vp != NULL); /* * NB: We require that the supplied array be at least MAXPATHLEN bytes * long. If this is not the case, then we can run into serious trouble. @@ -479,6 +445,8 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) (void) sbuf_new(&sbf, cpath, MAXPATHLEN, SBUF_FIXEDLEN); /* * Strip leading forward slashes. + * + * Note this does nothing to fully canonicalize the path. */ while (*copy == '/') copy++; @@ -490,35 +458,25 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) * on Darwin. As a result, this may need some additional attention * in the future. */ - if (rvnp != NULL) { - error = vn_fullpath_global(td, rvnp, &rbuf, &fbuf); - vdrop(rvnp); - if (error) { - cpath[0] = '\0'; - if (cvnp != NULL) - vdrop(cvnp); - return; - } - (void) sbuf_cat(&sbf, rbuf); - free(fbuf, M_TEMP); - } - if (cvnp != NULL) { - error = vn_fullpath(td, cvnp, &rbuf, &fbuf); - vdrop(cvnp); - if (error) { - cpath[0] = '\0'; - return; - } - (void) sbuf_cat(&sbf, rbuf); - free(fbuf, M_TEMP); + error = vn_fullpath_global(td, vp, &rbuf, &fbuf); + if (error) { + cpath[0] = '\0'; + return; } - if (needslash) + (void) sbuf_cat(&sbf, rbuf); + /* + * We are going to concatenate the resolved path with the passed path + * with all slashes removed and we want them glued with a single slash. + * However, if the directory is /, the slash is already there. + */ + if (rbuf[1] != '\0') (void) sbuf_putc(&sbf, '/'); + free(fbuf, M_TEMP); /* * Now that we have processed any alternate root and relative path * names, add the supplied pathname. */ - (void) sbuf_cat(&sbf, copy); + (void) sbuf_cat(&sbf, copy); /* * One or more of the previous sbuf operations could have resulted in * the supplied buffer being overflowed. Check to see if this is the @@ -530,3 +488,43 @@ audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) } sbuf_finish(&sbf); } + +void +audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath) +{ + struct vnode *cdir, *rdir; + struct filedesc *fdp; + cap_rights_t rights; + int error; + + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "%s: at %s:%d", + __func__, __FILE__, __LINE__); + + rdir = cdir = NULL; + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (*path == '/') { + rdir = fdp->fd_rdir; + vrefact(rdir); + } else { + if (dirfd == AT_FDCWD) { + cdir = fdp->fd_cdir; + vrefact(cdir); + } else { + error = fgetvp(td, dirfd, cap_rights_init(&rights), &cdir); + if (error != 0) { + FILEDESC_SUNLOCK(fdp); + cpath[0] = '\0'; + return; + } + } + } + FILEDESC_SUNLOCK(fdp); + + audit_canon_path_vp(td, rdir, cdir, path, cpath); + + if (rdir != NULL) + vrele(rdir); + if (cdir != NULL) + vrele(cdir); +} diff --git a/sys/security/audit/audit_private.h b/sys/security/audit/audit_private.h index 4aa811bc1516..890473722552 100644 --- a/sys/security/audit/audit_private.h +++ b/sys/security/audit/audit_private.h @@ -472,6 +472,8 @@ au_event_t audit_semsys_to_event(int which); au_event_t audit_shmsys_to_event(int which); void audit_canon_path(struct thread *td, int dirfd, char *path, char *cpath); +void audit_canon_path_vp(struct thread *td, struct vnode *rdir, + struct vnode *cdir, char *path, char *cpath); au_event_t auditon_command_event(int cmd); /* diff --git a/sys/sys/_smr.h b/sys/sys/_smr.h index 71658aa65e01..834d912783f5 100644 --- a/sys/sys/_smr.h +++ b/sys/sys/_smr.h @@ -32,6 +32,7 @@ #define _SYS__SMR_H_ typedef uint32_t smr_seq_t; +typedef int32_t smr_delta_t; typedef struct smr *smr_t; #endif /* __SYS_SMR_H_ */ diff --git a/sys/sys/param.h b/sys/sys/param.h index e10be84bd0f8..a690ad9ffdb3 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1300080 /* Master, propagated to newvers */ +#define __FreeBSD_version 1300081 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/smr.h b/sys/sys/smr.h index 378088f37cad..9e9cc3dec3a5 100644 --- a/sys/sys/smr.h +++ b/sys/sys/smr.h @@ -45,11 +45,13 @@ * Modular arithmetic for comparing sequence numbers that have * potentially wrapped. Copied from tcp_seq.h. */ -#define SMR_SEQ_LT(a, b) ((int32_t)((a)-(b)) < 0) -#define SMR_SEQ_LEQ(a, b) ((int32_t)((a)-(b)) <= 0) -#define SMR_SEQ_GT(a, b) ((int32_t)((a)-(b)) > 0) -#define SMR_SEQ_GEQ(a, b) ((int32_t)((a)-(b)) >= 0) -#define SMR_SEQ_DELTA(a, b) ((int32_t)((a)-(b))) +#define SMR_SEQ_LT(a, b) ((smr_delta_t)((a)-(b)) < 0) +#define SMR_SEQ_LEQ(a, b) ((smr_delta_t)((a)-(b)) <= 0) +#define SMR_SEQ_GT(a, b) ((smr_delta_t)((a)-(b)) > 0) +#define SMR_SEQ_GEQ(a, b) ((smr_delta_t)((a)-(b)) >= 0) +#define SMR_SEQ_DELTA(a, b) ((smr_delta_t)((a)-(b))) +#define SMR_SEQ_MIN(a, b) (SMR_SEQ_LT((a), (b)) ? (a) : (b)) +#define SMR_SEQ_MAX(a, b) (SMR_SEQ_GT((a), (b)) ? (a) : (b)) #define SMR_SEQ_INVALID 0 @@ -66,8 +68,13 @@ struct smr { smr_seq_t c_seq; /* Current observed sequence. */ smr_shared_t c_shared; /* Shared SMR state. */ int c_deferred; /* Deferred advance counter. */ + int c_limit; /* Deferred advance limit. */ + int c_flags; /* SMR Configuration */ }; +#define SMR_LAZY 0x0001 /* Higher latency write, fast read. */ +#define SMR_DEFERRED 0x0002 /* Aggregate updates to wr_seq. */ + #define SMR_ENTERED(smr) \ (curthread->td_critnest != 0 && zpcpu_get((smr))->c_seq != SMR_SEQ_INVALID) @@ -94,7 +101,7 @@ struct smr { * All acceses include a parameter for an assert to verify the required * synchronization. For example, a writer might use: * - * smr_serilized_store(pointer, value, mtx_owned(&writelock)); + * smr_serialized_store(pointer, value, mtx_owned(&writelock)); * * These are only enabled in INVARIANTS kernels. */ @@ -127,6 +134,9 @@ typedef struct { \ * Store 'v' to an SMR protected pointer while serialized by an * external mechanism. 'ex' should contain an assert that the * external mechanism is held. i.e. mtx_owned() + * + * Writers that are serialized with mutual exclusion or on a single + * thread should use smr_serialized_store() rather than swap. */ #define smr_serialized_store(p, v, ex) do { \ SMR_ASSERT(ex, "smr_serialized_store"); \ @@ -138,6 +148,8 @@ typedef struct { \ * swap 'v' with an SMR protected pointer and return the old value * while serialized by an external mechanism. 'ex' should contain * an assert that the external mechanism is provided. i.e. mtx_owned() + * + * Swap permits multiple writers to update a pointer concurrently. */ #define smr_serialized_swap(p, v, ex) ({ \ SMR_ASSERT(ex, "smr_serialized_swap"); \ @@ -170,7 +182,8 @@ typedef struct { \ } while (0) /* - * Return the current write sequence number. + * Return the current write sequence number. This is not the same as the + * current goal which may be in the future. */ static inline smr_seq_t smr_shared_current(smr_shared_t s) @@ -195,6 +208,8 @@ smr_enter(smr_t smr) critical_enter(); smr = zpcpu_get(smr); + KASSERT((smr->c_flags & SMR_LAZY) == 0, + ("smr_enter(%s) lazy smr.", smr->c_shared->s_name)); KASSERT(smr->c_seq == 0, ("smr_enter(%s) does not support recursion.", smr->c_shared->s_name)); @@ -228,6 +243,8 @@ smr_exit(smr_t smr) smr = zpcpu_get(smr); CRITICAL_ASSERT(curthread); + KASSERT((smr->c_flags & SMR_LAZY) == 0, + ("smr_exit(%s) lazy smr.", smr->c_shared->s_name)); KASSERT(smr->c_seq != SMR_SEQ_INVALID, ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name)); @@ -243,17 +260,61 @@ smr_exit(smr_t smr) } /* - * Advances the write sequence number. Returns the sequence number - * required to ensure that all modifications are visible to readers. + * Enter a lazy smr section. This is used for read-mostly state that + * can tolerate a high free latency. */ -smr_seq_t smr_advance(smr_t smr); +static inline void +smr_lazy_enter(smr_t smr) +{ + + critical_enter(); + smr = zpcpu_get(smr); + KASSERT((smr->c_flags & SMR_LAZY) != 0, + ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name)); + KASSERT(smr->c_seq == 0, + ("smr_lazy_enter(%s) does not support recursion.", + smr->c_shared->s_name)); + + /* + * This needs no serialization. If an interrupt occurs before we + * assign sr_seq to c_seq any speculative loads will be discarded. + * If we assign a stale wr_seq value due to interrupt we use the + * same algorithm that renders smr_enter() safe. + */ + smr->c_seq = smr_shared_current(smr->c_shared); +} /* - * Advances the write sequence number only after N calls. Returns - * the correct goal for a wr_seq that has not yet occurred. Used to - * minimize shared cacheline invalidations for frequent writers. + * Exit a lazy smr section. This is used for read-mostly state that + * can tolerate a high free latency. */ -smr_seq_t smr_advance_deferred(smr_t smr, int limit); +static inline void +smr_lazy_exit(smr_t smr) +{ + + smr = zpcpu_get(smr); + CRITICAL_ASSERT(curthread); + KASSERT((smr->c_flags & SMR_LAZY) != 0, + ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name)); + KASSERT(smr->c_seq != SMR_SEQ_INVALID, + ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name)); + + /* + * All loads/stores must be retired before the sequence becomes + * visible. The fence compiles away on amd64. Another + * alternative would be to omit the fence but store the exit + * time and wait 1 tick longer. + */ + atomic_thread_fence_rel(); + smr->c_seq = SMR_SEQ_INVALID; + critical_exit(); +} + +/* + * Advances the write sequence number. Returns the sequence number + * required to ensure that all modifications are visible to readers. + */ +smr_seq_t smr_advance(smr_t smr); /* * Returns true if a goal sequence has been reached. If @@ -262,7 +323,9 @@ smr_seq_t smr_advance_deferred(smr_t smr, int limit); bool smr_poll(smr_t smr, smr_seq_t goal, bool wait); /* Create a new SMR context. */ -smr_t smr_create(const char *name); +smr_t smr_create(const char *name, int limit, int flags); + +/* Destroy the context. */ void smr_destroy(smr_t smr); /* diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index a077fd6f7c8a..1b72d85496a8 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1140,7 +1140,6 @@ hash_free(struct uma_hash *hash) * Returns: * Nothing */ - static void bucket_drain(uma_zone_t zone, uma_bucket_t bucket) { @@ -1200,7 +1199,7 @@ cache_drain(uma_zone_t zone) */ seq = SMR_SEQ_INVALID; if ((zone->uz_flags & UMA_ZONE_SMR) != 0) - seq = smr_current(zone->uz_smr); + seq = smr_advance(zone->uz_smr); CPU_FOREACH(cpu) { cache = &zone->uz_cpu[cpu]; bucket = cache_bucket_unload_alloc(cache); @@ -1329,7 +1328,7 @@ bucket_cache_reclaim(uma_zone_t zone, bool drain) * the item count. Reclaim it individually here. */ zdom = ZDOM_GET(zone, i); - if ((zone->uz_flags & UMA_ZONE_SMR) == 0) { + if ((zone->uz_flags & UMA_ZONE_SMR) == 0 || drain) { ZONE_CROSS_LOCK(zone); bucket = zdom->uzd_cross; zdom->uzd_cross = NULL; @@ -2679,7 +2678,7 @@ out: /* Caller requests a private SMR context. */ if ((zone->uz_flags & UMA_ZONE_SMR) != 0) - zone->uz_smr = smr_create(zone->uz_name); + zone->uz_smr = smr_create(zone->uz_name, 0, 0); KASSERT((arg->flags & (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET)) != (UMA_ZONE_MAXBUCKET | UMA_ZONE_NOBUCKET), @@ -4137,22 +4136,21 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata) "uma_zfree: zone %s(%p) draining cross bucket %p", zone->uz_name, zone, bucket); - STAILQ_INIT(&fullbuckets); + /* + * It is possible for buckets to arrive here out of order so we fetch + * the current smr seq rather than accepting the bucket's. + */ + seq = SMR_SEQ_INVALID; + if ((zone->uz_flags & UMA_ZONE_SMR) != 0) + seq = smr_advance(zone->uz_smr); /* * To avoid having ndomain * ndomain buckets for sorting we have a * lock on the current crossfree bucket. A full matrix with * per-domain locking could be used if necessary. */ + STAILQ_INIT(&fullbuckets); ZONE_CROSS_LOCK(zone); - - /* - * It is possible for buckets to arrive here out of order so we fetch - * the current smr seq rather than accepting the bucket's. - */ - seq = SMR_SEQ_INVALID; - if ((zone->uz_flags & UMA_ZONE_SMR) != 0) - seq = smr_current(zone->uz_smr); while (bucket->ub_cnt > 0) { item = bucket->ub_bucket[bucket->ub_cnt - 1]; domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item)); diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index 8c554c6f65d5..c05020d7dba4 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -284,7 +284,7 @@ printcpuinfo(void) switch (cpu_id & 0xf00) { case 0x400: strcat(cpu_model, "i486 "); - /* Check the particular flavor of 486 */ + /* Check the particular flavor of 486 */ switch (cpu_id & 0xf0) { case 0x00: case 0x10: @@ -312,32 +312,32 @@ printcpuinfo(void) } break; case 0x500: - /* Check the particular flavor of 586 */ - strcat(cpu_model, "Pentium"); - switch (cpu_id & 0xf0) { + /* Check the particular flavor of 586 */ + strcat(cpu_model, "Pentium"); + switch (cpu_id & 0xf0) { case 0x00: - strcat(cpu_model, " A-step"); + strcat(cpu_model, " A-step"); break; case 0x10: - strcat(cpu_model, "/P5"); + strcat(cpu_model, "/P5"); break; case 0x20: - strcat(cpu_model, "/P54C"); + strcat(cpu_model, "/P54C"); break; case 0x30: - strcat(cpu_model, "/P24T"); + strcat(cpu_model, "/P24T"); break; case 0x40: - strcat(cpu_model, "/P55C"); + strcat(cpu_model, "/P55C"); break; case 0x70: - strcat(cpu_model, "/P54C"); + strcat(cpu_model, "/P54C"); break; case 0x80: - strcat(cpu_model, "/P55C (quarter-micron)"); + strcat(cpu_model, "/P55C (quarter-micron)"); break; default: - /* nothing */ + /* nothing */ break; } #if defined(I586_CPU) && !defined(NO_F00F_HACK) @@ -350,18 +350,18 @@ printcpuinfo(void) #endif break; case 0x600: - /* Check the particular flavor of 686 */ - switch (cpu_id & 0xf0) { + /* Check the particular flavor of 686 */ + switch (cpu_id & 0xf0) { case 0x00: - strcat(cpu_model, "Pentium Pro A-step"); + strcat(cpu_model, "Pentium Pro A-step"); break; case 0x10: - strcat(cpu_model, "Pentium Pro"); + strcat(cpu_model, "Pentium Pro"); break; case 0x30: case 0x50: case 0x60: - strcat(cpu_model, + strcat(cpu_model, "Pentium II/Pentium II Xeon/Celeron"); cpu = CPU_PII; break; @@ -369,12 +369,12 @@ printcpuinfo(void) case 0x80: case 0xa0: case 0xb0: - strcat(cpu_model, + strcat(cpu_model, "Pentium III/Pentium III Xeon/Celeron"); cpu = CPU_PIII; break; default: - strcat(cpu_model, "Unknown 80686"); + strcat(cpu_model, "Unknown 80686"); break; } break; @@ -1411,7 +1411,7 @@ identify_hypervisor_cpuid_base(void) if (regs[0] == 0 && regs[1] == 0x4b4d564b && regs[2] == 0x564b4d56 && regs[3] == 0x0000004d) regs[0] = leaf + 1; - + if (regs[0] >= leaf) { for (i = 0; i < nitems(vm_cpuids); i++) if (strncmp((const char *)®s[1], @@ -1471,7 +1471,7 @@ identify_hypervisor(void) if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) { vmware_hvcall(VMW_HVCMD_GETVERSION, regs); if (regs[1] == VMW_HVMAGIC) { - vm_guest = VM_GUEST_VMWARE; + vm_guest = VM_GUEST_VMWARE; freeenv(p); return; } @@ -2341,23 +2341,23 @@ print_svm_info(void) comma = 0; if (features & (1 << 0)) { printf("%sNP", comma ? "," : ""); - comma = 1; + comma = 1; } if (features & (1 << 3)) { printf("%sNRIP", comma ? "," : ""); - comma = 1; + comma = 1; } if (features & (1 << 5)) { printf("%sVClean", comma ? "," : ""); - comma = 1; + comma = 1; } if (features & (1 << 6)) { printf("%sAFlush", comma ? "," : ""); - comma = 1; + comma = 1; } if (features & (1 << 7)) { printf("%sDAssist", comma ? "," : ""); - comma = 1; + comma = 1; } printf("%sNAsids=%d", comma ? "," : "", regs[1]); return; @@ -2375,7 +2375,7 @@ print_svm_info(void) "\010DecodeAssist" /* Decode assist */ "\011<b8>" "\012<b9>" - "\013PauseFilter" /* PAUSE intercept filter */ + "\013PauseFilter" /* PAUSE intercept filter */ "\014EncryptedMcodePatch" "\015PauseFilterThreshold" /* PAUSE filter threshold */ "\016AVIC" /* virtual interrupt controller */ @@ -2385,7 +2385,7 @@ print_svm_info(void) "\022GMET" /* Guest Mode Execute Trap */ "\023<b18>" "\024<b19>" - "\025<b20>" + "\025GuesSpecCtl" /* Guest Spec_ctl */ "\026<b21>" "\027<b22>" "\030<b23>" @@ -2397,7 +2397,7 @@ print_svm_info(void) "\036<b29>" "\037<b30>" "\040<b31>" - ); + ); printf("\nRevision=%d, ASIDs=%d", regs[0] & 0xff, regs[1]); } diff --git a/tools/bsdbox/Makefile.base b/tools/bsdbox/Makefile.base index 027494905519..aac44480e2ca 100644 --- a/tools/bsdbox/Makefile.base +++ b/tools/bsdbox/Makefile.base @@ -20,6 +20,7 @@ CRUNCH_LIBS+= -ldevstat -lncursesw -lncurses -lmemstat -lkvm -lelf CRUNCH_PROGS_usr.bin+= cpio # XXX SSL ? CRUNCH_LIBS+= -larchive -lbz2 -lz -llzma -lbsdxml -lssl -lcrypto +CRUNCH_LIBS+= -lprivatezstd -lthr # Clear requires tput, and it's a shell script so it won't be crunched CRUNCH_PROGS_usr.bin+= tput diff --git a/usr.bin/dtc/dtc.cc b/usr.bin/dtc/dtc.cc index 0cda698011ef..14d3685ba9d4 100644 --- a/usr.bin/dtc/dtc.cc +++ b/usr.bin/dtc/dtc.cc @@ -304,7 +304,10 @@ main(int argc, char **argv) } break; default: - fprintf(stderr, "Unknown option %c\n", ch); + /* + * Since opterr is non-zero, getopt will have + * already printed an error message. + */ return EXIT_FAILURE; } } diff --git a/usr.sbin/bhyve/iov.c b/usr.sbin/bhyve/iov.c index 54ea22aa9498..af36cb056229 100644 --- a/usr.sbin/bhyve/iov.c +++ b/usr.sbin/bhyve/iov.c @@ -119,24 +119,25 @@ iov_to_buf(const struct iovec *iov, int niov, void **buf) } ssize_t -buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, +buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, int niov, size_t seek) { struct iovec *diov; - int ndiov, i; size_t off = 0, len; + int i; if (seek > 0) { + int ndiov; + diov = malloc(sizeof(struct iovec) * niov); seek_iov(iov, niov, diov, &ndiov, seek); - } else { - diov = iov; - ndiov = niov; + iov = diov; + niov = ndiov; } - for (i = 0; i < ndiov && off < buflen; i++) { - len = MIN(diov[i].iov_len, buflen - off); - memcpy(diov[i].iov_base, buf + off, len); + for (i = 0; i < niov && off < buflen; i++) { + len = MIN(iov[i].iov_len, buflen - off); + memcpy(iov[i].iov_base, buf + off, len); off += len; } diff --git a/usr.sbin/bhyve/iov.h b/usr.sbin/bhyve/iov.h index e3b5916edb10..f46b04b71eb5 100644 --- a/usr.sbin/bhyve/iov.h +++ b/usr.sbin/bhyve/iov.h @@ -38,7 +38,7 @@ void seek_iov(const struct iovec *iov1, int niov1, struct iovec *iov2, void truncate_iov(struct iovec *iov, int *niov, size_t length); size_t count_iov(const struct iovec *iov, int niov); ssize_t iov_to_buf(const struct iovec *iov, int niov, void **buf); -ssize_t buf_to_iov(const void *buf, size_t buflen, struct iovec *iov, int niov, - size_t seek); +ssize_t buf_to_iov(const void *buf, size_t buflen, const struct iovec *iov, + int niov, size_t seek); #endif /* _IOV_H_ */ diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c index dcb5a27aa4a4..205c0cf535c4 100644 --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -103,6 +103,13 @@ struct net_backend { int iovcnt); /* + * Get the length of the next packet that can be received from + * the backend. If no packets are currently available, this + * function returns 0. + */ + ssize_t (*peek_recvlen)(struct net_backend *be); + + /* * Called to receive a packet from the backend. When the function * returns a positive value 'len', the scatter-gather vector * provided by the caller contains a packet with such length. @@ -167,6 +174,13 @@ SET_DECLARE(net_backend_set, struct net_backend); struct tap_priv { struct mevent *mevp; + /* + * A bounce buffer that allows us to implement the peek_recvlen + * callback. In the future we may get the same information from + * the kevent data. + */ + char bbuf[1 << 16]; + ssize_t bbuflen; }; static void @@ -223,6 +237,9 @@ tap_init(struct net_backend *be, const char *devname, errx(EX_OSERR, "Unable to apply rights for sandbox"); #endif + memset(priv->bbuf, 0, sizeof(priv->bbuf)); + priv->bbuflen = 0; + priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { WPRINTF(("Could not register event")); @@ -246,15 +263,56 @@ tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt) } static ssize_t +tap_peek_recvlen(struct net_backend *be) +{ + struct tap_priv *priv = (struct tap_priv *)be->opaque; + ssize_t ret; + + if (priv->bbuflen > 0) { + /* + * We already have a packet in the bounce buffer. + * Just return its length. + */ + return priv->bbuflen; + } + + /* + * Read the next packet (if any) into the bounce buffer, so + * that we get to know its length and we can return that + * to the caller. + */ + ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf)); + if (ret < 0 && errno == EWOULDBLOCK) { + return (0); + } + + if (ret > 0) + priv->bbuflen = ret; + + return (ret); +} + +static ssize_t tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) { + struct tap_priv *priv = (struct tap_priv *)be->opaque; ssize_t ret; - /* Should never be called without a valid tap fd */ - assert(be->fd != -1); + if (priv->bbuflen > 0) { + /* + * A packet is available in the bounce buffer, so + * we read it from there. + */ + ret = buf_to_iov(priv->bbuf, priv->bbuflen, + iov, iovcnt, 0); - ret = readv(be->fd, iov, iovcnt); + /* Mark the bounce buffer as empty. */ + priv->bbuflen = 0; + return (ret); + } + + ret = readv(be->fd, iov, iovcnt); if (ret < 0 && errno == EWOULDBLOCK) { return (0); } @@ -299,6 +357,7 @@ static struct net_backend tap_backend = { .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, + .peek_recvlen = tap_peek_recvlen, .recv = tap_recv, .recv_enable = tap_recv_enable, .recv_disable = tap_recv_disable, @@ -313,6 +372,7 @@ static struct net_backend vmnet_backend = { .init = tap_init, .cleanup = tap_cleanup, .send = tap_send, + .peek_recvlen = tap_peek_recvlen, .recv = tap_recv, .recv_enable = tap_recv_enable, .recv_disable = tap_recv_disable, @@ -331,8 +391,7 @@ DATA_SET(net_backend_set, vmnet_backend); #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \ VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \ VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \ - VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO | \ - VIRTIO_NET_F_MRG_RXBUF) + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO) struct netmap_priv { char ifname[IFNAMSIZ]; @@ -540,6 +599,26 @@ txsync: } static ssize_t +netmap_peek_recvlen(struct net_backend *be) +{ + struct netmap_priv *priv = (struct netmap_priv *)be->opaque; + struct netmap_ring *ring = priv->rx; + uint32_t head = ring->head; + ssize_t totlen = 0; + + while (head != ring->tail) { + struct netmap_slot *slot = ring->slot + head; + + totlen += slot->len; + if ((slot->flags & NS_MOREFRAG) == 0) + break; + head = nm_ring_next(ring, head); + } + + return (totlen); +} + +static ssize_t netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) { struct netmap_priv *priv = (struct netmap_priv *)be->opaque; @@ -628,6 +707,7 @@ static struct net_backend netmap_backend = { .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, + .peek_recvlen = netmap_peek_recvlen, .recv = netmap_recv, .recv_enable = netmap_recv_enable, .recv_disable = netmap_recv_disable, @@ -642,6 +722,7 @@ static struct net_backend vale_backend = { .init = netmap_init, .cleanup = netmap_cleanup, .send = netmap_send, + .peek_recvlen = netmap_peek_recvlen, .recv = netmap_recv, .recv_enable = netmap_recv_enable, .recv_disable = netmap_recv_disable, @@ -758,6 +839,13 @@ netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt) return (be->send(be, iov, iovcnt)); } +ssize_t +netbe_peek_recvlen(struct net_backend *be) +{ + + return (be->peek_recvlen(be)); +} + /* * Try to read a packet from the backend, without blocking. * If no packets are available, return 0. In case of success, return diff --git a/usr.sbin/bhyve/net_backends.h b/usr.sbin/bhyve/net_backends.h index de80692f1487..25c1468622dd 100644 --- a/usr.sbin/bhyve/net_backends.h +++ b/usr.sbin/bhyve/net_backends.h @@ -45,6 +45,7 @@ int netbe_set_cap(net_backend_t *be, uint64_t cap, unsigned vnet_hdr_len); size_t netbe_get_vnet_hdr_len(net_backend_t *be); ssize_t netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt); +ssize_t netbe_peek_recvlen(net_backend_t *be); ssize_t netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt); ssize_t netbe_rx_discard(net_backend_t *be); void netbe_rx_disable(net_backend_t *be); diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c index eb35d088d568..52893f283919 100644 --- a/usr.sbin/bhyve/pci_virtio_net.c +++ b/usr.sbin/bhyve/pci_virtio_net.c @@ -228,22 +228,34 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc) struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; struct iovec iov[VTNET_MAXSEGS + 1]; struct vqueue_info *vq; - uint32_t riov_bytes; - struct iovec *riov; - int riov_len; - uint32_t ulen; - int n_chains; - int len; vq = &sc->vsc_queues[VTNET_RXQ]; for (;;) { struct virtio_net_rxhdr *hdr; + uint32_t riov_bytes; + struct iovec *riov; + uint32_t ulen; + int riov_len; + int n_chains; + ssize_t rlen; + ssize_t plen; + + plen = netbe_peek_recvlen(sc->vsc_be); + if (plen <= 0) { + /* + * No more packets (plen == 0), or backend errored + * (plen < 0). Interrupt if needed and stop. + */ + vq_endchains(vq, /*used_all_avail=*/0); + return; + } + plen += prepend_hdr_len; /* * Get a descriptor chain to store the next ingress * packet. In case of mergeable rx buffers, get as * many chains as necessary in order to make room - * for a maximum sized LRO packet. + * for plen bytes. */ riov_bytes = 0; riov_len = 0; @@ -287,8 +299,7 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc) riov_bytes += info[n_chains].len; riov += n; n_chains++; - } while (riov_bytes < VTNET_MAX_PKT_LEN && - riov_len < VTNET_MAXSEGS); + } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS); riov = iov; hdr = riov[0].iov_base; @@ -312,21 +323,20 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc) memset(hdr, 0, prepend_hdr_len); } - len = netbe_recv(sc->vsc_be, riov, riov_len); - - if (len <= 0) { + rlen = netbe_recv(sc->vsc_be, riov, riov_len); + if (rlen != plen - prepend_hdr_len) { /* - * No more packets (len == 0), or backend errored - * (err < 0). Return unused available buffers - * and stop. + * If this happens it means there is something + * wrong with the backend (e.g., some other + * process is stealing our packets). */ + WPRINTF(("netbe_recv: expected %zd bytes, " + "got %zd", plen - prepend_hdr_len, rlen)); vq_retchains(vq, n_chains); - /* Interrupt if needed/appropriate and stop. */ - vq_endchains(vq, /*used_all_avail=*/0); - return; + continue; } - ulen = (uint32_t)(len + prepend_hdr_len); + ulen = (uint32_t)plen; /* * Publish the used buffers to the guest, reporting the @@ -346,12 +356,11 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc) vq_relchain_prepare(vq, info[i].idx, iolen); ulen -= iolen; i++; - assert(i <= n_chains); } while (ulen > 0); hdr->vrh_bufs = i; vq_relchain_publish(vq); - vq_retchains(vq, n_chains - i); + assert(i == n_chains); } } @@ -592,7 +601,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) free(sc); return (err); } - sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be); + sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF | + netbe_get_cap(sc->vsc_be); } if (!mac_provided) { diff --git a/usr.sbin/iostat/iostat.c b/usr.sbin/iostat/iostat.c index dbe7219095e8..4cbfcfcbcbd5 100644 --- a/usr.sbin/iostat/iostat.c +++ b/usr.sbin/iostat/iostat.c @@ -929,7 +929,7 @@ devstats(int perf_select, long double etime, int havelast) } free(devicename); } else if (oflag > 0) { - int msdig = (ms_per_transaction < 100.0) ? 1 : 0; + int msdig = (ms_per_transaction < 99.94) ? 1 : 0; if (Iflag == 0) printf("%4.0Lf%4.0Lf%5.*Lf ", diff --git a/usr.sbin/pstat/pstat.c b/usr.sbin/pstat/pstat.c index c704f1599fff..923357b2c24d 100644 --- a/usr.sbin/pstat/pstat.c +++ b/usr.sbin/pstat/pstat.c @@ -95,6 +95,8 @@ static struct { #define NNAMES (sizeof(namelist) / sizeof(*namelist)) static struct nlist nl[NNAMES]; +#define SIZEHDR "Size" + static int humanflag; static int usenumflag; static int totalflag; @@ -471,7 +473,12 @@ print_swap_header(void) long blocksize; const char *header; - header = getbsize(&hlen, &blocksize); + if (humanflag) { + header = SIZEHDR; + hlen = sizeof(SIZEHDR); + } else { + header = getbsize(&hlen, &blocksize); + } if (totalflag == 0) (void)printf("%-15s %*s %8s %8s %8s\n", "Device", hlen, header, @@ -484,23 +491,30 @@ print_swap_line(const char *swdevname, intmax_t nblks, intmax_t bused, { char usedbuf[5]; char availbuf[5]; + char sizebuf[5]; int hlen, pagesize; long blocksize; pagesize = getpagesize(); getbsize(&hlen, &blocksize); - printf("%-15s %*jd ", swdevname, hlen, CONVERT(nblks)); + printf("%-15s ", swdevname); if (humanflag) { + humanize_number(sizebuf, sizeof(sizebuf), + CONVERT_BLOCKS(nblks), "", + HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); humanize_number(usedbuf, sizeof(usedbuf), CONVERT_BLOCKS(bused), "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); humanize_number(availbuf, sizeof(availbuf), CONVERT_BLOCKS(bavail), "", HN_AUTOSCALE, HN_B | HN_NOSPACE | HN_DECIMAL); - printf("%8s %8s %5.0f%%\n", usedbuf, availbuf, bpercent); + printf("%8s %8s %8s %5.0f%%\n", sizebuf, + usedbuf, availbuf, bpercent); } else { - printf("%8jd %8jd %5.0f%%\n", (intmax_t)CONVERT(bused), + printf("%*jd %8jd %8jd %5.0f%%\n", hlen, + (intmax_t)CONVERT(nblks), + (intmax_t)CONVERT(bused), (intmax_t)CONVERT(bavail), bpercent); } } |