diff options
105 files changed, 2360 insertions, 1687 deletions
diff --git a/Makefile.inc1 b/Makefile.inc1 index cdd01a5ea3d2..d54d26b1ad7b 100644 --- a/Makefile.inc1 +++ b/Makefile.inc1 @@ -1287,7 +1287,7 @@ ITOOLS+=makewhatis # # Non-base distributions produced by the base system -EXTRA_DISTRIBUTIONS= doc +EXTRA_DISTRIBUTIONS= .if defined(LIBCOMPAT) EXTRA_DISTRIBUTIONS+= lib${libcompat} .endif @@ -1297,7 +1297,7 @@ EXTRA_DISTRIBUTIONS+= tests DEBUG_DISTRIBUTIONS= .if ${MK_DEBUG_FILES} != "no" -DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,} +DEBUG_DISTRIBUTIONS+= base ${EXTRA_DISTRIBUTIONS:S,tests,,} .endif MTREE_MAGIC?= mtree 2.0 diff --git a/Makefile.libcompat b/Makefile.libcompat index ad3a3b683344..d66d4ee6f39f 100644 --- a/Makefile.libcompat +++ b/Makefile.libcompat @@ -17,7 +17,7 @@ LIB32CPUFLAGS= -march=${TARGET_CPUTYPE} .if ${WANT_COMPILER_TYPE} == gcc || \ (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc) .else -LIB32CPUFLAGS+= -target x86_64-unknown-freebsd12.0 +LIB32CPUFLAGS+= -target x86_64-unknown-freebsd13.0 .endif LIB32CPUFLAGS+= -m32 LIB32WMAKEENV= MACHINE=i386 MACHINE_ARCH=i386 \ @@ -49,9 +49,9 @@ LIB32CPUFLAGS= -march=${TARGET_CPUTYPE} .endif .else .if ${TARGET_ARCH:Mmips64el*} != "" -LIB32CPUFLAGS= -target mipsel-unknown-freebsd12.0 +LIB32CPUFLAGS= -target mipsel-unknown-freebsd13.0 .else -LIB32CPUFLAGS= -target mips-unknown-freebsd12.0 +LIB32CPUFLAGS= -target mips-unknown-freebsd13.0 .endif .endif LIB32CPUFLAGS+= -mabi=32 diff --git a/lib/Makefile b/lib/Makefile index 58c3753ec8e5..1c82f4142648 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,7 +42,7 @@ SUBDIR= ${SUBDIR_BOOTSTRAP} \ libdevctl \ libdevinfo \ libdevstat \ - ${_libdl} \ + libdl \ libdwarf \ libedit \ libelftc \ @@ -193,10 +193,6 @@ _libproc= libproc _librtld_db= librtld_db .endif -.if defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mfilter} -_libdl= libdl -.endif - SUBDIR.${MK_OPENSSL}+= libmp SUBDIR.${MK_PMC}+= libpmc libpmcstat SUBDIR.${MK_RADIUS_SUPPORT}+= libradius diff --git a/lib/csu/common/crtbegin.c b/lib/csu/common/crtbegin.c index 35231fa904f9..859582c37305 100644 --- a/lib/csu/common/crtbegin.c +++ b/lib/csu/common/crtbegin.c @@ -32,10 +32,27 @@ typedef void (*crt_func)(void); extern void *__dso_handle __hidden; -#ifdef SHARED -void *__dso_handle = &__dso_handle; -#else +#ifndef SHARED void *__dso_handle = 0; +#else +void *__dso_handle = &__dso_handle; +void __cxa_finalize(void *) __weak_symbol; + +/* + * Call __cxa_finalize with the dso handle in shared objects. + * When we have ctors/dtors call from the dtor handler before calling + * any dtors, otherwise use a destructor. + */ +#ifndef HAVE_CTORS +__attribute__((destructor)) +#endif +static void +run_cxa_finalize(void) +{ + + if (__cxa_finalize != NULL) + __cxa_finalize(__dso_handle); +} #endif /* @@ -58,6 +75,10 @@ __do_global_dtors_aux(void) crt_func fn; int n; +#ifdef SHARED + run_cxa_finalize(); +#endif + for (n = 1;; n++) { fn = __DTOR_LIST__[n]; if (fn == (crt_func)0 || fn == (crt_func)-1) diff --git a/lib/libc/Versions.def b/lib/libc/Versions.def index e348308a7906..fce33f53a475 100644 --- a/lib/libc/Versions.def +++ b/lib/libc/Versions.def @@ -31,6 +31,9 @@ FBSD_1.4 { FBSD_1.5 { } FBSD_1.4; +# This version was first added to 13.0-current. +FBSD_1.6 { +} FBSD_1.5; # This is our private namespace. Any global interfaces that are # strictly for use only by other FreeBSD applications and libraries @@ -39,4 +42,4 @@ FBSD_1.5 { # # Please do NOT increment the version of this namespace. FBSDprivate_1.0 { -} FBSD_1.5; +} FBSD_1.6; diff --git a/lib/libcasper/services/Makefile b/lib/libcasper/services/Makefile index c735063d4050..973805ea99aa 100644 --- a/lib/libcasper/services/Makefile +++ b/lib/libcasper/services/Makefile @@ -3,6 +3,7 @@ .include <src.opts.mk> SUBDIR= cap_dns +SUBDIR+= cap_fileargs SUBDIR+= cap_grp SUBDIR+= cap_pwd SUBDIR+= cap_random diff --git a/lib/libcasper/services/cap_fileargs/Makefile b/lib/libcasper/services/cap_fileargs/Makefile new file mode 100644 index 000000000000..2b4236183bd5 --- /dev/null +++ b/lib/libcasper/services/cap_fileargs/Makefile @@ -0,0 +1,35 @@ +# $FreeBSD$ + +SHLIBDIR?= /lib/casper + +.include <src.opts.mk> + +PACKAGE=libcasper + +SHLIB_MAJOR= 1 +INCSDIR?= ${INCLUDEDIR}/casper + +.if ${MK_CASPER} != "no" +SHLIB= cap_fileargs + +SRCS= cap_fileargs.c +.endif + +INCS= cap_fileargs.h + +LIBADD= nv + +CFLAGS+=-I${.CURDIR} + +MAN+= cap_fileargs.3 + +MLINKS+=cap_fileargs.3 libcap_fileargs.3 +MLINKS+=cap_fileargs.3 fileargs_cinit.3 +MLINKS+=cap_fileargs.3 fileargs_cinitnv.3 +MLINKS+=cap_fileargs.3 fileargs_fopen.3 +MLINKS+=cap_fileargs.3 fileargs_free.3 +MLINKS+=cap_fileargs.3 fileargs_init.3 +MLINKS+=cap_fileargs.3 fileargs_initnv.3 +MLINKS+=cap_fileargs.3 fileargs_open.3 + +.include <bsd.lib.mk> diff --git a/lib/libcasper/services/cap_fileargs/cap_fileargs.3 b/lib/libcasper/services/cap_fileargs/cap_fileargs.3 new file mode 100644 index 000000000000..1d0ea8d43688 --- /dev/null +++ b/lib/libcasper/services/cap_fileargs/cap_fileargs.3 @@ -0,0 +1,241 @@ +.\" Copyright (c) 2018 Mariusz Zaborski <oshogbo@FreeBSD.org> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd November 12, 2018 +.Dt CAP_FILEARGS 3 +.Os +.Sh NAME +.Nm fileargs_cinit , +.Nm fileargs_cinitnv , +.Nm fileargs_init , +.Nm fileargs_initnv , +.Nm fileargs_free , +.Nm fileargs_open , +.Nm fileargs_fopen +.Nd "library for handling files in capability mode" +.Sh LIBRARY +.Lb libcap_fileargs +.Sh SYNOPSIS +.In sys/nv.h +.In libcasper.h +.In casper/cap_fileargs.h +.Ft "fileargs_t *" +.Fn fileargs_init "int argc" "char *argv[]" "int flags" "mode_t mode" "cap_rights_t *rightsp" +.Ft "fileargs_t *" +.Fn fileargs_cinit "cap_channel_t *cas" "int argc" "char *argv[]" "int flags" "mode_t mode" "cap_rights_t *rightsp" +.Ft "fileargs_t *" +.Fn fileargs_cinitnv "cap_channel_t *cas" "nvlist_t *limits" +.Ft "fileargs_t *" +.Fn fileargs_initnv "nvlist_t *limits" +.Ft "void" +.Fn fileargs_free "fileargs_t *fa" +.Ft "int" +.Fn fileargs_open "fileargs_t *fa" "const char *name" +.Ft "FILE *" +.Fn fileargs_fopen "fileargs_t *fa" "const char *name" "const char *mode" +.Sh DESCRIPTION +The library is used to simplify Capsicumizing a tools that are using file system. +Idea behind the library is that we are passing a remaining +.Fa argc +and +.Fa argv +which contains a list of files that should be open for this program. +The library will create a service that will serve those files. +.Pp +The function +.Fn fileargs_init +create a service to the +.Nm system.fileargs . +The +.Fa argv +contains a list of files that should be opened. +The argument can be set to +.Dv NULL +which will not create a service and all files will be prohibited to be opened. +The +.Fa argc +argument contains a number of passed files. +The +.Fa flags +argument limits opened files for either execution or reading and/or writing. +The +.Fa mode +argument tells which what mode file should be created if the +.Dv O_CREATE +flag is present . +For more details of the +.Fa flags +and +.Fa mode +arguments see +.Xr open 2 . +The +.Fa rightsp +argument contains a list of the capability rights which file should be limited to. +For more details of the capability rights see +.Xr cap_rights_init 3 . +.Pp +The function +.Fn fileargs_cinit +is equivalent to +.Fn fileargs_init +except that the connection to the Casper needs to be provided. +.Pp +The functions +.Fn fileargs_ininv +and +.Fn fileargs_cininv +are respectively equivalent to +.Fn fileargs_init +and +.Fn fileargs_cinit +expect that all arguments all provided as +.Xr nvlist 9 . +For details see +.Sx LIMITS . +.Pp +The +.Fa fileargs_free +close connection to the +.Nm system.filerags +service and free are structures. +The function handle +.Dv NULL +argument. +.Pp +The functions +.Fn fileargs_open +and +.Fn fileargs_fopen +are respectively equivalent to +.Xr open 2 +and +.Xr fopen 3 +expect that all arguments are fetched from the +.Va fileargs_t +structure. +.Sh LIMITS +This section describe which values and types should be used to pass arguments to the +.Fa system.filerags +through the +.Fn fileargs_ininv +and +.Fn fileargs_cinit +functions. +The +.Xr nvlist 9 +for that functions must contain the following values and types: +.Bl -ohang -offset indent +.It flags ( NV_TYPE_NUMBER ) +The +.Va flags +limits opened files for either execution or reading and/or writing. +.It mode (NV_TYPE_NUMBER) +If in the +.Va flags +argument the +.Dv O_CREATE +flag was defined the +.Xr nvlist 9 +must contain the +.Va mode . +The +.Va mode +argument tells which what mode file should be created. +.El +.Pp +The +.Xr nvlist 9 +for that functions may contain the following values and types: +.Bl -ohang -offset indent +.It cap_rights ( NV_TYPE_BINARY ) +The +.Va cap_rights +argument contains a list of the capability rights which file should be limited to. +.It ( NV_TYPE_NULL ) +Any number of +.Dv NV_TYPE_NULL +where the name of the element is name of the file which can be opened. +.Sh EXAMPLES +The following example first parse some options and then create the +.Nm system.filerags +service with remaining arguments. +.Bd -literal +int ch, fd, i; +cap_rights_t rights; +fileargs_t *fa; + +while ((ch = getopt(argc, argv, "h")) != -1) { + switch (ch) { + case 'h': + default: + usage(); + } +} + +argc -= optind; +argv += optind; + +/* Create capability to the system.fileargs service. */ +fa = fileargs_init(argc, argv, O_RDONLY, 0, + cap_rights_init(&rights, CAP_READ)); +if (fa == NULL) + err(1, "unable to open system.fileargs service"); + +/* Enter capability mode sandbox. */ +if (cap_enter() < 0 && errno != ENOSYS) + err(1, "unable to enter capability mode"); + +/* Open files. */ +for (i = 0; i < argc; i++) { + fd = fileargs_open(fa, argv[i]); + if (fd < 0) + err(1, "unable to open file %s", argv[i]); + printf("File %s opened in capability mode\n", argv[i]); + close(fd); +} + +fileargs_free(fa); +.Ed +.Sh SEE ALSO +.Xr cap_enter 2 , +.Xr open 2 , +.Xr cap_rights_init 3 , +.Xr err 3 , +.Xr fopen 3, +.Xr getopt 3, +.Xr capsicum 4 , +.Xr nv 9 +.Sh BUGS +The +.Lb cap_fileargs +included in +.Fx +is considered experimental, and should not be deployed in production +environments without careful consideration of the risks associated with +the use of experimental operating system features. +.Sh AUTHORS +.An Mariusz Zaborski Aq Mt oshogbo@FreeBSD.org diff --git a/lib/libcasper/services/cap_fileargs/cap_fileargs.c b/lib/libcasper/services/cap_fileargs/cap_fileargs.c new file mode 100644 index 000000000000..0d9144231be6 --- /dev/null +++ b/lib/libcasper/services/cap_fileargs/cap_fileargs.c @@ -0,0 +1,505 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 Mariusz Zaborski <oshogbo@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/capsicum.h> +#include <sys/sysctl.h> +#include <sys/cnv.h> +#include <sys/dnv.h> +#include <sys/nv.h> + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <libcasper.h> +#include <libcasper_service.h> + +#include "cap_fileargs.h" + +#define CACHE_SIZE 128 + +#define FILEARGS_MAGIC 0xFA00FA00 + +struct fileargs { + uint32_t fa_magic; + nvlist_t *fa_cache; + cap_channel_t *fa_chann; + int fa_fdflags; +}; + +static int +fileargs_get_cache(fileargs_t *fa, const char *name) +{ + int fd; + const nvlist_t *nvl; + nvlist_t *tnvl; + + assert(fa != NULL); + assert(fa->fa_magic == FILEARGS_MAGIC); + assert(name != NULL); + + if (fa->fa_cache == NULL) + return (-1); + + if ((fa->fa_fdflags & O_CREAT) != 0) + return (-1); + + nvl = dnvlist_get_nvlist(fa->fa_cache, name, NULL); + if (nvl == NULL) + return (-1); + + tnvl = nvlist_take_nvlist(fa->fa_cache, name); + fd = nvlist_take_descriptor(tnvl, "fd"); + nvlist_destroy(tnvl); + + if ((fa->fa_fdflags & O_CLOEXEC) != O_CLOEXEC) { + if (fcntl(fd, F_SETFD, fa->fa_fdflags) == -1) { + close(fd); + return (-1); + } + } + + return (fd); +} + +static void +fileargs_set_cache(fileargs_t *fa, nvlist_t *nvl) +{ + + nvlist_destroy(fa->fa_cache); + fa->fa_cache = nvl; +} + +static nvlist_t* +fileargs_fetch(fileargs_t *fa, const char *name) +{ + nvlist_t *nvl; + int serrno; + + assert(fa != NULL); + assert(name != NULL); + + nvl = nvlist_create(NV_FLAG_NO_UNIQUE); + nvlist_add_string(nvl, "cmd", "open"); + nvlist_add_string(nvl, "name", name); + + nvl = cap_xfer_nvlist(fa->fa_chann, nvl); + if (nvl == NULL) + return (NULL); + + if (nvlist_get_number(nvl, "error") != 0) { + serrno = (int)nvlist_get_number(nvl, "error"); + nvlist_destroy(nvl); + errno = serrno; + return (NULL); + } + + return (nvl); +} + +static nvlist_t * +fileargs_create_limit(int argc, const char * const *argv, int flags, + mode_t mode, cap_rights_t *rightsp) +{ + nvlist_t *limits; + int i; + + limits = nvlist_create(NV_FLAG_NO_UNIQUE); + if (limits == NULL) + return (NULL); + + nvlist_add_number(limits, "flags", flags); + if (rightsp != NULL) { + nvlist_add_binary(limits, "cap_rights", rightsp, + sizeof(*rightsp)); + } + if ((flags & O_CREAT) != 0) + nvlist_add_number(limits, "mode", (uint64_t)mode); + + for (i = 0; i < argc; i++) { + nvlist_add_null(limits, argv[i]); + } + + return (limits); +} + +static fileargs_t * +fileargs_create(cap_channel_t *chan, int fdflags) +{ + fileargs_t *fa; + + fa = malloc(sizeof(*fa)); + if (fa != NULL) { + fa->fa_cache = NULL; + fa->fa_chann = chan; + fa->fa_fdflags = fdflags; + fa->fa_magic = FILEARGS_MAGIC; + } + + return (fa); +} + +fileargs_t * +fileargs_init(int argc, char *argv[], int flags, mode_t mode, + cap_rights_t *rightsp) +{ + nvlist_t *limits; + + if (argc <= 0 || argv == NULL) { + return (fileargs_create(NULL, 0)); + } + + limits = fileargs_create_limit(argc, (const char * const *)argv, flags, + mode, rightsp); + if (limits == NULL) + return (NULL); + + return (fileargs_initnv(limits)); +} + +fileargs_t * +fileargs_cinit(cap_channel_t *cas, int argc, char *argv[], int flags, + mode_t mode, cap_rights_t *rightsp) +{ + nvlist_t *limits; + + if (argc <= 0 || argv == NULL) { + return (fileargs_create(NULL, 0)); + } + + limits = fileargs_create_limit(argc, (const char * const *)argv, flags, + mode, rightsp); + if (limits == NULL) + return (NULL); + + return (fileargs_cinitnv(cas, limits)); +} + +fileargs_t * +fileargs_initnv(nvlist_t *limits) +{ + cap_channel_t *cas; + fileargs_t *fa; + + if (limits == NULL) { + return (fileargs_create(NULL, 0)); + } + + cas = cap_init(); + if (cas == NULL) { + nvlist_destroy(limits); + return (NULL); + } + + fa = fileargs_cinitnv(cas, limits); + cap_close(cas); + + return (fa); +} + +fileargs_t * +fileargs_cinitnv(cap_channel_t *cas, nvlist_t *limits) +{ + cap_channel_t *chann; + fileargs_t *fa; + int serrno, ret; + int flags; + + assert(cas != NULL); + + if (limits == NULL) { + return (fileargs_create(NULL, 0)); + } + + chann = NULL; + fa = NULL; + + chann = cap_service_open(cas, "system.fileargs"); + if (chann == NULL) { + nvlist_destroy(limits); + return (NULL); + } + + flags = nvlist_get_number(limits, "flags"); + + /* Limits are consumed no need to free them. */ + ret = cap_limit_set(chann, limits); + if (ret < 0) + goto out; + + fa = fileargs_create(chann, flags); + if (fa == NULL) + goto out; + + return (fa); +out: + serrno = errno; + if (chann != NULL) + cap_close(chann); + errno = serrno; + return (NULL); +} + +int +fileargs_open(fileargs_t *fa, const char *name) +{ + int fd; + nvlist_t *nvl; + char *cmd; + + assert(fa != NULL); + assert(fa->fa_magic == FILEARGS_MAGIC); + + if (name == NULL) { + errno = EINVAL; + return (-1); + } + + if (fa->fa_chann == NULL) { + errno = ENOTCAPABLE; + return (-1); + } + + fd = fileargs_get_cache(fa, name); + if (fd != -1) + return (fd); + + nvl = fileargs_fetch(fa, name); + if (nvl == NULL) + return (-1); + + fd = nvlist_take_descriptor(nvl, "fd"); + cmd = nvlist_take_string(nvl, "cmd"); + if (strcmp(cmd, "cache") == 0) + fileargs_set_cache(fa, nvl); + else + nvlist_destroy(nvl); + free(cmd); + + return (fd); +} + +FILE * +fileargs_fopen(fileargs_t *fa, const char *name, const char *mode) +{ + int fd; + + if ((fd = fileargs_open(fa, name)) < 0) { + return (NULL); + } + + return (fdopen(fd, mode)); +} + +void +fileargs_free(fileargs_t *fa) +{ + + if (fa == NULL) + return; + + assert(fa->fa_magic == FILEARGS_MAGIC); + + nvlist_destroy(fa->fa_cache); + if (fa->fa_chann != NULL) { + cap_close(fa->fa_chann); + } + explicit_bzero(&fa->fa_magic, sizeof(fa->fa_magic)); + free(fa); +} + +/* + * Service functions. + */ + +static const char *lastname; +static void *cacheposition; +static bool allcached; +static const cap_rights_t *caprightsp; +static int capflags; +static mode_t capmode; + +static int +open_file(const char *name) +{ + int fd, serrno; + + if ((capflags & O_CREAT) == 0) + fd = open(name, capflags); + else + fd = open(name, capflags, capmode); + if (fd < 0) + return (-1); + + if (caprightsp != NULL) { + if (cap_rights_limit(fd, caprightsp) < 0) { + serrno = errno; + close(fd); + errno = serrno; + return (-1); + } + } + + return (fd); +} + +static void +fileargs_add_cache(nvlist_t *nvlout, const nvlist_t *limits, + const char *curent_name) +{ + int type, i, fd; + void *cookie; + nvlist_t *new; + const char *fname; + + if ((capflags & O_CREAT) != 0) { + allcached = true; + return; + } + + cookie = cacheposition; + for (i = 0; i < CACHE_SIZE + 1; i++) { + fname = nvlist_next(limits, &type, &cookie); + if (fname == NULL) { + cacheposition = NULL; + lastname = NULL; + allcached = true; + return; + } + /* We doing that to catch next element name. */ + if (i == CACHE_SIZE) { + break; + } + + if (type != NV_TYPE_NULL || + (curent_name != NULL && strcmp(fname, curent_name) == 0)) { + curent_name = NULL; + i--; + continue; + } + + fd = open_file(fname); + if (fd < 0) { + i--; + continue; + } + + new = nvlist_create(NV_FLAG_NO_UNIQUE); + nvlist_move_descriptor(new, "fd", fd); + nvlist_add_nvlist(nvlout, fname, new); + } + cacheposition = cookie; + lastname = fname; +} + +static bool +fileargs_allowed(const nvlist_t *limits, const nvlist_t *request) +{ + const char *name; + + name = dnvlist_get_string(request, "name", NULL); + if (name == NULL) + return (false); + + /* Fast path. */ + if (lastname != NULL && strcmp(name, lastname) == 0) + return (true); + + if (!nvlist_exists_null(limits, name)) + return (false); + + return (true); +} + +static int +fileargs_limit(const nvlist_t *oldlimits, const nvlist_t *newlimits) +{ + + if (oldlimits != NULL) + return (ENOTCAPABLE); + + capflags = (int)dnvlist_get_number(newlimits, "flags", 0); + if ((capflags & O_CREAT) != 0) + capmode = (mode_t)nvlist_get_number(newlimits, "mode"); + else + capmode = 0; + + caprightsp = dnvlist_get_binary(newlimits, "cap_rights", NULL, NULL, 0); + + return (0); +} + +static int +fileargs_command_open(const nvlist_t *limits, nvlist_t *nvlin, + nvlist_t *nvlout) +{ + int fd; + const char *name; + + if (limits == NULL) + return (ENOTCAPABLE); + + if (!fileargs_allowed(limits, nvlin)) + return (ENOTCAPABLE); + + name = nvlist_get_string(nvlin, "name"); + + fd = open_file(name); + if (fd < 0) + return (errno); + + if (!allcached && (lastname == NULL || + strcmp(name, lastname) == 0)) { + nvlist_add_string(nvlout, "cmd", "cache"); + fileargs_add_cache(nvlout, limits, name); + } else { + nvlist_add_string(nvlout, "cmd", "open"); + } + nvlist_move_descriptor(nvlout, "fd", fd); + return (0); +} + +static int +fileargs_command(const char *cmd, const nvlist_t *limits, + nvlist_t *nvlin, nvlist_t *nvlout) +{ + + if (strcmp(cmd, "open") == 0) + return (fileargs_command_open(limits, nvlin, nvlout)); + + return (EINVAL); +} + +CREATE_SERVICE("system.fileargs", fileargs_limit, fileargs_command, + CASPER_SERVICE_FD | CASPER_SERVICE_STDIO | CASPER_SERVICE_NO_UNIQ_LIMITS); diff --git a/lib/libcasper/services/cap_fileargs/cap_fileargs.h b/lib/libcasper/services/cap_fileargs/cap_fileargs.h new file mode 100644 index 000000000000..979759072419 --- /dev/null +++ b/lib/libcasper/services/cap_fileargs/cap_fileargs.h @@ -0,0 +1,108 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 Mariusz Zaborski <oshogbo@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FILEARGS_H_ +#define _FILEARGS_H_ + +#include <sys/dnv.h> +#include <sys/nv.h> + +#include <stdbool.h> + +#ifdef WITH_CASPER +struct fileargs; +typedef struct fileargs fileargs_t; + +fileargs_t *fileargs_init(int argc, char *argv[], int flags, mode_t mode, + cap_rights_t *rightsp); +fileargs_t *fileargs_cinit(cap_channel_t *cas, int argc, char *argv[], + int flags, mode_t mode, cap_rights_t *rightsp); +fileargs_t *fileargs_initnv(nvlist_t *limits); +fileargs_t *fileargs_cinitnv(cap_channel_t *cas, nvlist_t *limits); +int fileargs_open(fileargs_t *fa, const char *name); +void fileargs_free(fileargs_t *fa); +FILE *fileargs_fopen(fileargs_t *fa, const char *name, const char *mode); +#else +typedef struct fileargs { + int fa_flags; + mode_t fa_mode; +} fileargs_t; + +static inline fileargs_t * +fileargs_init(int argc __unused, char *argv[] __unused, int flags, mode_t mode, + cap_rights_t *rightsp __unused) { + fileargs_t *fa; + + fa = malloc(sizeof(*fa)); + if (fa != NULL) { + fa->fa_flags = flags; + fa->fa_mode = mode; + } + + return (fa); +} + +static inline fileargs_t * +fileargs_cinit(cap_channel_t *cas __unused, int argc, char *argv[], int flags, + mode_t mode, cap_rights_t *rightsp) +{ + + return (fileargs_init(argc, argv, flags, mode, rightsp)); +} + +static inline fileargs_t * +fileargs_initnv(nvlist_t *limits) +{ + fileargs_t *fa; + + fa = fileargs_init(0, NULL, + nvlist_get_number(limits, "flags"), + dnvlist_get_number(limits, "mode", 0), + NULL); + nvlist_destroy(limits); + + return (fa); +} + +static inline fileargs_t * +fileargs_cinitnv(cap_channel_t *cas __unused, nvlist_t *limits) +{ + + return (fileargs_initnv(limits)); +} + +#define fileargs_open(fa, name) \ + open(name, fa->fa_flags, fa->fa_mode) +#define fileargs_fopen(fa, name, mode) \ + fopen(name, mode) +#define fileargs_free(fa) (free(fa)) +#endif + +#endif /* !_FILEARGS_H_ */ diff --git a/lib/libnv/common_impl.h b/lib/libnv/common_impl.h index a50902d221ce..69154466203a 100644 --- a/lib/libnv/common_impl.h +++ b/lib/libnv/common_impl.h @@ -34,6 +34,15 @@ #ifndef _COMMON_IMPL_H_ #define _COMMON_IMPL_H_ -#define fd_is_valid(fd) (fcntl((fd), F_GETFL) != -1 || errno != EBADF) +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> + +static inline bool +fd_is_valid(int fd) +{ + + return (fcntl(fd, F_GETFD) != -1 || errno != EBADF); +} #endif /* !_COMMON_IMPL_H_ */ diff --git a/lib/libnv/msgio.c b/lib/libnv/msgio.c index 300a437a7421..938bca81b0ac 100644 --- a/lib/libnv/msgio.c +++ b/lib/libnv/msgio.c @@ -66,11 +66,6 @@ msghdr_add_fd(struct cmsghdr *cmsg, int fd) PJDLOG_ASSERT(fd >= 0); - if (!fd_is_valid(fd)) { - errno = EBADF; - return (-1); - } - cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); diff --git a/lib/libnv/tests/nvlist_send_recv_test.c b/lib/libnv/tests/nvlist_send_recv_test.c index f4bc1c4464bc..1b673b5d4259 100644 --- a/lib/libnv/tests/nvlist_send_recv_test.c +++ b/lib/libnv/tests/nvlist_send_recv_test.c @@ -306,15 +306,12 @@ parent(int sock) CHECK(name == NULL); } -int -main(void) +static void +send_nvlist(void) { int status, socks[2]; pid_t pid; - printf("1..134\n"); - fflush(stdout); - if (socketpair(PF_UNIX, SOCK_STREAM, 0, socks) < 0) err(1, "socketpair() failed"); pid = fork(); @@ -326,7 +323,7 @@ main(void) /* Child. */ close(socks[0]); child(socks[1]); - return (0); + _exit(0); default: /* Parent. */ close(socks[1]); @@ -336,6 +333,35 @@ main(void) if (waitpid(pid, &status, 0) < 0) err(1, "waitpid() failed"); +} + +static void +send_closed_fd(void) +{ + nvlist_t *nvl; + int error, socks[2]; + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, socks) < 0) + err(1, "socketpair() failed"); + + nvl = nvlist_create(0); + nvlist_add_descriptor(nvl, "fd", 12345); + error = nvlist_error(nvl); + CHECK(error == EBADF); + + error = nvlist_send(socks[1], nvl); + CHECK(error != 0 && errno == EBADF); +} + +int +main(void) +{ + + printf("1..136\n"); + fflush(stdout); + + send_nvlist(); + send_closed_fd(); return (0); } diff --git a/lib/libufs/Makefile b/lib/libufs/Makefile index 16487bebc957..2685f7318ac1 100644 --- a/lib/libufs/Makefile +++ b/lib/libufs/Makefile @@ -3,12 +3,12 @@ PACKAGE=lib${LIB} LIB= ufs SHLIBDIR?= /lib -SHLIB_MAJOR= 6 +SHLIB_MAJOR= 7 SRCS= block.c cgroup.c crc32.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c INCS= libufs.h -MAN= bread.3 cgread.3 libufs.3 sbread.3 ufs_disk_close.3 +MAN= bread.3 cgread.3 getinode.3 libufs.3 sbread.3 ufs_disk_close.3 MLINKS+= bread.3 bwrite.3 MLINKS+= bread.3 berase.3 MLINKS+= cgread.3 cgread1.3 @@ -16,6 +16,7 @@ MLINKS+= cgread.3 cgget.3 MLINKS+= cgread.3 cgwrite.3 MLINKS+= cgread.3 cgwrite1.3 MLINKS+= cgread.3 cgput.3 +MLINKS+= getinode.3 putinode.3 MLINKS+= sbread.3 sbwrite.3 MLINKS+= sbread.3 sbget.3 MLINKS+= sbread.3 sbput.3 diff --git a/lib/libufs/getinode.3 b/lib/libufs/getinode.3 new file mode 100644 index 000000000000..6aa5388cbd6d --- /dev/null +++ b/lib/libufs/getinode.3 @@ -0,0 +1,131 @@ +.\" Author: Marshall Kirk McKusick <mckusick@freebsd.org> +.\" Date: January 19, 2018 +.\" Description: +.\" Manual page for libufs functions: +.\" getinode(3) +.\" putinode(3) +.\" +.\" This file is in the public domain. +.\" +.\" $FreeBSD$ +.\" +.Dd November 10, 2018 +.Dt GETINODE 3 +.Os +.Sh NAME +.Nm getinode , putinode +.Nd fetch and store inodes on a UFS file system +.Sh LIBRARY +.Lb libufs +.Sh SYNOPSIS +.In ufs/ufs/dinode.h +.In ufs/ffs/fs.h +.In libufs.h +.Ft int +.Fn getinode "struct uufsd *disk" "union dinodep *dp" "ino_t inumber" +.Ft int +.Fn putinode "struct uufsd *disk" +.Sh DESCRIPTION +The +.Fn getinode +and +.Fn putinode +functions provide an inode fetch and store API for +.Xr libufs 3 +consumers. +They operate on a userland UFS disk structure. +The +.Fn getinode +function fetches the specified inode from the filesystem. +The +.Fn putinode +function stores the most recently fetched inode to the filesystem. +.Pp +The +.Va dinodep +union is defined as: +.Bd -literal -offset indent +union dinodep { + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; +}; +.Ed +.Pp +Sample code to clear write permissions for inode number +.Fa inumber +stored on the filesystem described by +.Fa diskp . +.Bd -literal -offset indent +#include <sys/stat.h> +#include <err.h> + +#include <ufs/ufs/dinode.h> +#include <ufs/ffs/fs.h> +#include <libufs.h> + +void +clearwrite(struct uufsd *diskp, ino_t inumber) +{ + union dinodep dp; + + if (getinode(diskp, &dp, inumber) == -1) + err(1, "getinode: %s", diskp->d_error); + switch (diskp->d_ufs) { + case 1: /* UFS 1 filesystem */ + dp.dp1->di_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + break; + case 2: /* UFS 2 filesystem */ + dp.dp2->di_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH); + break; + default: + errx(1, "unknown filesystem type"); + } + if (putinode(diskp) == -1) + err(1, "putinode: %s", diskp->d_error); +} +.Ed +.Sh RETURN VALUES +The +.Fn getinode +and +.Fn putinode +functions return 0 on success, or \-1 in case of any error. +A string describing the error is stored in +.Fa diskp->d_error . +The global +.Fa errno +often provides additional information. +.Sh ERRORS +The function +.Fn getinode +may fail and set +.Va errno +for any of the errors specified for the library function +.Xr pread 2 . +It can also fail if the inode number is out of the range of inodes +in the filesystem. +.Pp +The function +.Fn putinode +may fail and set +.Va errno +for any of the errors specified for the library functions +.Xr ufs_disk_write 3 +or +.Xr pwrite 2 . +.Pp +Additionally both functions may follow the +.Xr libufs 3 +error methodologies in case of a device error. +.Sh SEE ALSO +.Xr pread 2 , +.Xr pwrite 2 , +.Xr libufs 3 , +.Xr ufs_disk_write 3 +.Sh HISTORY +These functions first appeared as part of +.Xr libufs 3 +in +.Fx 13.0 . +.Sh AUTHORS +.An Marshall Kirk McKusick Aq Mt mckusick@freebsd.org diff --git a/lib/libufs/inode.c b/lib/libufs/inode.c index 83a6cf0e36c7..11e263f08ad5 100644 --- a/lib/libufs/inode.c +++ b/lib/libufs/inode.c @@ -49,18 +49,16 @@ __FBSDID("$FreeBSD$"); #include <libufs.h> int -getino(struct uufsd *disk, void **dino, ino_t inode, int *mode) +getinode(struct uufsd *disk, union dinodep *dp, ino_t inum) { ino_t min, max; caddr_t inoblock; - struct ufs1_dinode *dp1; - struct ufs2_dinode *dp2; struct fs *fs; ERROR(disk, NULL); fs = &disk->d_fs; - if (inode >= (ino_t)fs->fs_ipg * fs->fs_ncg) { + if (inum >= (ino_t)fs->fs_ipg * fs->fs_ncg) { ERROR(disk, "inode number out of range"); return (-1); } @@ -76,26 +74,22 @@ getino(struct uufsd *disk, void **dino, ino_t inode, int *mode) } disk->d_inoblock = inoblock; } - if (inode >= min && inode < max) + if (inum >= min && inum < max) goto gotit; - bread(disk, fsbtodb(fs, ino_to_fsba(fs, inode)), inoblock, + bread(disk, fsbtodb(fs, ino_to_fsba(fs, inum)), inoblock, fs->fs_bsize); - disk->d_inomin = min = inode - (inode % INOPB(fs)); + disk->d_inomin = min = inum - (inum % INOPB(fs)); disk->d_inomax = max = min + INOPB(fs); gotit: switch (disk->d_ufs) { case 1: - dp1 = &((struct ufs1_dinode *)inoblock)[inode - min]; - if (mode != NULL) - *mode = dp1->di_mode & IFMT; - if (dino != NULL) - *dino = dp1; + disk->d_dp.dp1 = &((struct ufs1_dinode *)inoblock)[inum - min]; + if (dp != NULL) + *dp = disk->d_dp; return (0); case 2: - dp2 = &((struct ufs2_dinode *)inoblock)[inode - min]; - if (mode != NULL) - *mode = dp2->di_mode & IFMT; - if (dino != NULL) - *dino = dp2; + disk->d_dp.dp2 = &((struct ufs2_dinode *)inoblock)[inum - min]; + if (dp != NULL) + *dp = disk->d_dp; return (0); default: break; @@ -105,7 +99,7 @@ gotit: switch (disk->d_ufs) { } int -putino(struct uufsd *disk) +putinode(struct uufsd *disk) { struct fs *fs; diff --git a/lib/libufs/libufs.h b/lib/libufs/libufs.h index 4598a9999ce2..dbd378949877 100644 --- a/lib/libufs/libufs.h +++ b/lib/libufs/libufs.h @@ -35,6 +35,10 @@ /* * libufs structures. */ +union dinodep { + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; +}; /* * userland ufs disk. @@ -49,6 +53,7 @@ struct uufsd { caddr_t d_inoblock; /* inode block */ uint32_t d_inomin; /* low inode (not ino_t for ABI compat) */ uint32_t d_inomax; /* high inode (not ino_t for ABI compat) */ + union dinodep d_dp; /* pointer to currently active inode */ union { struct fs d_fs; /* filesystem information */ char d_sb[MAXBSIZE]; @@ -135,8 +140,8 @@ int cgwrite1(struct uufsd *, int); /* * inode.c */ -int getino(struct uufsd *, void **, ino_t, int *); -int putino(struct uufsd *); +int getinode(struct uufsd *, union dinodep *, ino_t); +int putinode(struct uufsd *); /* * sblock.c diff --git a/release/scripts/make-manifest.sh b/release/scripts/make-manifest.sh index 0ac5720dcd13..8dc182a55bf1 100755 --- a/release/scripts/make-manifest.sh +++ b/release/scripts/make-manifest.sh @@ -10,7 +10,6 @@ # $FreeBSD$ base="Base system" -doc="Additional Documentation" kernel="Kernel" ports="Ports tree" src="System source tree" @@ -19,7 +18,6 @@ tests="Test suite" desc_base="${base} (MANDATORY)" desc_base_dbg="${base} (Debugging)" -desc_doc="${doc}" desc_kernel="${kernel} (MANDATORY)" desc_kernel_dbg="${kernel} (Debugging)" desc_kernel_alt="Alternate ${kernel}" @@ -30,7 +28,6 @@ desc_ports="${ports}" desc_src="${src}" desc_tests="${tests}" -default_doc=off default_src=off default_ports=off default_tests=off @@ -51,6 +48,9 @@ for i in ${*}; do desc="$(eval echo \"\${desc_${distname}}\")" case ${i} in + doc.txz) + continue + ;; kernel-dbg.txz) desc="${desc_kernel_dbg}" ;; diff --git a/sbin/clri/clri.c b/sbin/clri/clri.c index 0c0f8f0947a3..7f9a4f09bfe0 100644 --- a/sbin/clri/clri.c +++ b/sbin/clri/clri.c @@ -62,11 +62,6 @@ __FBSDID("$FreeBSD$"); #include <stdio.h> #include <unistd.h> -union dinodep { - struct ufs1_dinode *dp1; - struct ufs2_dinode *dp2; -}; - static void usage(void) { @@ -104,8 +99,8 @@ main(int argc, char *argv[]) } (void)printf("clearing %d\n", inonum); - if (getino(&disk, (void **)&dp, inonum, NULL) == -1) { - printf("getino: %s\n", disk.d_error); + if (getinode(&disk, &dp, inonum) == -1) { + printf("getinode: %s\n", disk.d_error); exitval = 1; continue; } @@ -119,7 +114,7 @@ main(int argc, char *argv[]) memset(dp.dp2, 0, sizeof(*dp.dp2)); dp.dp2->di_gen = generation; } - putino(&disk); + putinode(&disk); (void)fsync(disk.d_fd); } (void)ufs_disk_close(&disk); diff --git a/sbin/dump/dump.h b/sbin/dump/dump.h index 7119df41b42b..680ff8a9b7a9 100644 --- a/sbin/dump/dump.h +++ b/sbin/dump/dump.h @@ -126,7 +126,7 @@ void dumpabort(int signo) __dead2; void dump_getfstab(void); char *rawname(char *cp); -union dinode *getinode(ino_t inum, int *mode); +union dinode *getino(ino_t inum, int *mode); /* rdump routines */ #ifdef RDUMP diff --git a/sbin/dump/main.c b/sbin/dump/main.c index 127b5ca33f97..7301ca318d80 100644 --- a/sbin/dump/main.c +++ b/sbin/dump/main.c @@ -549,7 +549,7 @@ main(int argc, char *argv[]) /* * Skip directory inodes deleted and maybe reallocated */ - dp = getinode(ino, &mode); + dp = getino(ino, &mode); if (mode != IFDIR) continue; (void)dumpino(dp, ino); @@ -568,7 +568,7 @@ main(int argc, char *argv[]) /* * Skip inodes deleted and reallocated as directories. */ - dp = getinode(ino, &mode); + dp = getino(ino, &mode); if (mode == IFDIR) continue; (void)dumpino(dp, ino); diff --git a/sbin/dump/traverse.c b/sbin/dump/traverse.c index 46f7e8a854a8..d094a08a7eb0 100644 --- a/sbin/dump/traverse.c +++ b/sbin/dump/traverse.c @@ -195,7 +195,7 @@ mapfiles(ino_t maxino, long *tapesize) } for (i = 0; i < inosused; i++, ino++) { if (ino < UFS_ROOTINO || - (dp = getinode(ino, &mode)) == NULL || + (dp = getino(ino, &mode)) == NULL || (mode & IFMT) == 0) continue; if (ino >= maxino) { @@ -277,7 +277,7 @@ mapdirs(ino_t maxino, long *tapesize) nodump = !nonodump && (TSTINO(ino, usedinomap) == 0); if ((isdir & 1) == 0 || (TSTINO(ino, dumpinomap) && !nodump)) continue; - dp = getinode(ino, &i); + dp = getino(ino, &i); /* * inode buf may change in searchdir(). */ @@ -421,7 +421,7 @@ searchdir( continue; } if (nodump) { - ip = getinode(dp->d_ino, &mode); + ip = getino(dp->d_ino, &mode); if (TSTINO(dp->d_ino, dumpinomap)) { CLRINO(dp->d_ino, dumpinomap); *tapesize -= blockest(ip); @@ -875,7 +875,7 @@ writeheader(ino_t ino) } union dinode * -getinode(ino_t inum, int *modep) +getino(ino_t inum, int *modep) { static ino_t minino, maxino; static caddr_t inoblock; diff --git a/sbin/ffsinfo/ffsinfo.c b/sbin/ffsinfo/ffsinfo.c index 5e08d35e5d91..9bd3210986a9 100644 --- a/sbin/ffsinfo/ffsinfo.c +++ b/sbin/ffsinfo/ffsinfo.c @@ -262,7 +262,7 @@ main(int argc, char **argv) dbg_csp = fscs; /* ... and dump it */ - for(dbg_csc=0; dbg_csc<sblock.fs_ncg; dbg_csc++) { + for (dbg_csc = 0; dbg_csc < sblock.fs_ncg; dbg_csc++) { snprintf(dbg_line, sizeof(dbg_line), "%d. csum in fscs", dbg_csc); DBG_DUMP_CSUM(&sblock, @@ -342,8 +342,8 @@ void dump_whole_ufs1_inode(ino_t inode, int level) { DBG_FUNC("dump_whole_ufs1_inode") - struct ufs1_dinode *ino; - int rb, mode; + union dinodep dp; + int rb; unsigned int ind2ctr, ind3ctr; ufs1_daddr_t *ind2ptr, *ind3ptr; char comment[80]; @@ -353,10 +353,10 @@ dump_whole_ufs1_inode(ino_t inode, int level) /* * Read the inode from disk/cache. */ - if (getino(&disk, (void **)&ino, inode, &mode) == -1) - err(1, "getino: %s", disk.d_error); + if (getinode(&disk, &dp, inode) == -1) + err(1, "getinode: %s", disk.d_error); - if(ino->di_nlink==0) { + if (dp.dp1->di_nlink == 0) { DBG_LEAVE; return; /* inode not in use */ } @@ -368,7 +368,7 @@ dump_whole_ufs1_inode(ino_t inode, int level) if (level & 0x100) { DBG_DUMP_INO(&sblock, comment, - ino); + dp.dp1); } if (!(level & 0x200)) { @@ -379,13 +379,13 @@ dump_whole_ufs1_inode(ino_t inode, int level) /* * Ok, now prepare for dumping all direct and indirect pointers. */ - rb = howmany(ino->di_size, sblock.fs_bsize) - UFS_NDADDR; - if(rb>0) { + rb = howmany(dp.dp1->di_size, sblock.fs_bsize) - UFS_NDADDR; + if (rb > 0) { /* * Dump single indirect block. */ - if (bread(&disk, fsbtodb(&sblock, ino->di_ib[0]), (void *)&i1blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, dp.dp1->di_ib[0]), + (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 0", @@ -394,14 +394,14 @@ dump_whole_ufs1_inode(ino_t inode, int level) comment, i1blk, (size_t)rb); - rb-=howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)); + rb -= howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)); } - if(rb>0) { + if (rb > 0) { /* * Dump double indirect blocks. */ - if (bread(&disk, fsbtodb(&sblock, ino->di_ib[1]), (void *)&i2blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, dp.dp1->di_ib[1]), + (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 1", @@ -410,12 +410,12 @@ dump_whole_ufs1_inode(ino_t inode, int level) comment, i2blk, howmany(rb, howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)))); - for(ind2ctr=0; ((ind2ctr < howmany(sblock.fs_bsize, - sizeof(ufs1_daddr_t))) && (rb>0)); ind2ctr++) { - ind2ptr=&((ufs1_daddr_t *)(void *)&i2blk)[ind2ctr]; + for (ind2ctr = 0; ((ind2ctr < howmany(sblock.fs_bsize, + sizeof(ufs1_daddr_t))) && (rb > 0)); ind2ctr++) { + ind2ptr = &((ufs1_daddr_t *)(void *)&i2blk)[ind2ctr]; - if (bread(&disk, fsbtodb(&sblock, *ind2ptr), (void *)&i1blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, *ind2ptr), + (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), @@ -425,15 +425,15 @@ dump_whole_ufs1_inode(ino_t inode, int level) comment, i1blk, (size_t)rb); - rb-=howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)); + rb -= howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)); } } - if(rb>0) { + if (rb > 0) { /* * Dump triple indirect blocks. */ - if (bread(&disk, fsbtodb(&sblock, ino->di_ib[2]), (void *)&i3blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, dp.dp1->di_ib[2]), + (void *)&i3blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 2", @@ -445,12 +445,12 @@ dump_whole_ufs1_inode(ino_t inode, int level) howmany(rb, SQUARE(howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t))))); #undef SQUARE - for(ind3ctr=0; ((ind3ctr<howmany(sblock.fs_bsize, - sizeof(ufs1_daddr_t)))&&(rb>0)); ind3ctr++) { - ind3ptr=&((ufs1_daddr_t *)(void *)&i3blk)[ind3ctr]; + for (ind3ctr = 0; ((ind3ctr < howmany(sblock.fs_bsize, + sizeof(ufs1_daddr_t))) && (rb > 0)); ind3ctr++) { + ind3ptr = &((ufs1_daddr_t *)(void *)&i3blk)[ind3ctr]; - if (bread(&disk, fsbtodb(&sblock, *ind3ptr), (void *)&i2blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, *ind3ptr), + (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), @@ -461,8 +461,8 @@ dump_whole_ufs1_inode(ino_t inode, int level) i2blk, howmany(rb, howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)))); - for(ind2ctr=0; ((ind2ctr < howmany(sblock.fs_bsize, - sizeof(ufs1_daddr_t)))&&(rb>0)); ind2ctr++) { + for (ind2ctr = 0; ((ind2ctr < howmany(sblock.fs_bsize, + sizeof(ufs1_daddr_t))) && (rb > 0)); ind2ctr++) { ind2ptr=&((ufs1_daddr_t *)(void *)&i2blk) [ind2ctr]; if (bread(&disk, fsbtodb(&sblock, *ind2ptr), @@ -477,7 +477,7 @@ dump_whole_ufs1_inode(ino_t inode, int level) comment, i1blk, (size_t)rb); - rb-=howmany(sblock.fs_bsize, + rb -= howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)); } } @@ -496,8 +496,8 @@ void dump_whole_ufs2_inode(ino_t inode, int level) { DBG_FUNC("dump_whole_ufs2_inode") - struct ufs2_dinode *ino; - int rb, mode; + union dinodep dp; + int rb; unsigned int ind2ctr, ind3ctr; ufs2_daddr_t *ind2ptr, *ind3ptr; char comment[80]; @@ -507,10 +507,10 @@ dump_whole_ufs2_inode(ino_t inode, int level) /* * Read the inode from disk/cache. */ - if (getino(&disk, (void **)&ino, inode, &mode) == -1) - err(1, "getino: %s", disk.d_error); + if (getinode(&disk, &dp, inode) == -1) + err(1, "getinode: %s", disk.d_error); - if (ino->di_nlink == 0) { + if (dp.dp2->di_nlink == 0) { DBG_LEAVE; return; /* inode not in use */ } @@ -520,7 +520,7 @@ dump_whole_ufs2_inode(ino_t inode, int level) */ snprintf(comment, sizeof(comment), "Inode 0x%08jx", (uintmax_t)inode); if (level & 0x100) { - DBG_DUMP_INO(&sblock, comment, ino); + DBG_DUMP_INO(&sblock, comment, dp.dp2); } if (!(level & 0x200)) { @@ -531,13 +531,13 @@ dump_whole_ufs2_inode(ino_t inode, int level) /* * Ok, now prepare for dumping all direct and indirect pointers. */ - rb = howmany(ino->di_size, sblock.fs_bsize) - UFS_NDADDR; + rb = howmany(dp.dp2->di_size, sblock.fs_bsize) - UFS_NDADDR; if (rb > 0) { /* * Dump single indirect block. */ - if (bread(&disk, fsbtodb(&sblock, ino->di_ib[0]), (void *)&i1blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, dp.dp2->di_ib[0]), + (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 0", @@ -549,8 +549,8 @@ dump_whole_ufs2_inode(ino_t inode, int level) /* * Dump double indirect blocks. */ - if (bread(&disk, fsbtodb(&sblock, ino->di_ib[1]), (void *)&i2blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, dp.dp2->di_ib[1]), + (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 1", @@ -563,8 +563,8 @@ dump_whole_ufs2_inode(ino_t inode, int level) sizeof(ufs2_daddr_t))) && (rb>0)); ind2ctr++) { ind2ptr = &((ufs2_daddr_t *)(void *)&i2blk)[ind2ctr]; - if (bread(&disk, fsbtodb(&sblock, *ind2ptr), (void *)&i1blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, *ind2ptr), + (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), @@ -578,8 +578,8 @@ dump_whole_ufs2_inode(ino_t inode, int level) /* * Dump triple indirect blocks. */ - if (bread(&disk, fsbtodb(&sblock, ino->di_ib[2]), (void *)&i3blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, dp.dp2->di_ib[2]), + (void *)&i3blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 2", @@ -595,8 +595,8 @@ dump_whole_ufs2_inode(ino_t inode, int level) sizeof(ufs2_daddr_t))) && (rb > 0)); ind3ctr++) { ind3ptr = &((ufs2_daddr_t *)(void *)&i3blk)[ind3ctr]; - if (bread(&disk, fsbtodb(&sblock, *ind3ptr), (void *)&i2blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, *ind3ptr), + (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), @@ -610,8 +610,9 @@ dump_whole_ufs2_inode(ino_t inode, int level) for (ind2ctr = 0; ((ind2ctr < howmany(sblock.fs_bsize, sizeof(ufs2_daddr_t))) && (rb > 0)); ind2ctr++) { ind2ptr = &((ufs2_daddr_t *)(void *)&i2blk) [ind2ctr]; - if (bread(&disk, fsbtodb(&sblock, *ind2ptr), (void *)&i1blk, - (size_t)sblock.fs_bsize) == -1) { + if (bread(&disk, fsbtodb(&sblock, *ind2ptr), + (void *)&i1blk, (size_t)sblock.fs_bsize) + == -1) { err(1, "bread: %s", disk.d_error); } snprintf(comment, sizeof(comment), diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c index a590ee74227b..e923714c3c72 100644 --- a/sbin/fsck_ffs/dir.c +++ b/sbin/fsck_ffs/dir.c @@ -254,14 +254,14 @@ fileerror(ino_t cwd, ino_t ino, const char *errmesg) char pathbuf[MAXPATHLEN + 1]; pwarn("%s ", errmesg); - pinode(ino); - printf("\n"); - getpathname(pathbuf, cwd, ino); if (ino < UFS_ROOTINO || ino > maxino) { - pfatal("NAME=%s\n", pathbuf); + pfatal("out-of-range inode number %ju", (uintmax_t)ino); return; } dp = ginode(ino); + prtinode(ino, dp); + printf("\n"); + getpathname(pathbuf, cwd, ino); if (ftypeok(dp)) pfatal("%s=%s\n", (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE", @@ -309,7 +309,7 @@ adjust(struct inodesc *idesc, int lcnt) if (lcnt != 0) { pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname : ((DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE")); - pinode(idesc->id_number); + prtinode(idesc->id_number, dp); printf(" COUNT %d SHOULD BE %d", DIP(dp, di_nlink), DIP(dp, di_nlink) - lcnt); if (preen || usedsoftdep) { @@ -390,7 +390,8 @@ linkup(ino_t orphan, ino_t parentdir, char *name) dp = ginode(orphan); lostdir = (DIP(dp, di_mode) & IFMT) == IFDIR; pwarn("UNREF %s ", lostdir ? "DIR" : "FILE"); - pinode(orphan); + prtinode(orphan, dp); + printf("\n"); if (preen && DIP(dp, di_size) == 0) return (0); if (cursnapshot != 0) { diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h index cfac25bdfe3d..ffe41be6cf3b 100644 --- a/sbin/fsck_ffs/fsck.h +++ b/sbin/fsck_ffs/fsck.h @@ -463,8 +463,8 @@ void pass4(void); int pass4check(struct inodesc *); void pass5(void); void pfatal(const char *fmt, ...) __printflike(1, 2); -void pinode(ino_t ino); void propagate(void); +void prtinode(ino_t ino, union dinode *dp); void pwarn(const char *fmt, ...) __printflike(1, 2); int readsb(int listerr); int reply(const char *question); diff --git a/sbin/fsck_ffs/gjournal.c b/sbin/fsck_ffs/gjournal.c index 79f670c00567..17361f78a058 100644 --- a/sbin/fsck_ffs/gjournal.c +++ b/sbin/fsck_ffs/gjournal.c @@ -392,13 +392,12 @@ clear_inode(struct ufs2_dinode *dino) void gjournal_check(const char *filesys) { - struct ufs2_dinode *dino; - void *p; + union dinodep dp; struct cgchain *cgc; struct cg *cgp; uint8_t *inosused; ino_t cino, ino; - int cg, mode; + int cg; devnam = filesys; opendisk(); @@ -444,19 +443,20 @@ gjournal_check(const char *filesys) /* Unallocated? Skip it. */ if (isclr(inosused, cino)) continue; - if (getino(diskp, &p, ino, &mode) == -1) - err(1, "getino(cg=%d ino=%ju)", - cg, (uintmax_t)ino); - dino = p; + if (getinode(diskp, &dp, ino) == -1) + err(1, "getinode (cg=%d ino=%ju) %s", + cg, (uintmax_t)ino, diskp->d_error); /* Not a regular file nor directory? Skip it. */ - if (!S_ISREG(dino->di_mode) && !S_ISDIR(dino->di_mode)) + if (!S_ISREG(dp.dp2->di_mode) && + !S_ISDIR(dp.dp2->di_mode)) continue; /* Has reference(s)? Skip it. */ - if (dino->di_nlink > 0) + if (dp.dp2->di_nlink > 0) continue; - //printf("Clearing inode=%d (size=%jd)\n", ino, (intmax_t)dino->di_size); + /* printf("Clearing inode=%d (size=%jd)\n", ino, + (intmax_t)dp.dp2->di_size); */ /* Free inode's blocks. */ - clear_inode(dino); + clear_inode(dp.dp2); /* Deallocate it. */ clrbit(inosused, cino); /* Update position of last used inode. */ @@ -469,17 +469,17 @@ gjournal_check(const char *filesys) cgp->cg_unrefs--; fs->fs_unrefs--; /* If this is directory, update related statistics. */ - if (S_ISDIR(dino->di_mode)) { + if (S_ISDIR(dp.dp2->di_mode)) { cgp->cg_cs.cs_ndir--; fs->fs_cs(fs, cg).cs_ndir--; fs->fs_cstotal.cs_ndir--; } /* Zero-fill the inode. */ - *dino = ufs2_zino; + *dp.dp2 = ufs2_zino; /* Write the inode back. */ - if (putino(diskp) == -1) - err(1, "putino(cg=%d ino=%ju)", - cg, (uintmax_t)ino); + if (putinode(diskp) == -1) + err(1, "putinode (cg=%d ino=%ju) %s", + cg, (uintmax_t)ino, diskp->d_error); if (cgp->cg_unrefs == 0) { //printf("No more unreferenced inodes in cg=%d.\n", cg); break; diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c index 7cce44953a69..3f1cab8b3bd5 100644 --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <pwd.h> #include <string.h> #include <time.h> +#include <libufs.h> #include "fsck.h" @@ -342,7 +343,11 @@ getnextinode(ino_t inumber, int rebuildcg) nextinop = inobuf.b_un.b_buf; } dp = (union dinode *)nextinop; - if (rebuildcg && nextinop == inobuf.b_un.b_buf) { + if (sblock.fs_magic == FS_UFS1_MAGIC) + nextinop += sizeof(struct ufs1_dinode); + else + nextinop += sizeof(struct ufs2_dinode); + if (rebuildcg && (char *)dp == inobuf.b_un.b_buf) { /* * Try to determine if we have reached the end of the * allocated inodes. @@ -355,7 +360,7 @@ getnextinode(ino_t inumber, int rebuildcg) UFS_NIADDR * sizeof(ufs2_daddr_t)) || dp->dp2.di_mode || dp->dp2.di_size) return (NULL); - goto inodegood; + return (dp); } if (!ftypeok(dp)) return (NULL); @@ -389,11 +394,6 @@ getnextinode(ino_t inumber, int rebuildcg) if (DIP(dp, di_ib[j]) != 0) return (NULL); } -inodegood: - if (sblock.fs_magic == FS_UFS1_MAGIC) - nextinop += sizeof(struct ufs1_dinode); - else - nextinop += sizeof(struct ufs2_dinode); return (dp); } @@ -534,7 +534,8 @@ clri(struct inodesc *idesc, const char *type, int flag) if (flag == 1) { pwarn("%s %s", type, (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE"); - pinode(idesc->id_number); + prtinode(idesc->id_number, dp); + printf("\n"); } if (preen || reply("CLEAR") == 1) { if (preen) @@ -600,9 +601,8 @@ clearentry(struct inodesc *idesc) } void -pinode(ino_t ino) +prtinode(ino_t ino, union dinode *dp) { - union dinode *dp; char *p; struct passwd *pw; time_t t; @@ -610,7 +610,6 @@ pinode(ino_t ino) printf(" I=%lu ", (u_long)ino); if (ino < UFS_ROOTINO || ino > maxino) return; - dp = ginode(ino); printf(" OWNER="); if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL) printf("%s ", pw->pw_name); diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c index e5118619475b..ce4d0a89a36a 100644 --- a/sbin/fsck_ffs/main.c +++ b/sbin/fsck_ffs/main.c @@ -458,30 +458,40 @@ checkfilesys(char *filesys) if (preen == 0 && yflag == 0 && sblock.fs_magic != FS_UFS1_MAGIC && fswritefd != -1 && getosreldate() >= P_OSREL_CK_CYLGRP) { if ((sblock.fs_metackhash & CK_CYLGRP) == 0 && - reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0) + reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_CYLGRP; + sblock.fs_metackhash |= CK_CYLGRP; + } if ((sblock.fs_metackhash & CK_SUPERBLOCK) == 0 && getosreldate() >= P_OSREL_CK_SUPERBLOCK && reply("ADD SUPERBLOCK CHECK-HASH PROTECTION") != 0) { + ckhashadd |= CK_SUPERBLOCK; sblock.fs_metackhash |= CK_SUPERBLOCK; - sbdirty(); } #ifdef notyet if ((sblock.fs_metackhash & CK_INODE) == 0 && getosreldate() >= P_OSREL_CK_INODE && - reply("ADD INODE CHECK-HASH PROTECTION") != 0) + reply("ADD INODE CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INODE; + sblock.fs_metackhash |= CK_INODE; + } if ((sblock.fs_metackhash & CK_INDIR) == 0 && getosreldate() >= P_OSREL_CK_INDIR && - reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) + reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INDIR; + sblock.fs_metackhash |= CK_INDIR; + } if ((sblock.fs_metackhash & CK_DIR) == 0 && getosreldate() >= P_OSREL_CK_DIR && - reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0) + reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_DIR; + sblock.fs_metackhash |= CK_DIR; + } #endif /* notyet */ - if (ckhashadd != 0) + if (ckhashadd != 0) { sblock.fs_flags |= FS_METACKHASH; + sbdirty(); + } } /* * Cleared if any questions answered no. Used to decide if diff --git a/sbin/fsck_ffs/pass5.c b/sbin/fsck_ffs/pass5.c index 71e3eda74c1b..436d184c327b 100644 --- a/sbin/fsck_ffs/pass5.c +++ b/sbin/fsck_ffs/pass5.c @@ -74,11 +74,8 @@ pass5(void) memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; /* check to see if we are to add a cylinder group check hash */ - if ((ckhashadd & CK_CYLGRP) != 0) { - fs->fs_metackhash |= CK_CYLGRP; + if ((ckhashadd & CK_CYLGRP) != 0) rewritecg = 1; - sbdirty(); - } if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) diff --git a/sbin/fsirand/fsirand.c b/sbin/fsirand/fsirand.c index bb0f27a59301..f5e26571f903 100644 --- a/sbin/fsirand/fsirand.c +++ b/sbin/fsirand/fsirand.c @@ -175,7 +175,7 @@ fsirand(char *device) } /* For each cylinder group, randomize inodes and update backup sblock */ - for (cg = 0, inumber = 0; cg < (int)sblock->fs_ncg; cg++) { + for (cg = 0, inumber = UFS_ROOTINO; cg < (int)sblock->fs_ncg; cg++) { /* Read in inodes, then print or randomize generation nums */ dblk = fsbtodb(sblock, ino_to_fsba(sblock, inumber)); if (lseek(devfd, (off_t)dblk * bsize, SEEK_SET) < 0) { @@ -187,21 +187,22 @@ fsirand(char *device) return (1); } - for (n = 0; n < (int)sblock->fs_ipg; n++, inumber++) { - if (sblock->fs_magic == FS_UFS1_MAGIC) - dp1 = &((struct ufs1_dinode *)inodebuf)[n]; - else - dp2 = &((struct ufs2_dinode *)inodebuf)[n]; - if (inumber >= UFS_ROOTINO) { - if (printonly) - (void)printf("ino %ju gen %08x\n", - (uintmax_t)inumber, - sblock->fs_magic == FS_UFS1_MAGIC ? - dp1->di_gen : dp2->di_gen); - else if (sblock->fs_magic == FS_UFS1_MAGIC) - dp1->di_gen = random(); - else - dp2->di_gen = random(); + dp1 = (struct ufs1_dinode *)(void *)inodebuf; + dp2 = (struct ufs2_dinode *)(void *)inodebuf; + for (n = cg > 0 ? 0 : UFS_ROOTINO; + n < (int)sblock->fs_ipg; + n++, inumber++) { + if (printonly) { + (void)printf("ino %ju gen %08x\n", + (uintmax_t)inumber, + sblock->fs_magic == FS_UFS1_MAGIC ? + dp1->di_gen : dp2->di_gen); + } else if (sblock->fs_magic == FS_UFS1_MAGIC) { + dp1->di_gen = arc4random(); + dp1++; + } else { + dp2->di_gen = arc4random(); + dp2++; } } diff --git a/sbin/growfs/growfs.c b/sbin/growfs/growfs.c index 61f90de7ea68..b75f377841ef 100644 --- a/sbin/growfs/growfs.c +++ b/sbin/growfs/growfs.c @@ -301,16 +301,21 @@ initcg(int cylno, time_t modtime, int fso, unsigned int Nflag) { DBG_FUNC("initcg") static caddr_t iobuf; + static long iobufsize; long blkno, start; ino_t ino; ufs2_daddr_t i, cbase, dmax; struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; struct csum *cs; uint j, d, dupper, dlower; - if (iobuf == NULL && (iobuf = malloc(sblock.fs_bsize * 3)) == NULL) - errx(37, "panic: cannot allocate I/O buffer"); - + if (iobuf == NULL) { + iobufsize = 2 * sblock.fs_bsize; + if ((iobuf = malloc(iobufsize)) == NULL) + errx(37, "panic: cannot allocate I/O buffer"); + memset(iobuf, '\0', iobufsize); + } /* * Determine block bounds for cylinder group. * Allow space for super block summary information in first @@ -375,12 +380,29 @@ initcg(int cylno, time_t modtime, int fso, unsigned int Nflag) acg.cg_cs.cs_nifree--; } /* + * Initialize the initial inode blocks. + */ + dp1 = (struct ufs1_dinode *)(void *)iobuf; + dp2 = (struct ufs2_dinode *)(void *)iobuf; + for (i = 0; i < acg.cg_initediblk; i++) { + if (sblock.fs_magic == FS_UFS1_MAGIC) { + dp1->di_gen = arc4random(); + dp1++; + } else { + dp2->di_gen = arc4random(); + dp2++; + } + } + wtfs(fsbtodb(&sblock, cgimin(&sblock, cylno)), iobufsize, iobuf, + fso, Nflag); + /* * For the old file system, we have to initialize all the inodes. */ - if (sblock.fs_magic == FS_UFS1_MAGIC) { - bzero(iobuf, sblock.fs_bsize); - for (i = 0; i < sblock.fs_ipg / INOPF(&sblock); - i += sblock.fs_frag) { + if (sblock.fs_magic == FS_UFS1_MAGIC && + sblock.fs_ipg > 2 * INOPB(&sblock)) { + for (i = 2 * sblock.fs_frag; + i < sblock.fs_ipg / INOPF(&sblock); + i += sblock.fs_frag) { dp1 = (struct ufs1_dinode *)(void *)iobuf; for (j = 0; j < INOPB(&sblock); j++) { dp1->di_gen = arc4random(); @@ -463,12 +485,8 @@ initcg(int cylno, time_t modtime, int fso, unsigned int Nflag) *cs = acg.cg_cs; cgckhash(&acg); - memcpy(iobuf, &acg, sblock.fs_cgsize); - memset(iobuf + sblock.fs_cgsize, '\0', - sblock.fs_bsize * 3 - sblock.fs_cgsize); - - wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)), - sblock.fs_bsize * 3, iobuf, fso, Nflag); + wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)), sblock.fs_cgsize, &acg, + fso, Nflag); DBG_DUMP_CG(&sblock, "new cg", &acg); DBG_LEAVE; diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8 index e19b8a1efec4..c4ba67701f4e 100644 --- a/sbin/ipfw/ipfw.8 +++ b/sbin/ipfw/ipfw.8 @@ -1,7 +1,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 12, 2018 +.Dd November 13, 2018 .Dt IPFW 8 .Os .Sh NAME @@ -105,16 +105,6 @@ in-kernel NAT. .Ar number .Cm config .Ar config-options -.Pp -.Nm -.Op Fl cfnNqS -.Oo -.Fl p Ar preproc -.Oo -.Ar preproc-flags -.Oc -.Oc -.Ar pathname .Ss STATEFUL IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION .Nm .Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm create Ar create-options @@ -166,6 +156,16 @@ in-kernel NAT. .Cm internal talist .Nm .Cm internal vlist +.Ss LIST OF RULES AND PREPROCESSING +.Nm +.Op Fl cfnNqS +.Oo +.Fl p Ar preproc +.Oo +.Ar preproc-flags +.Oc +.Oc +.Ar pathname .Sh DESCRIPTION The .Nm diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c index 74a0f9511a85..2bea89cf2bc9 100644 --- a/sbin/newfs/mkfs.c +++ b/sbin/newfs/mkfs.c @@ -1029,7 +1029,7 @@ goth: void iput(union dinode *ip, ino_t ino) { - ufs2_daddr_t d; + union dinodep dp; bread(&disk, part_ofs + fsbtodb(&sblock, cgtod(&sblock, 0)), (char *)&acg, sblock.fs_cgsize); @@ -1043,20 +1043,15 @@ iput(union dinode *ip, ino_t ino) err(1, "iput: cgput: %s", disk.d_error); sblock.fs_cstotal.cs_nifree--; fscs[0].cs_nifree--; - if (ino >= (unsigned long)sblock.fs_ipg * sblock.fs_ncg) { - printf("fsinit: inode value out of range (%ju).\n", - (uintmax_t)ino); + if (getinode(&disk, &dp, ino) == -1) { + printf("iput: %s\n", disk.d_error); exit(32); } - d = fsbtodb(&sblock, ino_to_fsba(&sblock, ino)); - bread(&disk, part_ofs + d, (char *)iobuf, sblock.fs_bsize); if (sblock.fs_magic == FS_UFS1_MAGIC) - ((struct ufs1_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] = - ip->dp1; + *dp.dp1 = ip->dp1; else - ((struct ufs2_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] = - ip->dp2; - wtfs(d, sblock.fs_bsize, (char *)iobuf); + *dp.dp2 = ip->dp2; + putinode(&disk); } /* diff --git a/sbin/tunefs/tunefs.c b/sbin/tunefs/tunefs.c index ef27b60f6bef..6a6b7f767ede 100644 --- a/sbin/tunefs/tunefs.c +++ b/sbin/tunefs/tunefs.c @@ -679,41 +679,36 @@ dir_search(ufs2_daddr_t blk, int bytes) static ino_t journal_findfile(void) { - struct ufs1_dinode *dp1; - struct ufs2_dinode *dp2; + union dinodep dp; ino_t ino; - int mode; - void *ip; int i; - if (getino(&disk, &ip, UFS_ROOTINO, &mode) != 0) { - warn("Failed to get root inode"); + if (getinode(&disk, &dp, UFS_ROOTINO) != 0) { + warn("Failed to get root inode: %s", disk.d_error); return (-1); } - dp2 = ip; - dp1 = ip; if (sblock.fs_magic == FS_UFS1_MAGIC) { - if ((off_t)dp1->di_size >= lblktosize(&sblock, UFS_NDADDR)) { + if ((off_t)dp.dp1->di_size >= lblktosize(&sblock, UFS_NDADDR)) { warnx("UFS_ROOTINO extends beyond direct blocks."); return (-1); } for (i = 0; i < UFS_NDADDR; i++) { - if (dp1->di_db[i] == 0) + if (dp.dp1->di_db[i] == 0) break; - if ((ino = dir_search(dp1->di_db[i], - sblksize(&sblock, (off_t)dp1->di_size, i))) != 0) + if ((ino = dir_search(dp.dp1->di_db[i], + sblksize(&sblock, (off_t)dp.dp1->di_size, i))) != 0) return (ino); } } else { - if ((off_t)dp2->di_size >= lblktosize(&sblock, UFS_NDADDR)) { + if ((off_t)dp.dp2->di_size >= lblktosize(&sblock, UFS_NDADDR)) { warnx("UFS_ROOTINO extends beyond direct blocks."); return (-1); } for (i = 0; i < UFS_NDADDR; i++) { - if (dp2->di_db[i] == 0) + if (dp.dp2->di_db[i] == 0) break; - if ((ino = dir_search(dp2->di_db[i], - sblksize(&sblock, (off_t)dp2->di_size, i))) != 0) + if ((ino = dir_search(dp.dp2->di_db[i], + sblksize(&sblock, (off_t)dp.dp2->di_size, i))) != 0) return (ino); } } @@ -795,23 +790,18 @@ dir_extend(ufs2_daddr_t blk, ufs2_daddr_t nblk, off_t size, ino_t ino) static int journal_insertfile(ino_t ino) { - struct ufs1_dinode *dp1; - struct ufs2_dinode *dp2; - void *ip; + union dinodep dp; ufs2_daddr_t nblk; ufs2_daddr_t blk; ufs_lbn_t lbn; int size; - int mode; int off; - if (getino(&disk, &ip, UFS_ROOTINO, &mode) != 0) { - warn("Failed to get root inode"); + if (getinode(&disk, &dp, UFS_ROOTINO) != 0) { + warn("Failed to get root inode: %s", disk.d_error); sbdirty(); return (-1); } - dp2 = ip; - dp1 = ip; blk = 0; size = 0; nblk = journal_balloc(); @@ -824,15 +814,15 @@ journal_insertfile(ino_t ino) * have to free them and extend the block. */ if (sblock.fs_magic == FS_UFS1_MAGIC) { - lbn = lblkno(&sblock, dp1->di_size); - off = blkoff(&sblock, dp1->di_size); - blk = dp1->di_db[lbn]; - size = sblksize(&sblock, (off_t)dp1->di_size, lbn); + lbn = lblkno(&sblock, dp.dp1->di_size); + off = blkoff(&sblock, dp.dp1->di_size); + blk = dp.dp1->di_db[lbn]; + size = sblksize(&sblock, (off_t)dp.dp1->di_size, lbn); } else { - lbn = lblkno(&sblock, dp2->di_size); - off = blkoff(&sblock, dp2->di_size); - blk = dp2->di_db[lbn]; - size = sblksize(&sblock, (off_t)dp2->di_size, lbn); + lbn = lblkno(&sblock, dp.dp2->di_size); + off = blkoff(&sblock, dp.dp2->di_size); + blk = dp.dp2->di_db[lbn]; + size = sblksize(&sblock, (off_t)dp.dp2->di_size, lbn); } if (off != 0) { if (dir_extend(blk, nblk, off, ino) == -1) @@ -843,16 +833,16 @@ journal_insertfile(ino_t ino) return (-1); } if (sblock.fs_magic == FS_UFS1_MAGIC) { - dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; - dp1->di_db[lbn] = nblk; - dp1->di_size = lblktosize(&sblock, lbn+1); + dp.dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; + dp.dp1->di_db[lbn] = nblk; + dp.dp1->di_size = lblktosize(&sblock, lbn+1); } else { - dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; - dp2->di_db[lbn] = nblk; - dp2->di_size = lblktosize(&sblock, lbn+1); + dp.dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; + dp.dp2->di_db[lbn] = nblk; + dp.dp2->di_size = lblktosize(&sblock, lbn+1); } - if (putino(&disk) < 0) { - warn("Failed to write root inode"); + if (putinode(&disk) < 0) { + warn("Failed to write root inode: %s", disk.d_error); return (-1); } if (cgwrite(&disk) < 0) { @@ -916,11 +906,8 @@ indir_fill(ufs2_daddr_t blk, int level, int *resid) static void journal_clear(void) { - struct ufs1_dinode *dp1; - struct ufs2_dinode *dp2; + union dinodep dp; ino_t ino; - int mode; - void *ip; ino = journal_findfile(); if (ino == (ino_t)-1 || ino == 0) { @@ -928,18 +915,16 @@ journal_clear(void) return; } printf("Clearing journal flags from inode %ju\n", (uintmax_t)ino); - if (getino(&disk, &ip, ino, &mode) != 0) { - warn("Failed to get journal inode"); + if (getinode(&disk, &dp, ino) != 0) { + warn("Failed to get journal inode: %s", disk.d_error); return; } - dp2 = ip; - dp1 = ip; if (sblock.fs_magic == FS_UFS1_MAGIC) - dp1->di_flags = 0; + dp.dp1->di_flags = 0; else - dp2->di_flags = 0; - if (putino(&disk) < 0) { - warn("Failed to write journal inode"); + dp.dp2->di_flags = 0; + if (putinode(&disk) < 0) { + warn("Failed to write journal inode: %s", disk.d_error); return; } } @@ -947,15 +932,12 @@ journal_clear(void) static int journal_alloc(int64_t size) { - struct ufs1_dinode *dp1; - struct ufs2_dinode *dp2; + union dinodep dp; ufs2_daddr_t blk; - void *ip; struct cg *cgp; int resid; ino_t ino; int blks; - int mode; time_t utime; int i; @@ -1007,8 +989,8 @@ journal_alloc(int64_t size) break; printf("Using inode %ju in cg %d for %jd byte journal\n", (uintmax_t)ino, cgp->cg_cgx, size); - if (getino(&disk, &ip, ino, &mode) != 0) { - warn("Failed to get allocated inode"); + if (getinode(&disk, &dp, ino) != 0) { + warn("Failed to get allocated inode: %s", disk.d_error); sbdirty(); goto out; } @@ -1017,39 +999,39 @@ journal_alloc(int64_t size) * blocks and size uninitialized. This causes legacy * fsck implementations to clear the inode. */ - dp2 = ip; - dp1 = ip; time(&utime); if (sblock.fs_magic == FS_UFS1_MAGIC) { - bzero(dp1, sizeof(*dp1)); - dp1->di_size = size; - dp1->di_mode = IFREG | IREAD; - dp1->di_nlink = 1; - dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; - dp1->di_atime = utime; - dp1->di_mtime = utime; - dp1->di_ctime = utime; + bzero(dp.dp1, sizeof(*dp.dp1)); + dp.dp1->di_size = size; + dp.dp1->di_mode = IFREG | IREAD; + dp.dp1->di_nlink = 1; + dp.dp1->di_flags = + SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; + dp.dp1->di_atime = utime; + dp.dp1->di_mtime = utime; + dp.dp1->di_ctime = utime; } else { - bzero(dp2, sizeof(*dp2)); - dp2->di_size = size; - dp2->di_mode = IFREG | IREAD; - dp2->di_nlink = 1; - dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; - dp2->di_atime = utime; - dp2->di_mtime = utime; - dp2->di_ctime = utime; - dp2->di_birthtime = utime; + bzero(dp.dp2, sizeof(*dp.dp2)); + dp.dp2->di_size = size; + dp.dp2->di_mode = IFREG | IREAD; + dp.dp2->di_nlink = 1; + dp.dp2->di_flags = + SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP; + dp.dp2->di_atime = utime; + dp.dp2->di_mtime = utime; + dp.dp2->di_ctime = utime; + dp.dp2->di_birthtime = utime; } for (i = 0; i < UFS_NDADDR && resid; i++, resid--) { blk = journal_balloc(); if (blk <= 0) goto out; if (sblock.fs_magic == FS_UFS1_MAGIC) { - dp1->di_db[i] = blk; - dp1->di_blocks++; + dp.dp1->di_db[i] = blk; + dp.dp1->di_blocks++; } else { - dp2->di_db[i] = blk; - dp2->di_blocks++; + dp.dp2->di_db[i] = blk; + dp.dp2->di_blocks++; } } for (i = 0; i < UFS_NIADDR && resid; i++) { @@ -1062,19 +1044,20 @@ journal_alloc(int64_t size) goto out; } if (sblock.fs_magic == FS_UFS1_MAGIC) { - dp1->di_ib[i] = blk; - dp1->di_blocks += blks; + dp.dp1->di_ib[i] = blk; + dp.dp1->di_blocks += blks; } else { - dp2->di_ib[i] = blk; - dp2->di_blocks += blks; + dp.dp2->di_ib[i] = blk; + dp.dp2->di_blocks += blks; } } if (sblock.fs_magic == FS_UFS1_MAGIC) - dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize; + dp.dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize; else - dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize; - if (putino(&disk) < 0) { - warn("Failed to write inode"); + dp.dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize; + if (putinode(&disk) < 0) { + warn("Failed to write allocated inode: %s", + disk.d_error); sbdirty(); return (-1); } diff --git a/share/mk/bsd.linker.mk b/share/mk/bsd.linker.mk index 35cfe7128202..8d6c28a74d0e 100644 --- a/share/mk/bsd.linker.mk +++ b/share/mk/bsd.linker.mk @@ -86,9 +86,6 @@ ${X_}LINKER_FEATURES= ${X_}LINKER_FEATURES+= build-id ${X_}LINKER_FEATURES+= ifunc .endif -.if ${${X_}LINKER_TYPE} != "lld" || ${${X_}LINKER_VERSION} >= 50000 -${X_}LINKER_FEATURES+= filter -.endif .if ${${X_}LINKER_TYPE} == "lld" && ${${X_}LINKER_VERSION} >= 60000 ${X_}LINKER_FEATURES+= retpoline .endif diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk index 72e3a69e80ff..886922b94e0b 100644 --- a/share/mk/src.libnames.mk +++ b/share/mk/src.libnames.mk @@ -75,6 +75,7 @@ _LIBRARIES= \ cam \ casper \ cap_dns \ + cap_fileargs \ cap_grp \ cap_pwd \ cap_random \ @@ -238,6 +239,7 @@ _DP_cam= sbuf _DP_kvm= elf _DP_casper= nv _DP_cap_dns= nv +_DP_cap_fileargs= nv _DP_cap_grp= nv _DP_cap_pwd= nv _DP_cap_random= nv diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 6e36ae975239..55db65ffc63c 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1581,6 +1581,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) identify_cpu1(); identify_hypervisor(); + identify_cpu_fixup_bsp(); identify_cpu2(); initializecpucache(); diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 34fb1e8e4370..fb4754ef4fcc 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -372,3 +372,8 @@ device vmx # VMware VMXNET3 Ethernet # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support + +# evdev interface +options EVDEV_SUPPORT # evdev support in legacy drivers +device evdev # input event device support +device uinput # install /dev/uinput cdev diff --git a/sys/amd64/conf/MINIMAL b/sys/amd64/conf/MINIMAL index 4a2bb7a0920b..4b0833377a26 100644 --- a/sys/amd64/conf/MINIMAL +++ b/sys/amd64/conf/MINIMAL @@ -147,3 +147,8 @@ device bpf # Berkeley packet filter # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver + +# evdev interface +options EVDEV_SUPPORT # evdev support in legacy drivers +device evdev # input event device support +device uinput # install /dev/uinput cdev diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index fdf691e05f86..4367b0358210 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -642,6 +642,11 @@ options LINPROCFS options LINSYSFS ##################################################################### +# ZFS support + +options ZFS + +##################################################################### # VM OPTIONS # KSTACK_PAGES is the number of memory pages to assign to the kernel diff --git a/sys/compat/freebsd32/Makefile b/sys/compat/freebsd32/Makefile index bb933dd4697e..67901e6efcf6 100644 --- a/sys/compat/freebsd32/Makefile +++ b/sys/compat/freebsd32/Makefile @@ -11,7 +11,7 @@ all: sysent: freebsd32_sysent.c freebsd32_syscall.h freebsd32_proto.h freebsd32_systrace_args.c freebsd32_sysent.c freebsd32_syscalls.c freebsd32_syscall.h freebsd32_proto.h freebsd32_systrace_args.c : \ - ../../kern/makesyscalls.sh syscalls.master syscalls.conf capabilities.conf + ../../kern/makesyscalls.sh syscalls.master syscalls.conf ../../kern/capabilities.conf sh ../../kern/makesyscalls.sh syscalls.master syscalls.conf clean: diff --git a/sys/compat/freebsd32/capabilities.conf b/sys/compat/freebsd32/capabilities.conf deleted file mode 100644 index 12272c4bd251..000000000000 --- a/sys/compat/freebsd32/capabilities.conf +++ /dev/null @@ -1,298 +0,0 @@ -## -## Copyright (c) 2008-2010 Robert N. M. Watson -## Copyright (c) 2016 The FreeBSD Foundation -## All rights reserved. -## -## This software was developed at the University of Cambridge Computer -## Laboratory with support from a grant from Google, Inc. -## -## Portions of this software were developed by Konstantin Belousov -## under sponsorship from the FreeBSD Foundation. -## -## Redistribution and use in source and binary forms, with or without -## modification, are permitted provided that the following conditions -## are met: -## 1. Redistributions of source code must retain the above copyright -## notice, this list of conditions and the following disclaimer. -## 2. Redistributions in binary form must reproduce the above copyright -## notice, this list of conditions and the following disclaimer in the -## documentation and/or other materials provided with the distribution. -## -## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -## ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -## SUCH DAMAGE. -## -## List of system calls enabled in freebsd32 capability mode, one name -## per line. See the original list in the sys/kern/capabilities.conf. -## Position of the compat syscall in this file must be identical to -## the master, to facilitate comparision and diagnostic. -## -## $FreeBSD$ -## - -__acl_aclcheck_fd -__acl_delete_fd -__acl_get_fd -__acl_set_fd -__mac_get_fd -#__mac_get_pid -__mac_get_proc -__mac_set_fd -__mac_set_proc -freebsd32___sysctl -freebsd32__umtx_op -abort2 -accept -accept4 -aio_cancel -freebsd32_aio_error -freebsd32_aio_fsync -freebsd32_aio_read -freebsd32_aio_return -freebsd32_aio_suspend -freebsd32_aio_waitcomplete -freebsd32_aio_write -#audit -bindat -cap_enter -cap_fcntls_get -cap_fcntls_limit -cap_getmode -freebsd32_cap_ioctls_get -freebsd32_cap_ioctls_limit -__cap_rights_get -cap_rights_limit -freebsd32_clock_getres -freebsd32_clock_gettime -close -closefrom -connectat -#cpuset -freebsd32_cpuset_getaffinity -#freebsd32_cpuset_getid -freebsd32_cpuset_setaffinity -#freebsd32_cpuset_setid -dup -dup2 -extattr_delete_fd -extattr_get_fd -extattr_list_fd -extattr_set_fd -fchflags -fchmod -fchown -freebsd32_fcntl -freebsd32_fexecve -flock -fork -fpathconf -freebsd32_fstat -freebsd32_fstatat -freebsd32_getdirentries -freebsd32_fstatfs -freebsd32_mknodat -freebsd32_ftruncate -freebsd32_lseek -freebsd32_mmap -mmap -freebsd32_pread -freebsd32_pwrite -freebsd32_fstat -fstatfs -fsync -ftruncate -freebsd32_ftruncate -freebsd32_futimens -freebsd32_futimes -getaudit -getaudit_addr -getauid -freebsd32_getcontext -freebsd32_getdents -freebsd32_getdirentries -getdirentries -getdomainname -getdtablesize -getegid -geteuid -gethostid -gethostname -freebsd32_getitimer -getgid -getgroups -getlogin -freebsd32_getpagesize -getpeername -getpgid -getpgrp -getpid -getppid -getpriority -getrandom -getresgid -getresuid -getrlimit -freebsd32_getrusage -getsid -getsockname -getsockopt -freebsd32_gettimeofday -getuid -freebsd32_ioctl -issetugid -freebsd32_kevent -kill -freebsd32_kmq_notify -freebsd32_kmq_setattr -freebsd32_kmq_timedreceive -freebsd32_kmq_timedsend -kqueue -freebsd32_ktimer_create -ktimer_delete -ktimer_getoverrun -freebsd32_ktimer_gettime -freebsd32_ktimer_settime -#ktrace -freebsd32_lio_listio -listen -freebsd32_lseek -madvise -mincore -minherit -mlock -mlockall -freebsd32_mmap -freebsd32_mprotect -msync -munlock -munlockall -munmap -freebsd32_nanosleep -ntp_gettime -freebsd6_freebsd32_aio_read -freebsd6_freebsd32_aio_write -break -freebsd6_freebsd32_lio_listio -chflagsat -faccessat -fchmodat -fchownat -freebsd32_fstatat -freebsd32_futimesat -linkat -mkdirat -mkfifoat -mknodat -openat -readlinkat -renameat -symlinkat -unlinkat -freebsd32_utimensat -pdfork -pdgetpid -pdkill -#pdwait4 # not yet implemented -freebsd32_pipe -pipe2 -poll -freebsd32_ppoll -freebsd32_posix_fallocate -freebsd32_pread -freebsd32_preadv -profil -#ptrace -freebsd32_pwrite -freebsd32_pwritev -read -freebsd32_readv -freebsd6_freebsd32_recv -freebsd32_recvfrom -freebsd32_recvmsg -rtprio -rtprio_thread -sbrk -sched_get_priority_max -sched_get_priority_min -sched_getparam -sched_getscheduler -freebsd32_sched_rr_get_interval -sched_setparam -sched_setscheduler -sched_yield -sctp_generic_recvmsg -sctp_generic_sendmsg -sctp_generic_sendmsg_iov -sctp_peeloff -freebsd32_pselect -freebsd32_select -freebsd6_freebsd32_send -freebsd32_sendfile -freebsd32_sendmsg -sendto -setaudit -setaudit_addr -setauid -freebsd32_setcontext -setegid -seteuid -setgid -freebsd32_setitimer -setpriority -setregid -setresgid -setresuid -setreuid -setrlimit -setsid -setsockopt -setuid -shm_open -shutdown -freebsd32_sigaction -freebsd32_sigaltstack -freebsd32_sigblock -freebsd32_sigpending -sigpending -freebsd32_sigprocmask -sigprocmask -freebsd32_sigqueue -sigqueue -freebsd32_sigreturn -freebsd32_sigsetmask -freebsd32_sigstack -freebsd32_sigsuspend -sigsuspend -freebsd32_sigtimedwait -freebsd32_sigvec -freebsd32_sigwaitinfo -sigwait -socket -socketpair -sstk -sync -sys_exit -freebsd32_sysarch -thr_create -thr_exit -thr_kill -#thr_kill2 -freebsd32_thr_new -thr_self -thr_set_name -freebsd32_thr_suspend -thr_wake -umask -utrace -uuidgen -write -freebsd32_writev -yield diff --git a/sys/compat/freebsd32/syscalls.conf b/sys/compat/freebsd32/syscalls.conf index 3715400fff8b..0b3d59f2fcf3 100644 --- a/sys/compat/freebsd32/syscalls.conf +++ b/sys/compat/freebsd32/syscalls.conf @@ -9,3 +9,5 @@ syscallprefix="FREEBSD32_SYS_" switchname="freebsd32_sysent" namesname="freebsd32_syscallnames" systrace="freebsd32_systrace_args.c" +abi_func_prefix="freebsd32_" +capabilities_conf="../../kern/capabilities.conf" diff --git a/sys/conf/dtb.mk b/sys/conf/dtb.mk index f072bd200557..003ecc62ddb6 100644 --- a/sys/conf/dtb.mk +++ b/sys/conf/dtb.mk @@ -55,21 +55,21 @@ DTBO=${DTSO:R:S/$/.dtbo/} all: ${DTB} ${DTBO} .if defined(DTS) -.export DTC +.export DTC ECHO .for _dts in ${DTS} ${_dts:R:S/$/.dtb/}: ${_dts} ${OP_META} @${ECHO} Generating ${.TARGET} from ${_dts} - @env ECHO=${ECHO} ${SYSDIR}/tools/fdt/make_dtb.sh ${SYSDIR} ${_dts} ${.OBJDIR} + @${SYSDIR}/tools/fdt/make_dtb.sh ${SYSDIR} ${_dts} ${.OBJDIR} CLEANFILES+=${_dts:R:S/$/.dtb/} .endfor .endif .if defined(DTSO) -.export DTC +.export DTC ECHO .for _dtso in ${DTSO} ${_dtso:R:S/$/.dtbo/}: ${_dtso} ${OP_META} @${ECHO} Generating ${.TARGET} from ${_dtso} - @env ECHO=${ECHO} ${SYSDIR}/tools/fdt/make_dtbo.sh ${SYSDIR} overlays/${_dtso} ${.OBJDIR} + @${SYSDIR}/tools/fdt/make_dtbo.sh ${SYSDIR} overlays/${_dtso} ${.OBJDIR} CLEANFILES+=${_dtso:R:S/$/.dtbo/} .endfor .endif diff --git a/sys/contrib/libnv/nvpair.c b/sys/contrib/libnv/nvpair.c index fc2350d9f0db..23038c5c5b71 100644 --- a/sys/contrib/libnv/nvpair.c +++ b/sys/contrib/libnv/nvpair.c @@ -1276,11 +1276,6 @@ nvpair_create_descriptor(const char *name, int value) { nvpair_t *nvp; - if (value < 0 || !fd_is_valid(value)) { - ERRNO_SET(EBADF); - return (NULL); - } - value = fcntl(value, F_DUPFD_CLOEXEC, 0); if (value < 0) return (NULL); @@ -1517,11 +1512,6 @@ nvpair_create_descriptor_array(const char *name, const int *value, if (value[ii] == -1) { fds[ii] = -1; } else { - if (!fd_is_valid(value[ii])) { - ERRNO_SET(EBADF); - goto fail; - } - fds[ii] = fcntl(value[ii], F_DUPFD_CLOEXEC, 0); if (fds[ii] == -1) goto fail; @@ -2035,10 +2025,6 @@ nvpair_append_descriptor_array(nvpair_t *nvp, const int value) NVPAIR_ASSERT(nvp); PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR_ARRAY); - if (value < 0 || !fd_is_valid(value)) { - ERRNO_SET(EBADF); - return -1; - } fd = fcntl(value, F_DUPFD_CLOEXEC, 0); if (fd == -1) { return (-1); diff --git a/sys/dev/amdsmn/amdsmn.c b/sys/dev/amdsmn/amdsmn.c index 17792dd922cd..245c375ac202 100644 --- a/sys/dev/amdsmn/amdsmn.c +++ b/sys/dev/amdsmn/amdsmn.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <machine/cpufunc.h> +#include <machine/cputypes.h> #include <machine/md_var.h> #include <machine/specialreg.h> @@ -53,14 +54,21 @@ __FBSDID("$FreeBSD$"); #define SMN_ADDR_REG 0x60 #define SMN_DATA_REG 0x64 +#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_ROOT_DF_F3 0x1463 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT_DF_F3 0x15eb + struct amdsmn_softc { struct mtx smn_lock; }; static struct pciid { - uint32_t device_id; + uint16_t amdsmn_vendorid; + uint16_t amdsmn_deviceid; } amdsmn_ids[] = { - { 0x14501022 }, + { CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_ROOT }, + { CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT }, }; /* @@ -89,18 +97,21 @@ static driver_t amdsmn_driver = { static devclass_t amdsmn_devclass; DRIVER_MODULE(amdsmn, hostb, amdsmn_driver, amdsmn_devclass, NULL, NULL); MODULE_VERSION(amdsmn, 1); -MODULE_PNP_INFO("W32:vendor/device", pci, amdsmn, amdsmn_ids, +MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmn, amdsmn_ids, nitems(amdsmn_ids)); static bool amdsmn_match(device_t parent) { - uint32_t devid; + uint16_t vendor, device; size_t i; - devid = pci_get_devid(parent); + vendor = pci_get_vendor(parent); + device = pci_get_device(parent); + for (i = 0; i < nitems(amdsmn_ids); i++) - if (amdsmn_ids[i].device_id == devid) + if (vendor == amdsmn_ids[i].amdsmn_vendorid && + device == amdsmn_ids[i].amdsmn_deviceid) return (true); return (false); } diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c index 2463212c25f5..2c7afcd0c10b 100644 --- a/sys/dev/amdtemp/amdtemp.c +++ b/sys/dev/amdtemp/amdtemp.c @@ -86,7 +86,10 @@ struct amdtemp_softc { #define DEVICEID_AMD_MISC16 0x1533 #define DEVICEID_AMD_MISC16_M30H 0x1583 #define DEVICEID_AMD_MISC17 0x141d -#define DEVICEID_AMD_HOSTB17H 0x1450 +#define DEVICEID_AMD_HOSTB17H_ROOT 0x1450 +#define DEVICEID_AMD_HOSTB17H_DF_F3 0x1463 +#define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0 +#define DEVICEID_AMD_HOSTB17H_M10H_DF_F3 0x15eb static struct amdtemp_product { uint16_t amdtemp_vendorid; @@ -101,7 +104,8 @@ static struct amdtemp_product { { VENDORID_AMD, DEVICEID_AMD_MISC16 }, { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H }, { VENDORID_AMD, DEVICEID_AMD_MISC17 }, - { VENDORID_AMD, DEVICEID_AMD_HOSTB17H }, + { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT }, + { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT }, }; /* @@ -111,8 +115,15 @@ static struct amdtemp_product { /* * Reported Temperature, Family 17h + * + * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register + * provide the current temp. bit 19, when clear, means the temp is reported in + * a range 0.."225C" (probable typo for 255C), and when set changes the range + * to -49..206C. */ -#define AMDTEMP_17H_CUR_TMP 0x59800 +#define AMDTEMP_17H_CUR_TMP 0x59800 +#define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1 << 19) +#define AMDTEMP_17H_CUR_TMP_RANGE_OFF 490 /* * Thermaltrip Status Register (Family 0Fh only) @@ -591,13 +602,15 @@ static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); - uint32_t temp; + uint32_t temp, val; int error; - error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &temp); + error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val); KASSERT(error == 0, ("amdsmn_read")); - temp = ((temp >> 21) & 0x7ff) * 5 / 4; + temp = ((val >> 21) & 0x7ff) * 5 / 4; + if ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0) + temp -= AMDTEMP_17H_CUR_TMP_RANGE_OFF; temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; return (temp); diff --git a/sys/dev/cxgbe/t4_netmap.c b/sys/dev/cxgbe/t4_netmap.c index adcd82034277..a27d54ec07f6 100644 --- a/sys/dev/cxgbe/t4_netmap.c +++ b/sys/dev/cxgbe/t4_netmap.c @@ -492,6 +492,9 @@ cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp, ASSERT_SYNCHRONIZED_OP(sc); + if (!nm_netmap_on(na)) + return (0); + if ((vi->flags & VI_INIT_DONE) == 0) return (0); diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index fd95a29ee180..d701b473b00c 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -535,11 +535,11 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id, if (!mtx_initialized(&ns->lock)) mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF); - status.done = FALSE; + status.done = 0; nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data, nvme_completion_poll_cb, &status); - while (status.done == FALSE) - DELAY(5); + while (!atomic_load_acq_int(&status.done)) + pause("nvme", 1); if (nvme_completion_is_error(&status.cpl)) { nvme_printf(ctrlr, "nvme_identify_namespace failed\n"); return (ENXIO); diff --git a/sys/dev/tws/tws.c b/sys/dev/tws/tws.c index 6b89966e7033..6cb814533ac6 100644 --- a/sys/dev/tws/tws.c +++ b/sys/dev/tws/tws.c @@ -445,9 +445,7 @@ tws_setup_intr(struct tws_softc *sc, int irqs) if (!(sc->intr_handle[i])) { if ((error = bus_setup_intr(sc->tws_dev, sc->irq_res[i], INTR_TYPE_CAM | INTR_MPSAFE, -#if (__FreeBSD_version >= 700000) NULL, -#endif tws_intr, sc, &sc->intr_handle[i]))) { tws_log(sc, SETUP_INTR_RES); return(FAILURE); diff --git a/sys/dev/tws/tws_cam.c b/sys/dev/tws/tws_cam.c index f948af092819..a8d75cc8fc20 100644 --- a/sys/dev/tws/tws_cam.c +++ b/sys/dev/tws/tws_cam.c @@ -160,9 +160,7 @@ tws_cam_attach(struct tws_softc *sc) */ sc->sim = cam_sim_alloc(tws_action, tws_poll, "tws", sc, device_get_unit(sc->tws_dev), -#if (__FreeBSD_version >= 700000) &sc->sim_lock, -#endif tws_cam_depth, 1, devq); /* 1, 1, devq); */ if (sc->sim == NULL) { @@ -172,9 +170,7 @@ tws_cam_attach(struct tws_softc *sc) /* Register the bus. */ mtx_lock(&sc->sim_lock); if (xpt_bus_register(sc->sim, -#if (__FreeBSD_version >= 700000) sc->tws_dev, -#endif 0) != CAM_SUCCESS) { cam_sim_free(sc->sim, TRUE); /* passing true will free the devq */ sc->sim = NULL; /* so cam_detach will not try to free it */ @@ -269,7 +265,6 @@ tws_action(struct cam_sim *sim, union ccb *ccb) { TWS_TRACE_DEBUG(sc, "get tran settings", sim, ccb); -#if (__FreeBSD_version >= 700000 ) ccb->cts.protocol = PROTO_SCSI; ccb->cts.protocol_version = SCSI_REV_2; ccb->cts.transport = XPORT_SPI; @@ -279,10 +274,6 @@ tws_action(struct cam_sim *sim, union ccb *ccb) ccb->cts.xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB; ccb->cts.proto_specific.scsi.valid = CTS_SCSI_VALID_TQ; ccb->cts.proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB; -#else - ccb->cts.valid = (CCB_TRANS_DISC_VALID | CCB_TRANS_TQ_VALID); - ccb->cts.flags &= ~(CCB_TRANS_DISC_ENB | CCB_TRANS_TAG_ENB); -#endif ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); @@ -314,13 +305,11 @@ tws_action(struct cam_sim *sim, union ccb *ccb) strlcpy(ccb->cpi.sim_vid, "FreeBSD", SIM_IDLEN); strlcpy(ccb->cpi.hba_vid, "3ware", HBA_IDLEN); strlcpy(ccb->cpi.dev_name, cam_sim_name(sim), DEV_IDLEN); -#if (__FreeBSD_version >= 700000 ) ccb->cpi.transport = XPORT_SPI; ccb->cpi.transport_version = 2; ccb->cpi.protocol = PROTO_SCSI; ccb->cpi.protocol_version = SCSI_REV_2; ccb->cpi.maxio = TWS_MAX_IO_SIZE; -#endif ccb->ccb_h.status = CAM_REQ_CMP; xpt_done(ccb); diff --git a/sys/dev/tws/tws_services.h b/sys/dev/tws/tws_services.h index 9fa3c3799994..22d0b50207dc 100644 --- a/sys/dev/tws/tws_services.h +++ b/sys/dev/tws/tws_services.h @@ -131,12 +131,5 @@ struct error_desc { /* ------------------------ */ -#if (__FreeBSD_version >= 700000) #include <sys/clock.h> #define TWS_LOCAL_TIME (time_second - utc_offset()) -#else -#include <machine/clock.h> -#define TWS_LOCAL_TIME (time_second - (tz_minuteswest * 60) - \ - (wall_cmos_clock ? adjkerntz : 0)) -#endif - diff --git a/sys/dts/Makefile.inc b/sys/dts/Makefile.inc index 2161751dc237..bb6fef4bbd20 100644 --- a/sys/dts/Makefile.inc +++ b/sys/dts/Makefile.inc @@ -5,9 +5,9 @@ SYSDIR?=${SRCTOP}/sys test-dts: .for dts in ${DTS} @env MACHINE=`basename ${.CURDIR}` ${SYSDIR}/tools/fdt/make_dtb.sh ${SYSDIR} ${dts} /tmp +.endfor test-dtso: .for dtso in ${DTSO} @env MACHINE=`basename ${.CURDIR}` ${SYSDIR}/tools/fdt/make_dtbo.sh ${SYSDIR} ${dtso} /tmp - .endfor diff --git a/sys/dts/arm/Makefile b/sys/dts/arm/Makefile index 03d156d94770..855f8847d514 100644 --- a/sys/dts/arm/Makefile +++ b/sys/dts/arm/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ -DTS!=ls *.dts +DTS!=ls ${.CURDIR}/*.dts all: test-dts diff --git a/sys/dts/arm/Makefile.inc b/sys/dts/arm/Makefile.inc new file mode 100644 index 000000000000..265f86d1ed55 --- /dev/null +++ b/sys/dts/arm/Makefile.inc @@ -0,0 +1,3 @@ +# $FreeBSD$ + +.include "../Makefile.inc" diff --git a/sys/dts/arm/overlays/Makefile b/sys/dts/arm/overlays/Makefile index aff6f3d459bc..cfc655929ef3 100644 --- a/sys/dts/arm/overlays/Makefile +++ b/sys/dts/arm/overlays/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ -DTSO!=ls *.dtso +DTSO!=ls ${.CURDIR}/*.dtso all: test-dtso diff --git a/sys/dts/arm64/Makefile b/sys/dts/arm64/Makefile index 03d156d94770..855f8847d514 100644 --- a/sys/dts/arm64/Makefile +++ b/sys/dts/arm64/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ -DTS!=ls *.dts +DTS!=ls ${.CURDIR}/*.dts all: test-dts diff --git a/sys/dts/arm64/Makefile.inc b/sys/dts/arm64/Makefile.inc new file mode 100644 index 000000000000..265f86d1ed55 --- /dev/null +++ b/sys/dts/arm64/Makefile.inc @@ -0,0 +1,3 @@ +# $FreeBSD$ + +.include "../Makefile.inc" diff --git a/sys/dts/arm64/overlays/Makefile b/sys/dts/arm64/overlays/Makefile index aff6f3d459bc..cfc655929ef3 100644 --- a/sys/dts/arm64/overlays/Makefile +++ b/sys/dts/arm64/overlays/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ -DTSO!=ls *.dtso +DTSO!=ls ${.CURDIR}/*.dtso all: test-dtso diff --git a/sys/dts/mips/Makefile b/sys/dts/mips/Makefile index 03d156d94770..855f8847d514 100644 --- a/sys/dts/mips/Makefile +++ b/sys/dts/mips/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ -DTS!=ls *.dts +DTS!=ls ${.CURDIR}/*.dts all: test-dts diff --git a/sys/dts/powerpc/Makefile b/sys/dts/powerpc/Makefile index 03d156d94770..855f8847d514 100644 --- a/sys/dts/powerpc/Makefile +++ b/sys/dts/powerpc/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ -DTS!=ls *.dts +DTS!=ls ${.CURDIR}/*.dts all: test-dts diff --git a/sys/i386/conf/GENERIC b/sys/i386/conf/GENERIC index e3f0a62d7e00..46e6a599fb40 100644 --- a/sys/i386/conf/GENERIC +++ b/sys/i386/conf/GENERIC @@ -366,3 +366,8 @@ device xenpci # Xen HVM Hypervisor services driver # VMware support device vmx # VMware VMXNET3 Ethernet + +# evdev interface +options EVDEV_SUPPORT # evdev support in legacy drivers +device evdev # input event device support +device uinput # install /dev/uinput cdev diff --git a/sys/i386/conf/MINIMAL b/sys/i386/conf/MINIMAL index 60869f93e804..6e578223ba55 100644 --- a/sys/i386/conf/MINIMAL +++ b/sys/i386/conf/MINIMAL @@ -148,3 +148,8 @@ device bpf # Berkeley packet filter # NOTE: XENHVM depends on xenpci. They must be added or removed together. options XENHVM # Xen HVM kernel infrastructure device xenpci # Xen HVM Hypervisor services driver + +# evdev interface +options EVDEV_SUPPORT # evdev support in legacy drivers +device evdev # input event device support +device uinput # install /dev/uinput cdev diff --git a/sys/kern/genoffset.c b/sys/kern/genoffset.c index 4a1e12a1d072..aadb909e943c 100644 --- a/sys/kern/genoffset.c +++ b/sys/kern/genoffset.c @@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$"); #include <sys/assym.h> #include <sys/proc.h> -OFFSYM(td_pre_epoch_prio, thread, u_char); OFFSYM(td_priority, thread, u_char); OFFSYM(td_epochnest, thread, u_char); OFFSYM(td_critnest, thread, u_int); diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c index b8eebc217721..a2480c3d689e 100644 --- a/sys/kern/kern_environment.c +++ b/sys/kern/kern_environment.c @@ -249,6 +249,7 @@ init_static_kenv(char *buf, size_t len) { char *eval; + KASSERT(!dynamic_kenv, ("kenv: dynamic_kenv already initialized")); /* * Give the static environment a chance to disable the loader(8) * environment first. This is done with loader_env.disabled=1. diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 65c2e299bbc7..a35cd3b834fb 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -486,7 +486,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v) int64_t all_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) - int doing_lockprof; + int doing_lockprof = 0; #endif td = curthread; @@ -690,7 +690,7 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v) int64_t spin_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) - int doing_lockprof; + int doing_lockprof = 0; #endif tid = (uintptr_t)curthread; diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index 80f54006b693..5cb18fbfa582 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -742,7 +742,7 @@ _rm_assert(const struct rmlock *rm, int what, const char *file, int line) { int count; - if (panicstr != NULL) + if (SCHEDULER_STOPPED()) return; switch (what) { case RA_LOCKED: diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 9ec967b94192..2d3342e497d1 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -445,7 +445,7 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v int64_t all_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) - uintptr_t state; + uintptr_t state = 0; int doing_lockprof = 0; #endif @@ -913,7 +913,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) int64_t all_time = 0; #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) - uintptr_t state; + uintptr_t state = 0; int doing_lockprof = 0; #endif int extra_work = 0; @@ -1439,7 +1439,7 @@ __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line) { const struct rwlock *rw; - if (panicstr != NULL) + if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index c1ebfb2f715a..600a3d03558a 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -1416,7 +1416,7 @@ _sx_assert(const struct sx *sx, int what, const char *file, int line) int slocked = 0; #endif - if (panicstr != NULL) + if (SCHEDULER_STOPPED()) return; switch (what) { case SA_SLOCKED: diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index b60c2d5b40ef..ee456fc6920e 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/epoch.h> #include <sys/rangelock.h> #include <sys/resourcevar.h> #include <sys/sdt.h> @@ -272,6 +273,7 @@ thread_init(void *mem, int size, int flags) td->td_rlqe = NULL; EVENTHANDLER_DIRECT_INVOKE(thread_init, td); umtx_thread_init(td); + epoch_thread_init(td); td->td_kstack = 0; td->td_sel = NULL; return (0); @@ -291,6 +293,7 @@ thread_fini(void *mem, int size) turnstile_free(td->td_turnstile); sleepq_free(td->td_sleepqueue); umtx_thread_fini(td); + epoch_thread_fini(td); seltdfini(td); } diff --git a/sys/kern/makesyscalls.sh b/sys/kern/makesyscalls.sh index 4030ed321693..0e95a79c7bad 100644 --- a/sys/kern/makesyscalls.sh +++ b/sys/kern/makesyscalls.sh @@ -45,13 +45,7 @@ sysarg="sysarg.switch.$$" sysprotoend="sysprotoend.$$" systracetmp="systrace.$$" systraceret="systraceret.$$" - -if [ -r capabilities.conf ]; then - capenabled=`egrep -v '^#|^$' capabilities.conf` - capenabled=`echo $capenabled | sed 's/ /,/g'` -else - capenabled="" -fi +capabilities_conf="capabilities.conf" trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0 @@ -67,6 +61,13 @@ if [ -n "$2" ]; then . $2 fi +if [ -r $capabilities_conf ]; then + capenabled=`egrep -v '^#|^$' $capabilities_conf` + capenabled=`echo $capenabled | sed 's/ /,/g'` +else + capenabled="" +fi + sed -e ' # FreeBSD ID, includes, comments, and blank lines /.*\$FreeBSD/b done_joining @@ -137,6 +138,7 @@ sed -e ' switchname = \"$switchname\" namesname = \"$namesname\" infile = \"$1\" + abi_func_prefix = \"$abi_func_prefix\" capenabled_string = \"$capenabled\" "' @@ -381,7 +383,8 @@ sed -e ' # from it. # for (cap in capenabled) { - if (funcname == capenabled[cap]) { + if (funcname == capenabled[cap] || + funcname == abi_func_prefix capenabled[cap]) { flags = "SYF_CAPENABLED"; break; } diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c index db7df6932633..0e5156c5053c 100644 --- a/sys/kern/subr_blist.c +++ b/sys/kern/subr_blist.c @@ -46,11 +46,11 @@ * upper bound on a potential allocation, but not necessarily a tight upper * bound. * - * The radix tree also implements two collapsed states for meta nodes: - * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is - * in either of these two states, all information contained underneath - * the node is considered stale. These states are used to optimize - * allocation and freeing operations. + * The bitmap field in each node directs the search for available blocks. + * For a leaf node, a bit is set if the corresponding block is free. For a + * meta node, a bit is set if the corresponding subtree contains a free + * block somewhere within it. The search at a meta node considers only + * children of that node that represent a range that includes a free block. * * The hinting greatly increases code efficiency for allocations while * the general radix structure optimizes both allocations and frees. The @@ -59,19 +59,19 @@ * * The blist code wires all necessary memory at creation time. Neither * allocations nor frees require interaction with the memory subsystem. - * The non-blocking features of the blist code are used in the swap code - * (vm/swap_pager.c). + * The non-blocking nature of allocations and frees is required by swap + * code (vm/swap_pager.c). * - * LAYOUT: The radix tree is laid out recursively using a - * linear array. Each meta node is immediately followed (laid out - * sequentially in memory) by BLIST_META_RADIX lower level nodes. This - * is a recursive structure but one that can be easily scanned through - * a very simple 'skip' calculation. In order to support large radixes, - * portions of the tree may reside outside our memory allocation. We - * handle this with an early-termination optimization (when bighint is - * set to -1) on the scan. The memory allocation is only large enough - * to cover the number of blocks requested at creation time even if it - * must be encompassed in larger root-node radix. + * LAYOUT: The radix tree is laid out recursively using a linear array. + * Each meta node is immediately followed (laid out sequentially in + * memory) by BLIST_META_RADIX lower level nodes. This is a recursive + * structure but one that can be easily scanned through a very simple + * 'skip' calculation. The memory allocation is only large enough to + * cover the number of blocks requested at creation time. Nodes that + * represent blocks beyond that limit, nodes that would never be read + * or written, are not allocated, so that the last of the + * BLIST_META_RADIX lower level nodes of a some nodes may not be + * allocated. * * NOTE: the allocator cannot currently allocate more than * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too @@ -105,6 +105,7 @@ __FBSDID("$FreeBSD$"); #define BLIST_DEBUG #endif +#include <sys/errno.h> #include <sys/types.h> #include <sys/malloc.h> #include <sys/sbuf.h> @@ -118,7 +119,7 @@ __FBSDID("$FreeBSD$"); #define bitcount64(x) __bitcount64((uint64_t)(x)) #define malloc(a,b,c) calloc(a, 1) #define free(a,b) free(a) -static __inline int imax(int a, int b) { return (a > b ? a : b); } +#define ummin(a,b) ((a) < (b) ? (a) : (b)) #include <sys/blist.h> @@ -179,6 +180,18 @@ radix_to_skip(daddr_t radix) } /* + * Provide a mask with count bits set, starting as position n. + */ +static inline u_daddr_t +bitrange(int n, int count) +{ + + return (((u_daddr_t)-1 << n) & + ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count)))); +} + + +/* * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t. * Assumes that the argument has only one bit set. */ @@ -220,9 +233,7 @@ blist_t blist_create(daddr_t blocks, int flags) { blist_t bl; - daddr_t i, last_block; - u_daddr_t nodes, radix, skip; - int digit; + u_daddr_t nodes, radix; if (blocks == 0) panic("invalid block count"); @@ -230,30 +241,13 @@ blist_create(daddr_t blocks, int flags) /* * Calculate the radix and node count used for scanning. */ - last_block = blocks - 1; + nodes = 1; radix = BLIST_BMAP_RADIX; - while (radix < blocks) { - if (((last_block / radix + 1) & BLIST_META_MASK) != 0) - /* - * We must widen the blist to avoid partially - * filled nodes. - */ - last_block |= radix - 1; + while (radix <= blocks) { + nodes += 1 + (blocks - 1) / radix; radix *= BLIST_META_RADIX; } - /* - * Count the meta-nodes in the expanded tree, including the final - * terminator, from the bottom level up to the root. - */ - nodes = 1; - if (radix - blocks >= BLIST_BMAP_RADIX) - nodes++; - last_block /= BLIST_BMAP_RADIX; - while (last_block > 0) { - nodes += last_block + 1; - last_block /= BLIST_META_RADIX; - } bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags | M_ZERO); if (bl == NULL) @@ -261,33 +255,6 @@ blist_create(daddr_t blocks, int flags) bl->bl_blocks = blocks; bl->bl_radix = radix; - bl->bl_cursor = 0; - - /* - * Initialize the empty tree by filling in root values, then initialize - * just the terminators in the rest of the tree. - */ - bl->bl_root[0].bm_bighint = 0; - if (radix == BLIST_BMAP_RADIX) - bl->bl_root[0].u.bmu_bitmap = 0; - else - bl->bl_root[0].u.bmu_avail = 0; - last_block = blocks - 1; - i = 0; - while (radix > BLIST_BMAP_RADIX) { - radix /= BLIST_META_RADIX; - skip = radix_to_skip(radix); - digit = last_block / radix; - i += 1 + digit * skip; - if (digit != BLIST_META_MASK) { - /* - * Add a terminator. - */ - bl->bl_root[i + skip].bm_bighint = (daddr_t)-1; - bl->bl_root[i + skip].u.bmu_bitmap = 0; - } - last_block %= radix; - } #if defined(BLIST_DEBUG) printf( @@ -321,6 +288,9 @@ blist_alloc(blist_t bl, daddr_t count) { daddr_t blk; + if (count > BLIST_MAX_ALLOC) + panic("allocation too large"); + /* * This loop iterates at most twice. An allocation failure in the * first iteration leads to a second iteration only if the cursor was @@ -331,12 +301,13 @@ blist_alloc(blist_t bl, daddr_t count) blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count, bl->bl_radix); if (blk != SWAPBLK_NONE) { + bl->bl_avail -= count; bl->bl_cursor = blk + count; if (bl->bl_cursor == bl->bl_blocks) bl->bl_cursor = 0; return (blk); - } else if (bl->bl_cursor != 0) - bl->bl_cursor = 0; + } + bl->bl_cursor = 0; } return (SWAPBLK_NONE); } @@ -348,10 +319,7 @@ daddr_t blist_avail(blist_t bl) { - if (bl->bl_radix == BLIST_BMAP_RADIX) - return (bitcount64(bl->bl_root->u.bmu_bitmap)); - else - return (bl->bl_root->u.bmu_avail); + return (bl->bl_avail); } /* @@ -363,7 +331,10 @@ void blist_free(blist_t bl, daddr_t blkno, daddr_t count) { + if (blkno < 0 || blkno + count > bl->bl_blocks) + panic("freeing invalid range"); blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix); + bl->bl_avail += count; } /* @@ -375,8 +346,13 @@ blist_free(blist_t bl, daddr_t blkno, daddr_t count) daddr_t blist_fill(blist_t bl, daddr_t blkno, daddr_t count) { + daddr_t filled; - return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix)); + if (blkno < 0 || blkno + count > bl->bl_blocks) + panic("filling invalid range"); + filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix); + bl->bl_avail -= filled; + return (filled); } /* @@ -414,8 +390,11 @@ blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) void blist_print(blist_t bl) { - printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor); - blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); + printf("BLIST avail = %jd, cursor = %08jx {\n", + (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor); + + if (bl->bl_root->bm_bitmap != 0) + blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); printf("}\n"); } @@ -569,16 +548,11 @@ blist_stats(blist_t bl, struct sbuf *s) * Check for skippable subtrees starting at i. */ while (radix > BLIST_BMAP_RADIX) { - if (bl->bl_root[nodes].u.bmu_avail == 0) { + if (bl->bl_root[nodes].bm_bitmap == 0) { if (gap_stats_counting(stats)) update_gap_stats(stats, i); break; } - if (bl->bl_root[nodes].u.bmu_avail == radix) { - if (!gap_stats_counting(stats)) - update_gap_stats(stats, i); - break; - } /* * Skip subtree root. @@ -590,7 +564,7 @@ blist_stats(blist_t bl, struct sbuf *s) /* * Scan leaf. */ - mask = bl->bl_root[nodes].u.bmu_bitmap; + mask = bl->bl_root[nodes].bm_bitmap; diff = mask ^ (mask << 1); if (gap_stats_counting(stats)) diff ^= 1; @@ -618,7 +592,57 @@ blist_stats(blist_t bl, struct sbuf *s) */ /* - * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). + * BLST_NEXT_LEAF_ALLOC() - allocate the first few blocks in the next leaf. + * + * 'scan' is a leaf node, associated with a block containing 'blk'. + * The next leaf node could be adjacent, or several nodes away if the + * least common ancestor of 'scan' and its neighbor is several levels + * up. Use 'blk' to determine how many meta-nodes lie between the + * leaves. If the next leaf has enough initial bits set, clear them + * and clear the bits in the meta nodes on the path up to the least + * common ancestor to mark any subtrees made completely empty. + */ +static int +blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) +{ + blmeta_t *next; + daddr_t skip; + u_daddr_t radix; + int digit; + + next = scan + 1; + blk += BLIST_BMAP_RADIX; + radix = BLIST_BMAP_RADIX; + while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 && + (next->bm_bitmap & 1) == 1) { + next++; + radix *= BLIST_META_RADIX; + } + if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) { + /* + * The next leaf doesn't have enough free blocks at the + * beginning to complete the spanning allocation. + */ + return (ENOMEM); + } + /* Clear the first 'count' bits in the next leaf to allocate. */ + next->bm_bitmap &= (u_daddr_t)-1 << count; + + /* + * Update bitmaps of next-ancestors, up to least common ancestor. + */ + skip = radix_to_skip(radix); + while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) { + (--next)->bm_bitmap ^= 1; + radix /= BLIST_META_RADIX; + } + if (next->bm_bitmap == 0) + scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit; + return (0); +} + +/* + * BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap). * * This is the core of the allocator and is optimized for the * BLIST_BMAP_RADIX block allocation case. Otherwise, execution @@ -633,15 +657,15 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) range1 = 0; count1 = count - 1; num_shifts = fls(count1); - mask = scan->u.bmu_bitmap; + mask = scan->bm_bitmap; while ((-mask & ~mask) != 0 && num_shifts > 0) { /* * If bit i is set in mask, then bits in [i, i+range1] are set - * in scan->u.bmu_bitmap. The value of range1 is equal to + * in scan->bm_bitmap. The value of range1 is equal to * count1 >> num_shifts. Grow range and reduce num_shifts to 0, * while preserving these invariants. The updates to mask leave * fewer bits set, but each bit that remains set represents a - * longer string of consecutive bits set in scan->u.bmu_bitmap. + * longer string of consecutive bits set in scan->bm_bitmap. * If more updates to mask cannot clear more bits, because mask * is partitioned with all 0 bits preceding all 1 bits, the loop * terminates immediately. @@ -685,31 +709,14 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) * An allocation within this leaf is impossible, so a successful * allocation depends on the next leaf providing some of the blocks. */ - if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) { - /* - * The next leaf has a different meta-node parent, so it - * is not necessarily initialized. Update bighint, - * comparing the range found at the end of mask to the - * largest earlier range that could have been made to - * vanish in the initial processing of mask. - */ - scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1); - return (SWAPBLK_NONE); - } - hi -= BLIST_BMAP_RADIX; - if (((scan[1].u.bmu_bitmap + 1) & ~((u_daddr_t)-1 << hi)) != 0) { + if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0) /* - * The next leaf doesn't have enough free blocks at the - * beginning to complete the spanning allocation. The - * hint cannot be updated, because the same allocation - * request could be satisfied later, by this leaf, if - * the state of the next leaf changes, and without any - * changes to this leaf. + * The hint cannot be updated, because the same + * allocation request could be satisfied later, by this + * leaf, if the state of the next leaf changes, and + * without any changes to this leaf. */ return (SWAPBLK_NONE); - } - /* Clear the first 'hi' bits in the next leaf, allocating them. */ - scan[1].u.bmu_bitmap &= (u_daddr_t)-1 << hi; hi = BLIST_BMAP_RADIX; } @@ -724,12 +731,9 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) } else { /* Clear the bits of mask at position 'hi' and higher. */ mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi); - /* If this allocation uses all the bits, clear the hint. */ - if (mask == scan->u.bmu_bitmap) - scan->bm_bighint = 0; } /* Clear the allocated bits from this leaf. */ - scan->u.bmu_bitmap &= ~mask; + scan->bm_bitmap &= ~mask; return ((blk & ~BLIST_BMAP_MASK) + lo); } @@ -744,81 +748,61 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix) { - daddr_t blk, i, next_skip, r, skip; - int child; + daddr_t blk, i, r, skip; + u_daddr_t bit, mask; bool scan_from_start; + int digit; if (radix == BLIST_BMAP_RADIX) return (blst_leaf_alloc(scan, cursor, count)); - if (scan->u.bmu_avail < count) { - /* - * The meta node's hint must be too large if the allocation - * exceeds the number of free blocks. Reduce the hint, and - * return failure. - */ - scan->bm_bighint = scan->u.bmu_avail; - return (SWAPBLK_NONE); - } blk = cursor & -radix; + scan_from_start = (cursor == blk); + radix /= BLIST_META_RADIX; skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; + mask = scan->bm_bitmap; + + /* Discard any candidates that appear before cursor. */ + digit = (cursor / radix) & BLIST_META_MASK; + mask &= (u_daddr_t)-1 << digit; /* - * An ALL-FREE meta node requires special handling before allocating - * any of its blocks. + * If the first try is for a block that includes the cursor, pre-undo + * the digit * radix offset in the first call; otherwise, ignore the + * cursor entirely. */ - if (scan->u.bmu_avail == radix) { - radix /= BLIST_META_RADIX; - - /* - * Reinitialize each of the meta node's children. An ALL-FREE - * meta node cannot have a terminator in any subtree. - */ - for (i = 1; i < skip; i += next_skip) { - if (next_skip == 1) - scan[i].u.bmu_bitmap = (u_daddr_t)-1; - else - scan[i].u.bmu_avail = radix; - scan[i].bm_bighint = radix; - } - } else { - radix /= BLIST_META_RADIX; - } + if (((mask >> digit) & 1) == 1) + cursor -= digit * radix; + else + cursor = blk; - if (count > radix) { - /* - * The allocation exceeds the number of blocks that are - * managed by a subtree of this meta node. - */ - panic("allocation too large"); - } - scan_from_start = cursor == blk; - child = (cursor - blk) / radix; - blk += child * radix; - for (i = 1 + child * next_skip; i < skip; i += next_skip) { + /* + * Examine the nonempty subtree associated with each bit set in mask. + */ + do { + bit = mask & -mask; + digit = bitpos(bit); + i = 1 + digit * skip; if (count <= scan[i].bm_bighint) { /* * The allocation might fit beginning in the i'th subtree. */ - r = blst_meta_alloc(&scan[i], - cursor > blk ? cursor : blk, count, radix); + r = blst_meta_alloc(&scan[i], cursor + digit * radix, + count, radix); if (r != SWAPBLK_NONE) { - scan->u.bmu_avail -= count; + if (scan[i].bm_bitmap == 0) + scan->bm_bitmap ^= bit; return (r); } - } else if (scan[i].bm_bighint == (daddr_t)-1) { - /* - * Terminator - */ - break; } - blk += radix; - } + cursor = blk; + } while ((mask ^= bit) != 0); /* - * We couldn't allocate count in this subtree, update bighint. + * We couldn't allocate count in this subtree. If the whole tree was + * scanned, and the last tree node is allocated, update bighint. */ - if (scan_from_start && scan->bm_bighint >= count) + if (scan_from_start && !(digit == BLIST_META_RADIX - 1 && + scan[i].bm_bighint == BLIST_MAX_ALLOC)) scan->bm_bighint = count - 1; return (SWAPBLK_NONE); @@ -832,7 +816,6 @@ static void blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) { u_daddr_t mask; - int n; /* * free some data in this bitmap @@ -840,20 +823,10 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) * \_________/\__/ * count n */ - n = blk & BLIST_BMAP_MASK; - mask = ((u_daddr_t)-1 << n) & - ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); - if (scan->u.bmu_bitmap & mask) + mask = bitrange(blk & BLIST_BMAP_MASK, count); + if (scan->bm_bitmap & mask) panic("freeing free block"); - scan->u.bmu_bitmap |= mask; - - /* - * We could probably do a better job here. We are required to make - * bighint at least as large as the biggest contiguous block of - * data. If we just shoehorn it, a little extra overhead will - * be incured on the next allocation (but only that one typically). - */ - scan->bm_bighint = BLIST_BMAP_RADIX; + scan->bm_bitmap |= mask; } /* @@ -869,79 +842,37 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix) { - daddr_t blk, i, next_skip, skip, v; - int child; - - if (scan->bm_bighint == (daddr_t)-1) - panic("freeing invalid range"); - if (radix == BLIST_BMAP_RADIX) - return (blst_leaf_free(scan, freeBlk, count)); - skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; - - if (scan->u.bmu_avail == 0) { - /* - * ALL-ALLOCATED special case, with possible - * shortcut to ALL-FREE special case. - */ - scan->u.bmu_avail = count; - scan->bm_bighint = count; - - if (count != radix) { - for (i = 1; i < skip; i += next_skip) { - if (scan[i].bm_bighint == (daddr_t)-1) - break; - scan[i].bm_bighint = 0; - if (next_skip == 1) { - scan[i].u.bmu_bitmap = 0; - } else { - scan[i].u.bmu_avail = 0; - } - } - /* fall through */ - } - } else { - scan->u.bmu_avail += count; - /* scan->bm_bighint = radix; */ - } + daddr_t blk, endBlk, i, skip; + int digit, endDigit; /* - * ALL-FREE special case. + * We could probably do a better job here. We are required to make + * bighint at least as large as the biggest allocable block of data. + * If we just shoehorn it, a little extra overhead will be incurred + * on the next allocation (but only that one typically). */ + scan->bm_bighint = BLIST_MAX_ALLOC; - if (scan->u.bmu_avail == radix) - return; - if (scan->u.bmu_avail > radix) - panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", - (long long)count, (long long)scan->u.bmu_avail, - (long long)radix); - - /* - * Break the free down into its components - */ + if (radix == BLIST_BMAP_RADIX) + return (blst_leaf_free(scan, freeBlk, count)); - blk = freeBlk & -radix; + endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix); radix /= BLIST_META_RADIX; - - child = (freeBlk - blk) / radix; - blk += child * radix; - i = 1 + child * next_skip; - while (i < skip && blk < freeBlk + count) { - v = blk + radix - freeBlk; - if (v > count) - v = count; - blst_meta_free(&scan[i], freeBlk, v, radix); - if (scan->bm_bighint < scan[i].bm_bighint) - scan->bm_bighint = scan[i].bm_bighint; - count -= v; - freeBlk += v; + skip = radix_to_skip(radix); + blk = freeBlk & -radix; + digit = (blk / radix) & BLIST_META_MASK; + endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK); + scan->bm_bitmap |= bitrange(digit, endDigit - digit); + for (i = 1 + digit * skip; blk < endBlk; i += skip) { blk += radix; - i += next_skip; + count = ummin(blk, endBlk) - freeBlk; + blst_meta_free(&scan[i], freeBlk, count, radix); + freeBlk = blk; } } /* - * BLIST_RADIX_COPY() - copy one radix tree to another + * BLST_COPY() - copy one radix tree to another * * Locates free space in the source tree and frees it in the destination * tree. The space may not already be free in the destination. @@ -950,21 +881,21 @@ static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, daddr_t count) { - daddr_t i, next_skip, skip; + daddr_t endBlk, i, skip; /* * Leaf node */ if (radix == BLIST_BMAP_RADIX) { - u_daddr_t v = scan->u.bmu_bitmap; + u_daddr_t v = scan->bm_bitmap; if (v == (u_daddr_t)-1) { blist_free(dest, blk, count); } else if (v != 0) { int i; - for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { + for (i = 0; i < count; ++i) { if (v & ((u_daddr_t)1 << i)) blist_free(dest, blk + i, 1); } @@ -976,42 +907,22 @@ blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, * Meta node */ - if (scan->u.bmu_avail == 0) { + if (scan->bm_bitmap == 0) { /* * Source all allocated, leave dest allocated */ return; } - if (scan->u.bmu_avail == radix) { - /* - * Source all free, free entire dest - */ - if (count < radix) - blist_free(dest, blk, count); - else - blist_free(dest, blk, radix); - return; - } - - skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; + endBlk = blk + count; radix /= BLIST_META_RADIX; - - for (i = 1; count && i < skip; i += next_skip) { - if (scan[i].bm_bighint == (daddr_t)-1) - break; - - if (count >= radix) { - blst_copy(&scan[i], blk, radix, dest, radix); - count -= radix; - } else { - if (count) { - blst_copy(&scan[i], blk, radix, dest, count); - } - count = 0; - } + skip = radix_to_skip(radix); + for (i = 1; blk < endBlk; i += skip) { blk += radix; + count = radix; + if (blk >= endBlk) + count -= blk - endBlk; + blst_copy(&scan[i], blk - radix, radix, dest, count); } } @@ -1027,16 +938,13 @@ blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) { daddr_t nblks; u_daddr_t mask; - int n; - n = blk & BLIST_BMAP_MASK; - mask = ((u_daddr_t)-1 << n) & - ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); + mask = bitrange(blk & BLIST_BMAP_MASK, count); /* Count the number of blocks that we are allocating. */ - nblks = bitcount64(scan->u.bmu_bitmap & mask); + nblks = bitcount64(scan->bm_bitmap & mask); - scan->u.bmu_bitmap &= ~mask; + scan->bm_bitmap &= ~mask; return (nblks); } @@ -1051,70 +959,27 @@ blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) { - daddr_t blk, i, nblks, next_skip, skip, v; - int child; + daddr_t blk, endBlk, i, nblks, skip; + int digit; - if (scan->bm_bighint == (daddr_t)-1) - panic("filling invalid range"); - if (count > radix) { - /* - * The allocation exceeds the number of blocks that are - * managed by this node. - */ - panic("fill too large"); - } if (radix == BLIST_BMAP_RADIX) return (blst_leaf_fill(scan, allocBlk, count)); - if (count == radix || scan->u.bmu_avail == 0) { - /* - * ALL-ALLOCATED special case - */ - nblks = scan->u.bmu_avail; - scan->u.bmu_avail = 0; - scan->bm_bighint = 0; - return (nblks); - } + + endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix); + radix /= BLIST_META_RADIX; skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; blk = allocBlk & -radix; - - /* - * An ALL-FREE meta node requires special handling before allocating - * any of its blocks. - */ - if (scan->u.bmu_avail == radix) { - radix /= BLIST_META_RADIX; - - /* - * Reinitialize each of the meta node's children. An ALL-FREE - * meta node cannot have a terminator in any subtree. - */ - for (i = 1; i < skip; i += next_skip) { - if (next_skip == 1) - scan[i].u.bmu_bitmap = (u_daddr_t)-1; - else - scan[i].u.bmu_avail = radix; - scan[i].bm_bighint = radix; - } - } else { - radix /= BLIST_META_RADIX; - } - nblks = 0; - child = (allocBlk - blk) / radix; - blk += child * radix; - i = 1 + child * next_skip; - while (i < skip && blk < allocBlk + count) { - v = blk + radix - allocBlk; - if (v > count) - v = count; - nblks += blst_meta_fill(&scan[i], allocBlk, v, radix); - count -= v; - allocBlk += v; + while (blk < endBlk) { + digit = (blk / radix) & BLIST_META_MASK; + i = 1 + digit * skip; blk += radix; - i += next_skip; + count = ummin(blk, endBlk) - allocBlk; + nblks += blst_meta_fill(&scan[i], allocBlk, count, radix); + if (scan[i].bm_bitmap == 0) + scan->bm_bitmap &= ~((u_daddr_t)1 << digit); + allocBlk = blk; } - scan->u.bmu_avail -= nblks; return (nblks); } @@ -1123,64 +988,44 @@ blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) { - daddr_t i, next_skip, skip; + daddr_t skip; + u_daddr_t bit, mask; + int digit; if (radix == BLIST_BMAP_RADIX) { printf( - "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", + "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n", tab, tab, "", (long long)blk, (long long)radix, - (long long)scan->u.bmu_bitmap, + 1 + (BLIST_BMAP_RADIX - 1) / 4, + (long long)scan->bm_bitmap, (long long)scan->bm_bighint ); return; } - if (scan->u.bmu_avail == 0) { - printf( - "%*.*s(%08llx,%lld) ALL ALLOCATED\n", - tab, tab, "", - (long long)blk, - (long long)radix - ); - return; - } - if (scan->u.bmu_avail == radix) { - printf( - "%*.*s(%08llx,%lld) ALL FREE\n", - tab, tab, "", - (long long)blk, - (long long)radix - ); - return; - } - printf( - "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", + "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n", tab, tab, "", (long long)blk, (long long)radix, - (long long)scan->u.bmu_avail, (long long)radix, + 1 + (BLIST_META_RADIX - 1) / 4, + (long long)scan->bm_bitmap, (long long)scan->bm_bighint ); - skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; radix /= BLIST_META_RADIX; + skip = radix_to_skip(radix); tab += 4; - for (i = 1; i < skip; i += next_skip) { - if (scan[i].bm_bighint == (daddr_t)-1) { - printf( - "%*.*s(%08llx,%lld): Terminator\n", - tab, tab, "", - (long long)blk, (long long)radix - ); - break; - } - blst_radix_print(&scan[i], blk, radix, tab); - blk += radix; - } + mask = scan->bm_bitmap; + /* Examine the nonempty subtree associated with each bit set in mask */ + do { + bit = mask & -mask; + digit = bitpos(bit); + blst_radix_print(&scan[1 + digit * skip], blk + digit * radix, + radix, tab); + } while ((mask ^= bit) != 0); tab -= 4; printf( @@ -1196,7 +1041,7 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) int main(int ac, char **av) { - int size = 1024; + int size = BLIST_META_RADIX * BLIST_BMAP_RADIX; int i; blist_t bl; struct sbuf *s; diff --git a/sys/kern/subr_epoch.c b/sys/kern/subr_epoch.c index a63f669fea75..9104f1e0880a 100644 --- a/sys/kern/subr_epoch.c +++ b/sys/kern/subr_epoch.c @@ -55,6 +55,27 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation"); +#ifdef __amd64__ +#define EPOCH_ALIGN CACHE_LINE_SIZE*2 +#else +#define EPOCH_ALIGN CACHE_LINE_SIZE +#endif + +TAILQ_HEAD (epoch_tdlist, epoch_tracker); +typedef struct epoch_record { + ck_epoch_record_t er_record; + volatile struct epoch_tdlist er_tdlist; + volatile uint32_t er_gen; + uint32_t er_cpuid; +} __aligned(EPOCH_ALIGN) *epoch_record_t; + +struct epoch { + struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); + epoch_record_t e_pcpu_record; + int e_idx; + int e_flags; +}; + /* arbitrary --- needs benchmarking */ #define MAX_ADAPTIVE_SPIN 100 #define MAX_EPOCHS 64 @@ -119,11 +140,15 @@ epoch_init(void *arg __unused) epoch_call_count = counter_u64_alloc(M_WAITOK); epoch_call_task_count = counter_u64_alloc(M_WAITOK); - pcpu_zone_record = uma_zcreate("epoch_record pcpu", sizeof(struct epoch_record), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU); + pcpu_zone_record = uma_zcreate("epoch_record pcpu", + sizeof(struct epoch_record), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_PCPU); CPU_FOREACH(cpu) { - GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, epoch_call_task, NULL); - taskqgroup_attach_cpu(qgroup_softirq, DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, "epoch call task"); + GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, + epoch_call_task, NULL); + taskqgroup_attach_cpu(qgroup_softirq, + DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, + "epoch call task"); } inited = 1; global_epoch = epoch_alloc(0); @@ -150,13 +175,21 @@ epoch_ctor(epoch_t epoch) CPU_FOREACH(cpu) { er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu); bzero(er, sizeof(*er)); - ck_epoch_register(&epoch->e_epoch, &er->er_read_record, NULL); - ck_epoch_register(&epoch->e_epoch, &er->er_write_record, NULL); + ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL); TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist); er->er_cpuid = cpu; } } +static void +epoch_adjust_prio(struct thread *td, u_char prio) +{ + + thread_lock(td); + sched_prio(td, prio); + thread_unlock(td); +} + epoch_t epoch_alloc(int flags) { @@ -192,51 +225,126 @@ epoch_free(epoch_t epoch) free(epoch, M_EPOCH); } +static epoch_record_t +epoch_currecord(epoch_t epoch) +{ + + return (zpcpu_get_cpu(epoch->e_pcpu_record, curcpu)); +} + +#define INIT_CHECK(epoch) \ + do { \ + if (__predict_false((epoch) == NULL)) \ + return; \ + } while (0) + void -epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et) +epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et) { + struct epoch_record *er; + struct thread *td; + + MPASS(cold || epoch != NULL); + INIT_CHECK(epoch); + MPASS(epoch->e_flags & EPOCH_PREEMPT); +#ifdef EPOCH_TRACKER_DEBUG + et->et_magic_pre = EPOCH_MAGIC0; + et->et_magic_post = EPOCH_MAGIC1; +#endif + td = curthread; + et->et_td = td; + td->td_epochnest++; + critical_enter(); + sched_pin(); - epoch_enter_preempt(epoch, et); + td->td_pre_epoch_prio = td->td_priority; + er = epoch_currecord(epoch); + TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link); + ck_epoch_begin(&er->er_record, &et->et_section); + critical_exit(); } void -epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et) +epoch_enter(epoch_t epoch) { + struct thread *td; + epoch_record_t er; + + MPASS(cold || epoch != NULL); + INIT_CHECK(epoch); + td = curthread; - epoch_exit_preempt(epoch, et); + td->td_epochnest++; + critical_enter(); + er = epoch_currecord(epoch); + ck_epoch_begin(&er->er_record, NULL); } void -epoch_enter_KBI(epoch_t epoch) +epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et) { + struct epoch_record *er; + struct thread *td; - epoch_enter(epoch); + INIT_CHECK(epoch); + td = curthread; + critical_enter(); + sched_unpin(); + MPASS(td->td_epochnest); + td->td_epochnest--; + er = epoch_currecord(epoch); + MPASS(epoch->e_flags & EPOCH_PREEMPT); + MPASS(et != NULL); + MPASS(et->et_td == td); +#ifdef EPOCH_TRACKER_DEBUG + MPASS(et->et_magic_pre == EPOCH_MAGIC0); + MPASS(et->et_magic_post == EPOCH_MAGIC1); + et->et_magic_pre = 0; + et->et_magic_post = 0; +#endif +#ifdef INVARIANTS + et->et_td = (void*)0xDEADBEEF; +#endif + ck_epoch_end(&er->er_record, &et->et_section); + TAILQ_REMOVE(&er->er_tdlist, et, et_link); + er->er_gen++; + if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) + epoch_adjust_prio(td, td->td_pre_epoch_prio); + critical_exit(); } void -epoch_exit_KBI(epoch_t epoch) +epoch_exit(epoch_t epoch) { + struct thread *td; + epoch_record_t er; - epoch_exit(epoch); + INIT_CHECK(epoch); + td = curthread; + MPASS(td->td_epochnest); + td->td_epochnest--; + er = epoch_currecord(epoch); + ck_epoch_end(&er->er_record, NULL); + critical_exit(); } /* - * epoch_block_handler_preempt is a callback from the ck code when another thread is - * currently in an epoch section. + * epoch_block_handler_preempt() is a callback from the CK code when another + * thread is currently in an epoch section. */ static void -epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t *cr, - void *arg __unused) +epoch_block_handler_preempt(struct ck_epoch *global __unused, + ck_epoch_record_t *cr, void *arg __unused) { epoch_record_t record; struct thread *td, *owner, *curwaittd; - struct epoch_thread *tdwait; + struct epoch_tracker *tdwait; struct turnstile *ts; struct lock_object *lock; int spincount, gen; int locksheld __unused; - record = __containerof(cr, struct epoch_record, er_read_record); + record = __containerof(cr, struct epoch_record, er_record); td = curthread; locksheld = td->td_locks; spincount = 0; @@ -318,25 +426,27 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) && ((ts = curwaittd->td_blocked) != NULL)) { /* - * We unlock td to allow turnstile_wait to reacquire the - * the thread lock. Before unlocking it we enter a critical - * section to prevent preemption after we reenable interrupts - * by dropping the thread lock in order to prevent curwaittd - * from getting to run. + * We unlock td to allow turnstile_wait to reacquire + * the thread lock. Before unlocking it we enter a + * critical section to prevent preemption after we + * reenable interrupts by dropping the thread lock in + * order to prevent curwaittd from getting to run. */ critical_enter(); thread_unlock(td); owner = turnstile_lock(ts, &lock); /* - * The owner pointer indicates that the lock succeeded. Only - * in case we hold the lock and the turnstile we locked is still - * the one that curwaittd is blocked on can we continue. Otherwise - * The turnstile pointer has been changed out from underneath - * us, as in the case where the lock holder has signalled curwaittd, + * The owner pointer indicates that the lock succeeded. + * Only in case we hold the lock and the turnstile we + * locked is still the one that curwaittd is blocked on + * can we continue. Otherwise the turnstile pointer has + * been changed out from underneath us, as in the case + * where the lock holder has signalled curwaittd, * and we need to continue. */ if (owner != NULL && ts == curwaittd->td_blocked) { - MPASS(TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd)); + MPASS(TD_IS_INHIBITED(curwaittd) && + TD_ON_LOCK(curwaittd)); critical_exit(); turnstile_wait(ts, owner, curwaittd->td_tsqueue); counter_u64_add(turnstile_count, 1); @@ -386,9 +496,8 @@ epoch_wait_preempt(epoch_t epoch) if ((epoch->e_flags & EPOCH_LOCKED) == 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "epoch_wait() can be long running"); - KASSERT(!in_epoch(epoch), - ("epoch_wait_preempt() called in the middle " - "of an epoch section of the same epoch")); + KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle " + "of an epoch section of the same epoch")); #endif thread_lock(td); DROP_GIANT(); @@ -401,7 +510,8 @@ epoch_wait_preempt(epoch_t epoch) td->td_pinned = 0; sched_bind(td, old_cpu); - ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt, NULL); + ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt, + NULL); /* restore CPU binding, if any */ if (was_bound != 0) { @@ -462,7 +572,7 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t critical_enter(); *DPCPU_PTR(epoch_cb_count) += 1; er = epoch_currecord(epoch); - ck_epoch_call(&er->er_write_record, cb, (ck_epoch_cb_t *)callback); + ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback); critical_exit(); return; boottime: @@ -486,7 +596,7 @@ epoch_call_task(void *arg __unused) if (__predict_false((epoch = allepochs[i]) == NULL)) continue; er = epoch_currecord(epoch); - record = &er->er_write_record; + record = &er->er_record; if ((npending = record->n_pending) == 0) continue; ck_epoch_poll_deferred(record, &cb_stack); @@ -502,7 +612,7 @@ epoch_call_task(void *arg __unused) head = ck_stack_batch_pop_npsc(&cb_stack); for (cursor = head; cursor != NULL; cursor = next) { struct ck_epoch_entry *entry = - ck_epoch_entry_container(cursor); + ck_epoch_entry_container(cursor); next = CK_STACK_NEXT(cursor); entry->function(entry); @@ -512,7 +622,7 @@ epoch_call_task(void *arg __unused) int in_epoch_verbose(epoch_t epoch, int dump_onfail) { - struct epoch_thread *tdwait; + struct epoch_tracker *tdwait; struct thread *td; epoch_record_t er; @@ -548,9 +658,15 @@ in_epoch(epoch_t epoch) } void -epoch_adjust_prio(struct thread *td, u_char prio) +epoch_thread_init(struct thread *td) { - thread_lock(td); - sched_prio(td, prio); - thread_unlock(td); + + td->td_et = malloc(sizeof(struct epoch_tracker), M_EPOCH, M_WAITOK); +} + +void +epoch_thread_fini(struct thread *td) +{ + + free(td->td_et, M_EPOCH); } diff --git a/sys/net/if.c b/sys/net/if.c index 4a89e37e5f53..15b4d7c12d1e 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1767,35 +1767,29 @@ if_data_copy(struct ifnet *ifp, struct if_data *ifd) void if_addr_rlock(struct ifnet *ifp) { - MPASS(*(uint64_t *)&ifp->if_addr_et == 0); - epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et); + + epoch_enter_preempt(net_epoch_preempt, curthread->td_et); } void if_addr_runlock(struct ifnet *ifp) { - epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et); -#ifdef INVARIANTS - bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker)); -#endif + + epoch_exit_preempt(net_epoch_preempt, curthread->td_et); } void if_maddr_rlock(if_t ifp) { - MPASS(*(uint64_t *)&ifp->if_maddr_et == 0); - epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et); + epoch_enter_preempt(net_epoch_preempt, curthread->td_et); } void if_maddr_runlock(if_t ifp) { - epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et); -#ifdef INVARIANTS - bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker)); -#endif + epoch_exit_preempt(net_epoch_preempt, curthread->td_et); } /* diff --git a/sys/net/if_var.h b/sys/net/if_var.h index a7edae305223..37c08e68156a 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -381,8 +381,6 @@ struct ifnet { */ struct netdump_methods *if_netdump_methods; struct epoch_context if_epoch_ctx; - struct epoch_tracker if_addr_et; - struct epoch_tracker if_maddr_et; /* * Spare fields to be added before branching a stable branch, so diff --git a/sys/powerpc/conf/GENERIC64 b/sys/powerpc/conf/GENERIC64 index fd24daff91e4..b395bccf7778 100644 --- a/sys/powerpc/conf/GENERIC64 +++ b/sys/powerpc/conf/GENERIC64 @@ -39,6 +39,10 @@ options PREEMPTION #Enable kernel thread preemption options VIMAGE # Subsystem virtualization, e.g. VNET options INET #InterNETworking options INET6 #IPv6 communications protocols +options IPSEC # IP (v4/v6) security +options IPSEC_SUPPORT # Allow kldload of ipsec and tcpmd5 +options TCP_OFFLOAD # TCP offload +options TCP_BLACKBOX # Enhanced TCP event logging options TCP_HHOOK # hhook(9) framework for TCP options TCP_RFC7413 # TCP Fast Open options SCTP #Stream Control Transmission Protocol @@ -83,6 +87,9 @@ options MAC # TrustedBSD MAC Framework options KDTRACE_HOOKS # Kernel DTrace hooks options DDB_CTF # Kernel ELF linker loads CTF data options INCLUDE_CONFIG_FILE # Include this file in kernel +options RACCT # Resource accounting framework +options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default +options RCTL # Resource limits # Debugging support. Always need this: options KDB # Enable kernel debugger support. @@ -136,10 +143,12 @@ device sym # NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D # ATA/SCSI peripherals device scbus # SCSI bus (required for ATA/SCSI) +device ch # SCSI media changers device da # Direct Access (disks) device sa # Sequential Access (tape etc) device cd # CD device pass # Passthrough device (direct ATA/SCSI access) +device ses # Enclosure Service (SES and SAF-TE) # vt is the default console driver, resembling an SCO console device vt # Core console driver @@ -168,6 +177,7 @@ device re # RealTek 8139C+/8169/8169S/8110S device rl # RealTek 8129/8139 # Pseudo devices. +device crypto # core crypto support device loop # Network loopback device random # Entropy device device ether # Ethernet support @@ -193,10 +203,8 @@ device usb # USB Bus (required) device uhid # "Human Interface Devices" device ukbd # Keyboard options KBD_INSTALL_CDEV # install a CDEV entry in /dev -device ulpt # Printer device umass # Disks/Mass storage - Requires scbus and da0 device ums # Mouse -device urio # Diamond Rio 500 MP3 player # USB Ethernet device aue # ADMtek USB Ethernet device axe # ASIX Electronics USB Ethernet @@ -236,3 +244,5 @@ device sound # Generic sound driver (required) device snd_ai2s # Apple I2S audio device snd_uaudio # USB Audio +# Netmap provides direct access to TX/RX rings on supported NICs +device netmap # netmap(4) support diff --git a/sys/riscv/include/cpu.h b/sys/riscv/include/cpu.h index 989f8946333d..79c6f730f2a6 100644 --- a/sys/riscv/include/cpu.h +++ b/sys/riscv/include/cpu.h @@ -38,6 +38,7 @@ #define _MACHINE_CPU_H_ #include <machine/atomic.h> +#include <machine/cpufunc.h> #include <machine/frame.h> #define TRAPF_PC(tfp) ((tfp)->tf_ra) @@ -86,8 +87,7 @@ static __inline uint64_t get_cyclecount(void) { - /* TODO: This is bogus */ - return (1); + return (rdcycle()); } #endif diff --git a/sys/riscv/include/cpufunc.h b/sys/riscv/include/cpufunc.h index e1979d5d2c3f..625c17ebf7dc 100644 --- a/sys/riscv/include/cpufunc.h +++ b/sys/riscv/include/cpufunc.h @@ -104,6 +104,11 @@ sfence_vma_page(uintptr_t addr) __asm __volatile("sfence.vma %0" :: "r" (addr) : "memory"); } +#define rdcycle() csr_read64(cycle) +#define rdtime() csr_read64(time) +#define rdinstret() csr_read64(instret) +#define rdhpmcounter(n) csr_read64(hpmcounter##n) + #define cpufunc_nullop() riscv_nullop() void riscv_nullop(void); diff --git a/sys/riscv/include/riscvreg.h b/sys/riscv/include/riscvreg.h index fae70d9e0441..12e4a8fbbefa 100644 --- a/sys/riscv/include/riscvreg.h +++ b/sys/riscv/include/riscvreg.h @@ -223,4 +223,23 @@ val; \ }) +#if __riscv_xlen == 32 +#define csr_read64(csr) \ +({ uint64_t val; \ + uint32_t high, low; \ + __asm __volatile("1: " \ + "csrr t0, " #csr "h\n" \ + "csrr %0, " #csr "\n" \ + "csrr %1, " #csr "h\n" \ + "bne t0, %1, 1b" \ + : "=r" (low), "=r" (high) \ + : \ + : "t0"); \ + val = (low | ((uint64_t)high << 32)); \ + val; \ +}) +#else +#define csr_read64(csr) ((uint64_t)csr_read(csr)) +#endif + #endif /* !_MACHINE_RISCVREG_H_ */ diff --git a/sys/sys/blist.h b/sys/sys/blist.h index 22d834f71544..595d3477aa08 100644 --- a/sys/sys/blist.h +++ b/sys/sys/blist.h @@ -73,22 +73,20 @@ typedef uint64_t u_daddr_t; /* unsigned disk address */ */ typedef struct blmeta { - union { - daddr_t bmu_avail; /* space available under us */ - u_daddr_t bmu_bitmap; /* bitmap if we are a leaf */ - } u; + u_daddr_t bm_bitmap; /* bitmap if we are a leaf */ daddr_t bm_bighint; /* biggest contiguous block hint*/ } blmeta_t; typedef struct blist { daddr_t bl_blocks; /* area of coverage */ + daddr_t bl_avail; /* # available blocks */ u_daddr_t bl_radix; /* coverage radix */ daddr_t bl_cursor; /* next-fit search starts at */ blmeta_t bl_root[1]; /* root of radix tree */ } *blist_t; -#define BLIST_META_RADIX 16 #define BLIST_BMAP_RADIX (sizeof(u_daddr_t)*8) +#define BLIST_META_RADIX BLIST_BMAP_RADIX #define BLIST_MAX_ALLOC BLIST_BMAP_RADIX diff --git a/sys/sys/epoch.h b/sys/sys/epoch.h index 8d05c81e964b..3ed67a775625 100644 --- a/sys/sys/epoch.h +++ b/sys/sys/epoch.h @@ -29,10 +29,17 @@ #ifndef _SYS_EPOCH_H_ #define _SYS_EPOCH_H_ + +struct epoch_context { + void *data[2]; +} __aligned(sizeof(void *)); + +typedef struct epoch_context *epoch_context_t; + #ifdef _KERNEL #include <sys/lock.h> #include <sys/pcpu.h> -#endif +#include <ck_epoch.h> struct epoch; typedef struct epoch *epoch_t; @@ -43,22 +50,19 @@ typedef struct epoch *epoch_t; extern epoch_t global_epoch; extern epoch_t global_epoch_preempt; -struct epoch_context { - void *data[2]; -} __aligned(sizeof(void *)); - -typedef struct epoch_context *epoch_context_t; - - struct epoch_tracker { - void *datap[3]; -#ifdef EPOCH_TRACKER_DEBUG - int datai[5]; -#else - int datai[1]; +#ifdef EPOCH_TRACKER_DEBUG +#define EPOCH_MAGIC0 0xFADECAFEF00DD00D +#define EPOCH_MAGIC1 0xBADDBABEDEEDFEED + uint64_t et_magic_pre; +#endif + TAILQ_ENTRY(epoch_tracker) et_link; + struct thread *et_td; + ck_epoch_section_t et_section; +#ifdef EPOCH_TRACKER_DEBUG + uint64_t et_magic_post; #endif } __aligned(sizeof(void *)); - typedef struct epoch_tracker *epoch_tracker_t; epoch_t epoch_alloc(int flags); @@ -68,26 +72,18 @@ void epoch_wait_preempt(epoch_t epoch); void epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t)); int in_epoch(epoch_t epoch); int in_epoch_verbose(epoch_t epoch, int dump_onfail); -#ifdef _KERNEL DPCPU_DECLARE(int, epoch_cb_count); DPCPU_DECLARE(struct grouptask, epoch_cb_task); #define EPOCH_MAGIC0 0xFADECAFEF00DD00D #define EPOCH_MAGIC1 0xBADDBABEDEEDFEED -void epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et); -void epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et); -void epoch_enter_KBI(epoch_t epoch); -void epoch_exit_KBI(epoch_t epoch); +void epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et); +void epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et); +void epoch_enter(epoch_t epoch); +void epoch_exit(epoch_t epoch); +void epoch_thread_init(struct thread *); +void epoch_thread_fini(struct thread *); -#if defined(KLD_MODULE) && !defined(KLD_TIED) -#define epoch_enter_preempt(e, t) epoch_enter_preempt_KBI((e), (t)) -#define epoch_exit_preempt(e, t) epoch_exit_preempt_KBI((e), (t)) -#define epoch_enter(e) epoch_enter_KBI((e)) -#define epoch_exit(e) epoch_exit_KBI((e)) -#else -#include <sys/epoch_private.h> -#endif /* KLD_MODULE */ - -#endif /* _KERNEL */ -#endif +#endif /* _KERNEL */ +#endif /* _SYS_EPOCH_H_ */ diff --git a/sys/sys/epoch_private.h b/sys/sys/epoch_private.h deleted file mode 100644 index f475ca3aa200..000000000000 --- a/sys/sys/epoch_private.h +++ /dev/null @@ -1,211 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _SYS_EPOCH_PRIVATE_H_ -#define _SYS_EPOCH_PRIVATE_H_ -#ifndef _KERNEL -#error "no user serviceable parts" -#else -#include <ck_epoch.h> -#include <sys/kpilite.h> - -#include <sys/mutex.h> - -extern void epoch_adjust_prio(struct thread *td, u_char prio); -#ifndef _SYS_SYSTM_H_ -extern void critical_exit_preempt(void); -#endif - -#ifdef __amd64__ -#define EPOCH_ALIGN CACHE_LINE_SIZE*2 -#else -#define EPOCH_ALIGN CACHE_LINE_SIZE -#endif - -/* - * Standalone (_sa) routines for thread state manipulation - */ -static __inline void -critical_enter_sa(void *tdarg) -{ - struct thread_lite *td; - - td = tdarg; - td->td_critnest++; - __compiler_membar(); -} - -static __inline void -critical_exit_sa(void *tdarg) -{ - struct thread_lite *td; - - td = tdarg; - MPASS(td->td_critnest > 0); - __compiler_membar(); - td->td_critnest--; - __compiler_membar(); - if (__predict_false(td->td_owepreempt != 0)) - critical_exit_preempt(); -} - -typedef struct epoch_thread { -#ifdef EPOCH_TRACKER_DEBUG - uint64_t et_magic_pre; -#endif - TAILQ_ENTRY(epoch_thread) et_link; /* Epoch queue. */ - struct thread *et_td; /* pointer to thread in section */ - ck_epoch_section_t et_section; /* epoch section object */ -#ifdef EPOCH_TRACKER_DEBUG - uint64_t et_magic_post; -#endif -} *epoch_thread_t; -TAILQ_HEAD (epoch_tdlist, epoch_thread); - -typedef struct epoch_record { - ck_epoch_record_t er_read_record; - ck_epoch_record_t er_write_record; - volatile struct epoch_tdlist er_tdlist; - volatile uint32_t er_gen; - uint32_t er_cpuid; -} __aligned(EPOCH_ALIGN) *epoch_record_t; - -struct epoch { - struct ck_epoch e_epoch __aligned(EPOCH_ALIGN); - epoch_record_t e_pcpu_record; - int e_idx; - int e_flags; -}; - -static epoch_record_t -epoch_currecord(epoch_t epoch) -{ - return zpcpu_get_cpu(epoch->e_pcpu_record, curcpu); -} - -#define INIT_CHECK(epoch) \ - do { \ - if (__predict_false((epoch) == NULL)) \ - return; \ - } while (0) - -static __inline void -epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et) -{ - struct epoch_record *er; - struct epoch_thread *etd; - struct thread_lite *td; - - MPASS(cold || epoch != NULL); - INIT_CHECK(epoch); - etd = (void *)et; - MPASS(epoch->e_flags & EPOCH_PREEMPT); -#ifdef EPOCH_TRACKER_DEBUG - etd->et_magic_pre = EPOCH_MAGIC0; - etd->et_magic_post = EPOCH_MAGIC1; -#endif - td = (struct thread_lite *)curthread; - etd->et_td = (void*)td; - td->td_epochnest++; - critical_enter_sa(td); - sched_pin_lite(td); - - td->td_pre_epoch_prio = td->td_priority; - er = epoch_currecord(epoch); - TAILQ_INSERT_TAIL(&er->er_tdlist, etd, et_link); - ck_epoch_begin(&er->er_read_record, (ck_epoch_section_t *)&etd->et_section); - critical_exit_sa(td); -} - -static __inline void -epoch_enter(epoch_t epoch) -{ - struct thread_lite *td; - epoch_record_t er; - - MPASS(cold || epoch != NULL); - INIT_CHECK(epoch); - td = (struct thread_lite *)curthread; - - td->td_epochnest++; - critical_enter_sa(td); - er = epoch_currecord(epoch); - ck_epoch_begin(&er->er_read_record, NULL); -} - -static __inline void -epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et) -{ - struct epoch_record *er; - struct epoch_thread *etd; - struct thread_lite *td; - - INIT_CHECK(epoch); - td = (struct thread_lite *)curthread; - critical_enter_sa(td); - sched_unpin_lite(td); - MPASS(td->td_epochnest); - td->td_epochnest--; - er = epoch_currecord(epoch); - MPASS(epoch->e_flags & EPOCH_PREEMPT); - etd = (void *)et; - MPASS(etd != NULL); - MPASS(etd->et_td == (struct thread *)td); -#ifdef EPOCH_TRACKER_DEBUG - MPASS(etd->et_magic_pre == EPOCH_MAGIC0); - MPASS(etd->et_magic_post == EPOCH_MAGIC1); - etd->et_magic_pre = 0; - etd->et_magic_post = 0; -#endif - etd->et_td = (void*)0xDEADBEEF; - ck_epoch_end(&er->er_read_record, - (ck_epoch_section_t *)&etd->et_section); - TAILQ_REMOVE(&er->er_tdlist, etd, et_link); - er->er_gen++; - if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) - epoch_adjust_prio((struct thread *)td, td->td_pre_epoch_prio); - critical_exit_sa(td); -} - -static __inline void -epoch_exit(epoch_t epoch) -{ - struct thread_lite *td; - epoch_record_t er; - - INIT_CHECK(epoch); - td = (struct thread_lite *)curthread; - MPASS(td->td_epochnest); - td->td_epochnest--; - er = epoch_currecord(epoch); - ck_epoch_end(&er->er_read_record, NULL); - critical_exit_sa(td); -} -#endif /* _KERNEL */ -#endif /* _SYS_EPOCH_PRIVATE_H_ */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index d5351dd043c1..b43758c4e568 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -193,6 +193,7 @@ struct trapframe; struct turnstile; struct vm_map; struct vm_map_entry; +struct epoch_tracker; /* * XXX: Does this belong in resource.h or resourcevar.h instead? @@ -360,6 +361,7 @@ struct thread { int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ void *td_lkpi_task; /* LinuxKPI task struct pointer */ + struct epoch_tracker *td_et; /* (k) compat KPI spare tracker */ int td_pmcpend; }; diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 6b8e708f7371..c2f6c91bcc4b 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -82,7 +82,7 @@ int ffs_getcg(struct fs *, struct vnode *, u_int, struct buf **, struct cg **); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); -void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); +int ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); void ffs_oldfscompat_write(struct fs *, struct ufsmount *); int ffs_own_mount(const struct mount *mp); int ffs_reallocblks(struct vop_reallocblks_args *); diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index a1be71511434..d146d14980c3 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -148,12 +148,18 @@ loop: if (I_IS_UFS1(ip)) { *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; - /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ + /* + * XXX: FIX? The entropy here is desirable, + * but the harvesting may be expensive + */ random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME); } else { *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; - /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ + /* + * XXX: FIX? The entropy here is desirable, + * but the harvesting may be expensive + */ random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME); } if (waitfor) diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 4453c59517df..09b1d4982c49 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -1333,12 +1333,12 @@ expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) */ dip = (struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, cancelip->i_number); - if (clearmode || cancelip->i_effnlink == 0) - dip->di_mode = 0; dip->di_size = 0; dip->di_blocks = 0; dip->di_flags &= ~SF_SNAPSHOT; bzero(&dip->di_db[0], (UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)); + if (clearmode || cancelip->i_effnlink == 0) + dip->di_mode = 0; bdwrite(bp); /* * Now go through and expunge all the blocks in the file diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index dcc72b2fdfd3..3fe9e205f858 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -6698,12 +6698,13 @@ softdep_journal_freeblocks(ip, cred, length, flags) if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; softdep_update_inodeblock(ip, bp, 0); - if (ump->um_fstype == UFS1) + if (ump->um_fstype == UFS1) { *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; - else + } else { *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; + } ACQUIRE_LOCK(ump); (void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); if ((inodedep->id_state & IOSTARTED) != 0) @@ -9640,6 +9641,7 @@ static void clear_unlinked_inodedep(inodedep) struct inodedep *inodedep; { + struct ufs2_dinode *dip; struct ufsmount *ump; struct inodedep *idp; struct inodedep *idn; @@ -9743,12 +9745,14 @@ clear_unlinked_inodedep(inodedep) ffs_oldfscompat_write((struct fs *)bp->b_data, ump); softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp); - } else if (fs->fs_magic == FS_UFS1_MAGIC) + } else if (fs->fs_magic == FS_UFS1_MAGIC) { ((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, pino))->di_freelink = nino; - else - ((struct ufs2_dinode *)bp->b_data + - ino_to_fsbo(fs, pino))->di_freelink = nino; + } else { + dip = (struct ufs2_dinode *)bp->b_data + + ino_to_fsbo(fs, pino); + dip->di_freelink = nino; + } /* * If the bwrite fails we have no recourse to recover. The * filesystem is corrupted already. diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c index de70a3f84283..541d44c887db 100644 --- a/sys/ufs/ffs/ffs_subr.c +++ b/sys/ufs/ffs/ffs_subr.c @@ -108,31 +108,35 @@ ffs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp) * Load up the contents of an inode and copy the appropriate pieces * to the incore copy. */ -void +int ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino) { + struct ufs1_dinode *dip1; + struct ufs2_dinode *dip2; if (I_IS_UFS1(ip)) { - *ip->i_din1 = + dip1 = ip->i_din1; + *dip1 = *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); - ip->i_mode = ip->i_din1->di_mode; - ip->i_nlink = ip->i_din1->di_nlink; - ip->i_size = ip->i_din1->di_size; - ip->i_flags = ip->i_din1->di_flags; - ip->i_gen = ip->i_din1->di_gen; - ip->i_uid = ip->i_din1->di_uid; - ip->i_gid = ip->i_din1->di_gid; - } else { - *ip->i_din2 = - *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); - ip->i_mode = ip->i_din2->di_mode; - ip->i_nlink = ip->i_din2->di_nlink; - ip->i_size = ip->i_din2->di_size; - ip->i_flags = ip->i_din2->di_flags; - ip->i_gen = ip->i_din2->di_gen; - ip->i_uid = ip->i_din2->di_uid; - ip->i_gid = ip->i_din2->di_gid; + ip->i_mode = dip1->di_mode; + ip->i_nlink = dip1->di_nlink; + ip->i_size = dip1->di_size; + ip->i_flags = dip1->di_flags; + ip->i_gen = dip1->di_gen; + ip->i_uid = dip1->di_uid; + ip->i_gid = dip1->di_gid; + return (0); } + dip2 = ip->i_din2; + *dip2 = *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + ip->i_mode = dip2->di_mode; + ip->i_nlink = dip2->di_nlink; + ip->i_size = dip2->di_size; + ip->i_flags = dip2->di_flags; + ip->i_gen = dip2->di_gen; + ip->i_uid = dip2->di_uid; + ip->i_gid = dip2->di_gid; + return (0); } #endif /* KERNEL */ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index b69dd20da113..7ecaae231f07 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -740,16 +740,19 @@ loop: bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { - VOP_UNLOCK(vp, 0); - vrele(vp); + vput(vp); + MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); + return (error); + } + if ((error = ffs_load_inode(bp, ip, fs, ip->i_number)) != 0) { + brelse(bp); + vput(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } - ffs_load_inode(bp, ip, fs, ip->i_number); ip->i_effnlink = ip->i_nlink; brelse(bp); - VOP_UNLOCK(vp, 0); - vrele(vp); + vput(vp); } return (0); } @@ -1729,7 +1732,12 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags) ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); else ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); - ffs_load_inode(bp, ip, fs, ino); + if ((error = ffs_load_inode(bp, ip, fs, ino)) != 0) { + bqrelse(bp); + vput(vp); + *vpp = NULL; + return (error); + } if (DOINGSOFTDEP(vp)) softdep_load_inodeblock(ip); else diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 9db39ea2ac80..7d14586a31cd 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -459,6 +459,36 @@ bucket_zone_drain(void) zone_drain(ubz->ubz_zone); } +static uma_bucket_t +zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws) +{ + uma_bucket_t bucket; + + ZONE_LOCK_ASSERT(zone); + + if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { + MPASS(zdom->uzd_nitems >= bucket->ub_cnt); + LIST_REMOVE(bucket, ub_link); + zdom->uzd_nitems -= bucket->ub_cnt; + if (ws && zdom->uzd_imin > zdom->uzd_nitems) + zdom->uzd_imin = zdom->uzd_nitems; + } + return (bucket); +} + +static void +zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket, + const bool ws) +{ + + ZONE_LOCK_ASSERT(zone); + + LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); + zdom->uzd_nitems += bucket->ub_cnt; + if (ws && zdom->uzd_imax < zdom->uzd_nitems) + zdom->uzd_imax = zdom->uzd_nitems; +} + static void zone_log_warning(uma_zone_t zone) { @@ -509,6 +539,23 @@ uma_timeout(void *unused) } /* + * Update the working set size estimate for the zone's bucket cache. + * The constants chosen here are somewhat arbitrary. With an update period of + * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the + * last 100s. + */ +static void +zone_domain_update_wss(uma_zone_domain_t zdom) +{ + long wss; + + MPASS(zdom->uzd_imax >= zdom->uzd_imin); + wss = zdom->uzd_imax - zdom->uzd_imin; + zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems; + zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5; +} + +/* * Routine to perform timeout driven calculations. This expands the * hashes and does per cpu statistics aggregation. * @@ -560,8 +607,14 @@ keg_timeout(uma_keg_t keg) static void zone_timeout(uma_zone_t zone) { + int i; zone_foreach_keg(zone, &keg_timeout); + + ZONE_LOCK(zone); + for (i = 0; i < vm_ndomains; i++) + zone_domain_update_wss(&zone->uz_domain[i]); + ZONE_UNLOCK(zone); } /* @@ -772,16 +825,16 @@ cache_drain_safe_cpu(uma_zone_t zone) cache = &zone->uz_cpu[curcpu]; if (cache->uc_allocbucket) { if (cache->uc_allocbucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, - cache->uc_allocbucket, ub_link); + zone_put_bucket(zone, &zone->uz_domain[domain], + cache->uc_allocbucket, false); else b1 = cache->uc_allocbucket; cache->uc_allocbucket = NULL; } if (cache->uc_freebucket) { if (cache->uc_freebucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, - cache->uc_freebucket, ub_link); + zone_put_bucket(zone, &zone->uz_domain[domain], + cache->uc_freebucket, false); else b2 = cache->uc_freebucket; cache->uc_freebucket = NULL; @@ -844,8 +897,8 @@ bucket_cache_drain(uma_zone_t zone) */ for (i = 0; i < vm_ndomains; i++) { zdom = &zone->uz_domain[i]; - while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { - LIST_REMOVE(bucket, ub_link); + while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) != + NULL) { ZONE_UNLOCK(zone); bucket_drain(zone, bucket); bucket_free(zone, bucket, NULL); @@ -2523,11 +2576,9 @@ zalloc_start: zdom = &zone->uz_domain[0]; else zdom = &zone->uz_domain[domain]; - if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { + if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) { KASSERT(bucket->ub_cnt != 0, ("uma_zalloc_arg: Returning an empty bucket.")); - - LIST_REMOVE(bucket, ub_link); cache->uc_allocbucket = bucket; ZONE_UNLOCK(zone); goto zalloc_start; @@ -2556,6 +2607,7 @@ zalloc_start: critical_enter(); cpu = curcpu; cache = &zone->uz_cpu[cpu]; + /* * See if we lost the race or were migrated. Cache the * initialized bucket to make this less likely or claim @@ -2565,6 +2617,7 @@ zalloc_start: ((zone->uz_flags & UMA_ZONE_NUMA) == 0 || domain == PCPU_GET(domain))) { cache->uc_allocbucket = bucket; + zdom->uzd_imax += bucket->ub_cnt; } else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) { critical_exit(); ZONE_UNLOCK(zone); @@ -2572,7 +2625,7 @@ zalloc_start: bucket_free(zone, bucket, udata); goto zalloc_restart; } else - LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); + zone_put_bucket(zone, zdom, bucket, false); ZONE_UNLOCK(zone); goto zalloc_start; } @@ -3200,7 +3253,7 @@ zfree_start: bucket_free(zone, bucket, udata); goto zfree_restart; } else - LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); + zone_put_bucket(zone, zdom, bucket, true); } /* @@ -3649,6 +3702,7 @@ uma_reclaim_locked(bool kmem_danger) cache_drain_safe(NULL); zone_foreach(zone_drain); } + /* * Some slabs may have been freed but this zone will be visited early * we visit again so that we can free pages that are empty once other @@ -3882,7 +3936,7 @@ uma_print_zone(uma_zone_t zone) * directly so that we don't have to. */ static void -uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp, +uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp, uint64_t *freesp, uint64_t *sleepsp) { uma_cache_t cache; @@ -3937,7 +3991,6 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) struct uma_stream_header ush; struct uma_type_header uth; struct uma_percpu_stat *ups; - uma_bucket_t bucket; uma_zone_domain_t zdom; struct sbuf sbuf; uma_cache_t cache; @@ -3997,9 +4050,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) for (i = 0; i < vm_ndomains; i++) { zdom = &z->uz_domain[i]; - LIST_FOREACH(bucket, &zdom->uzd_buckets, - ub_link) - uth.uth_zone_free += bucket->ub_cnt; + uth.uth_zone_free += zdom->uzd_nitems; } uth.uth_allocs = z->uz_allocs; uth.uth_frees = z->uz_frees; @@ -4199,12 +4250,11 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) #ifdef DDB DB_SHOW_COMMAND(uma, db_show_uma) { - uma_bucket_t bucket; uma_keg_t kz; uma_zone_t z; - uma_zone_domain_t zdom; uint64_t allocs, frees, sleeps; - int cachefree, i; + long cachefree; + int i; db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used", "Free", "Requests", "Sleeps", "Bucket"); @@ -4221,13 +4271,10 @@ DB_SHOW_COMMAND(uma, db_show_uma) if (!((z->uz_flags & UMA_ZONE_SECONDARY) && (LIST_FIRST(&kz->uk_zones) != z))) cachefree += kz->uk_free; - for (i = 0; i < vm_ndomains; i++) { - zdom = &z->uz_domain[i]; - LIST_FOREACH(bucket, &zdom->uzd_buckets, - ub_link) - cachefree += bucket->ub_cnt; - } - db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n", + for (i = 0; i < vm_ndomains; i++) + cachefree += z->uz_domain[i].uzd_nitems; + + db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u\n", z->uz_name, (uintmax_t)kz->uk_size, (intmax_t)(allocs - frees), cachefree, (uintmax_t)allocs, sleeps, z->uz_count); @@ -4239,22 +4286,18 @@ DB_SHOW_COMMAND(uma, db_show_uma) DB_SHOW_COMMAND(umacache, db_show_umacache) { - uma_bucket_t bucket; uma_zone_t z; - uma_zone_domain_t zdom; uint64_t allocs, frees; - int cachefree, i; + long cachefree; + int i; db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free", "Requests", "Bucket"); LIST_FOREACH(z, &uma_cachezones, uz_link) { uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL); - for (i = 0; i < vm_ndomains; i++) { - zdom = &z->uz_domain[i]; - LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link) - cachefree += bucket->ub_cnt; - } - db_printf("%18s %8ju %8jd %8d %12ju %8u\n", + for (i = 0; i < vm_ndomains; i++) + cachefree += z->uz_domain[i].uzd_nitems; + db_printf("%18s %8ju %8jd %8ld %12ju %8u\n", z->uz_name, (uintmax_t)z->uz_size, (intmax_t)(allocs - frees), cachefree, (uintmax_t)allocs, z->uz_count); diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h index 5b28df3701bf..a97f51734ef6 100644 --- a/sys/vm/uma_int.h +++ b/sys/vm/uma_int.h @@ -304,6 +304,10 @@ typedef struct uma_klink *uma_klink_t; struct uma_zone_domain { LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */ + long uzd_nitems; /* total item count */ + long uzd_imax; /* maximum item count this period */ + long uzd_imin; /* minimum item count this period */ + long uzd_wss; /* working set size estimate */ }; typedef struct uma_zone_domain * uma_zone_domain_t; @@ -423,11 +427,12 @@ void uma_large_free(uma_slab_t slab); mtx_init(&(z)->uz_lock, (z)->uz_name, \ "UMA zone", MTX_DEF | MTX_DUPOK); \ } while (0) - + #define ZONE_LOCK(z) mtx_lock((z)->uz_lockptr) #define ZONE_TRYLOCK(z) mtx_trylock((z)->uz_lockptr) #define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lockptr) #define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock) +#define ZONE_LOCK_ASSERT(z) mtx_assert((z)->uz_lockptr, MA_OWNED) /* * Find a slab within a hash table. This is used for OFFPAGE zones to lookup diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h index 286c01261f0f..8f53b41fd197 100644 --- a/sys/x86/include/x86_var.h +++ b/sys/x86/include/x86_var.h @@ -129,6 +129,7 @@ void dump_drop_page(vm_paddr_t); void finishidentcpu(void); void identify_cpu1(void); void identify_cpu2(void); +void identify_cpu_fixup_bsp(void); void identify_hypervisor(void); void initializecpu(void); void initializecpucache(void); diff --git a/sys/x86/x86/identcpu.c b/sys/x86/x86/identcpu.c index 8e406f053baa..56186b2f3ba4 100644 --- a/sys/x86/x86/identcpu.c +++ b/sys/x86/x86/identcpu.c @@ -1467,6 +1467,19 @@ identify_cpu2(void) } } +void +identify_cpu_fixup_bsp(void) +{ + u_int regs[4]; + + cpu_vendor_id = find_cpu_vendor_id(); + + if (fix_cpuid()) { + do_cpuid(0, regs); + cpu_high = regs[0]; + } +} + /* * Final stage of CPU identification. */ @@ -1478,12 +1491,7 @@ finishidentcpu(void) u_char ccr3; #endif - cpu_vendor_id = find_cpu_vendor_id(); - - if (fix_cpuid()) { - do_cpuid(0, regs); - cpu_high = regs[0]; - } + identify_cpu_fixup_bsp(); if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) { do_cpuid(5, regs); diff --git a/targets/pseudo/userland/lib/Makefile.depend b/targets/pseudo/userland/lib/Makefile.depend index 9f702cb53fc3..2139d18204ca 100644 --- a/targets/pseudo/userland/lib/Makefile.depend +++ b/targets/pseudo/userland/lib/Makefile.depend @@ -60,6 +60,7 @@ DIRDEPS = \ lib/libdevdctl \ lib/libdevinfo \ lib/libdevstat \ + lib/libdl \ lib/libdwarf \ lib/libedit/edit/readline \ lib/libelf \ @@ -214,10 +215,6 @@ DIRDEPS+= \ DIRDEPS+= stand/libsa32 .endif -.if defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mfilter} -DIRDEPS+= lib/libdl -.endif - .if ${MK_NAND} != "no" DIRDEPS+= lib/libnandfs .endif diff --git a/tools/diag/prtblknos/main.c b/tools/diag/prtblknos/main.c index 51d5b135a5ea..c3abb2babad1 100644 --- a/tools/diag/prtblknos/main.c +++ b/tools/diag/prtblknos/main.c @@ -47,7 +47,7 @@ main(argc, argv) char *argv[]; { struct uufsd disk; - union dinode *dp; + union dinodep dp; struct fs *fs; struct stat sb; struct statfs sfb; @@ -98,11 +98,11 @@ main(argc, argv) (void)printf("%s (inode #%jd): ", filename, (intmax_t)inonum); - if ((error = getino(&disk, (void **)&dp, inonum, NULL)) < 0) - warn("Read of inode %jd on %s failed", - (intmax_t)inonum, fsname); + if ((error = getinode(&disk, &dp, inonum)) < 0) + warn("Read of inode %jd on %s failed: %s", + (intmax_t)inonum, fsname, disk.d_error); - prtblknos(&disk, dp); + prtblknos(&disk, (union dinode *)dp.dp1); } exit(0); } diff --git a/usr.bin/head/Makefile b/usr.bin/head/Makefile index 5536d6c5a53d..b966f8165795 100644 --- a/usr.bin/head/Makefile +++ b/usr.bin/head/Makefile @@ -8,4 +8,10 @@ PROG= head HAS_TESTS= SUBDIR.${MK_TESTS}+= tests +.if ${MK_CASPER} != "no" && !defined(RESCUE) +LIBADD+= casper +LIBADD+= cap_fileargs +CFLAGS+=-DWITH_CASPER +.endif + .include <bsd.prog.mk> diff --git a/usr.bin/head/head.c b/usr.bin/head/head.c index 10423dd96be0..a27acea565a0 100644 --- a/usr.bin/head/head.c +++ b/usr.bin/head/head.c @@ -43,10 +43,13 @@ static char sccsid[] = "@(#)head.c 8.2 (Berkeley) 5/4/95"; #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <sys/capsicum.h> #include <sys/types.h> +#include <capsicum_helpers.h> #include <ctype.h> #include <err.h> +#include <errno.h> #include <getopt.h> #include <inttypes.h> #include <stdio.h> @@ -54,6 +57,9 @@ __FBSDID("$FreeBSD$"); #include <string.h> #include <unistd.h> +#include <libcasper.h> +#include <casper/cap_fileargs.h> + /* * head - give the first few lines of a stream or of each of a set of files * @@ -75,14 +81,19 @@ static const struct option long_opts[] = int main(int argc, char *argv[]) { - int ch; FILE *fp; - int first, linecnt = -1, eval = 0; - off_t bytecnt = -1; char *ep; + off_t bytecnt; + int ch, first, linecnt, eval; + fileargs_t *fa; + cap_rights_t rights; + + linecnt = -1; + eval = 0; + bytecnt = -1; obsolete(argv); - while ((ch = getopt_long(argc, argv, "+n:c:", long_opts, NULL)) != -1) + while ((ch = getopt_long(argc, argv, "+n:c:", long_opts, NULL)) != -1) { switch(ch) { case 'c': bytecnt = strtoimax(optarg, &ep, 10); @@ -97,17 +108,28 @@ main(int argc, char *argv[]) case '?': default: usage(); + /* NOTREACHED */ } + } argc -= optind; argv += optind; + fa = fileargs_init(argc, argv, O_RDONLY, 0, + cap_rights_init(&rights, CAP_READ, CAP_FSTAT, CAP_FCNTL)); + if (fa == NULL) + errx(1, "unable to init casper"); + + caph_cache_catpages(); + if (caph_limit_stdio() < 0 || caph_enter_casper() < 0) + err(1, "unable to enter capability mode"); + if (linecnt != -1 && bytecnt != -1) errx(1, "can't combine line and byte counts"); - if (linecnt == -1 ) + if (linecnt == -1) linecnt = 10; - if (*argv) { - for (first = 1; *argv; ++argv) { - if ((fp = fopen(*argv, "r")) == NULL) { + if (*argv != NULL) { + for (first = 1; *argv != NULL; ++argv) { + if ((fp = fileargs_fopen(fa, *argv, "r")) == NULL) { warn("%s", *argv); eval = 1; continue; @@ -128,6 +150,7 @@ main(int argc, char *argv[]) else head_bytes(stdin, bytecnt); + fileargs_free(fa); exit(eval); } @@ -137,7 +160,7 @@ head(FILE *fp, int cnt) char *cp; size_t error, readlen; - while (cnt && (cp = fgetln(fp, &readlen)) != NULL) { + while (cnt != 0 && (cp = fgetln(fp, &readlen)) != NULL) { error = fwrite(cp, sizeof(char), readlen, stdout); if (error != readlen) err(1, "stdout"); diff --git a/usr.bin/wc/Makefile b/usr.bin/wc/Makefile index 540e33d15c92..550b718e1478 100644 --- a/usr.bin/wc/Makefile +++ b/usr.bin/wc/Makefile @@ -1,7 +1,15 @@ # @(#)Makefile 8.1 (Berkeley) 6/6/93 # $FreeBSD$ +.include <src.opts.mk> + PROG= wc LIBADD= xo +.if ${MK_CASPER} != "no" +LIBADD+= casper +LIBADD+= cap_fileargs +CFLAGS+=-DWITH_CASPER +.endif + .include <bsd.prog.mk> diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c index c2990035c0c7..0517b67de69d 100644 --- a/usr.bin/wc/wc.c +++ b/usr.bin/wc/wc.c @@ -44,9 +44,11 @@ static char sccsid[] = "@(#)wc.c 8.1 (Berkeley) 6/6/93"; #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <sys/capsicum.h> #include <sys/param.h> #include <sys/stat.h> +#include <capsicum_helpers.h> #include <ctype.h> #include <err.h> #include <errno.h> @@ -61,6 +63,10 @@ __FBSDID("$FreeBSD$"); #include <wctype.h> #include <libxo/xo.h> +#include <libcasper.h> +#include <casper/cap_fileargs.h> + +static fileargs_t *fa; static uintmax_t tlinect, twordct, tcharct, tlongline; static int doline, doword, dochar, domulti, dolongline; static volatile sig_atomic_t siginfo; @@ -90,6 +96,7 @@ int main(int argc, char *argv[]) { int ch, errors, total; + cap_rights_t rights; (void) setlocale(LC_CTYPE, ""); @@ -125,6 +132,26 @@ main(int argc, char *argv[]) (void)signal(SIGINFO, siginfo_handler); + fa = fileargs_init(argc, argv, O_RDONLY, 0, + cap_rights_init(&rights, CAP_READ, CAP_FSTAT)); + if (fa == NULL) { + xo_warn("Unable to init casper"); + exit(1); + } + + caph_cache_catpages(); + if (caph_limit_stdio() < 0) { + xo_warn("Unable to limit stdio"); + fileargs_free(fa); + exit(1); + } + + if (caph_enter_casper() < 0) { + xo_warn("Unable to enter capability mode"); + fileargs_free(fa); + exit(1); + } + /* Wc's flags are on by default. */ if (doline + doword + dochar + domulti + dolongline == 0) doline = doword = dochar = 1; @@ -158,6 +185,7 @@ main(int argc, char *argv[]) xo_close_container("total"); } + fileargs_free(fa); xo_close_container("wc"); xo_finish(); exit(errors == 0 ? 0 : 1); @@ -206,7 +234,7 @@ cnt(const char *file) linect = wordct = charct = llct = tmpll = 0; if (file == NULL) fd = STDIN_FILENO; - else if ((fd = open(file, O_RDONLY, 0)) < 0) { + else if ((fd = fileargs_open(fa, file)) < 0) { xo_warn("%s: open", file); return (1); } diff --git a/usr.sbin/cpucontrol/amd.c b/usr.sbin/cpucontrol/amd.c index a875afd85325..9191f261564b 100644 --- a/usr.sbin/cpucontrol/amd.c +++ b/usr.sbin/cpucontrol/amd.c @@ -73,16 +73,16 @@ amd_probe(int fd) } void -amd_update(const char *dev, const char *path) +amd_update(const struct ucode_update_params *params) { - int fd, devfd; + int devfd; unsigned int i; - struct stat st; - uint32_t *fw_image; - amd_fw_header_t *fw_header; + const char *dev, *path; + const uint32_t *fw_image; + const amd_fw_header_t *fw_header; uint32_t sum; uint32_t signature; - uint32_t *fw_data; + const uint32_t *fw_data; size_t fw_size; cpuctl_cpuid_args_t idargs = { .level = 1, /* Request signature. */ @@ -90,16 +90,14 @@ amd_update(const char *dev, const char *path) cpuctl_update_args_t args; int error; + dev = params->dev_path; + path = params->fw_path; + devfd = params->devfd; + fw_image = params->fwimage; + assert(path); assert(dev); - fd = -1; - fw_image = MAP_FAILED; - devfd = open(dev, O_RDWR); - if (devfd < 0) { - WARN(0, "could not open %s for writing", dev); - return; - } error = ioctl(devfd, CPUCTL_CPUID, &idargs); if (error < 0) { WARN(0, "ioctl()"); @@ -115,37 +113,18 @@ amd_update(const char *dev, const char *path) /* * Open the firmware file. */ - fd = open(path, O_RDONLY, 0); - if (fd < 0) { - WARN(0, "open(%s)", path); - goto fail; - } - error = fstat(fd, &st); - if (error != 0) { - WARN(0, "fstat(%s)", path); - goto fail; - } - if (st.st_size < 0 || (unsigned)st.st_size < sizeof(*fw_header)) { + if (params->fwsize < sizeof(*fw_header)) { WARNX(2, "file too short: %s", path); goto fail; } - /* - * mmap the whole image. - */ - fw_image = (uint32_t *)mmap(NULL, st.st_size, PROT_READ, - MAP_PRIVATE, fd, 0); - if (fw_image == MAP_FAILED) { - WARN(0, "mmap(%s)", path); - goto fail; - } - fw_header = (amd_fw_header_t *)fw_image; + fw_header = (const amd_fw_header_t *)fw_image; if ((fw_header->magic >> 8) != AMD_MAGIC) { WARNX(2, "%s is not a valid amd firmware: version mismatch", path); goto fail; } - fw_data = (uint32_t *)(fw_header + 1); - fw_size = (st.st_size - sizeof(*fw_header)) / sizeof(uint32_t); + fw_data = (const uint32_t *)(fw_header + 1); + fw_size = (params->fwsize - sizeof(*fw_header)) / sizeof(uint32_t); /* * Check the primary checksum. @@ -160,8 +139,8 @@ amd_update(const char *dev, const char *path) if (signature == fw_header->signature) { fprintf(stderr, "%s: updating cpu %s... ", path, dev); - args.data = fw_image; - args.size = st.st_size; + args.data = __DECONST(void *, fw_image); + args.size = params->fwsize; error = ioctl(devfd, CPUCTL_UPDATE, &args); if (error < 0) { fprintf(stderr, "failed.\n"); @@ -172,12 +151,5 @@ amd_update(const char *dev, const char *path) } fail: - if (fd >= 0) - close(fd); - if (devfd >= 0) - close(devfd); - if (fw_image != MAP_FAILED) - if(munmap(fw_image, st.st_size) != 0) - warn("munmap(%s)", path); return; } diff --git a/usr.sbin/cpucontrol/amd10h.c b/usr.sbin/cpucontrol/amd10h.c index c8364f1d7890..ca2bba70dc47 100644 --- a/usr.sbin/cpucontrol/amd10h.c +++ b/usr.sbin/cpucontrol/amd10h.c @@ -88,9 +88,8 @@ amd10h_probe(int fd) * source code. */ void -amd10h_update(const char *dev, const char *path) +amd10h_update(const struct ucode_update_params *params) { - struct stat st; cpuctl_cpuid_args_t idargs; cpuctl_msr_args_t msrargs; cpuctl_update_args_t args; @@ -100,27 +99,27 @@ amd10h_update(const char *dev, const char *path) const section_header_t *section_header; const container_header_t *container_header; const uint8_t *fw_data; - uint8_t *fw_image; + const uint8_t *fw_image; + const char *dev, *path; size_t fw_size; size_t selected_size; uint32_t revision; uint32_t new_rev; uint32_t signature; uint16_t equiv_id; - int fd, devfd; + int devfd; unsigned int i; int error; + dev = params->dev_path; + path = params->fw_path; + devfd = params->devfd; + fw_image = params->fwimage; + fw_size = params->fwsize; + assert(path); assert(dev); - fd = -1; - fw_image = MAP_FAILED; - devfd = open(dev, O_RDWR); - if (devfd < 0) { - WARN(0, "could not open %s for writing", dev); - return; - } idargs.level = 1; error = ioctl(devfd, CPUCTL_CPUID, &idargs); if (error < 0) { @@ -149,33 +148,15 @@ amd10h_update(const char *dev, const char *path) * Open the firmware file. */ WARNX(1, "checking %s for update.", path); - fd = open(path, O_RDONLY, 0); - if (fd < 0) { - WARN(0, "open(%s)", path); - goto done; - } - error = fstat(fd, &st); - if (error != 0) { - WARN(0, "fstat(%s)", path); - goto done; - } - if (st.st_size < 0 || (size_t)st.st_size < + if (fw_size < (sizeof(*container_header) + sizeof(*section_header))) { WARNX(2, "file too short: %s", path); goto done; } - fw_size = st.st_size; /* * mmap the whole image. */ - fw_image = (uint8_t *)mmap(NULL, st.st_size, PROT_READ, - MAP_PRIVATE, fd, 0); - if (fw_image == MAP_FAILED) { - WARN(0, "mmap(%s)", path); - goto done; - } - fw_data = fw_image; container_header = (const container_header_t *)fw_data; if (container_header->magic != AMD_10H_MAGIC) { @@ -306,12 +287,5 @@ amd10h_update(const char *dev, const char *path) WARNX(0, "revision after update %#x", new_rev); done: - if (fd >= 0) - close(fd); - if (devfd >= 0) - close(devfd); - if (fw_image != MAP_FAILED) - if (munmap(fw_image, st.st_size) != 0) - warn("munmap(%s)", path); return; } diff --git a/usr.sbin/cpucontrol/cpucontrol.c b/usr.sbin/cpucontrol/cpucontrol.c index b20247dfba87..5904a912bc32 100644 --- a/usr.sbin/cpucontrol/cpucontrol.c +++ b/usr.sbin/cpucontrol/cpucontrol.c @@ -34,18 +34,20 @@ __FBSDID("$FreeBSD$"); #include <assert.h> +#include <err.h> +#include <errno.h> +#include <dirent.h> +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <fcntl.h> -#include <err.h> #include <sysexits.h> -#include <dirent.h> #include <sys/queue.h> #include <sys/param.h> #include <sys/types.h> +#include <sys/mman.h> #include <sys/stat.h> #include <sys/ioctl.h> #include <sys/cpuctl.h> @@ -74,16 +76,6 @@ int verbosity_level = 0; #define HIGH(val) (uint32_t)(((val) >> 32) & 0xffffffff) #define LOW(val) (uint32_t)((val) & 0xffffffff) -/* - * Macros for freeing SLISTs, probably must be in /sys/queue.h - */ -#define SLIST_FREE(head, field, freef) do { \ - typeof(SLIST_FIRST(head)) __elm0; \ - typeof(SLIST_FIRST(head)) __elm; \ - SLIST_FOREACH_SAFE(__elm, (head), field, __elm0) \ - (void)(freef)(__elm); \ -} while(0); - struct datadir { const char *path; SLIST_ENTRY(datadir) next; @@ -102,7 +94,6 @@ static struct ucode_handler { #define NHANDLERS (sizeof(handlers) / sizeof(*handlers)) static void usage(void); -static int isdir(const char *path); static int do_cpuid(const char *cmdarg, const char *dev); static int do_cpuid_count(const char *cmdarg, const char *dev); static int do_msr(const char *cmdarg, const char *dev); @@ -123,20 +114,6 @@ usage(void) } static int -isdir(const char *path) -{ - int error; - struct stat st; - - error = stat(path, &st); - if (error < 0) { - WARN(0, "stat(%s)", path); - return (error); - } - return (st.st_mode & S_IFDIR); -} - -static int do_cpuid(const char *cmdarg, const char *dev) { unsigned int level; @@ -361,16 +338,77 @@ do_eval_cpu_features(const char *dev) } static int +try_a_fw_image(const char *dev_path, int devfd, int fwdfd, const char *dpath, + const char *fname, struct ucode_handler *handler) +{ + struct ucode_update_params parm; + struct stat st; + char *fw_path; + void *fw_map; + int fwfd, rc; + + rc = 0; + fw_path = NULL; + fw_map = MAP_FAILED; + fwfd = openat(fwdfd, fname, O_RDONLY); + if (fwfd < 0) { + WARN(0, "openat(%s, %s)", dpath, fname); + goto out; + } + + rc = asprintf(&fw_path, "%s/%s", dpath, fname); + if (rc == -1) { + WARNX(0, "out of memory"); + rc = ENOMEM; + goto out; + } + + rc = fstat(fwfd, &st); + if (rc != 0) { + WARN(0, "fstat(%s)", fw_path); + rc = 0; + goto out; + } + if (st.st_size <= 0) { + WARN(0, "%s: empty", fw_path); + goto out; + } + + fw_map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fwfd, 0); + if (fw_map == MAP_FAILED) { + WARN(0, "mmap(%s)", fw_path); + goto out; + } + + + memset(&parm, 0, sizeof(parm)); + parm.devfd = devfd; + parm.fwimage = fw_map; + parm.fwsize = st.st_size; + parm.dev_path = dev_path; + parm.fw_path = fw_path; + + handler->update(&parm); + +out: + if (fw_map != MAP_FAILED) + munmap(fw_map, st.st_size); + free(fw_path); + if (fwfd >= 0) + close(fwfd); + return (rc); +} + +static int do_update(const char *dev) { - int fd; + int fd, fwdfd; unsigned int i; int error; struct ucode_handler *handler; struct datadir *dir; DIR *dirp; struct dirent *direntry; - char buf[MAXPATHLEN]; fd = open(dev, O_RDONLY); if (fd < 0) { @@ -379,7 +417,7 @@ do_update(const char *dev) } /* - * Find the appropriate handler for device. + * Find the appropriate handler for CPU. */ for (i = 0; i < NHANDLERS; i++) if (handlers[i].probe(fd) == 0) @@ -387,39 +425,54 @@ do_update(const char *dev) if (i < NHANDLERS) handler = &handlers[i]; else { - WARNX(0, "cannot find the appropriate handler for device"); + WARNX(0, "cannot find the appropriate handler for %s", dev); close(fd); return (1); } close(fd); + fd = open(dev, O_RDWR); + if (fd < 0) { + WARN(0, "error opening %s for writing", dev); + return (1); + } + /* * Process every image in specified data directories. */ SLIST_FOREACH(dir, &datadirs, next) { - dirp = opendir(dir->path); - if (dirp == NULL) { - WARNX(1, "skipping directory %s: not accessible", dir->path); + fwdfd = open(dir->path, O_RDONLY); + if (fwdfd < 0) { + WARN(1, "skipping directory %s: not accessible", dir->path); continue; } + dirp = fdopendir(fwdfd); + if (dirp == NULL) { + WARNX(0, "out of memory"); + close(fwdfd); + close(fd); + return (1); + } + while ((direntry = readdir(dirp)) != NULL) { if (direntry->d_namlen == 0) continue; - error = snprintf(buf, sizeof(buf), "%s/%s", dir->path, - direntry->d_name); - if ((unsigned)error >= sizeof(buf)) - WARNX(0, "skipping %s, buffer too short", - direntry->d_name); - if (isdir(buf) != 0) { - WARNX(2, "skipping %s: is a directory", buf); + if (direntry->d_type == DT_DIR) continue; + + error = try_a_fw_image(dev, fd, fwdfd, dir->path, + direntry->d_name, handler); + if (error != 0) { + closedir(dirp); + close(fd); + return (1); } - handler->update(dev, buf); } error = closedir(dirp); if (error != 0) WARN(0, "closedir(%s)", dir->path); } + close(fd); return (0); } @@ -441,6 +494,7 @@ datadir_add(const char *path) int main(int argc, char *argv[]) { + struct datadir *elm; int c, flags; const char *cmdarg; const char *dev; @@ -511,6 +565,9 @@ main(int argc, char *argv[]) default: usage(); /* Only one command can be selected. */ } - SLIST_FREE(&datadirs, next, free); + while ((elm = SLIST_FIRST(&datadirs)) != NULL) { + SLIST_REMOVE_HEAD(&datadirs, next); + free(elm); + } return (error == 0 ? 0 : 1); } diff --git a/usr.sbin/cpucontrol/cpucontrol.h b/usr.sbin/cpucontrol/cpucontrol.h index 555e931734d4..8620d5ade19d 100644 --- a/usr.sbin/cpucontrol/cpucontrol.h +++ b/usr.sbin/cpucontrol/cpucontrol.h @@ -30,8 +30,17 @@ #ifndef CPUCONTROL_H #define CPUCONTROL_H +#include <stddef.h> + typedef int ucode_probe_t(int fd); -typedef void ucode_update_t(const char *dev, const char *image); +struct ucode_update_params { + int devfd; /* RDWR handle to cpucontrol device */ + const void *fwimage; /* READ mapping of firmware image */ + size_t fwsize; /* Non-zero size of firmware image */ + const char *dev_path; /* cpucontrol device path, for logging */ + const char *fw_path; /* firmware image path, for logging */ +}; +typedef void ucode_update_t(const struct ucode_update_params *params); extern int verbosity_level; diff --git a/usr.sbin/cpucontrol/intel.c b/usr.sbin/cpucontrol/intel.c index 8ad1908bd386..dfe86f7185c6 100644 --- a/usr.sbin/cpucontrol/intel.c +++ b/usr.sbin/cpucontrol/intel.c @@ -76,23 +76,23 @@ intel_probe(int fd) } void -intel_update(const char *dev, const char *path) +intel_update(const struct ucode_update_params *params) { - int fd, devfd; - struct stat st; - uint32_t *fw_image; + int devfd; + const char *dev, *path; + const uint32_t *fw_image; int have_ext_table; uint32_t sum; unsigned int i; size_t payload_size; - intel_fw_header_t *fw_header; - intel_cpu_signature_t *ext_table; - intel_ext_header_t *ext_header; + const intel_fw_header_t *fw_header; + const intel_cpu_signature_t *ext_table; + const intel_ext_header_t *ext_header; uint32_t sig, signature, flags; int32_t revision; ssize_t ext_size; size_t ext_table_size; - void *fw_data; + const void *fw_data; size_t data_size, total_size; cpuctl_msr_args_t msrargs = { .msr = MSR_BIOS_SIGN, @@ -104,18 +104,17 @@ intel_update(const char *dev, const char *path) cpuctl_update_args_t args; int error; + dev = params->dev_path; + path = params->fw_path; + devfd = params->devfd; + fw_image = params->fwimage; + assert(path); assert(dev); - fd = -1; - fw_image = MAP_FAILED; ext_table = NULL; ext_header = NULL; - devfd = open(dev, O_RDWR); - if (devfd < 0) { - WARN(0, "could not open %s for writing", dev); - return; - } + error = ioctl(devfd, CPUCTL_WRMSR, &msrargs); if (error < 0) { WARN(0, "ioctl(%s)", dev); @@ -151,31 +150,12 @@ intel_update(const char *dev, const char *path) /* * Open firmware image. */ - fd = open(path, O_RDONLY, 0); - if (fd < 0) { - WARN(0, "open(%s)", path); - goto fail; - } - error = fstat(fd, &st); - if (error != 0) { - WARN(0, "fstat(%s)", path); - goto fail; - } - if (st.st_size < 0 || (unsigned)st.st_size < sizeof(*fw_header)) { + if (params->fwsize < sizeof(*fw_header)) { WARNX(2, "file too short: %s", path); goto fail; } - /* - * mmap the whole image. - */ - fw_image = (uint32_t *)mmap(NULL, st.st_size, PROT_READ, - MAP_PRIVATE, fd, 0); - if (fw_image == MAP_FAILED) { - WARN(0, "mmap(%s)", path); - goto fail; - } - fw_header = (intel_fw_header_t *)fw_image; + fw_header = (const intel_fw_header_t *)fw_image; if (fw_header->header_version != INTEL_HEADER_VERSION || fw_header->loader_revision != INTEL_LOADER_REVISION) { WARNX(2, "%s is not a valid intel firmware: version mismatch", @@ -193,7 +173,7 @@ intel_update(const char *dev, const char *path) total_size = data_size + sizeof(*fw_header); else total_size = fw_header->total_size; - if (total_size > (unsigned)st.st_size || st.st_size < 0) { + if (total_size > params->fwsize) { WARNX(2, "file too short: %s", path); goto fail; } @@ -204,7 +184,7 @@ intel_update(const char *dev, const char *path) */ sum = 0; for (i = 0; i < (payload_size / sizeof(uint32_t)); i++) - sum += *((uint32_t *)fw_image + i); + sum += *((const uint32_t *)fw_image + i); if (sum != 0) { WARNX(2, "%s: update data checksum invalid", path); goto fail; @@ -217,9 +197,9 @@ intel_update(const char *dev, const char *path) have_ext_table = 0; if (ext_size > (signed)sizeof(*ext_header)) { - ext_header = - (intel_ext_header_t *)((char *)fw_image + payload_size); - ext_table = (intel_cpu_signature_t *)(ext_header + 1); + ext_header = (const intel_ext_header_t *) + ((const char *)fw_image + payload_size); + ext_table = (const intel_cpu_signature_t *)(ext_header + 1); /* * Check the extended table size. @@ -236,7 +216,7 @@ intel_update(const char *dev, const char *path) */ sum = 0; for (i = 0; i < (ext_table_size / sizeof(uint32_t)); i++) - sum += *((uint32_t *)ext_header + i); + sum += *((const uint32_t *)ext_header + i); if (sum != 0) { WARNX(2, "%s: extended signature table checksum invalid", @@ -273,7 +253,7 @@ matched: } fprintf(stderr, "%s: updating cpu %s from rev %#x to rev %#x... ", path, dev, revision, fw_header->revision); - args.data = fw_data; + args.data = __DECONST(void *, fw_data); args.size = data_size; error = ioctl(devfd, CPUCTL_UPDATE, &args); if (error < 0) { @@ -286,11 +266,5 @@ matched: fprintf(stderr, "done.\n"); fail: - if (fw_image != MAP_FAILED) - if (munmap(fw_image, st.st_size) != 0) - warn("munmap(%s)", path); - if (devfd >= 0) - close(devfd); - if (fd >= 0) - close(fd); + return; } diff --git a/usr.sbin/cpucontrol/via.c b/usr.sbin/cpucontrol/via.c index 354817edf851..b03790a0b4df 100644 --- a/usr.sbin/cpucontrol/via.c +++ b/usr.sbin/cpucontrol/via.c @@ -76,18 +76,18 @@ via_probe(int fd) } void -via_update(const char *dev, const char *path) +via_update(const struct ucode_update_params *params) { - int fd, devfd; - struct stat st; - uint32_t *fw_image; + int devfd; + const char *dev, *path; + const uint32_t *fw_image; uint32_t sum; unsigned int i; size_t payload_size; - via_fw_header_t *fw_header; + const via_fw_header_t *fw_header; uint32_t signature; int32_t revision; - void *fw_data; + const void *fw_data; size_t data_size, total_size; cpuctl_msr_args_t msrargs = { .msr = MSR_IA32_PLATFORM_ID, @@ -98,17 +98,14 @@ via_update(const char *dev, const char *path) cpuctl_update_args_t args; int error; + dev = params->dev_path; + path = params->fw_path; + devfd = params->devfd; + fw_image = params->fwimage; + assert(path); assert(dev); - fd = -1; - devfd = -1; - fw_image = MAP_FAILED; - devfd = open(dev, O_RDWR); - if (devfd < 0) { - WARN(0, "could not open %s for writing", dev); - return; - } error = ioctl(devfd, CPUCTL_CPUID, &idargs); if (error < 0) { WARN(0, "ioctl(%s)", dev); @@ -134,34 +131,13 @@ via_update(const char *dev, const char *path) WARNX(2, "found cpu type %#x family %#x model %#x stepping %#x.", (signature >> 12) & 0x03, (signature >> 8) & 0x0f, (signature >> 4) & 0x0f, (signature >> 0) & 0x0f); - /* - * Open firmware image. - */ - fd = open(path, O_RDONLY, 0); - if (fd < 0) { - WARN(0, "open(%s)", path); - goto fail; - } - error = fstat(fd, &st); - if (error != 0) { - WARN(0, "fstat(%s)", path); - goto fail; - } - if (st.st_size < 0 || (unsigned)st.st_size < sizeof(*fw_header)) { + + if (params->fwsize < sizeof(*fw_header)) { WARNX(2, "file too short: %s", path); goto fail; } - /* - * mmap the whole image. - */ - fw_image = (uint32_t *)mmap(NULL, st.st_size, PROT_READ, - MAP_PRIVATE, fd, 0); - if (fw_image == MAP_FAILED) { - WARN(0, "mmap(%s)", path); - goto fail; - } - fw_header = (via_fw_header_t *)fw_image; + fw_header = (const via_fw_header_t *)fw_image; if (fw_header->signature != VIA_HEADER_SIGNATURE || fw_header->loader_revision != VIA_LOADER_REVISION) { WARNX(2, "%s is not a valid via firmware: version mismatch", @@ -170,7 +146,7 @@ via_update(const char *dev, const char *path) } data_size = fw_header->data_size; total_size = fw_header->total_size; - if (total_size > (unsigned)st.st_size || st.st_size < 0) { + if (total_size > params->fwsize) { WARNX(2, "file too short: %s", path); goto fail; } @@ -181,7 +157,7 @@ via_update(const char *dev, const char *path) */ sum = 0; for (i = 0; i < (payload_size / sizeof(uint32_t)); i++) - sum += *((uint32_t *)fw_image + i); + sum += *((const uint32_t *)fw_image + i); if (sum != 0) { WARNX(2, "%s: update data checksum invalid", path); goto fail; @@ -202,25 +178,18 @@ via_update(const char *dev, const char *path) } fprintf(stderr, "%s: updating cpu %s from rev %#x to rev %#x... ", path, dev, revision, fw_header->revision); - args.data = fw_data; + args.data = __DECONST(void *, fw_data); args.size = data_size; error = ioctl(devfd, CPUCTL_UPDATE, &args); if (error < 0) { error = errno; - fprintf(stderr, "failed.\n"); + fprintf(stderr, "failed.\n"); errno = error; - WARN(0, "ioctl()"); - goto fail; + WARN(0, "ioctl()"); + goto fail; } fprintf(stderr, "done.\n"); fail: - if (fw_image != MAP_FAILED) - if (munmap(fw_image, st.st_size) != 0) - warn("munmap(%s)", path); - if (devfd >= 0) - close(devfd); - if (fd >= 0) - close(fd); return; } |
