diff options
234 files changed, 8273 insertions, 4169 deletions
diff --git a/.cirrus.yml b/.cirrus.yml index d6c4df7a9776..b03fac2b26b5 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -194,11 +194,13 @@ precommit_task: matrix: - name: amd64 smoke test using internal ci systems only_if: $CIRRUS_REPO_FULL_NAME != 'freebsd/freebsd-src' || $CIRRUS_BRANCH =~ 'pull/.*' + trigger_type: manual env: TARGET: amd64 TARGET_ARCH: amd64 - name: aarch64 smoke test using internal ci systems only_if: $CIRRUS_REPO_FULL_NAME != 'freebsd/freebsd-src' || $CIRRUS_BRANCH =~ 'pull/.*' + trigger_type: manual env: TARGET: arm64 TARGET_ARCH: aarch64 diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc index dcda9a035b44..61f948a2c970 100644 --- a/ObsoleteFiles.inc +++ b/ObsoleteFiles.inc @@ -2481,7 +2481,7 @@ OLD_FILES+=usr/share/man/man4/ng_uni.4.gz OLD_FILES+=usr/share/man/man4/ngatmbase.4.gz # 20230308: machine-id merged into hostid_save -OLD_FILES+=etc/rc.d/machine-id +OLD_FILES+=etc/rc.d/machine_id # 20230306: remove tzsetwall(3) OLD_FILES+=usr/share/man/man3/tzsetwall.3.gz @@ -10,6 +10,9 @@ newline. Entries should be separated by a newline. Changes to this file should not be MFCed. +f1f230439fa4: + FreeBSD now implements the inotify(2) family of system calls. + 50e733f19b37, 171f66b0c2ca: These commits helped improve utilization of NFSv4.1/4.2 delegations. The changes are only used when the NFSv4 diff --git a/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/oclo.c b/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/oclo.c new file mode 100644 index 000000000000..8e6f7c726f24 --- /dev/null +++ b/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/oclo.c @@ -0,0 +1,1341 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2025 Oxide Computer Company + */ + +/* + * Verify the behavior of the various O_CLOFORK and O_CLOEXEC variants. In + * particular getting this via: + * + * - open(2): O_CLOFORK/O_CLOEXEC + * - fcntl(2): F_SETFD FD_CLOFORK/FD_CLOEXEC + * - fcntl(2): F_DUPFD_CLOFORK/F_DUPFD_CLOEXEC + * - fcntl(2): F_DUP2FD_CLOFORK/F_DUP2FD_CLOEXEC + * - dup2(3C) + * - dup3(3C): argument translation + * - pipe2(2) + * - socket(2): SOCK_CLOEXEC/SOCK_CLOFORK + * - accept(2): flags on the listen socket aren't inherited on accept + * - socketpair(3SOCKET) + * - accept4(2): SOCK_CLOEXEC/SOCK_CLOFORK + * - recvmsg(2): SCM_RIGHTS MSG_CMSG_CLOFORK/MSG_CMSG_CLOEXEC + * + * The test is designed such that we have an array of functions that are used to + * create file descriptors with different rules. This is found in the + * oclo_create array. Each file descriptor that is created is then registered + * with information about what is expected about it. A given creation function + * can create more than one file descriptor; however, our expectation is that + * every file descriptor is accounted for (ignoring stdin, stdout, and stderr). + * + * We pass a record of each file descriptor that was recorded to a verification + * program that will verify everything is correctly honored after an exec. Note + * that O_CLOFORK is cleared after exec. The original specification in POSIX has + * it being retained; however, this issue was raised after the spec was + * published as folks went to implement it and we have ended up following along + * with the divergence of other implementations. + */ + +#include <sys/param.h> +#include <sys/sysctl.h> +#include <sys/stat.h> +#include <sys/wait.h> + +#include <netinet/in.h> +#include <sys/socket.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +void *recallocarray(void *, size_t, size_t, size_t); + +#define strerrorname_np(e) (sys_errlist[e]) + +/* + * Get pathname to avoid reading /proc/curproc/exe + * + * Taken from procstat_getpathname_sysctl() + */ +static int +getpathname(pid_t pid, char *pathname, size_t maxlen) +{ + int error, name[4]; + size_t len; + + name[0] = CTL_KERN; + name[1] = KERN_PROC; + name[2] = KERN_PROC_PATHNAME; + name[3] = pid; + len = maxlen; + error = sysctl(name, nitems(name), pathname, &len, NULL, 0); + if (error != 0 && errno != ESRCH) + warn("sysctl: kern.proc.pathname: %d", pid); + if (len == 0) + pathname[0] = '\0'; + return (error); +} + +/* + * Verification program name. + */ +#define OCLO_VERIFY "ocloexec_verify" + +/* + * This structure represents a table of ways we expect to create file + * descriptors that should have the resulting flags set when done. The table is + * ordered and subsequent iterations are allowed to assume that the ones that + * have gone ahead of them have run and are therefore allowed to access them. + * The create function is expected to return the created fd. + */ +typedef struct clo_create clo_create_t; +struct clo_create { + const char *clo_desc; + int clo_flags; + void (*clo_func)(const clo_create_t *); +}; + +/* + * This is our run-time data. We expect all file descriptors to be registered by + * our calling functions through oclo_record(). + */ +typedef struct clo_rtdata { + const clo_create_t *crt_data; + size_t crt_idx; + int crt_fd; + int crt_flags; + const char *crt_desc; +} clo_rtdata_t; + +static clo_rtdata_t *oclo_rtdata; +static size_t oclo_rtdata_nents = 0; +static size_t oclo_rtdata_next = 0; +static int oclo_nextfd = STDERR_FILENO + 1; + +static bool +oclo_flags_match(const clo_rtdata_t *rt, bool child) +{ + const char *pass = child ? "post-fork" : "pre-fork"; + bool fail = child && (rt->crt_flags & FD_CLOFORK) != 0; + int flags = fcntl(rt->crt_fd, F_GETFD, NULL); + + if (flags < 0) { + int e = errno; + + if (fail) { + if (e == EBADF) { + (void) printf("TEST PASSED: %s (%s): fd %d: " + "correctly closed\n", + rt->crt_data->clo_desc, pass, rt->crt_fd); + return (true); + } + + warn("TEST FAILED: %s (%s): fd %d: expected fcntl to " + "fail with EBADF, but found %s", + rt->crt_data->clo_desc, pass, rt->crt_fd, + strerrorname_np(e)); + return (false); + } + + warnx("TEST FAILED: %s (%s): fd %d: fcntl(F_GETFD) " + "unexpectedly failed", rt->crt_data->clo_desc, pass, + rt->crt_fd); + return (false); + } + + if (fail) { + warnx("TEST FAILED: %s (%s): fd %d: received flags %d, but " + "expected to fail based on flags %d", + rt->crt_data->clo_desc, pass, rt->crt_fd, flags, + rt->crt_fd); + return (false); + } + + if (flags != rt->crt_flags) { + warnx("TEST FAILED: %s (%s): fd %d: discovered flags 0x%x do " + "not match expected flags 0x%x", rt->crt_data->clo_desc, + pass, rt->crt_fd, flags, rt->crt_fd); + return (false); + } + + (void) printf("TEST PASSED: %s (%s): fd %d discovered flags match " + "(0x%x)\n", rt->crt_data->clo_desc, pass, rt->crt_fd, flags); + return (true); +} + + +static void +oclo_record(const clo_create_t *c, int fd, int exp_flags, const char *desc) +{ + if (oclo_rtdata_next == oclo_rtdata_nents) { + size_t newrt = oclo_rtdata_nents + 8; + clo_rtdata_t *rt; + rt = recallocarray(oclo_rtdata, oclo_rtdata_nents, newrt, + sizeof (clo_rtdata_t)); + if (rt == NULL) { + err(EXIT_FAILURE, "TEST_FAILED: internal error " + "expanding fd records to %zu entries", newrt); + } + + oclo_rtdata_nents = newrt; + oclo_rtdata = rt; + } + + if (fd != oclo_nextfd) { + errx(EXIT_FAILURE, "TEST FAILED: internal test error: expected " + "to record next fd %d, given %d", oclo_nextfd, fd); + } + + oclo_rtdata[oclo_rtdata_next].crt_data = c; + oclo_rtdata[oclo_rtdata_next].crt_fd = fd; + oclo_rtdata[oclo_rtdata_next].crt_flags = exp_flags; + oclo_rtdata[oclo_rtdata_next].crt_desc = desc; + + /* + * Matching errors at this phase are fatal as it means we screwed up the + * program pretty badly. + */ + if (!oclo_flags_match(&oclo_rtdata[oclo_rtdata_next], false)) { + exit(EXIT_FAILURE); + } + + oclo_rtdata_next++; + oclo_nextfd++; +} + +static int +oclo_file(const clo_create_t *c) +{ + int flags = O_RDWR, fd; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + flags |= O_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + flags |= O_CLOFORK; + fd = open("/dev/null", flags); + if (fd < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to open /dev/null", + c->clo_desc); + } + + return (fd); +} + +static void +oclo_open(const clo_create_t *c) +{ + oclo_record(c, oclo_file(c), c->clo_flags, NULL); +} + +static void +oclo_setfd_common(const clo_create_t *c, int targ_flags) +{ + int fd = oclo_file(c); + if (fcntl(fd, F_SETFD, targ_flags) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: F_SETFD failed to set " + "flags to %d", c->clo_desc, targ_flags); + } + + oclo_record(c, fd, targ_flags, NULL); +} + +static void +oclo_setfd_none(const clo_create_t *c) +{ + oclo_setfd_common(c, 0); +} + +static void +oclo_setfd_exec(const clo_create_t *c) +{ + oclo_setfd_common(c, FD_CLOEXEC); +} + +static void +oclo_setfd_fork(const clo_create_t *c) +{ + oclo_setfd_common(c, FD_CLOFORK); +} + +static void +oclo_setfd_both(const clo_create_t *c) +{ + oclo_setfd_common(c, FD_CLOFORK | FD_CLOEXEC); +} + +/* + * Open an fd with flags in a certain form and then use one of the F_DUPFD or + * F_DUP2FD variants and ensure that flags are properly propagated as expected. + */ +static void +oclo_fdup_common(const clo_create_t *c, int targ_flags, int cmd) +{ + int dup, fd; + + fd = oclo_file(c); + oclo_record(c, fd, c->clo_flags, "base"); + switch (cmd) { + case F_DUPFD: + case F_DUPFD_CLOEXEC: + case F_DUPFD_CLOFORK: + dup = fcntl(fd, cmd, fd); + break; + case F_DUP2FD: + case F_DUP2FD_CLOEXEC: +#ifdef F_DUP2FD_CLOFORK + case F_DUP2FD_CLOFORK: +#endif + dup = fcntl(fd, cmd, fd + 1); + break; + case F_DUP3FD: + dup = fcntl(fd, cmd | (targ_flags << F_DUP3FD_SHIFT), fd + 1); + break; + default: + errx(EXIT_FAILURE, "TEST FAILURE: %s: internal error: " + "unexpected fcntl cmd: 0x%x", c->clo_desc, cmd); + } + + if (dup < 0) { + err(EXIT_FAILURE, "TEST FAILURE: %s: failed to dup fd with " + "fcntl command 0x%x", c->clo_desc, cmd); + } + + oclo_record(c, dup, targ_flags, "dup"); +} + +static void +oclo_fdupfd(const clo_create_t *c) +{ + oclo_fdup_common(c, 0, F_DUPFD); +} + +static void +oclo_fdupfd_fork(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOFORK, F_DUPFD_CLOFORK); +} + +static void +oclo_fdupfd_exec(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC, F_DUPFD_CLOEXEC); +} + +static void +oclo_fdup2fd(const clo_create_t *c) +{ + oclo_fdup_common(c, 0, F_DUP2FD); +} + +#ifdef F_DUP2FD_CLOFORK +static void +oclo_fdup2fd_fork(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOFORK, F_DUP2FD_CLOFORK); +} +#endif + +static void +oclo_fdup2fd_exec(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC, F_DUP2FD_CLOEXEC); +} + +static void +oclo_fdup3fd_none(const clo_create_t *c) +{ + oclo_fdup_common(c, 0, F_DUP3FD); +} + +static void +oclo_fdup3fd_exec(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC, F_DUP3FD); +} + +static void +oclo_fdup3fd_fork(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOFORK, F_DUP3FD); +} + +static void +oclo_fdup3fd_both(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC | FD_CLOFORK, F_DUP3FD); +} + +static void +oclo_dup_common(const clo_create_t *c, int targ_flags, bool v3) +{ + int dup, fd; + fd = oclo_file(c); + oclo_record(c, fd, c->clo_flags, "base"); + if (v3) { + int dflags = 0; + if ((targ_flags & FD_CLOEXEC) != 0) + dflags |= O_CLOEXEC; + if ((targ_flags & FD_CLOFORK) != 0) + dflags |= O_CLOFORK; + dup = dup3(fd, fd + 1, dflags); + } else { + dup = dup2(fd, fd + 1); + } + + oclo_record(c, dup, targ_flags, "dup"); +} + +static void +oclo_dup2(const clo_create_t *c) +{ + oclo_dup_common(c, 0, false); +} + +static void +oclo_dup3_none(const clo_create_t *c) +{ + oclo_dup_common(c, 0, true); +} + +static void +oclo_dup3_exec(const clo_create_t *c) +{ + oclo_dup_common(c, FD_CLOEXEC, true); +} + +static void +oclo_dup3_fork(const clo_create_t *c) +{ + oclo_dup_common(c, FD_CLOFORK, true); +} + +static void +oclo_dup3_both(const clo_create_t *c) +{ + oclo_dup_common(c, FD_CLOEXEC | FD_CLOFORK, true); +} + +static void +oclo_pipe(const clo_create_t *c) +{ + int flags = 0, fds[2]; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + flags |= O_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + flags |= O_CLOFORK; + + if (pipe2(fds, flags) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: pipe2() with flags %d " + "failed", c->clo_desc, flags); + } + + oclo_record(c, fds[0], c->clo_flags, "pipe[0]"); + oclo_record(c, fds[1], c->clo_flags, "pipe[1]"); +} + +static void +oclo_socket(const clo_create_t *c) +{ + int type = SOCK_DGRAM, fd; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + type |= SOCK_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + type |= SOCK_CLOFORK; + fd = socket(PF_INET, type, 0); + if (fd < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create socket " + "with flags: 0x%x\n", c->clo_desc, c->clo_flags); + } + + oclo_record(c, fd, c->clo_flags, NULL); +} + +static void +oclo_accept_common(const clo_create_t *c, int targ_flags, bool a4) +{ + int lsock, csock, asock; + int ltype = SOCK_STREAM, atype = 0; + struct sockaddr_in in; + socklen_t slen; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + ltype |= SOCK_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + ltype |= SOCK_CLOFORK; + + if ((targ_flags & FD_CLOEXEC) != 0) + atype |= SOCK_CLOEXEC; + if ((targ_flags & FD_CLOFORK) != 0) + atype |= SOCK_CLOFORK; + + lsock = socket(PF_INET, ltype, 0); + if (lsock < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create listen " + "socket with flags: 0x%x\n", c->clo_desc, c->clo_flags); + } + + oclo_record(c, lsock, c->clo_flags, "listen"); + (void) memset(&in, 0, sizeof (in)); + in.sin_family = AF_INET; + in.sin_port = 0; + in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + if (bind(lsock, (struct sockaddr *)&in, sizeof (in)) != 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to bind socket", + c->clo_desc); + } + + slen = sizeof (struct sockaddr_in); + if (getsockname(lsock, (struct sockaddr *)&in, &slen) != 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to discover bound " + "socket address", c->clo_desc); + } + + if (listen(lsock, 5) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to listen on socket", + c->clo_desc); + } + + csock = socket(PF_INET, SOCK_STREAM, 0); + if (csock < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create client " + "socket", c->clo_desc); + } + oclo_record(c, csock, 0, "connect"); + + if (connect(csock, (struct sockaddr *)&in, sizeof (in)) != 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to connect to " + "server socket", c->clo_desc); + } + + if (a4) { + asock = accept4(lsock, NULL, NULL, atype); + } else { + asock = accept(lsock, NULL, NULL); + } + if (asock < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to accept client " + "connection", c->clo_desc); + } + oclo_record(c, asock, targ_flags, "accept"); +} + +static void +oclo_accept(const clo_create_t *c) +{ + oclo_accept_common(c, 0, false); +} + +static void +oclo_accept4_none(const clo_create_t *c) +{ + oclo_accept_common(c, 0, true); +} + +static void +oclo_accept4_fork(const clo_create_t *c) +{ + oclo_accept_common(c, FD_CLOFORK, true); +} + +static void +oclo_accept4_exec(const clo_create_t *c) +{ + oclo_accept_common(c, FD_CLOEXEC, true); +} + +static void +oclo_accept4_both(const clo_create_t *c) +{ + oclo_accept_common(c, FD_CLOEXEC | FD_CLOFORK, true); +} + +/* + * Go through the process of sending ourselves a file descriptor. + */ +static void +oclo_rights_common(const clo_create_t *c, int targ_flags) +{ + int pair[2], type = SOCK_DGRAM, sflags = 0; + int tosend = oclo_file(c), recvfd; + uint32_t data = 0x7777; + struct iovec iov; + struct msghdr msg; + struct cmsghdr *cm; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + type |= SOCK_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + type |= SOCK_CLOFORK; + + if (socketpair(PF_UNIX, type, 0, pair) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create socket " + "pair", c->clo_desc); + } + + oclo_record(c, tosend, c->clo_flags, "send fd"); + oclo_record(c, pair[0], c->clo_flags, "pair[0]"); + oclo_record(c, pair[1], c->clo_flags, "pair[1]"); + + iov.iov_base = (void *)&data; + iov.iov_len = sizeof (data); + + (void) memset(&msg, 0, sizeof (msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_controllen = CMSG_SPACE(sizeof (int)); + + msg.msg_control = calloc(1, msg.msg_controllen); + if (msg.msg_control == NULL) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to allocate %u " + "bytes for SCM_RIGHTS control message", c->clo_desc, + msg.msg_controllen); + } + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_len = CMSG_LEN(sizeof (int)); + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_RIGHTS; + (void) memcpy(CMSG_DATA(cm), &tosend, sizeof (tosend)); + + if ((targ_flags & FD_CLOEXEC) != 0) + sflags |= MSG_CMSG_CLOEXEC; + if ((targ_flags & FD_CLOFORK) != 0) + sflags |= MSG_CMSG_CLOFORK; + + if (sendmsg(pair[0], &msg, 0) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to send fd", + c->clo_desc); + } + + data = 0; + if (recvmsg(pair[1], &msg, sflags) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to get fd", + c->clo_desc); + } + + if (data != 0x7777) { + errx(EXIT_FAILURE, "TEST FAILED: %s: did not receive correct " + "data: expected 0x7777, found 0x%x", c->clo_desc, data); + } + + if (msg.msg_controllen < CMSG_SPACE(sizeof (int))) { + errx(EXIT_FAILURE, "TEST FAILED: %s: found insufficient " + "message control length: expected at least 0x%zx, found " + "0x%x", c->clo_desc, CMSG_SPACE(sizeof (int)), + msg.msg_controllen); + } + + cm = CMSG_FIRSTHDR(&msg); + if (cm->cmsg_level != SOL_SOCKET || cm->cmsg_type != SCM_RIGHTS) { + errx(EXIT_FAILURE, "TEST FAILED: %s: found surprising cmsg " + "0x%x/0x%x, expected 0x%x/0x%x", c->clo_desc, + cm->cmsg_level, cm->cmsg_type, SOL_SOCKET, SCM_RIGHTS); + } + + if (cm->cmsg_len != CMSG_LEN(sizeof (int))) { + errx(EXIT_FAILURE, "TEST FAILED: %s: found unexpected " + "SCM_RIGHTS length 0x%x: expected 0x%zx", c->clo_desc, + cm->cmsg_len, CMSG_LEN(sizeof (int))); + } + + (void) memcpy(&recvfd, CMSG_DATA(cm), sizeof (recvfd)); + oclo_record(c, recvfd, targ_flags, "SCM_RIGHTS"); +} + +static void +oclo_rights_none(const clo_create_t *c) +{ + oclo_rights_common(c, 0); +} + +static void +oclo_rights_exec(const clo_create_t *c) +{ + oclo_rights_common(c, FD_CLOEXEC); +} + +static void +oclo_rights_fork(const clo_create_t *c) +{ + oclo_rights_common(c, FD_CLOFORK); +} + +static void +oclo_rights_both(const clo_create_t *c) +{ + oclo_rights_common(c, FD_CLOEXEC | FD_CLOFORK); +} + +static const clo_create_t oclo_create[] = { { + .clo_desc = "open(2), no flags", + .clo_flags = 0, + .clo_func = oclo_open +}, { + .clo_desc = "open(2), O_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_open +}, { + .clo_desc = "open(2), O_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_open +}, { + .clo_desc = "open(2), O_CLOEXEC|O_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_open +}, { + .clo_desc = "fcntl(F_SETFD) no flags->no flags", + .clo_flags = 0, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->no flags", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->no flags", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->no flags", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) no flags->O_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->O_CLOEXEC", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->O_CLOEXEC", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->O_CLOEXEC", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) no flags->O_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->O_CLOFORK", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->O_CLOFORK", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->O_CLOFORK", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) no flags->O_CLOFORK|O_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->O_CLOFORK|O_CLOEXEC", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->O_CLOFORK|O_CLOEXEC", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->O_CLOFORK|O_CLOEXEC", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_DUPFD) none->none", + .clo_flags = 0, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD) FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD) FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD) FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) none", + .clo_flags = 0, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) none", + .clo_flags = 0, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD) none->none", + .clo_flags = 0, + .clo_func = oclo_fdup2fd +}, { + .clo_desc = "fcntl(F_DUP2FD) FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup2fd +}, { + .clo_desc = "fcntl(F_DUP2FD) FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup2fd +}, { + .clo_desc = "fcntl(F_DUP2FD) FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup2fd +}, { +#ifdef F_DUP2FD_CLOFORK + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) none", + .clo_flags = 0, + .clo_func = oclo_fdup2fd_fork +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup2fd_fork +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup2fd_fork +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup2fd_fork +}, { +#endif + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) none", + .clo_flags = 0, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) none->none", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) none->FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) none->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->" + "FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) none->FD_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->FD_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "dup2() none->none", + .clo_flags = 0, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup2() FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup2() FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup2() FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup3() none->none", + .clo_flags = 0, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() none->FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() FD_CLOEXEC->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() none->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() FD_CLOEXEC->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() FD_CLOFORK->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() none->FD_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "dup3() FD_CLOEXEC->FD_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "dup3() FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "pipe(2), no flags", + .clo_flags = 0, + .clo_func = oclo_pipe +}, { + .clo_desc = "pipe(2), O_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_pipe +}, { + .clo_desc = "pipe(2), O_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_pipe +}, { + .clo_desc = "pipe(2), O_CLOEXEC|O_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_pipe +}, { + .clo_desc = "socket(2), no flags", + .clo_flags = 0, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), O_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), O_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), no flags->accept() none", + .clo_flags = 0, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept() none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), O_CLOFORK->accept() none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept() none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), no flags->accept4() none", + .clo_flags = 0, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), no flags->accept4() SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() " + "SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), no flags->accept4() SOCK_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() SOCK_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() SOCK_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() SOCK_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), no flags->accept4() SOCK_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() SOCK_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "SCM_RIGHTS none->none", + .clo_flags = 0, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS none->MSG_CMSG_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->MSG_CMSG_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->MSG_CMSG_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->MSG_CMSG_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS MSG_CMSG_CLOFORK->nMSG_CMSG_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS none->MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_rights_both +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_both +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_both +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->" + "MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_both +} }; + +static bool +oclo_verify_fork(void) +{ + bool ret = true; + + for (size_t i = 0; i < oclo_rtdata_next; i++) { + if (!oclo_flags_match(&oclo_rtdata[i], true)) { + ret = false; + } + } + + return (ret); +} + +/* + * Here we proceed to re-open any fd that was closed due to O_CLOFORK again to + * make sure that the file descriptor makes it to our child verifier. + * Importantly, with the changes that cause O_CLOFORK to be cleared on exec, we + * should not see any file descriptors with the flag in the child. This allows + * us to confirm that this is what we expect. + * + * In addition, this serves as a test to make sure that our opening of the + * lowest fd is correct. While this doesn't actually use the same method as was + * done previously, this should get us most of the way there. + */ +static void +oclo_child_reopen(void) +{ + for (size_t i = 0; i < oclo_rtdata_next; i++) { + int fd; + int flags = O_RDWR | O_CLOFORK; + + if ((oclo_rtdata[i].crt_flags & FD_CLOFORK) == 0) + continue; + + if ((oclo_rtdata[i].crt_flags & FD_CLOEXEC) != 0) + flags |= O_CLOEXEC; + + fd = open("/dev/zero", flags); + if (fd < 0) { + err(EXIT_FAILURE, "TEST FAILED: failed to re-open fd " + "%d with flags %d", oclo_rtdata[i].crt_fd, flags); + } + + if (fd != oclo_rtdata[i].crt_fd) { + errx(EXIT_FAILURE, "TEST FAILED: re-opening fd %d " + "returned fd %d: test design issue or lowest fd " + "algorithm is broken", oclo_rtdata[i].crt_fd, fd); + } + } + + (void) printf("TEST PASSED: successfully reopened fds post-fork"); +} + +/* + * Look for the verification program in the same directory that this program is + * found in. Note, that isn't the same thing as the current working directory. + */ +static void +oclo_exec(void) +{ + ssize_t ret; + char dir[PATH_MAX], file[PATH_MAX]; + char **argv; + + ret = getpathname(getpid(), dir, sizeof(dir)); + if (ret < 0) + err(EXIT_FAILURE, "TEST FAILED: failed to read executable path"); + + if (snprintf(file, sizeof (file), "%s/%s", dirname(dir), OCLO_VERIFY) >= + (int)sizeof (file)) { + errx(EXIT_FAILURE, "TEST FAILED: cannot assemble exec path " + "name: internal buffer overflow"); + } + + /* We need an extra for both the NULL terminator and the program name */ + argv = calloc(oclo_rtdata_next + 2, sizeof (char *)); + if (argv == NULL) { + err(EXIT_FAILURE, "TEST FAILED: failed to allocate exec " + "argument array"); + } + + argv[0] = file; + for (size_t i = 0; i < oclo_rtdata_next; i++) { + if (asprintf(&argv[i + 1], "0x%x", oclo_rtdata[i].crt_flags) == + -1) { + err(EXIT_FAILURE, "TEST FAILED: failed to assemble " + "exec argument %zu", i + 1); + } + } + + (void) execv(file, argv); + warn("TEST FAILED: failed to exec verifier %s", file); +} + +int +main(void) +{ + int ret = EXIT_SUCCESS; + siginfo_t cret; + + /* + * Before we do anything else close all FDs that aren't standard. We + * don't want anything the test suite environment may have left behind. + */ + (void) closefrom(STDERR_FILENO + 1); + + /* + * Treat failure during this set up phase as a hard failure. There's no + * reason to continue if we can't successfully create the FDs we expect. + */ + for (size_t i = 0; i < nitems(oclo_create); i++) { + oclo_create[i].clo_func(&oclo_create[i]); + } + + pid_t child = fork(); + if (child == 0) { + if (!oclo_verify_fork()) { + ret = EXIT_FAILURE; + } + + oclo_child_reopen(); + + oclo_exec(); + ret = EXIT_FAILURE; + _exit(ret); + } + + if (waitid(P_PID, child, &cret, WEXITED) < 0) { + err(EXIT_FAILURE, "TEST FAILED: internal test failure waiting " + "for forked child to report"); + } + + if (cret.si_code != CLD_EXITED) { + warnx("TEST FAILED: child process did not successfully exit: " + "found si_code: %d", cret.si_code); + ret = EXIT_FAILURE; + } else if (cret.si_status != 0) { + warnx("TEST FAILED: child process did not exit with code 0: " + "found %d", cret.si_status); + ret = EXIT_FAILURE; + } + + if (ret == EXIT_SUCCESS) { + (void) printf("All tests passed successfully\n"); + } + + return (ret); +} diff --git a/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/oclo_errors.c b/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/oclo_errors.c new file mode 100644 index 000000000000..05b0c1a0839b --- /dev/null +++ b/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/oclo_errors.c @@ -0,0 +1,202 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2024 Oxide Computer Company + */ + +/* + * Verify that unsupported flags will properly generate errors across the + * functions that we know perform strict error checking. This includes: + * + * o fcntl(..., F_DUP3FD, ...) + * o dup3() + * o pipe2() + * o socket() + * o accept4() + */ + +#include <netinet/in.h> +#include <sys/socket.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#define strerrorname_np(e) (sys_errlist[e]) + +static bool +oclo_check(const char *desc, const char *act, int ret, int e) +{ + if (ret >= 0) { + warnx("TEST FAILED: %s: fd was %s!", desc, act); + return (false); + } else if (errno != EINVAL) { + e = errno; + warnx("TEST FAILED: %s: failed with %s, expected " + "EINVAL", desc, strerrorname_np(e)); + return (false); + } + + (void) printf("TEST PASSED: %s: correctly failed with EINVAL\n", + desc); + return (true); +} + +static bool +oclo_dup3(const char *desc, int flags) +{ + int fd = dup3(STDERR_FILENO, 23, flags); + return (oclo_check(desc, "duplicated", fd, errno)); +} + +static bool +oclo_dup3fd(const char *desc, int flags) +{ + int fd = fcntl(STDERR_FILENO, F_DUP3FD | (flags << F_DUP3FD_SHIFT), 23); + return (oclo_check(desc, "duplicated", fd, errno)); +} + + +static bool +oclo_pipe2(const char *desc, int flags) +{ + int fds[2], ret; + + ret = pipe2(fds, flags); + return (oclo_check(desc, "piped", ret, errno)); +} + +#if 0 +static bool +oclo_socket(const char *desc, int type) +{ + int fd = socket(PF_UNIX, SOCK_STREAM | type, 0); + return (oclo_check(desc, "created", fd, errno)); +} +#endif + +static bool +oclo_accept(const char *desc, int flags) +{ + int sock, fd, e; + struct sockaddr_in in; + + sock = socket(PF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (sock < 0) { + warn("TEST FAILED: %s: failed to create listen socket", desc); + return (false); + } + + (void) memset(&in, 0, sizeof (in)); + in.sin_family = AF_INET; + in.sin_port = 0; + in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + if (bind(sock, (struct sockaddr *)&in, sizeof (in)) != 0) { + warn("TEST FAILED: %s: failed to bind socket", desc); + (void) close(sock); + return (false); + } + + if (listen(sock, 5) < 0) { + warn("TEST FAILED: %s: failed to listen on socket", desc); + (void) close(sock); + return (false); + } + + + fd = accept4(sock, NULL, NULL, flags); + e = errno; + (void) close(sock); + return (oclo_check(desc, "accepted", fd, e)); +} + +int +main(void) +{ + int ret = EXIT_SUCCESS; + + closefrom(STDERR_FILENO + 1); + + if (!oclo_dup3("dup3(): O_RDWR", O_RDWR)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3("dup3(): O_NONBLOCK|O_CLOXEC", O_NONBLOCK | O_CLOEXEC)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3("dup3(): O_CLOFORK|O_WRONLY", O_CLOFORK | O_WRONLY)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3fd("fcntl(FDUP3FD): 0x7777", 0x7777)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3fd("fcntl(FDUP3FD): FD_CLOEXEC|FD_CLOFORK + 1", + (FD_CLOEXEC | FD_CLOFORK) + 1)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3fd("fcntl(FDUP3FD): INT_MAX", INT_MAX)) { + ret = EXIT_FAILURE; + } + + + if (!oclo_pipe2("pipe2(): O_RDWR", O_RDWR)) { + ret = EXIT_FAILURE; + } + + if (!oclo_pipe2("pipe2(): O_SYNC|O_CLOXEC", O_SYNC | O_CLOEXEC)) { + ret = EXIT_FAILURE; + } + + if (!oclo_pipe2("pipe2(): O_CLOFORK|O_WRONLY", O_CLOFORK | O_WRONLY)) { + ret = EXIT_FAILURE; + } + + if (!oclo_pipe2("pipe2(): INT32_MAX", INT32_MAX)) { + ret = EXIT_FAILURE; + } + +#if 0 /* These tests are known to fail on FreeBSD */ + if (!oclo_socket("socket(): INT32_MAX", INT32_MAX)) { + ret = EXIT_FAILURE; + } + + if (!oclo_socket("socket(): 3 << 25", 3 << 25)) { + ret = EXIT_FAILURE; + } +#endif + + if (!oclo_accept("accept4(): INT32_MAX", INT32_MAX)) { + ret = EXIT_FAILURE; + } + + if (!oclo_accept("accept4(): 3 << 25", 3 << 25)) { + ret = EXIT_FAILURE; + } + + if (ret == EXIT_SUCCESS) { + (void) printf("All tests completed successfully\n"); + } + + return (ret); +} diff --git a/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/ocloexec_verify.c b/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/ocloexec_verify.c new file mode 100644 index 000000000000..e33c61f03d54 --- /dev/null +++ b/cddl/contrib/opensolaris/tests/os-tests/tests/oclo/ocloexec_verify.c @@ -0,0 +1,154 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2025 Oxide Computer Company + */ + +/* + * Verify that our file descriptors starting after stderr are correct based upon + * the series of passed in arguments from the 'oclo' program. Arguments are + * passed as a string that represents the flags that were originally verified + * pre-fork/exec via fcntl(F_GETFD). In addition, anything that was originally + * closed because it had FD_CLOFORK set was reopened with the same flags. This + * allows us to verify that the combinations worked and that FD_CLOFORK was + * properly cleared. + */ + +#include <sys/types.h> +#include <sys/user.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <libutil.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#define strerrorname_np(e) (sys_errlist[e]) + +static int +getmaxfd(void) +{ + struct kinfo_file *files; + int i, cnt, max; + + if ((files = kinfo_getfile(getpid(), &cnt)) == NULL) + err(1, "kinfo_getfile"); + + max = -1; + for (i = 0; i < cnt; i++) + if (files[i].kf_fd > max) + max = files[i].kf_fd; + + free(files); + return (max); +} + +/* + * Our flags may have FD_CLOFORK set in them (anything with FD_CLOEXEC Should + * not exist by definition). FD_CLOFORK is supposed to be cleared on exec. We + * still indicate which file descriptors FD_CLOFORK so we can check where it + * wasn't cleared. + */ +static bool +verify_flags(int fd, int exp_flags) +{ + bool fail = (exp_flags & FD_CLOEXEC) != 0; + int flags = fcntl(fd, F_GETFD, NULL); + bool clofork = (exp_flags & FD_CLOFORK) != 0; + exp_flags &= ~FD_CLOFORK; + + if (flags < 0) { + int e = errno; + + if (fail) { + if (e == EBADF) { + (void) printf("TEST PASSED: post-exec fd %d: " + "flags 0x%x: correctly closed\n", fd, + exp_flags); + return (true); + } + + + warn("TEST FAILED: post-fork fd %d: expected fcntl to " + "fail with EBADF, but found %s", fd, + strerrorname_np(e)); + return (false); + } + + warnx("TEST FAILED: post-fork fd %d: fcntl(F_GETFD) " + "unexpectedly failed with %s, expected flags %d", fd, + strerrorname_np(e), exp_flags); + return (false); + } + + if (fail) { + warnx("TEST FAILED: post-fork fd %d: received flags %d, but " + "expected to fail based on flags %d", fd, flags, exp_flags); + return (false); + } + + if (clofork && (flags & FD_CLOFORK) != 0) { + warnx("TEST FAILED: post-fork fd %d (flags %d) retained " + "FD_CLOFORK, but it should have been cleared", fd, flags); + return (false); + } + + if (flags != exp_flags) { + warnx("TEST FAILED: post-exec fd %d: discovered flags 0x%x do " + "not match expected flags 0x%x", fd, flags, exp_flags); + return (false); + } + + (void) printf("TEST PASSED: post-exec fd %d: flags 0x%x: successfully " + "matched\n", fd, exp_flags); + return (true); +} + +int +main(int argc, char *argv[]) +{ + int maxfd; + int ret = EXIT_SUCCESS; + + /* + * We should have one argument for each fd we found, ignoring stdin, + * stdout, and stderr. argc will also have an additional entry for our + * program name, which we want to skip. Note, the last fd may not exist + * because it was marked for close, hence the use of '>' below. + */ + maxfd = getmaxfd(); + if (maxfd - 3 > argc - 1) { + errx(EXIT_FAILURE, "TEST FAILED: found more fds %d than " + "arguments %d", maxfd - 3, argc - 1); + } + + for (int i = 1; i < argc; i++) { + char *endptr; + int targ_fd = i + STDERR_FILENO; + errno = 0; + long long val = strtoll(argv[i], &endptr, 0); + + if (errno != 0 || *endptr != '\0' || + (val < 0 || val > (FD_CLOEXEC | FD_CLOFORK))) { + errx(EXIT_FAILURE, "TEST FAILED: failed to parse " + "argument %d: %s", i, argv[i]); + } + + if (!verify_flags(targ_fd, (int)val)) + ret = EXIT_FAILURE; + } + + return (ret); +} diff --git a/etc/mtree/BSD.usr.dist b/etc/mtree/BSD.usr.dist index 97b555e50dc1..ffdd82ae9911 100644 --- a/etc/mtree/BSD.usr.dist +++ b/etc/mtree/BSD.usr.dist @@ -304,6 +304,8 @@ .. indent .. + inotify + .. ipfilter .. ipfw diff --git a/lib/clang/libclang/Makefile b/lib/clang/libclang/Makefile index dc9e0010e309..7eb2c99b25c8 100644 --- a/lib/clang/libclang/Makefile +++ b/lib/clang/libclang/Makefile @@ -841,6 +841,11 @@ SRCS_MIN+= Tooling/ArgumentsAdjusters.cpp SRCS_MIN+= Tooling/CommonOptionsParser.cpp SRCS_MIN+= Tooling/CompilationDatabase.cpp SRCS_MIN+= Tooling/Core/Replacement.cpp +SRCS_MIN+= Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +SRCS_MIN+= Tooling/DependencyScanning/DependencyScanningService.cpp +SRCS_MIN+= Tooling/DependencyScanning/DependencyScanningTool.cpp +SRCS_MIN+= Tooling/DependencyScanning/DependencyScanningWorker.cpp +SRCS_MIN+= Tooling/DependencyScanning/ModuleDepCollector.cpp SRCS_MIN+= Tooling/ExpandResponseFilesCompilationDatabase.cpp SRCS_MIN+= Tooling/FileMatchTrie.cpp SRCS_MIN+= Tooling/GuessTargetAndModeCompilationDatabase.cpp @@ -848,6 +853,7 @@ SRCS_MIN+= Tooling/Inclusions/HeaderIncludes.cpp SRCS_MIN+= Tooling/Inclusions/IncludeStyle.cpp SRCS_MIN+= Tooling/InterpolatingCompilationDatabase.cpp SRCS_MIN+= Tooling/JSONCompilationDatabase.cpp +SRCS_MIN+= Tooling/LocateToolCompilationDatabase.cpp SRCS_MIN+= Tooling/Refactoring.cpp SRCS_MIN+= Tooling/RefactoringCallbacks.cpp SRCS_MIN+= Tooling/Tooling.cpp diff --git a/lib/lib80211/regdomain.xml b/lib/lib80211/regdomain.xml index 9116e54c31cf..16b74445f429 100644 --- a/lib/lib80211/regdomain.xml +++ b/lib/lib80211/regdomain.xml @@ -494,6 +494,10 @@ <flags>IEEE80211_CHAN_PASSIVE</flags> <flags>IEEE80211_CHAN_DFS</flags> </band> + <band> + <freqband ref="A20_5745_5865"/> + <maxpower>13</maxpower> + </band> </netband> <netband mode="11ng"> <band> @@ -548,6 +552,14 @@ <flags>IEEE80211_CHAN_PASSIVE</flags> <flags>IEEE80211_CHAN_DFS</flags> </band> + <band> + <freqband ref="NA20_5745_5865"/> + <maxpower>13</maxpower> + </band> + <band> + <freqband ref="NA40_5745_5845"/> + <maxpower>13</maxpower> + </band> </netband> <netband mode="11ac"> <!-- 5150-5250/80, 200 mW, indoor --> @@ -645,7 +657,7 @@ <flags>IEEE80211_CHAN_DFS</flags> </band> <band> - <freqband ref="AC2_5745_5805_40"/> + <freqband ref="AC2_5745_5845_40"/> <maxpower>13</maxpower> <flags>IEEE80211_CHAN_HT40</flags> <flags>IEEE80211_CHAN_VHT40</flags> @@ -658,13 +670,6 @@ <flags>IEEE80211_CHAN_VHT80</flags> <flags>IEEE80211_CHAN_DFS</flags> </band> - <band> - <freqband ref="AC2_5745_5885_160"/> - <maxpower>13</maxpower> - <flags>IEEE80211_CHAN_HT40</flags> - <flags>IEEE80211_CHAN_VHT160</flags> - <flags>IEEE80211_CHAN_DFS</flags> - </band> </netband> </rd> @@ -2304,6 +2309,29 @@ <chanwidth>20</chanwidth> <chansep>20</chansep> <flags>IEEE80211_CHAN_A</flags> </freqband> +<freqband id="A20_5745_5865"> + <freqstart>5745</freqstart> + <freqend>5865</freqend> + <chanwidth>20</chanwidth> + <chansep>20</chansep> + <flags>IEEE80211_CHAN_A</flags> +</freqband> +<freqband id="NA20_5745_5865"> + <freqstart>5745</freqstart> + <freqend>5865</freqend> + <chanwidth>20</chanwidth> + <chansep>20</chansep> + <flags>IEEE80211_CHAN_A</flags> + <flags>IEEE80211_CHAN_HT20</flags> +</freqband> +<freqband id="NA40_5745_5845"> + <freqstart>5745</freqstart> + <freqend>5845</freqend> + <chanwidth>40</chanwidth> + <chansep>20</chansep> + <flags>IEEE80211_CHAN_A</flags> + <flags>IEEE80211_CHAN_HT40</flags> +</freqband> <freqband id="F1_5660_5700"> <freqstart>5660</freqstart> <freqend>5700</freqend> <chanwidth>20</chanwidth> <chansep>20</chansep> diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index ad13aaa65621..4d064d18d36e 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -89,6 +89,7 @@ SRCS+= \ glob.c \ glob-compat11.c \ initgroups.c \ + inotify.c \ isatty.c \ isinf.c \ isnan.c \ diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 8faecf4b3048..50dbf3425964 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -464,6 +464,9 @@ FBSD_1.8 { fdscandir_b; fts_open_b; glob_b; + inotify_add_watch; + inotify_init; + inotify_init1; psiginfo; rtld_get_var; rtld_set_var; diff --git a/lib/libc/gen/dup3.3 b/lib/libc/gen/dup3.3 index f2798930797b..338a9ae74c64 100644 --- a/lib/libc/gen/dup3.3 +++ b/lib/libc/gen/dup3.3 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 16, 2013 +.Dd May 17, 2025 .Dt DUP3 3 .Os .Sh NAME @@ -47,6 +47,11 @@ The close-on-exec flag on the new file descriptor is determined by the bit in .Fa flags . .Pp +The close-on-fork flag on the new file descriptor is determined by the +.Dv O_CLOFORK +bit in +.Fa flags . +.Pp If .Fa oldd \*(Ne @@ -91,7 +96,9 @@ argument. The .Fa flags argument has bits set other than -.Dv O_CLOEXEC . +.Dv O_CLOEXEC +or +.Dv O_CLOFORK . .El .Sh SEE ALSO .Xr accept 2 , @@ -112,3 +119,7 @@ The .Fn dup3 function appeared in .Fx 10.0 . +The +.Dv O_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libc/gen/dup3.c b/lib/libc/gen/dup3.c index fca1e99fb47b..1401c1f5b607 100644 --- a/lib/libc/gen/dup3.c +++ b/lib/libc/gen/dup3.c @@ -39,21 +39,22 @@ int __dup3(int, int, int); int __dup3(int oldfd, int newfd, int flags) { - int how; + int fdflags; if (oldfd == newfd) { errno = EINVAL; return (-1); } - if (flags & ~O_CLOEXEC) { + if ((flags & ~(O_CLOEXEC | O_CLOFORK)) != 0) { errno = EINVAL; return (-1); } - how = (flags & O_CLOEXEC) ? F_DUP2FD_CLOEXEC : F_DUP2FD; + fdflags = ((flags & O_CLOEXEC) != 0 ? FD_CLOEXEC : 0) | + ((flags & O_CLOFORK) != 0 ? FD_CLOFORK : 0); - return (_fcntl(oldfd, how, newfd)); + return (_fcntl(oldfd, F_DUP3FD | (fdflags << F_DUP3FD_SHIFT), newfd)); } __weak_reference(__dup3, dup3); diff --git a/lib/libc/gen/inotify.c b/lib/libc/gen/inotify.c new file mode 100644 index 000000000000..7ce53aaccd58 --- /dev/null +++ b/lib/libc/gen/inotify.c @@ -0,0 +1,48 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include "namespace.h" +#include <sys/fcntl.h> +#include <sys/inotify.h> +#include <sys/specialfd.h> +#include "un-namespace.h" +#include "libc_private.h" + +/* + * Provide compatibility with libinotify, which uses different values for these + * flags. + */ +#define IN_NONBLOCK_OLD 0x80000 +#define IN_CLOEXEC_OLD 0x00800 + +int +inotify_add_watch(int fd, const char *pathname, uint32_t mask) +{ + return (inotify_add_watch_at(fd, AT_FDCWD, pathname, mask)); +} + +int +inotify_init1(int flags) +{ + struct specialfd_inotify args; + + if ((flags & IN_NONBLOCK_OLD) != 0) { + flags &= ~IN_NONBLOCK_OLD; + flags |= IN_NONBLOCK; + } + if ((flags & IN_CLOEXEC_OLD) != 0) { + flags &= ~IN_CLOEXEC_OLD; + flags |= IN_CLOEXEC; + } + args.flags = flags; + return (__sys___specialfd(SPECIALFD_INOTIFY, &args, sizeof(args))); +} + +int +inotify_init(void) +{ + return (inotify_init1(0)); +} diff --git a/lib/libifconfig/libifconfig.h b/lib/libifconfig/libifconfig.h index 8d5ca01b0ce6..fc835485a51e 100644 --- a/lib/libifconfig/libifconfig.h +++ b/lib/libifconfig/libifconfig.h @@ -29,6 +29,7 @@ #include <sys/types.h> #include <net/if.h> +#include <net/if_bridgevar.h> /* for ifbvlan_set_t */ #include <netinet/in.h> #include <netinet/ip_carp.h> @@ -64,6 +65,7 @@ struct lagg_reqport; struct ifconfig_bridge_status { struct ifbropreq *params; /**< current operational parameters */ struct ifbreq *members; /**< list of bridge members */ + ifbvlan_set_t *member_vlans; /**< bridge member vlan sets */ size_t members_count; /**< how many member interfaces */ uint32_t cache_size; /**< size of address cache */ uint32_t cache_lifetime; /**< address cache entry lifetime */ diff --git a/lib/libifconfig/libifconfig_bridge.c b/lib/libifconfig/libifconfig_bridge.c index 2a9bbc35858b..b4a920f488c5 100644 --- a/lib/libifconfig/libifconfig_bridge.c +++ b/lib/libifconfig/libifconfig_bridge.c @@ -66,40 +66,37 @@ ifconfig_bridge_get_bridge_status(ifconfig_handle_t *h, { struct ifbifconf members; struct ifbrparam cache_param; - struct _ifconfig_bridge_status *bridge; - char *buf; + struct _ifconfig_bridge_status *bridge = NULL; + char *buf = NULL; + members.ifbic_buf = NULL; *bridgep = NULL; bridge = calloc(1, sizeof(struct _ifconfig_bridge_status)); if (bridge == NULL) { h->error.errtype = OTHER; h->error.errcode = ENOMEM; - return (-1); + goto err; } bridge->inner.params = &bridge->params; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGCACHE, &cache_param, sizeof(cache_param), false) != 0) { - free(bridge); - return (-1); + goto err; } bridge->inner.cache_size = cache_param.ifbrp_csize; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGTO, &cache_param, sizeof(cache_param), false) != 0) { - free(bridge); - return (-1); + goto err; } bridge->inner.cache_lifetime = cache_param.ifbrp_ctime; if (ifconfig_bridge_ioctlwrap(h, name, BRDGPARAM, &bridge->params, sizeof(bridge->params), false) != 0) { - free(bridge); - return (-1); + goto err; } - members.ifbic_buf = NULL; for (size_t len = 8192; (buf = realloc(members.ifbic_buf, len)) != NULL; len *= 2) { @@ -107,27 +104,52 @@ ifconfig_bridge_get_bridge_status(ifconfig_handle_t *h, members.ifbic_len = len; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGIFS, &members, sizeof(members), false) != 0) { - free(buf); - free(bridge); - return (-1); + goto err; } if ((members.ifbic_len + sizeof(*members.ifbic_req)) < len) break; } if (buf == NULL) { - free(members.ifbic_buf); - free(bridge); h->error.errtype = OTHER; h->error.errcode = ENOMEM; - return (-1); + goto err; } bridge->inner.members = members.ifbic_req; bridge->inner.members_count = members.ifbic_len / sizeof(*members.ifbic_req); + bridge->inner.member_vlans = calloc(bridge->inner.members_count, + sizeof(ifbvlan_set_t)); + if (bridge->inner.member_vlans == NULL) { + h->error.errtype = OTHER; + h->error.errcode = ENOMEM; + goto err; + } + for (size_t i = 0; i < bridge->inner.members_count; ++i) { + struct ifbif_vlan_req vreq; + memset(&vreq, 0, sizeof(vreq)); + strlcpy(vreq.bv_ifname, bridge->inner.members[i].ifbr_ifsname, + sizeof(vreq.bv_ifname)); + + if (ifconfig_bridge_ioctlwrap(h, name, BRDGGIFVLANSET, &vreq, + sizeof(vreq), false) != 0) { + goto err; + } + + __BIT_COPY(BRVLAN_SETSIZE, &vreq.bv_set, + &bridge->inner.member_vlans[i]); + } + *bridgep = &bridge->inner; return (0); + +err: + free(members.ifbic_buf); + if (bridge) + free(bridge->inner.member_vlans); + free(bridge); + return (-1); } void diff --git a/lib/libopenbsd/Makefile b/lib/libopenbsd/Makefile index 675ed476c51d..dca1c08b0aed 100644 --- a/lib/libopenbsd/Makefile +++ b/lib/libopenbsd/Makefile @@ -2,7 +2,8 @@ PACKAGE=lib${LIB} LIB= openbsd SRCS= imsg-buffer.c \ imsg.c \ - ohash.c + ohash.c \ + recallocarray.c .if !defined(BOOTSTRAPPING) # Skip getdtablecount.c when bootstrapping since it doesn't compile for Linux # and is not used by any of the bootstrap tools diff --git a/lib/libopenbsd/recallocarray.c b/lib/libopenbsd/recallocarray.c new file mode 100644 index 000000000000..11e1fda744c7 --- /dev/null +++ b/lib/libopenbsd/recallocarray.c @@ -0,0 +1,82 @@ +/* $OpenBSD: recallocarray.c,v 1.1 2017/03/06 18:44:21 otto Exp $ */ +/* + * Copyright (c) 2008, 2017 Otto Moerbeek <otto@drijf.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <errno.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +/* + * This is sqrt(SIZE_MAX+1), as s1*s2 <= SIZE_MAX + * if both s1 < MUL_NO_OVERFLOW and s2 < MUL_NO_OVERFLOW + */ +#define MUL_NO_OVERFLOW ((size_t)1 << (sizeof(size_t) * 4)) + +void *recallocarray(void *, size_t, size_t, size_t); + +void * +recallocarray(void *ptr, size_t oldnmemb, size_t newnmemb, size_t size) +{ + size_t oldsize, newsize; + void *newptr; + + if (ptr == NULL) + return calloc(newnmemb, size); + + if ((newnmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + newnmemb > 0 && SIZE_MAX / newnmemb < size) { + errno = ENOMEM; + return NULL; + } + newsize = newnmemb * size; + + if ((oldnmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && + oldnmemb > 0 && SIZE_MAX / oldnmemb < size) { + errno = EINVAL; + return NULL; + } + oldsize = oldnmemb * size; + + /* + * Don't bother too much if we're shrinking just a bit, + * we do not shrink for series of small steps, oh well. + */ + if (newsize <= oldsize) { + size_t d = oldsize - newsize; + + if (d < oldsize / 2 && d < (size_t)getpagesize()) { + memset((char *)ptr + newsize, 0, d); + return ptr; + } + } + + newptr = malloc(newsize); + if (newptr == NULL) + return NULL; + + if (newsize > oldsize) { + memcpy(newptr, ptr, oldsize); + memset((char *)newptr + oldsize, 0, newsize - oldsize); + } else + memcpy(newptr, ptr, newsize); + + explicit_bzero(ptr, oldsize); + free(ptr); + + return newptr; +} diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index 29f464ef6414..eb8137f6c76f 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -625,6 +625,10 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap type = PS_FST_TYPE_EVENTFD; data = file.f_data; break; + case DTYPE_INOTIFY: + type = PS_FST_TYPE_INOTIFY; + data = file.f_data; + break; default: continue; } @@ -717,6 +721,7 @@ kinfo_type2fst(int kftype) { KF_TYPE_SOCKET, PS_FST_TYPE_SOCKET }, { KF_TYPE_VNODE, PS_FST_TYPE_VNODE }, { KF_TYPE_EVENTFD, PS_FST_TYPE_EVENTFD }, + { KF_TYPE_INOTIFY, PS_FST_TYPE_INOTIFY }, { KF_TYPE_UNKNOWN, PS_FST_TYPE_UNKNOWN } }; #define NKFTYPES (sizeof(kftypes2fst) / sizeof(*kftypes2fst)) diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h index 0e9a4214414c..548747f90171 100644 --- a/lib/libprocstat/libprocstat.h +++ b/lib/libprocstat/libprocstat.h @@ -71,6 +71,7 @@ #define PS_FST_TYPE_PROCDESC 13 #define PS_FST_TYPE_DEV 14 #define PS_FST_TYPE_EVENTFD 15 +#define PS_FST_TYPE_INOTIFY 16 /* * Special descriptor numbers. diff --git a/lib/libsys/Makefile.sys b/lib/libsys/Makefile.sys index 491c765e9416..3eb4bf85153d 100644 --- a/lib/libsys/Makefile.sys +++ b/lib/libsys/Makefile.sys @@ -224,6 +224,7 @@ MAN+= abort2.2 \ getsockopt.2 \ gettimeofday.2 \ getuid.2 \ + inotify.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ @@ -448,6 +449,11 @@ MLINKS+=getrlimit.2 setrlimit.2 MLINKS+=getsockopt.2 setsockopt.2 MLINKS+=gettimeofday.2 settimeofday.2 MLINKS+=getuid.2 geteuid.2 +MLINKS+=inotify.2 inotify_init.2 \ + inotify.2 inotify_init1.2 \ + inotify.2 inotify_add_watch.2 \ + inotify.2 inotify_add_watch_at.2 \ + inotify.2 inotify_rm_watch.2 MLINKS+=intro.2 errno.2 MLINKS+=jail.2 jail_attach.2 \ jail.2 jail_get.2 \ diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map index 7fac1ed6160d..45e0160100af 100644 --- a/lib/libsys/Symbol.sys.map +++ b/lib/libsys/Symbol.sys.map @@ -381,6 +381,8 @@ FBSD_1.8 { exterrctl; fchroot; getrlimitusage; + inotify_add_watch_at; + inotify_rm_watch; kcmp; setcred; }; diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h index e2a8f2253814..1799906eb885 100644 --- a/lib/libsys/_libsys.h +++ b/lib/libsys/_libsys.h @@ -466,6 +466,8 @@ typedef int (__sys_getrlimitusage_t)(u_int, int, rlim_t *); typedef int (__sys_fchroot_t)(int); typedef int (__sys_setcred_t)(u_int, const struct setcred *, size_t); typedef int (__sys_exterrctl_t)(u_int, u_int, void *); +typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t); +typedef int (__sys_inotify_rm_watch_t)(int, int); void __sys_exit(int rval); int __sys_fork(void); @@ -868,6 +870,8 @@ int __sys_getrlimitusage(u_int which, int flags, rlim_t * res); int __sys_fchroot(int fd); int __sys_setcred(u_int flags, const struct setcred * wcred, size_t size); int __sys_exterrctl(u_int op, u_int flags, void * ptr); +int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask); +int __sys_inotify_rm_watch(int fd, int wd); __END_DECLS #endif /* __LIBSYS_H_ */ diff --git a/lib/libsys/accept.2 b/lib/libsys/accept.2 index 53926b3153d2..2da2af066a5b 100644 --- a/lib/libsys/accept.2 +++ b/lib/libsys/accept.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd October 9, 2014 +.Dd May 17, 2025 .Dt ACCEPT 2 .Os .Sh NAME @@ -85,6 +85,13 @@ and the close-on-exec flag on the new file descriptor can be set via the flag in the .Fa flags argument. +Similarly, the +.Dv O_CLOFORK +property can be set via the +.Dv SOCK_CLOFORK +flag in the +.Fa flags +argument. .Pp If no pending connections are present on the queue, and the original socket @@ -234,3 +241,8 @@ The .Fn accept4 system call appeared in .Fx 10.0 . +.Pp +The +.Dv SOCK_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/closefrom.2 b/lib/libsys/closefrom.2 index aaa4c55607ac..1885a6fdeaa8 100644 --- a/lib/libsys/closefrom.2 +++ b/lib/libsys/closefrom.2 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 3, 2022 +.Dd May 17, 2025 .Dt CLOSEFROM 2 .Os .Sh NAME @@ -59,6 +59,8 @@ Supported .Bl -tag -width ".Dv CLOSE_RANGE_CLOEXEC" .It Dv CLOSE_RANGE_CLOEXEC Set the close-on-exec flag on descriptors in the range instead of closing them. +.It Dv CLOSE_RANGE_CLOFORK +Set the close-on-fork flag on descriptors in the range instead of closing them. .El .Sh RETURN VALUES Upon successful completion, @@ -90,3 +92,8 @@ The .Fn closefrom function first appeared in .Fx 8.0 . +.Pp +The +.Dv CLOSE_RANGE_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/execve.2 b/lib/libsys/execve.2 index 5a35980e9555..dc85b9321e48 100644 --- a/lib/libsys/execve.2 +++ b/lib/libsys/execve.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 26, 2022 +.Dd July 02, 2025 .Dt EXECVE 2 .Os .Sh NAME @@ -127,7 +127,10 @@ flag is set (see and .Xr fcntl 2 ) . Descriptors that remain open are unaffected by -.Fn execve . +.Fn execve , +except those with the close-on-fork flag +.Dv FD_CLOFORK +which is cleared from all file descriptors. If any of the standard descriptors (0, 1, and/or 2) are closed at the time .Fn execve diff --git a/lib/libsys/fcntl.2 b/lib/libsys/fcntl.2 index 604de43e5e8c..3cf8adc29f88 100644 --- a/lib/libsys/fcntl.2 +++ b/lib/libsys/fcntl.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd June 5, 2025 +.Dd June 24, 2025 .Dt FCNTL 2 .Os .Sh NAME @@ -81,6 +81,13 @@ to remain open across .Xr execve 2 system calls. .It +The fork-on-exec flag +.Dv FD_CLOFORK +associated with the new file descriptor is cleared, so the file descriptor is +to remain open across +.Xr fork 2 +system calls. +.It The .Dv FD_RESOLVE_BENEATH flag, described below, will be set if it was set on the original @@ -95,6 +102,15 @@ flag associated with the new file descriptor is set, so the file descriptor is closed when .Xr execve 2 system call executes. +.It Dv F_DUPFD_CLOFORK +Like +.Dv F_DUPFD , +but the +.Dv FD_CLOFORK +flag associated with the new file descriptor is set, so the file descriptor +is closed when +.Xr fork 2 +system call executes. .It Dv F_DUP2FD It is functionally equivalent to .Bd -literal -offset indent @@ -117,6 +133,11 @@ Use .Fn dup2 instead of .Dv F_DUP2FD . +.It Dv F_DUP3FD +Used to implement the +.Fn dup3 +call. +Do not use it. .It Dv F_GETFD Get the flags associated with the file descriptor .Fa fd . @@ -128,6 +149,10 @@ The file will be closed upon execution of .Fa ( arg is ignored). Otherwise, the file descriptor will remain open. +.It Dv FD_CLOFORK +The file will be closed upon execution of the +.Fn fork +family of system calls. .It Dv FD_RESOLVE_BENEATH All path name lookups relative to that file descriptor will behave as if the lookup had @@ -153,7 +178,8 @@ descriptor to also have the flag set. Set flags associated with .Fa fd . The available flags are -.Dv FD_CLOEXEC +.Dv FD_CLOEXEC , +.Dv FD_CLOFORK and .Dv FD_RESOLVE_BENEATH . The @@ -785,8 +811,10 @@ for the reasons as stated in .Sh STANDARDS The .Dv F_DUP2FD -constant is non portable. -It is provided for compatibility with AIX and Solaris. +and +.Dv F_DUP3FD +constants are not portable. +They are provided for compatibility with AIX and Solaris. .Pp Per .St -susv4 , @@ -811,3 +839,10 @@ The .Dv F_DUP2FD constant first appeared in .Fx 7.1 . +.Pp +The +.Dv F_DUPFD_CLOFORK +and +.Dv F_DUP3FD +flags appeared in +.Fx 15.0 . diff --git a/lib/libsys/fork.2 b/lib/libsys/fork.2 index 7d548a42890d..e59b208a9ff5 100644 --- a/lib/libsys/fork.2 +++ b/lib/libsys/fork.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 5, 2021 +.Dd May 17, 2024 .Dt FORK 2 .Os .Sh NAME @@ -68,6 +68,16 @@ by the parent. This descriptor copying is also used by the shell to establish standard input and output for newly created processes as well as to set up pipes. +Any file descriptors that were marked with the close-on-fork flag, +.Dv FD_CLOFORK +.Po see +.Fn fcntl 2 +and +.Dv O_CLOFORK +in +.Fn open 2 +.Pc , +will not be present in the child process, but remain open in the parent. .It The child process' resource utilizations are set to 0; see diff --git a/lib/libsys/inotify.2 b/lib/libsys/inotify.2 new file mode 100644 index 000000000000..f94509d6f59e --- /dev/null +++ b/lib/libsys/inotify.2 @@ -0,0 +1,379 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Klara, Inc. +.\" +.Dd May 19, 2025 +.Dt INOTIFY 2 +.Os +.Sh NAME +.Nm inotify_init , +.Nm inotify_init1 , +.Nm inotify_add_watch , +.Nm inotify_add_watch_at , +.Nm inotify_rm_watch +.Nd monitor file system events +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/inotify.h +.Ft int +.Fo inotify_init +.Fc +.Ft int +.Fo inotify_init1 +.Fa "int flags" +.Fc +.Ft int +.Fo inotify_add_watch +.Fa "int fd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_add_watch_at +.Fa "int fd" +.Fa "int dfd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_rm_watch +.Fa "int fd" +.Fa "uint32_t wd" +.Fc +.Bd -literal +struct inotify_event { + int wd; /* Watch descriptor */ + uint32_t mask; /* Event and flags */ + uint32_t cookie; /* Unique ID which links rename events */ + uint32_t len; /* Name field size, including nul bytes */ + char name[0]; /* Filename (nul-terminated) */ +}; +.Ed +.Sh DESCRIPTION +The inotify system calls provide an interface to monitor file system events. +They aim to be compatible with the Linux inotify interface. +The provided functionality is similar to the +.Dv EVFILT_VNODE +filter of the +.Xr kevent 2 +system call, but further allows monitoring of a directory without needing to +open each object in that directory. +This avoids races and reduces the number of file descriptors needed to monitor +a large file hierarchy. +.Pp +inotify allows one or more file system objects, generally files or directories, +to be watched for events, such as file open or close. +Watched objects are associated with a file descriptor returned +by +.Fn inotify_init +or +.Fn inotify_init1 . +When an event occurs, a record describing the event becomes available for +reading from the inotify file descriptor. +Each inotify descriptor thus refers to a queue of events waiting to be read. +inotify descriptors are inherited across +.Xr fork 2 +calls and may be passed to other processes via +.Xr unix 4 +sockets. +.Pp +The +.Fn inotify_init1 +system call accepts two flags. +The +.Dv IN_NONBLOCK +flag causes the inotify descriptor to be opened in non-blocking mode, such that +.Xr read 2 +calls will not block if no records are available to consume, and will instead +return +.Er EWOULDBLOCK . +The +.Dv IN_CLOEXEC +flag causes the inotify descriptor to be closed automatically when +.Xr execve 2 +is called. +.Pp +To watch a file or directory, the +.Fn inotify_add_watch +or +.Fn inotify_add_watch_at +system calls must be used. +They take a path and a mask of events to watch for, and return a +.Dq watch descriptor , +a non-negative integer which uniquely identifies the watched object within the +inotify descriptor. +.Pp +The +.Fn inotify_rm_watch +system call removes a watch from an inotify descriptor. +.Pp +When watching a directory, objects within the directory are monitored for events +as well as the directory itself. +A record describing an inotify event consists of a +.Dq struct inotify_event +followed by the name of the object in the directory being watched. +If the watched object itself generates an event, no name is present. +Extra nul bytes may follow the file name in order to provide alignment for a +subsequent record. +.Pp +The following events are defined: +.Bl -tag -width IN_CLOSE_NOWRITE +.It Dv IN_ACCESS +A file's contents were accessed, e.g., by +.Xr read 2 +.Xr copy_file_range 2 , +.Xr sendfile 2 , +or +.Xr getdirentries 2 . +.It Dv IN_ATTRIB +A file's metadata was changed, e.g., by +.Xr chmod 2 +or +.Xr unlink 2 . +.It Dv IN_CLOSE_WRITE +A file that was previously opened for writing was closed. +.It Dv IN_CLOSE_NOWRITE +A file that was previously opened read-only was closed. +.It Dv IN_CREATE +A file within a watched directory was created, e.g., by +.Xr open 2 , +.Xr mkdir 2 , +.Xr symlink 2 , +.Xr mknod 2 , +or +.Xr bind 2 . +.It Dv IN_DELETE +A file or directory within a watched directory was removed. +.It Dv IN_DELETE_SELF +The watched file or directory itself was deleted. +This event is generated only when the link count of the file drops +to zero. +.It Dv IN_MODIFY +A file's contents were modified, e.g., by +.Xr write 2 +or +.Xr copy_file_range 2 . +.It Dv IN_MOVE_SELF +The watched file or directory itself was renamed. +.It Dv IN_MOVED_FROM +A file or directory was moved from a watched directory. +.It Dv IN_MOVED_TO +A file or directory was moved into a watched directory. +A +.Xr rename 2 +call thus may generate two events, one for the old name and one for the new +name. +These are linked together by the +.Ar cookie +field in the inotify record, which can be compared to link the two records +to the same event. +.It Dv IN_OPEN +A file was opened. +.El +.Pp +Some additional flags may be set in inotify event records: +.Bl -tag -width IN_Q_OVERFLOW +.It Dv IN_IGNORED +When a watch is removed from a file, for example because it was created with the +.Dv IN_ONESHOT +flag, the file was deleted, or the watch was explicitly removed with +.Xr inotify_rm_watch 2 , +an event with this mask is generated to indicate that the watch will not +generate any more events. +Once this event is generated, the watch is automatically removed, and in +particular should not be removed manually with +.Xr inotify_rm_watch 2 . +.It Dv IN_ISDIR +When the subject of an event is a directory, this flag is set in the +.Ar mask +.It Dv IN_Q_OVERFLOW +One or more events were dropped, for example because of a kernel memory allocation +failure or because the event queue size hit a limit. +.It Dv IN_UNMOUNT +The filesystem containing the watched object was unmounted. +.El +.Pp +A number of flags may also be specified in the +.Ar mask +given to +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at : +.Bl -tag -width IN_DONT_FOLLOW +.It Dv IN_DONT_FOLLOW +If +.Ar pathname +is a symbolic link, do not follow it. +.It Dv IN_EXCL_UNLINK +This currently has no effect, see the +.Sx BUGS +section. +.In Dv IN_MASK_ADD +When adding a watch to an object, and that object is already watched by the +same inotify descriptor, by default the mask of the existing watch is +overwritten. +When +.Dv IN_MASK_ADD +is specified, the mask of the existing watch is instead logically ORed with +the new mask. +.In Dv IN_MASK_CREATE +When +.Fn inotify_add watch +is used to add a watch to an object, +.Dv IN_MASK_CREATE +is specified, and that object is already watched by the same inotify descriptor, +return an error instead of updating the existing watch. +.In Dv IN_ONESHOT +Monitor the object for a single event, after which the watch is automatically +removed. +As part of removal, a +.Dv IN_IGNORED +event is generated. +.In Dv IN_ONLYDIR +When creating a watch, fail with +.Er ENOTDIR +if the path does not refer to a directory. +.El +.Sh SYSCTL VARIABLES +The following variables are available as both +.Xr sysctl 8 +variables and +.Xr loader 8 +tunables: +.Bl -tag -width 15 +.It Va vfs.inotify.max_events +The maximum number of inotify records that can be queued for a single +inotify descriptor. +Records in excess of this limit are discarded, and a single event with +mask equal to +.Dv IN_Q_OVERFLOW +will be present in the queue. +.It Va vfs.inotify.max_user_instances +The maximum number of inotify descriptors that can be created by a single +user. +.It Va vfs.inotify.max_user_watches +The maximum number of inotify watches per user. +.El +.Sh EXAMPLES +See the example program in +.Pa /usr/share/examples/inotify/inotify.c . +.Sh ERRORS +The +.Fn inotify_init +and +.Fn inotify_init1 +functions will fail if: +.Bl -tag -width Er +.It Bq Er ENFILE +The system limit on the total number of open files has been reached. +.It Bq Er EMFILE +A per-process limit on the number of open files has been reached. +.It Bq Er EMFILE +The system limit on the number of inotify descriptors has been reached. +.It Bq Er EINVAL +An unrecognized flag was passed to +.Fn inotify_init1 . +.El +.Pp +The +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at +system calls will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar mask +parameter does not specify an event, or +the +.Dv IN_MASK_CREATE +and +.Dv IN_MASK_ADD +flags are both set, or an unrecognized flag was passed. +.It Bq Er ENOTDIR +The +.Ar pathname +parameter refers to a file that is not a directory, and the +.Dv IN_ONLYDIR +flag was specified. +.It Bq Er ENOSPC +The per-user limit on the total number of inotify watches has been reached. +.It Bq Er ECAPMODE +The process is in capability mode and +.Fn inotify_add_watch +was called, or +.Fn inotify_add_watch_at +was called with +.Dv AT_FDCWD +as the directory file descriptor +.Ar dfd . +.It Bq Er ENOTCAPABLE +The process is in capability mode and +.Ar pathname +contains a +.Dq .. +component leading to a directory outside the directory hierarchy specified +by +.Ar dfd . +.El +.Pp +The +.Fn inotify_rm_watch +system call will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar wd +parameter is not a valid watch descriptor. +.El +.Sh SEE ALSO +.Xr kevent 2 , +.Xr capsicum 4 +.Sh STANDARDS +The +.Nm +interface originates from Linux and is non-standard. +This implementation aims to be compatible with that of Linux and is based +on the documentation available at +.Pa https://man7.org/linux/man-pages/man7/inotify.7.html . +.Sh HISTORY +The inotify system calls first appeared in +.Fx 15.0 . +.Sh BUGS +If a file in a watched directory has multiple hard links, +an access via any hard link for that file will generate an event, even +if the accessed link belongs to an unwatched directory. +This is not the case for the Linux implementation, where only accesses +via the hard link in the watched directory will generate an event. +.Pp +If a watched directory contains multiple hard links of a file, an event +on one of the hard links will generate an inotify record for each link +in the directory. +.Pp +When a file is unlinked, no more events will be generated for that file, +even if it continues to be accessed. +By default, the Linux implementation will continue to generate events in +this case. +Thus, the +.Fx +implementation behaves as though +.Dv IN_EXCL_UNLINK +is always set. diff --git a/lib/libsys/open.2 b/lib/libsys/open.2 index 84c4f02fce8a..a0e905a8f375 100644 --- a/lib/libsys/open.2 +++ b/lib/libsys/open.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 3, 2025 +.Dd May 17, 2025 .Dt OPEN 2 .Os .Sh NAME @@ -195,6 +195,9 @@ error if file is not a directory .It Dv O_CLOEXEC automatically close file on .Xr execve 2 +.It Dv O_CLOFORK +automatically close file on any child process created with +.Fn fork 2 .It Dv O_VERIFY verify the contents of the file with .Xr mac_veriexec 4 @@ -360,6 +363,27 @@ may be used to set .Dv FD_CLOEXEC flag for the newly returned file descriptor. .Pp +.Dv O_CLOFORK +may be used to set +.Dv FD_CLOFORK +flag for the newly returned file descriptor. +The file will be closed on any child process created with +.Fn fork 2 , +.Fn vfork 2 +or +.Fn rfork 2 +with the +.Dv RFFDG +flag, remaining open in the parent. +Both the +.Dv O_CLOEXEC +and +.Dv O_CLOFORK +flags can be modified with the +.Dv F_SETFD +.Fn fcntl 2 +command. +.Pp .Dv O_VERIFY may be used to indicate to the kernel that the contents of the file should be verified before allowing the open to proceed. @@ -846,6 +870,9 @@ function was introduced in appeared in 13.0. .Dv O_NAMEDATTR appeared in 15.0. +.Dv O_CLOFORK +appeared in +.Fx 15.0 . .Sh BUGS The .Fa mode diff --git a/lib/libsys/pathconf.2 b/lib/libsys/pathconf.2 index 4c562b9c2c9a..79ac8310000d 100644 --- a/lib/libsys/pathconf.2 +++ b/lib/libsys/pathconf.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd May 3, 2025 +.Dd July 5, 2025 .Dt PATHCONF 2 .Os .Sh NAME @@ -179,6 +179,14 @@ otherwise 0. Return 1 if named attributes are enabled for the file system, otherwise 0. .It Li _PC_HAS_NAMEDATTR Return 1 if one or more named attributes exist for the file, otherwise 0. +.It Li _PC_HAS_HIDDENSYSTEM +Return 1 if both +.Dv UF_HIDDEN +and +.Dv UF_SYSTEM +flags can be set by +.Xr chflags 2 , +otherwise 0. .El .Sh RETURN VALUES If the call to @@ -255,6 +263,7 @@ An I/O error occurred while reading from or writing to the file system. Corrupted data was detected while reading from the file system. .El .Sh SEE ALSO +.Xr chflags 2 , .Xr lseek 2 , .Xr sysctl 3 .Sh HISTORY diff --git a/lib/libsys/pipe.2 b/lib/libsys/pipe.2 index 9531c9717395..37d6eba420de 100644 --- a/lib/libsys/pipe.2 +++ b/lib/libsys/pipe.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd December 1, 2017 +.Dd May 17, 2025 .Dt PIPE 2 .Os .Sh NAME @@ -64,6 +64,8 @@ list, defined in .Bl -tag -width ".Dv O_NONBLOCK" .It Dv O_CLOEXEC Set the close-on-exec flag for the new file descriptors. +.It Dv O_CLOFORK +Set the close-on-fork flag for the new file descriptors. .It Dv O_NONBLOCK Set the non-blocking flag for the ends of the pipe. .El @@ -173,3 +175,8 @@ function became a wrapper around .Fn pipe2 in .Fx 11.0 . +.Pp +The +.Dv O_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/recv.2 b/lib/libsys/recv.2 index f3ee60b75663..b78cd70b8a1d 100644 --- a/lib/libsys/recv.2 +++ b/lib/libsys/recv.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 30, 2022 +.Dd May 17, 2025 .Dt RECV 2 .Os .Sh NAME @@ -164,6 +164,7 @@ one or more of the values: .It Dv MSG_WAITALL Ta wait for full request or error .It Dv MSG_DONTWAIT Ta do not block .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec +.It Dv MSG_CMSG_CLOFORK Ta set received fds close-on-fork .It Dv MSG_WAITFORONE Ta do not block after receiving the first message (only for .Fn recvmmsg diff --git a/lib/libsys/socket.2 b/lib/libsys/socket.2 index a383cbcc4d80..b211611c6354 100644 --- a/lib/libsys/socket.2 +++ b/lib/libsys/socket.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 15, 2023 +.Dd May 17, 2025 .Dt SOCKET 2 .Os .Sh NAME @@ -121,6 +121,7 @@ argument: .Pp .Bd -literal -offset indent -compact SOCK_CLOEXEC Set close-on-exec on the new descriptor, +SOCK_CLOFORK Set close-on-fork on the new descriptor, SOCK_NONBLOCK Set non-blocking mode on the new socket .Ed .Pp @@ -331,7 +332,10 @@ argument of .Fn socket . The .Dv SOCK_CLOEXEC -flag is expected to conform to the next revision of the +and +.Dv SOCK_CLOFORK +flags are expected to conform to +.St -p1003.1-2024 . .Tn POSIX standard. The @@ -347,3 +351,8 @@ The .Fn socket system call appeared in .Bx 4.2 . +.Pp +The +.Dv SOCK_CLOFORK +flag appeared in +.Fx 15.0 . diff --git a/lib/libsys/socketpair.2 b/lib/libsys/socketpair.2 index 5874a0791f4d..60dec74f9cc2 100644 --- a/lib/libsys/socketpair.2 +++ b/lib/libsys/socketpair.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd February 10, 2018 +.Dd May 17, 2025 .Dt SOCKETPAIR 2 .Os .Sh NAME @@ -56,7 +56,8 @@ and The two sockets are indistinguishable. .Pp The -.Dv SOCK_CLOEXEC +.Dv SOCK_CLOEXEC , +.Dv SOCK_CLOFORK and .Dv SOCK_NONBLOCK flags in the diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map index 51be88203c17..69fce2ea7c63 100644 --- a/lib/libsys/syscalls.map +++ b/lib/libsys/syscalls.map @@ -809,4 +809,8 @@ FBSDprivate_1.0 { __sys_setcred; _exterrctl; __sys_exterrctl; + _inotify_add_watch_at; + __sys_inotify_add_watch_at; + _inotify_rm_watch; + __sys_inotify_rm_watch; }; diff --git a/lib/libsysdecode/Makefile b/lib/libsysdecode/Makefile index b01877bb8bb8..ca020552a6e9 100644 --- a/lib/libsysdecode/Makefile +++ b/lib/libsysdecode/Makefile @@ -84,6 +84,7 @@ MLINKS+=sysdecode_mask.3 sysdecode_accessmode.3 \ sysdecode_mask.3 sysdecode_fileflags.3 \ sysdecode_mask.3 sysdecode_filemode.3 \ sysdecode_mask.3 sysdecode_flock_operation.3 \ + sysdecode_mask.3 sysdecode_inotifyflags.3 \ sysdecode_mask.3 sysdecode_mlockall_flags.3 \ sysdecode_mask.3 sysdecode_mmap_flags.3 \ sysdecode_mask.3 sysdecode_mmap_prot.3 \ diff --git a/lib/libsysdecode/flags.c b/lib/libsysdecode/flags.c index 32829d35dbe0..f8e26e6a9dae 100644 --- a/lib/libsysdecode/flags.c +++ b/lib/libsysdecode/flags.c @@ -23,7 +23,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> #define L2CAP_SOCKET_CHECKED #include <sys/types.h> @@ -31,6 +30,7 @@ #include <sys/capsicum.h> #include <sys/event.h> #include <sys/extattr.h> +#include <sys/inotify.h> #include <sys/linker.h> #include <sys/mman.h> #include <sys/mount.h> @@ -196,7 +196,7 @@ sysdecode_vmprot(FILE *fp, int type, int *rem) } static struct name_table sockflags[] = { - X(SOCK_CLOEXEC) X(SOCK_NONBLOCK) XEND + X(SOCK_CLOEXEC) X(SOCK_CLOFORK) X(SOCK_NONBLOCK) XEND }; bool @@ -206,16 +206,17 @@ sysdecode_socket_type(FILE *fp, int type, int *rem) uintmax_t val; bool printed; - str = lookup_value(socktype, type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)); + str = lookup_value(socktype, + type & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK)); if (str != NULL) { fputs(str, fp); *rem = 0; printed = true; } else { - *rem = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK); + *rem = type & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK); printed = false; } - val = type & (SOCK_CLOEXEC | SOCK_NONBLOCK); + val = type & (SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK); print_mask_part(fp, sockflags, &val, &printed); return (printed); } @@ -351,6 +352,13 @@ sysdecode_getrusage_who(int who) return (lookup_value(rusage, who)); } +bool +sysdecode_inotifyflags(FILE *fp, int flag, int *rem) +{ + + return (print_mask_int(fp, inotifyflags, flag, rem)); +} + static struct name_table kevent_user_ffctrl[] = { X(NOTE_FFNOP) X(NOTE_FFAND) X(NOTE_FFOR) X(NOTE_FFCOPY) XEND @@ -556,7 +564,7 @@ sysdecode_nfssvc_flags(int flags) } static struct name_table pipe2flags[] = { - X(O_CLOEXEC) X(O_NONBLOCK) XEND + X(O_CLOEXEC) X(O_CLOFORK) X(O_NONBLOCK) XEND }; bool @@ -866,7 +874,7 @@ sysdecode_fcntl_cmd(int cmd) } static struct name_table fcntl_fd_arg[] = { - X(FD_CLOEXEC) X(0) XEND + X(FD_CLOEXEC) X(FD_CLOFORK) X(0) XEND }; bool diff --git a/lib/libsysdecode/mktables b/lib/libsysdecode/mktables index 5d7be2aad3c8..6b4f79402660 100644 --- a/lib/libsysdecode/mktables +++ b/lib/libsysdecode/mktables @@ -98,6 +98,7 @@ gen_table "extattrns" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/ gen_table "fadvisebehav" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h" gen_table "openflags" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" "O_RDONLY|O_RDWR|O_WRONLY" gen_table "flockops" "LOCK_[A-Z]+[[:space:]]+0x[0-9]+" "sys/fcntl.h" +gen_table "inotifyflags" "IN_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/inotify.h" "IN_CLOEXEC|IN_NONBLOCK" gen_table "kldsymcmd" "KLDSYM_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "kldunloadfflags" "LINKER_UNLOAD_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "lio_listiomodes" "LIO_(NO)?WAIT[[:space:]]+[0-9]+" "aio.h" diff --git a/lib/libsysdecode/sysdecode.h b/lib/libsysdecode/sysdecode.h index 8dc0bbea6f0d..c95d7f71379b 100644 --- a/lib/libsysdecode/sysdecode.h +++ b/lib/libsysdecode/sysdecode.h @@ -61,6 +61,7 @@ const char *sysdecode_getfsstat_mode(int _mode); const char *sysdecode_getrusage_who(int _who); const char *sysdecode_idtype(int _idtype); const char *sysdecode_ioctlname(unsigned long _val); +bool sysdecode_inotifyflags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_ipproto(int _protocol); void sysdecode_kevent_fflags(FILE *_fp, short _filter, int _fflags, int _base); diff --git a/lib/libsysdecode/sysdecode_fcntl_arg.3 b/lib/libsysdecode/sysdecode_fcntl_arg.3 index ee3a030a79e4..d5648ce0adc3 100644 --- a/lib/libsysdecode/sysdecode_fcntl_arg.3 +++ b/lib/libsysdecode/sysdecode_fcntl_arg.3 @@ -54,7 +54,8 @@ are determined by .It Sy Command Ta Fa arg Sy Type Ta Sy Output Format .It .It Dv F_SETFD Ta Vt int Ta -.Dq FD_CLOEXEC +.Dq FD_CLOEXEC , +.Dq FD_CLOFORK or the value of .Fa arg in the indicated diff --git a/libexec/flua/linit_flua.c b/libexec/flua/linit_flua.c index 8eaa4af1ffca..b466b7872158 100644 --- a/libexec/flua/linit_flua.c +++ b/libexec/flua/linit_flua.c @@ -57,18 +57,11 @@ static const luaL_Reg loadedlibs[] = { #endif /* FreeBSD Extensions */ {"lfs", luaopen_lfs}, - {"posix.fnmatch", luaopen_posix_fnmatch}, - {"posix.libgen", luaopen_posix_libgen}, - {"posix.stdlib", luaopen_posix_stdlib}, - {"posix.sys.stat", luaopen_posix_sys_stat}, - {"posix.sys.utsname", luaopen_posix_sys_utsname}, - {"posix.sys.wait", luaopen_posix_sys_wait}, - {"posix.unistd", luaopen_posix_unistd}, + {"posix", luaopen_posix}, {"fbsd", luaopen_fbsd}, {NULL, NULL} }; - LUALIB_API void luaL_openlibs (lua_State *L) { const luaL_Reg *lib; /* "require" functions from 'loadedlibs' and set results to global table */ @@ -77,4 +70,3 @@ LUALIB_API void luaL_openlibs (lua_State *L) { lua_pop(L, 1); /* remove lib */ } } - diff --git a/libexec/flua/modules/lposix.c b/libexec/flua/modules/lposix.c index 816d4bc688d2..75cdd345aeaa 100644 --- a/libexec/flua/modules/lposix.c +++ b/libexec/flua/modules/lposix.c @@ -88,18 +88,23 @@ static int lua_chown(lua_State *L) { const char *path; - uid_t owner = (uid_t) -1; - gid_t group = (gid_t) -1; + uid_t owner = (uid_t)-1; + gid_t group = (gid_t)-1; + int error; enforce_max_args(L, 3); path = luaL_checkstring(L, 1); if (lua_isinteger(L, 2)) - owner = (uid_t) lua_tointeger(L, 2); + owner = (uid_t)lua_tointeger(L, 2); else if (lua_isstring(L, 2)) { - struct passwd *p = getpwnam(lua_tostring(L, 2)); - if (p != NULL) - owner = p->pw_uid; + char buf[4096]; + struct passwd passwd, *pwd; + + error = getpwnam_r(lua_tostring(L, 2), &passwd, + buf, sizeof(buf), &pwd); + if (error == 0) + owner = pwd->pw_uid; else return (luaL_argerror(L, 2, lua_pushfstring(L, "unknown user %s", @@ -112,11 +117,15 @@ lua_chown(lua_State *L) } if (lua_isinteger(L, 3)) - group = (gid_t) lua_tointeger(L, 3); + group = (gid_t)lua_tointeger(L, 3); else if (lua_isstring(L, 3)) { - struct group *g = getgrnam(lua_tostring(L, 3)); - if (g != NULL) - group = g->gr_gid; + char buf[4096]; + struct group gr, *grp; + + error = getgrnam_r(lua_tostring(L, 3), &gr, buf, sizeof(buf), + &grp); + if (error == 0) + group = grp->gr_gid; else return (luaL_argerror(L, 3, lua_pushfstring(L, "unknown group %s", @@ -581,21 +590,21 @@ static const struct luaL_Reg unistdlib[] = { #undef REG_SIMPLE #undef REG_DEF -int +static int luaopen_posix_libgen(lua_State *L) { luaL_newlib(L, libgenlib); return (1); } -int +static int luaopen_posix_stdlib(lua_State *L) { luaL_newlib(L, stdliblib); return (1); } -int +static int luaopen_posix_fnmatch(lua_State *L) { luaL_newlib(L, fnmatchlib); @@ -613,14 +622,21 @@ luaopen_posix_fnmatch(lua_State *L) return 1; } -int +static int luaopen_posix_sys_stat(lua_State *L) { luaL_newlib(L, sys_statlib); return (1); } -int +static int +luaopen_posix_sys_utsname(lua_State *L) +{ + luaL_newlib(L, sys_utsnamelib); + return 1; +} + +static int luaopen_posix_sys_wait(lua_State *L) { luaL_newlib(L, sys_waitlib); @@ -646,16 +662,38 @@ luaopen_posix_sys_wait(lua_State *L) return (1); } -int -luaopen_posix_sys_utsname(lua_State *L) +static int +luaopen_posix_unistd(lua_State *L) { - luaL_newlib(L, sys_utsnamelib); - return 1; + luaL_newlib(L, unistdlib); + return (1); } int -luaopen_posix_unistd(lua_State *L) +luaopen_posix(lua_State *L) { - luaL_newlib(L, unistdlib); + lua_newtable(L); /* posix */ + + luaL_requiref(L, "posix.fnmatch", luaopen_posix_fnmatch, 0); + lua_setfield(L, -2, "fnmatch"); + + luaL_requiref(L, "posix.libgen", luaopen_posix_libgen, 0); + lua_setfield(L, -2, "libgen"); + + luaL_requiref(L, "posix.stdlib", luaopen_posix_stdlib, 0); + lua_setfield(L, -2, "stdlib"); + + lua_newtable(L); /* posix.sys */ + luaL_requiref(L, "posix.sys.stat", luaopen_posix_sys_stat, 0); + lua_setfield(L, -2, "stat"); + luaL_requiref(L, "posix.sys.utsname", luaopen_posix_sys_utsname, 0); + lua_setfield(L, -2, "utsname"); + luaL_requiref(L, "posix.sys.wait", luaopen_posix_sys_wait, 0); + lua_setfield(L, -2, "wait"); + lua_setfield(L, -2, "sys"); + + luaL_requiref(L, "posix.unistd", luaopen_posix_unistd, 0); + lua_setfield(L, -2, "unistd"); + return (1); } diff --git a/libexec/flua/modules/lposix.h b/libexec/flua/modules/lposix.h index da7079056826..1aa33f042571 100644 --- a/libexec/flua/modules/lposix.h +++ b/libexec/flua/modules/lposix.h @@ -7,10 +7,4 @@ #include <lua.h> -int luaopen_posix_fnmatch(lua_State *L); -int luaopen_posix_libgen(lua_State *L); -int luaopen_posix_stdlib(lua_State *L); -int luaopen_posix_sys_stat(lua_State *L); -int luaopen_posix_sys_utsname(lua_State *L); -int luaopen_posix_sys_wait(lua_State *L); -int luaopen_posix_unistd(lua_State *L); +int luaopen_posix(lua_State *L); diff --git a/libexec/nuageinit/nuage.lua b/libexec/nuageinit/nuage.lua index 11958e8b5cc2..493ae11d6ca7 100644 --- a/libexec/nuageinit/nuage.lua +++ b/libexec/nuageinit/nuage.lua @@ -56,6 +56,21 @@ local function errmsg(str, prepend) os.exit(1) end +local function chmod(path, mode) + local mode = tonumber(mode, 8) + local _, err, msg = sys_stat.chmod(path, mode) + if err then + errmsg("chmod(" .. path .. ", " .. mode .. ") failed: " .. msg) + end +end + +local function chown(path, owner, group) + local _, err, msg = unistd.chown(path, owner, group) + if err then + errmsg("chown(" .. path .. ", " .. owner .. ", " .. group .. ") failed: " .. msg) + end +end + local function dirname(oldpath) if not oldpath then return nil @@ -252,12 +267,12 @@ local function addsshkey(homedir, key) f:write(key .. "\n") f:close() if chownak then - sys_stat.chmod(ak_path, 384) - unistd.chown(ak_path, dirattrs.uid, dirattrs.gid) + chmod(ak_path, "0600") + chown(ak_path, dirattrs.uid, dirattrs.gid) end if chowndotssh then - sys_stat.chmod(dotssh_path, 448) - unistd.chown(dotssh_path, dirattrs.uid, dirattrs.gid) + chmod(dotssh_path, "0700") + chown(dotssh_path, dirattrs.uid, dirattrs.gid) end end @@ -296,10 +311,10 @@ local function addsudo(pwd) end f:close() if chmodsudoers then - sys_stat.chmod(sudoers, 416) + chmod(sudoers, "0640") end if chmodsudoersd then - sys_stat.chmod(sudoers, 480) + chmod(sudoers, "0740") end end @@ -521,16 +536,14 @@ local function addfile(file, defer) end f:close() if file.permissions then - -- convert from octal to decimal - local perm = tonumber(file.permissions, 8) - sys_stat.chmod(filepath, perm) + chmod(filepath, file.permissions) end if file.owner then local owner, group = string.match(file.owner, "([^:]+):([^:]+)") if not owner then owner = file.owner end - unistd.chown(filepath, owner, group) + chown(filepath, owner, group) end return true end @@ -538,6 +551,8 @@ end local n = { warn = warnmsg, err = errmsg, + chmod = chmod, + chown = chown, dirname = dirname, mkdir_p = mkdir_p, sethostname = sethostname, diff --git a/libexec/nuageinit/nuageinit b/libexec/nuageinit/nuageinit index 84133d4373c5..0fcdc7274db3 100755 --- a/libexec/nuageinit/nuageinit +++ b/libexec/nuageinit/nuageinit @@ -7,7 +7,6 @@ local nuage = require("nuage") local ucl = require("ucl") local yaml = require("lyaml") -local sys_stat = require("posix.sys.stat") if #arg ~= 2 then nuage.err("Usage: " .. arg[0] .. " <cloud-init-directory> (<config-2> | <nocloud>)", false) @@ -157,7 +156,7 @@ local function ssh_keys(obj) sshkey:close() end if keytype == "private" then - sys_stat.chmod(path, 384) + nuage.chmod(path, "0600") end end end @@ -281,7 +280,7 @@ local function runcmd(obj) end if f ~= nil then f:close() - sys_stat.chmod(root .. "/var/cache/nuageinit/runcmds", 493) + nuage.chmod(root .. "/var/cache/nuageinit/runcmds", "0755") end end @@ -503,5 +502,5 @@ if line == "#cloud-config" then end elseif line:sub(1, 2) == "#!" then -- delay for execution at rc.local time -- - sys_stat.chmod(root .. "/var/cache/nuageinit/user_data", 493) + nuage.chmod(root .. "/var/cache/nuageinit/user_data", "0755") end diff --git a/sbin/ifconfig/ifbridge.c b/sbin/ifconfig/ifbridge.c index 2d0af1255a73..ce5d2f4894fa 100644 --- a/sbin/ifconfig/ifbridge.c +++ b/sbin/ifconfig/ifbridge.c @@ -147,6 +147,36 @@ bridge_addresses(if_ctx *ctx, const char *prefix) } static void +print_vlans(ifbvlan_set_t *vlans) +{ + unsigned printed = 0; + + for (unsigned vlan = DOT1Q_VID_MIN; vlan <= DOT1Q_VID_MAX;) { + unsigned last; + + if (!BRVLAN_TEST(vlans, vlan)) { + ++vlan; + continue; + } + + last = vlan; + while (last < DOT1Q_VID_MAX && BRVLAN_TEST(vlans, last + 1)) + ++last; + + if (printed == 0) + printf(" tagged "); + else + printf(","); + + printf("%u", vlan); + if (last != vlan) + printf("-%u", last); + ++printed; + vlan = last + 1; + } +} + +static void bridge_status(if_ctx *ctx) { struct ifconfig_bridge_status *bridge; @@ -211,6 +241,9 @@ bridge_status(if_ctx *ctx) else printf(" <unknown state %d>", state); } + if (member->ifbr_untagged != 0) + printf(" untagged %u", (unsigned)member->ifbr_untagged); + print_vlans(&bridge->member_vlans[i]); printf("\n"); } @@ -577,6 +610,45 @@ setbridge_ifpathcost(if_ctx *ctx, const char *ifn, const char *cost) } static void +setbridge_untagged(if_ctx *ctx, const char *ifn, const char *vlanid) +{ + struct ifbreq req; + u_long val; + + memset(&req, 0, sizeof(req)); + + if (get_val(vlanid, &val) < 0) + errx(1, "invalid VLAN identifier: %s", vlanid); + + /* + * Reject vlan 0, since it's not a valid vlan identifier and has a + * special meaning in the kernel interface. + */ + if (val == 0) + errx(1, "invalid VLAN identifier: %lu", val); + + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); + req.ifbr_untagged = val; + + if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFUNTAGGED %s", vlanid); +} + +static void +unsetbridge_untagged(if_ctx *ctx, const char *ifn, int dummy __unused) +{ + struct ifbreq req; + + memset(&req, 0, sizeof(req)); + + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); + req.ifbr_untagged = 0; + + if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFUNTAGGED"); +} + +static void setbridge_ifmaxaddr(if_ctx *ctx, const char *ifn, const char *arg) { struct ifbreq req; @@ -612,17 +684,118 @@ setbridge_timeout(if_ctx *ctx, const char *arg, int dummy __unused) static void setbridge_private(if_ctx *ctx, const char *val, int dummy __unused) { - do_bridgeflag(ctx, val, IFBIF_PRIVATE, 1); } static void unsetbridge_private(if_ctx *ctx, const char *val, int dummy __unused) { - do_bridgeflag(ctx, val, IFBIF_PRIVATE, 0); } +static void +setbridge_vlanfilter(if_ctx *ctx, const char *val, int dummy __unused) +{ + do_bridgeflag(ctx, val, IFBIF_VLANFILTER, 1); +} + +static void +unsetbridge_vlanfilter(if_ctx *ctx, const char *val, int dummy __unused) +{ + do_bridgeflag(ctx, val, IFBIF_VLANFILTER, 0); +} + +static int +parse_vlans(ifbvlan_set_t *set, const char *str) +{ + char *s, *token; + + /* "none" means the empty vlan set */ + if (strcmp(str, "none") == 0) { + __BIT_ZERO(BRVLAN_SETSIZE, set); + return (0); + } + + /* "all" means all vlans, except for 0 and 4095 which are reserved */ + if (strcmp(str, "all") == 0) { + __BIT_FILL(BRVLAN_SETSIZE, set); + BRVLAN_CLR(set, DOT1Q_VID_NULL); + BRVLAN_CLR(set, DOT1Q_VID_RSVD_IMPL); + return (0); + } + + if ((s = strdup(str)) == NULL) + return (-1); + + while ((token = strsep(&s, ",")) != NULL) { + unsigned long first, last; + char *p, *lastp; + + if ((lastp = strchr(token, '-')) != NULL) + *lastp++ = '\0'; + + first = last = strtoul(token, &p, 10); + if (*p != '\0') + goto err; + if (first < DOT1Q_VID_MIN || first > DOT1Q_VID_MAX) + goto err; + + if (lastp) { + last = strtoul(lastp, &p, 10); + if (*p != '\0') + goto err; + if (last < DOT1Q_VID_MIN || last > DOT1Q_VID_MAX || + last < first) + goto err; + } + + for (unsigned vlan = first; vlan <= last; ++vlan) + BRVLAN_SET(set, vlan); + } + + free(s); + return (0); + +err: + free(s); + return (-1); +} + +static void +set_bridge_vlanset(if_ctx *ctx, const char *ifn, const char *vlans, int op) +{ + struct ifbif_vlan_req req; + + memset(&req, 0, sizeof(req)); + + if (parse_vlans(&req.bv_set, vlans) != 0) + errx(1, "invalid vlan set: %s", vlans); + + strlcpy(req.bv_ifname, ifn, sizeof(req.bv_ifname)); + req.bv_op = op; + + if (do_cmd(ctx, BRDGSIFVLANSET, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFVLANSET %s", vlans); +} + +static void +setbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_SET); +} + +static void +addbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_ADD); +} + +static void +delbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_DEL); +} + static struct cmd bridge_cmds[] = { DEF_CMD_ARG("addm", setbridge_add), DEF_CMD_ARG("deletem", setbridge_delete), @@ -659,6 +832,13 @@ static struct cmd bridge_cmds[] = { DEF_CMD_ARG2("ifpriority", setbridge_ifpriority), DEF_CMD_ARG2("ifpathcost", setbridge_ifpathcost), DEF_CMD_ARG2("ifmaxaddr", setbridge_ifmaxaddr), + DEF_CMD_ARG("vlanfilter", setbridge_vlanfilter), + DEF_CMD_ARG("-vlanfilter", unsetbridge_vlanfilter), + DEF_CMD_ARG2("untagged", setbridge_untagged), + DEF_CMD_ARG("-untagged", unsetbridge_untagged), + DEF_CMD_ARG2("tagged", setbridge_tagged), + DEF_CMD_ARG2("+tagged", addbridge_tagged), + DEF_CMD_ARG2("-tagged", delbridge_tagged), DEF_CMD_ARG("timeout", setbridge_timeout), DEF_CMD_ARG("private", setbridge_private), DEF_CMD_ARG("-private", unsetbridge_private), diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index e3f094a336fb..3fb8b5f02b76 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 24, 2025 +.Dd July 5, 2025 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2696,6 +2696,57 @@ source addresses are dropped until an existing host cache entry expires or is removed. Set to 0 to disable. .El +.Ss Bridge VLAN Filtering Parameters +The behaviour of these options is described in the +.Dq VLAN SUPPORT +section of +.Xr bridge 4 . +.Bl -tag -width indent +.It Cm vlanfilter Ar interface +Enable VLAN filtering on an interface. +.It Cm -vlanfilter Ar interface +Disable VLAN filtering on an interface. +.It Cm untagged Ar interface Ar vlan-id +Set the untagged VLAN identifier for an interface. +.Pp +Setting +.Cm untagged +will automatically enable VLAN filtering on the interface. +.It Cm -untagged Ar interface Ar vlan-id +Clear the untagged VLAN identifier for an interface. +.It Cm tagged Ar interface Ar vlan-list +Set the interface's VLAN access list to the provided list of VLANs. +The list should be a comma-separated list of one or more VLAN IDs +or ranges formatted as +.Ar first-last , +the value +.Dq none +meaning the empty set, +or the value +.Dq all +meaning all VLANs (1-4094). +.Pp +Setting +.Cm tagged +will automatically enable VLAN filtering on the interface. +.It Cm +tagged Ar interface Ar vlan-list +Add the provided list of VLAN IDs to the interface's VLAN access list. +The list should be formatted as described for +.Cm tagged . +.Pp +Setting +.Cm +tagged +will automatically enable VLAN filtering on the interface. +.It Cm -tagged Ar interface Ar vlan-list +Remove the provided list of VLAN IDs from the interface's VLAN access +list. +The list should be formatted as described for +.Cm tagged . +.Pp +Setting +.Cm -tagged +will automatically enable VLAN filtering on the interface. +.El .Ss Link Aggregation and Link Failover Parameters The following parameters are specific to lagg interfaces: .Bl -tag -width indent diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y index c59204d3d5a4..af1fb95398f8 100644 --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -921,7 +921,27 @@ varset : STRING '=' varstring { } ; -anchorname : STRING { $$ = $1; } +anchorname : STRING { + if ($1[0] == '\0') { + free($1); + yyerror("anchor name must not be empty"); + YYERROR; + } + if (strlen(pf->anchor->path) + 1 + + strlen($1) >= PATH_MAX) { + free($1); + yyerror("anchor name is longer than %u", + PATH_MAX - 1); + YYERROR; + } + if ($1[0] == '_' || strstr($1, "/_") != NULL) { + free($1); + yyerror("anchor names beginning with '_' " + "are reserved for internal use"); + YYERROR; + } + $$ = $1; + } | /* empty */ { $$ = NULL; } ; @@ -938,6 +958,8 @@ pfa_anchor : '{' struct pfctl_ruleset *rs; /* stepping into a brace anchor */ + if (pf->asd >= PFCTL_ANCHOR_STACK_DEPTH) + errx(1, "pfa_anchor: anchors too deep"); pf->asd++; pf->bn++; @@ -974,13 +996,6 @@ anchorrule : ANCHOR anchorname dir quick interface af proto fromto YYERROR; } - if ($2 && ($2[0] == '_' || strstr($2, "/_") != NULL)) { - free($2); - yyerror("anchor names beginning with '_' " - "are reserved for internal use"); - YYERROR; - } - pfctl_init_rule(&r); if (pf->astack[pf->asd + 1]) { @@ -1162,14 +1177,11 @@ anchorrule : ANCHOR anchorname dir quick interface af proto fromto } ; -loadrule : LOAD ANCHOR string FROM string { +loadrule : LOAD ANCHOR anchorname FROM string { struct loadanchors *loadanchor; - if (strlen(pf->anchor->path) + 1 + - strlen($3) >= MAXPATHLEN) { - yyerror("anchorname %s too long, max %u\n", - $3, MAXPATHLEN - 1); - free($3); + if ($3 == NULL) { + yyerror("anchor name is missing"); YYERROR; } loadanchor = calloc(1, sizeof(struct loadanchors)); @@ -1251,6 +1263,8 @@ etherpfa_anchor : '{' struct pfctl_eth_ruleset *rs; /* steping into a brace anchor */ + if (pf->asd >= PFCTL_ANCHOR_STACK_DEPTH) + errx(1, "pfa_anchor: anchors too deep"); pf->asd++; pf->bn++; @@ -5424,6 +5438,12 @@ process_tabledef(char *name, struct table_opts *opts, int popts) if (pf->opts & PF_OPT_VERBOSE) print_tabledef(name, opts->flags, opts->init_addr, &opts->init_nodes); + if (!(pf->opts & PF_OPT_NOACTION) || + (pf->opts & PF_OPT_DUMMYACTION)) + warn_duplicate_tables(name, pf->anchor->path); + else if (pf->opts & PF_OPT_VERBOSE) + fprintf(stderr, "%s:%d: skipping duplicate table checks" + " for <%s>\n", file->name, yylval.lineno, name); if (!(pf->opts & PF_OPT_NOACTION) && pfctl_define_table(name, opts->flags, opts->init_addr, pf->anchor->path, &ab, pf->anchor->ruleset.tticket)) { @@ -6918,7 +6938,7 @@ top: if (c == '-' || isdigit(c)) { do { *p++ = c; - if ((unsigned)(p-buf) >= sizeof(buf)) { + if ((size_t)(p-buf) >= sizeof(buf)) { yyerror("string too long"); return (findeol()); } @@ -6957,7 +6977,7 @@ nodigits: if (isalnum(c) || c == ':' || c == '_') { do { *p++ = c; - if ((unsigned)(p-buf) >= sizeof(buf)) { + if ((size_t)(p-buf) >= sizeof(buf)) { yyerror("string too long"); return (findeol()); } diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8 index c7fad58262dc..28efff896956 100644 --- a/sbin/pfctl/pfctl.8 +++ b/sbin/pfctl/pfctl.8 @@ -24,7 +24,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd June 30, 2025 +.Dd July 2, 2025 .Dt PFCTL 8 .Os .Sh NAME @@ -223,6 +223,11 @@ Flush the filter information (statistics that are not bound to rules). Flush the tables. .It Fl F Cm osfp Flush the passive operating system fingerprints. +.It Fl F Cm Reset +Reset limits, timeouts and other options back to default settings. +See the OPTIONS section in +.Xr pf.conf 5 +for details. .It Fl F Cm all Flush all of the above. .El @@ -401,7 +406,11 @@ Only print errors and warnings. Load only the filter rules present in the rule file. Other rules and options are ignored. .It Fl r -Perform reverse DNS lookups on states when displaying them. +Perform reverse DNS lookups on states and tables when displaying them. +.Fl N +and +.Fl r +are mutually exclusive. .It Fl s Ar modifier Show the filter parameters specified by .Ar modifier diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c index 8c6497b4d1ee..defba3b56c44 100644 --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -77,8 +77,9 @@ void pfctl_flush_nat(int, int, char *); int pfctl_clear_altq(int, int); void pfctl_clear_src_nodes(int, int); void pfctl_clear_iface_states(int, const char *, int); -void pfctl_addrprefix(char *, struct pf_addr *); -void pfctl_kill_src_nodes(int, const char *, int); +struct addrinfo * + pfctl_addrprefix(char *, struct pf_addr *, int); +void pfctl_kill_src_nodes(int, int); void pfctl_net_kill_states(int, const char *, int); void pfctl_gateway_kill_states(int, const char *, int); void pfctl_label_kill_states(int, const char *, int); @@ -122,6 +123,7 @@ int pfctl_load_ruleset(struct pfctl *, char *, struct pfctl_ruleset *, int, int); int pfctl_load_rule(struct pfctl *, char *, struct pfctl_rule *, int); const char *pfctl_lookup_option(char *, const char * const *); +void pfctl_reset(int, int); static struct pfctl_anchor_global pf_anchors; struct pfctl_anchor pf_main_anchor; @@ -230,7 +232,7 @@ static const struct { static const char * const clearopt_list[] = { "nat", "queue", "rules", "Sources", "states", "info", "Tables", "osfp", "all", - "ethernet", NULL + "ethernet", "Reset", NULL }; static const char * const showopt_list[] = { @@ -539,35 +541,36 @@ pfctl_clear_iface_states(int dev, const char *iface, int opts) fprintf(stderr, "%d states cleared\n", killed); } -void -pfctl_addrprefix(char *addr, struct pf_addr *mask) +struct addrinfo * +pfctl_addrprefix(char *addr, struct pf_addr *mask, int numeric) { char *p; const char *errstr; int prefix, ret_ga, q, r; struct addrinfo hints, *res; - if ((p = strchr(addr, '/')) == NULL) - return; - - *p++ = '\0'; - prefix = strtonum(p, 0, 128, &errstr); - if (errstr) - errx(1, "prefix is %s: %s", errstr, p); - bzero(&hints, sizeof(hints)); - /* prefix only with numeric addresses */ - hints.ai_flags |= AI_NUMERICHOST; + hints.ai_socktype = SOCK_DGRAM; /* dummy */ + if (numeric) + hints.ai_flags = AI_NUMERICHOST; + + if ((p = strchr(addr, '/')) != NULL) { + *p++ = '\0'; + /* prefix only with numeric addresses */ + hints.ai_flags |= AI_NUMERICHOST; + } if ((ret_ga = getaddrinfo(addr, NULL, &hints, &res))) { errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); /* NOTREACHED */ } - if (res->ai_family == AF_INET && prefix > 32) - errx(1, "prefix too long for AF_INET"); - else if (res->ai_family == AF_INET6 && prefix > 128) - errx(1, "prefix too long for AF_INET6"); + if (p == NULL) + return (res); + + prefix = strtonum(p, 0, res->ai_family == AF_INET6 ? 128 : 32, &errstr); + if (errstr) + errx(1, "prefix is %s: %s", errstr, p); q = prefix >> 3; r = prefix & 7; @@ -586,17 +589,17 @@ pfctl_addrprefix(char *addr, struct pf_addr *mask) (0xff00 >> r) & 0xff; break; } - freeaddrinfo(res); + + return (res); } void -pfctl_kill_src_nodes(int dev, const char *iface, int opts) +pfctl_kill_src_nodes(int dev, int opts) { struct pfioc_src_node_kill psnk; struct addrinfo *res[2], *resp[2]; struct sockaddr last_src, last_dst; int killed, sources, dests; - int ret_ga; killed = sources = dests = 0; @@ -606,12 +609,9 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts) memset(&last_src, 0xff, sizeof(last_src)); memset(&last_dst, 0xff, sizeof(last_dst)); - pfctl_addrprefix(src_node_kill[0], &psnk.psnk_src.addr.v.a.mask); + res[0] = pfctl_addrprefix(src_node_kill[0], + &psnk.psnk_src.addr.v.a.mask, (opts & PF_OPT_NODNS)); - if ((ret_ga = getaddrinfo(src_node_kill[0], NULL, NULL, &res[0]))) { - errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); - /* NOTREACHED */ - } for (resp[0] = res[0]; resp[0]; resp[0] = resp[0]->ai_next) { if (resp[0]->ai_addr == NULL) continue; @@ -623,29 +623,16 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts) psnk.psnk_af = resp[0]->ai_family; sources++; - if (psnk.psnk_af == AF_INET) - psnk.psnk_src.addr.v.a.addr.v4 = - ((struct sockaddr_in *)resp[0]->ai_addr)->sin_addr; - else if (psnk.psnk_af == AF_INET6) - psnk.psnk_src.addr.v.a.addr.v6 = - ((struct sockaddr_in6 *)resp[0]->ai_addr)-> - sin6_addr; - else - errx(1, "Unknown address family %d", psnk.psnk_af); + copy_satopfaddr(&psnk.psnk_src.addr.v.a.addr, resp[0]->ai_addr); if (src_node_killers > 1) { dests = 0; memset(&psnk.psnk_dst.addr.v.a.mask, 0xff, sizeof(psnk.psnk_dst.addr.v.a.mask)); memset(&last_dst, 0xff, sizeof(last_dst)); - pfctl_addrprefix(src_node_kill[1], - &psnk.psnk_dst.addr.v.a.mask); - if ((ret_ga = getaddrinfo(src_node_kill[1], NULL, NULL, - &res[1]))) { - errx(1, "getaddrinfo: %s", - gai_strerror(ret_ga)); - /* NOTREACHED */ - } + res[1] = pfctl_addrprefix(src_node_kill[1], + &psnk.psnk_dst.addr.v.a.mask, + (opts & PF_OPT_NODNS)); for (resp[1] = res[1]; resp[1]; resp[1] = resp[1]->ai_next) { if (resp[1]->ai_addr == NULL) @@ -660,18 +647,8 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts) dests++; - if (psnk.psnk_af == AF_INET) - psnk.psnk_dst.addr.v.a.addr.v4 = - ((struct sockaddr_in *)resp[1]-> - ai_addr)->sin_addr; - else if (psnk.psnk_af == AF_INET6) - psnk.psnk_dst.addr.v.a.addr.v6 = - ((struct sockaddr_in6 *)resp[1]-> - ai_addr)->sin6_addr; - else - errx(1, "Unknown address family %d", - psnk.psnk_af); - + copy_satopfaddr(&psnk.psnk_src.addr.v.a.addr, + resp[1]->ai_addr); if (ioctl(dev, DIOCKILLSRCNODES, &psnk)) err(1, "DIOCKILLSRCNODES"); killed += psnk.psnk_killed; @@ -699,7 +676,7 @@ pfctl_net_kill_states(int dev, const char *iface, int opts) struct sockaddr last_src, last_dst; unsigned int newkilled; int killed, sources, dests; - int ret_ga, ret; + int ret; killed = sources = dests = 0; @@ -718,15 +695,12 @@ pfctl_net_kill_states(int dev, const char *iface, int opts) state_killers = 1; } - pfctl_addrprefix(state_kill[0], &kill.src.addr.v.a.mask); + res[0] = pfctl_addrprefix(state_kill[0], + &kill.src.addr.v.a.mask, (opts & PF_OPT_NODNS)); if (opts & PF_OPT_KILLMATCH) kill.kill_match = true; - if ((ret_ga = getaddrinfo(state_kill[0], NULL, NULL, &res[0]))) { - errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); - /* NOTREACHED */ - } for (resp[0] = res[0]; resp[0]; resp[0] = resp[0]->ai_next) { if (resp[0]->ai_addr == NULL) continue; @@ -738,29 +712,16 @@ pfctl_net_kill_states(int dev, const char *iface, int opts) kill.af = resp[0]->ai_family; sources++; - if (kill.af == AF_INET) - kill.src.addr.v.a.addr.v4 = - ((struct sockaddr_in *)resp[0]->ai_addr)->sin_addr; - else if (kill.af == AF_INET6) - kill.src.addr.v.a.addr.v6 = - ((struct sockaddr_in6 *)resp[0]->ai_addr)-> - sin6_addr; - else - errx(1, "Unknown address family %d", kill.af); + copy_satopfaddr(&kill.src.addr.v.a.addr, resp[0]->ai_addr); if (state_killers > 1) { dests = 0; memset(&kill.dst.addr.v.a.mask, 0xff, sizeof(kill.dst.addr.v.a.mask)); memset(&last_dst, 0xff, sizeof(last_dst)); - pfctl_addrprefix(state_kill[1], - &kill.dst.addr.v.a.mask); - if ((ret_ga = getaddrinfo(state_kill[1], NULL, NULL, - &res[1]))) { - errx(1, "getaddrinfo: %s", - gai_strerror(ret_ga)); - /* NOTREACHED */ - } + res[1] = pfctl_addrprefix(state_kill[1], + &kill.dst.addr.v.a.mask, + (opts & PF_OPT_NODNS)); for (resp[1] = res[1]; resp[1]; resp[1] = resp[1]->ai_next) { if (resp[1]->ai_addr == NULL) @@ -775,17 +736,8 @@ pfctl_net_kill_states(int dev, const char *iface, int opts) dests++; - if (kill.af == AF_INET) - kill.dst.addr.v.a.addr.v4 = - ((struct sockaddr_in *)resp[1]-> - ai_addr)->sin_addr; - else if (kill.af == AF_INET6) - kill.dst.addr.v.a.addr.v6 = - ((struct sockaddr_in6 *)resp[1]-> - ai_addr)->sin6_addr; - else - errx(1, "Unknown address family %d", - kill.af); + copy_satopfaddr(&kill.src.addr.v.a.addr, + resp[1]->ai_addr); if ((ret = pfctl_kill_states_h(pfh, &kill, &newkilled)) != 0) errc(1, ret, "DIOCKILLSTATES"); @@ -814,7 +766,6 @@ pfctl_gateway_kill_states(int dev, const char *iface, int opts) struct sockaddr last_src; unsigned int newkilled; int killed = 0; - int ret_ga; if (state_killers != 2 || (strlen(state_kill[1]) == 0)) { warnx("no gateway specified"); @@ -832,12 +783,9 @@ pfctl_gateway_kill_states(int dev, const char *iface, int opts) if (opts & PF_OPT_KILLMATCH) kill.kill_match = true; - pfctl_addrprefix(state_kill[1], &kill.rt_addr.addr.v.a.mask); + res = pfctl_addrprefix(state_kill[1], &kill.rt_addr.addr.v.a.mask, + (opts & PF_OPT_NODNS)); - if ((ret_ga = getaddrinfo(state_kill[1], NULL, NULL, &res))) { - errx(1, "getaddrinfo: %s", gai_strerror(ret_ga)); - /* NOTREACHED */ - } for (resp = res; resp; resp = resp->ai_next) { if (resp->ai_addr == NULL) continue; @@ -848,16 +796,8 @@ pfctl_gateway_kill_states(int dev, const char *iface, int opts) kill.af = resp->ai_family; - if (kill.af == AF_INET) - kill.rt_addr.addr.v.a.addr.v4 = - ((struct sockaddr_in *)resp->ai_addr)->sin_addr; - else if (kill.af == AF_INET6) - kill.rt_addr.addr.v.a.addr.v6 = - ((struct sockaddr_in6 *)resp->ai_addr)-> - sin6_addr; - else - errx(1, "Unknown address family %d", kill.af); - + copy_satopfaddr(&kill.rt_addr.addr.v.a.addr, + resp->ai_addr); if (pfctl_kill_states_h(pfh, &kill, &newkilled)) err(1, "DIOCKILLSTATES"); killed += newkilled; @@ -1002,8 +942,6 @@ pfctl_parse_host(char *str, struct pf_rule_addr *addr) { char *s = NULL, *sbs, *sbe; struct addrinfo hints, *ai; - struct sockaddr_in *sin4; - struct sockaddr_in6 *sin6; s = strdup(str); if (!s) @@ -1026,19 +964,10 @@ pfctl_parse_host(char *str, struct pf_rule_addr *addr) if (getaddrinfo(s, sbs, &hints, &ai) != 0) goto error; - switch (ai->ai_family) { - case AF_INET: - sin4 = (struct sockaddr_in *)ai->ai_addr; - addr->addr.v.a.addr.v4 = sin4->sin_addr; - addr->port[0] = sin4->sin_port; - break; - - case AF_INET6: - sin6 = (struct sockaddr_in6 *)ai->ai_addr; - addr->addr.v.a.addr.v6 = sin6->sin6_addr; - addr->port[0] = sin6->sin6_port; - break; - } + copy_satopfaddr(&addr->addr.v.a.addr, ai->ai_addr); + addr->port[0] = ai->ai_family == AF_INET6 ? + ((struct sockaddr_in6 *)ai->ai_addr)->sin6_port : + ((struct sockaddr_in *)ai->ai_addr)->sin_port; freeaddrinfo(ai); free(s); @@ -1703,7 +1632,7 @@ pfctl_show_states(int dev, const char *iface, int opts) struct pfctl_state_filter filter = {}; if (iface != NULL) - strncpy(filter.ifname, iface, IFNAMSIZ); + strlcpy(filter.ifname, iface, IFNAMSIZ); arg.opts = opts; arg.dotitle = opts & PF_OPT_SHOWALL; @@ -2115,7 +2044,6 @@ pfctl_load_ruleset(struct pfctl *pf, char *path, struct pfctl_ruleset *rs, } } else if (pf->opts & PF_OPT_VERBOSE) printf("\n"); - } if (pf->optimize && rs_num == PF_RULESET_FILTER) @@ -2958,7 +2886,7 @@ pfctl_show_anchors(int dev, int opts, char *anchorname) errc(1, ret, "DIOCGETRULESET"); if (!strcmp(pr.name, PF_RESERVED_ANCHOR)) continue; - sub[0] = 0; + sub[0] = '\0'; if (pr.path[0]) { strlcat(sub, pr.path, sizeof(sub)); strlcat(sub, "/", sizeof(sub)); @@ -3020,6 +2948,45 @@ pfctl_lookup_option(char *cmd, const char * const *list) return (NULL); } +void +pfctl_reset(int dev, int opts) +{ + struct pfctl pf; + struct pfr_buffer t; + int i; + + pf.dev = dev; + pf.h = pfh; + pfctl_init_options(&pf); + + /* Force reset upon pfctl_load_options() */ + pf.debug_set = 1; + pf.reass_set = 1; + pf.syncookieswat_set = 1; + pf.ifname = strdup("none"); + if (pf.ifname == NULL) + err(1, "%s: strdup", __func__); + pf.ifname_set = 1; + + memset(&t, 0, sizeof(t)); + t.pfrb_type = PFRB_TRANS; + if (pfctl_trans(dev, &t, DIOCXBEGIN, 0)) + err(1, "%s: DIOCXBEGIN", __func__); + + for (i = 0; pf_limits[i].name; i++) + pf.limit_set[pf_limits[i].index] = 1; + + for (i = 0; pf_timeouts[i].name; i++) + pf.timeout_set[pf_timeouts[i].timeout] = 1; + + pfctl_load_options(&pf); + + if (pfctl_trans(dev, &t, DIOCXCOMMIT, 0)) + err(1, "%s: DIOCXCOMMIT", __func__); + + pfctl_clear_interface_flags(dev, opts); +} + int main(int argc, char *argv[]) { @@ -3175,6 +3142,12 @@ main(int argc, char *argv[]) } } + if ((opts & PF_OPT_NODNS) && (opts & PF_OPT_USEDNS)) + errx(1, "-N and -r are mutually exclusive"); + + if ((tblcmdopt == NULL) ^ (tableopt == NULL)) + usage(); + if (tblcmdopt != NULL) { argc -= optind; argv += optind; @@ -3196,6 +3169,8 @@ main(int argc, char *argv[]) if (anchoropt != NULL) { int len = strlen(anchoropt); + if (anchoropt[0] == '\0') + errx(1, "anchor name must not be empty"); if (mode == O_RDONLY && showopt == NULL && tblcmdopt == NULL) { warnx("anchors apply to -f, -F, -s, and -T only"); usage(); @@ -3304,12 +3279,14 @@ main(int argc, char *argv[]) 0); pfctl_show_nat(dev, path, opts, anchorname, 0, 0); - pfctl_show_rules(dev, path, opts, 0, anchorname, 0, 0); + pfctl_show_rules(dev, path, opts, PFCTL_SHOW_RULES, + anchorname, 0, 0); pfctl_show_altq(dev, ifaceopt, opts, 0); pfctl_show_states(dev, ifaceopt, opts); pfctl_show_src_nodes(dev, opts); pfctl_show_status(dev, opts); - pfctl_show_rules(dev, path, opts, 1, anchorname, 0, 0); + pfctl_show_rules(dev, path, opts, PFCTL_SHOW_LABELS, + anchorname, 0, 0); pfctl_show_timeouts(dev, opts); pfctl_show_limits(dev, opts); pfctl_show_tables(anchorname, opts); @@ -3362,6 +3339,11 @@ main(int argc, char *argv[]) pfctl_clear_stats(pfh, opts); break; case 'a': + if (ifaceopt) { + warnx("don't specify an interface with -Fall"); + usage(); + /* NOTREACHED */ + } pfctl_flush_eth_rules(dev, opts, anchorname); pfctl_flush_rules(dev, opts, anchorname); pfctl_flush_nat(dev, opts, anchorname); @@ -3372,7 +3354,7 @@ main(int argc, char *argv[]) pfctl_clear_src_nodes(dev, opts); pfctl_clear_stats(pfh, opts); pfctl_clear_fingerprints(dev, opts); - pfctl_clear_interface_flags(dev, opts); + pfctl_reset(dev, opts); } break; case 'o': @@ -3381,6 +3363,9 @@ main(int argc, char *argv[]) case 'T': pfctl_do_clear_tables(anchorname, opts); break; + case 'R': + pfctl_reset(dev, opts); + break; } } if (state_killers) { @@ -3397,10 +3382,10 @@ main(int argc, char *argv[]) } if (src_node_killers) - pfctl_kill_src_nodes(dev, ifaceopt, opts); + pfctl_kill_src_nodes(dev, opts); if (tblcmdopt != NULL) { - error = pfctl_command_tables(argc, argv, tableopt, + error = pfctl_table(argc, argv, tableopt, tblcmdopt, rulesopt, anchorname, opts); rulesopt = NULL; } @@ -3432,9 +3417,6 @@ main(int argc, char *argv[]) if (pfctl_rules(dev, rulesopt, opts, optimize, anchorname, NULL)) error = 1; - else if (!(opts & PF_OPT_NOACTION) && - (loadopt & PFCTL_FLAG_TABLE)) - warn_namespace_collision(NULL); } if (opts & PF_OPT_ENABLE) diff --git a/sbin/pfctl/pfctl.h b/sbin/pfctl/pfctl.h index 5abd5ddcdf8f..d8196c129187 100644 --- a/sbin/pfctl/pfctl.h +++ b/sbin/pfctl/pfctl.h @@ -83,10 +83,10 @@ int pfi_clr_istats(const char *, int *, int); void pfctl_print_title(char *); void pfctl_do_clear_tables(const char *, int); void pfctl_show_tables(const char *, int); -int pfctl_command_tables(int, char *[], char *, const char *, char *, +int pfctl_table(int, char *[], char *, const char *, char *, const char *, int); int pfctl_show_altq(int, const char *, int, int); -void warn_namespace_collision(const char *); +void warn_duplicate_tables(const char *, const char *); void pfctl_show_ifaces(const char *, int); void pfctl_show_creators(int); FILE *pfctl_fopen(const char *, const char *); diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c index 1db98c6103d4..26a213c3ffd9 100644 --- a/sbin/pfctl/pfctl_parser.c +++ b/sbin/pfctl/pfctl_parser.c @@ -66,7 +66,6 @@ #include "pfctl_parser.h" #include "pfctl.h" -void copy_satopfaddr(struct pf_addr *, struct sockaddr *); void print_op (u_int8_t, const char *, const char *); void print_port (u_int8_t, u_int16_t, u_int16_t, const char *, int); void print_ugid (u_int8_t, unsigned, unsigned, const char *, unsigned); @@ -1795,7 +1794,7 @@ host(const char *s, int opts) char *p, *ps; const char *errstr; - if ((p = strrchr(s, '/')) != NULL) { + if ((p = strchr(s, '/')) != NULL) { mask = strtonum(p+1, 0, 128, &errstr); if (errstr) { fprintf(stderr, "netmask is %s: %s\n", errstr, p); diff --git a/sbin/pfctl/pfctl_parser.h b/sbin/pfctl/pfctl_parser.h index 91c0f655e008..b91d37c791ae 100644 --- a/sbin/pfctl/pfctl_parser.h +++ b/sbin/pfctl/pfctl_parser.h @@ -276,6 +276,8 @@ struct pf_opt_rule { TAILQ_HEAD(pf_opt_queue, pf_opt_rule); +void copy_satopfaddr(struct pf_addr *, struct sockaddr *); + int pfctl_rules(int, char *, int, int, char *, struct pfr_buffer *); int pfctl_optimize_ruleset(struct pfctl *, struct pfctl_ruleset *); diff --git a/sbin/pfctl/pfctl_table.c b/sbin/pfctl/pfctl_table.c index abe22a3258de..0842b042df41 100644 --- a/sbin/pfctl/pfctl_table.c +++ b/sbin/pfctl/pfctl_table.c @@ -55,8 +55,6 @@ #include "pfctl.h" extern void usage(void); -static int pfctl_table(int, char *[], char *, const char *, char *, - const char *, int); static void print_table(const struct pfr_table *, int, int); static int print_tstats(const struct pfr_tstats *, int); static int load_addr(struct pfr_buffer *, int, char *[], char *, int, int); @@ -87,6 +85,8 @@ static const char *istats_text[2][2][2] = { } while (0) #define CREATE_TABLE do { \ + warn_duplicate_tables(table.pfrt_name, \ + table.pfrt_anchor); \ table.pfrt_flags |= PFR_TFLAG_PERSIST; \ if ((!(opts & PF_OPT_NOACTION) || \ (opts & PF_OPT_DUMMYACTION)) && \ @@ -96,7 +96,6 @@ static const char *istats_text[2][2][2] = { goto _error; \ } \ if (nadd) { \ - warn_namespace_collision(table.pfrt_name); \ xprintf(opts, "%d table created", nadd); \ if (opts & PF_OPT_NOACTION) \ return (0); \ @@ -119,15 +118,6 @@ pfctl_show_tables(const char *anchor, int opts) } int -pfctl_command_tables(int argc, char *argv[], char *tname, - const char *command, char *file, const char *anchor, int opts) -{ - if (tname == NULL || command == NULL) - usage(); - return pfctl_table(argc, argv, tname, command, file, anchor, opts); -} - -int pfctl_table(int argc, char *argv[], char *tname, const char *command, char *file, const char *anchor, int opts) { @@ -587,12 +577,10 @@ pfctl_define_table(char *name, int flags, int addrs, const char *anchor, } void -warn_namespace_collision(const char *filter) +warn_duplicate_tables(const char *tablename, const char *anchorname) { struct pfr_buffer b; struct pfr_table *t; - const char *name = NULL, *lastcoll; - int coll = 0; bzero(&b, sizeof(b)); b.pfrb_type = PFRB_TABLES; @@ -608,22 +596,13 @@ warn_namespace_collision(const char *filter) PFRB_FOREACH(t, &b) { if (!(t->pfrt_flags & PFR_TFLAG_ACTIVE)) continue; - if (filter != NULL && strcmp(filter, t->pfrt_name)) + if (!strcmp(anchorname, t->pfrt_anchor)) continue; - if (!t->pfrt_anchor[0]) - name = t->pfrt_name; - else if (name != NULL && !strcmp(name, t->pfrt_name)) { - coll++; - lastcoll = name; - name = NULL; - } + if (!strcmp(tablename, t->pfrt_name)) + warnx("warning: table <%s> already defined" + " in anchor \"%s\"", tablename, + t->pfrt_anchor[0] ? t->pfrt_anchor : "/"); } - if (coll == 1) - warnx("warning: namespace collision with <%s> global table.", - lastcoll); - else if (coll > 1) - warnx("warning: namespace collisions with %d global tables.", - coll); pfr_buf_clear(&b); } diff --git a/sbin/routed/routed.8 b/sbin/routed/routed.8 index 8cf12d7b60e1..334c828b943e 100644 --- a/sbin/routed/routed.8 +++ b/sbin/routed/routed.8 @@ -27,13 +27,20 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd June 27, 2022 +.Dd May 20, 2025 .Dt ROUTED 8 .Os .Sh NAME .Nm routed , .Nm rdisc .Nd network RIP and router discovery routing daemon +.Sh DEPRECATION NOTICE +The +.Nm routed +and +.Nm rdisc +utilities are deprecated and will be removed in +.Fx 16.0 . .Sh SYNOPSIS .Nm .Op Fl isqdghmpAtv diff --git a/sbin/routed/rtquery/rtquery.8 b/sbin/routed/rtquery/rtquery.8 index de5e1fc7cf96..ff46a3414dcf 100644 --- a/sbin/routed/rtquery/rtquery.8 +++ b/sbin/routed/rtquery/rtquery.8 @@ -1,11 +1,16 @@ .\" $Revision: 2.27 $ .\" -.Dd June 1, 1996 +.Dd May 20, 2025 .Dt RTQUERY 8 .Os .Sh NAME .Nm rtquery .Nd query routing daemons for their routing tables +.Sh DEPRECATION NOTICE +The +.Nm +utility is deprecated and will be removed in +.Fx 16.0 . .Sh SYNOPSIS .Nm .Op Fl np1 diff --git a/share/examples/Makefile b/share/examples/Makefile index 560fdae6de5b..f0c050a36306 100644 --- a/share/examples/Makefile +++ b/share/examples/Makefile @@ -15,6 +15,7 @@ LDIRS= BSD_daemon \ find_interface \ flua \ indent \ + inotify \ ipfw \ jails \ kld \ @@ -97,6 +98,10 @@ SE_FLUA= libjail.lua SE_DIRS+= indent SE_INDENT= indent.pro +SE_DIRS+= inotify +SE_INOTIFY= inotify.c \ + Makefile + .if ${MK_IPFILTER} != "no" SUBDIR+= ipfilter .endif diff --git a/share/examples/inotify/Makefile b/share/examples/inotify/Makefile new file mode 100644 index 000000000000..c54c629c58d7 --- /dev/null +++ b/share/examples/inotify/Makefile @@ -0,0 +1,6 @@ +PROG= inotify +MAN= + +LIBADD= xo + +.include <bsd.prog.mk> diff --git a/share/examples/inotify/inotify.c b/share/examples/inotify/inotify.c new file mode 100644 index 000000000000..ea63ccd1f337 --- /dev/null +++ b/share/examples/inotify/inotify.c @@ -0,0 +1,172 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +/* + * A simple program to demonstrate inotify. Given one or more paths, it watches + * all events on those paths and prints them to standard output. + */ + +#include <sys/types.h> +#include <sys/event.h> +#include <sys/inotify.h> + +#include <assert.h> +#include <err.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <libxo/xo.h> + +static void +usage(void) +{ + xo_errx(1, "usage: inotify <path1> [<path2> ...]"); +} + +static const char * +ev2str(uint32_t event) +{ + switch (event & IN_ALL_EVENTS) { + case IN_ACCESS: + return ("IN_ACCESS"); + case IN_ATTRIB: + return ("IN_ATTRIB"); + case IN_CLOSE_WRITE: + return ("IN_CLOSE_WRITE"); + case IN_CLOSE_NOWRITE: + return ("IN_CLOSE_NOWRITE"); + case IN_CREATE: + return ("IN_CREATE"); + case IN_DELETE: + return ("IN_DELETE"); + case IN_DELETE_SELF: + return ("IN_DELETE_SELF"); + case IN_MODIFY: + return ("IN_MODIFY"); + case IN_MOVE_SELF: + return ("IN_MOVE_SELF"); + case IN_MOVED_FROM: + return ("IN_MOVED_FROM"); + case IN_MOVED_TO: + return ("IN_MOVED_TO"); + case IN_OPEN: + return ("IN_OPEN"); + default: + switch (event) { + case IN_IGNORED: + return ("IN_IGNORED"); + case IN_Q_OVERFLOW: + return ("IN_Q_OVERFLOW"); + case IN_UNMOUNT: + return ("IN_UNMOUNT"); + } + warnx("unknown event %#x", event); + assert(0); + } +} + +static void +set_handler(int kq, int sig) +{ + struct kevent kev; + + (void)signal(sig, SIG_IGN); + EV_SET(&kev, sig, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0) + xo_err(1, "kevent"); +} + +int +main(int argc, char **argv) +{ + struct inotify_event *iev, *iev1; + struct kevent kev; + size_t ievsz; + int ifd, kq; + + argc = xo_parse_args(argc, argv); + if (argc < 2) + usage(); + argc--; + argv++; + + ifd = inotify_init1(IN_NONBLOCK); + if (ifd < 0) + xo_err(1, "inotify"); + for (int i = 0; i < argc; i++) { + int wd; + + wd = inotify_add_watch(ifd, argv[i], IN_ALL_EVENTS); + if (wd < 0) + xo_err(1, "inotify_add_watch(%s)", argv[i]); + } + + xo_set_version("1"); + xo_open_list("events"); + + kq = kqueue(); + if (kq < 0) + xo_err(1, "kqueue"); + + /* + * Handle signals in the event loop so that we can close the xo list. + */ + set_handler(kq, SIGINT); + set_handler(kq, SIGTERM); + set_handler(kq, SIGHUP); + set_handler(kq, SIGQUIT); + + /* + * Monitor the inotify descriptor for events. + */ + EV_SET(&kev, ifd, EVFILT_READ, EV_ADD, 0, 0, NULL); + if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0) + xo_err(1, "kevent"); + + ievsz = sizeof(*iev) + NAME_MAX + 1; + iev = malloc(ievsz); + if (iev == NULL) + err(1, "malloc"); + + for (;;) { + ssize_t n; + const char *ev; + + if (kevent(kq, NULL, 0, &kev, 1, NULL) < 0) + xo_err(1, "kevent"); + if (kev.filter == EVFILT_SIGNAL) + break; + + n = read(ifd, iev, ievsz); + if (n < 0) + xo_err(1, "read"); + assert(n >= (ssize_t)sizeof(*iev)); + + for (iev1 = iev; n > 0;) { + assert(n >= (ssize_t)sizeof(*iev1)); + + ev = ev2str(iev1->mask); + xo_open_instance("event"); + xo_emit("{:wd/%3d} {:event/%16s} {:name/%s}\n", + iev1->wd, ev, iev1->len > 0 ? iev1->name : ""); + xo_close_instance("event"); + + n -= sizeof(*iev1) + iev1->len; + iev1 = (struct inotify_event *)(void *) + ((char *)iev1 + sizeof(*iev1) + iev1->len); + } + (void)xo_flush(); + } + + xo_close_list("events"); + + if (xo_finish() < 0) + xo_err(1, "stdout"); + exit(0); +} diff --git a/share/man/man4/bridge.4 b/share/man/man4/bridge.4 index 7ce734ae87eb..2dff393ebc29 100644 --- a/share/man/man4/bridge.4 +++ b/share/man/man4/bridge.4 @@ -36,7 +36,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd May 28, 2025 +.Dd July 5, 2025 .Dt IF_BRIDGE 4 .Os .Sh NAME @@ -271,6 +271,54 @@ by setting the .Va net.link.bridge.log_stp node using .Xr sysctl 8 . +.Sh VLAN SUPPORT +The +.Nm +driver has full support for virtual LANs (VLANs). +The bridge implements independent VLAN learning, i.e. MAC addresses are +learned on a per-VLAN basis, and the same MAC address may be learned on +multiple interfaces on different VLANs. +Incoming frames with an 802.1Q tag will be assigned to the appropriate +VLAN. +.Pp +Traffic sent to or from the host is not assigned to a VLAN by default. +To allow the host to communicate on a VLAN, configure a +.Xr vlan 4 +interface on the bridge and (if necessary) assign IP addresses there. +.Pp +By default no access control is enabled, so any interface may +participate in any VLAN. +.Pp +VLAN filtering may be enabled on an interface using the +.Xr ifconfig 8 +.Cm vlanfilter +option. +When VLAN filtering is enabled, an interface may only send and receive +frames based on its configured VLAN access list. +.Pp +The interface's untagged VLAN ID may be configured using the +.Xr ifconfig 8 +.Cm untagged +option. +If an untagged VLAN ID is configured, incoming frames will be assigned +to that VLAN, and the interface may receive outgoing untagged frames +in that VLAN. +.Pp +The tagged VLAN access list may be configured using the +.Cm tagged , +.Cm +tagged +and +.Cm -tagged +options to +.Xr ifconfig 8 . +An interface may send and receive tagged frames for any VLAN in its +access list. +.Pp +The bridge will automatically insert or remove 802.1q tags as needed, +based on the interface configuration, when forwarding frames between +interfaces. +This tag processing is only done for interfaces with VLAN filtering +enabled. .Sh PACKET FILTERING Packet filtering can be used with any firewall package that hooks in via the .Xr pfil 9 @@ -538,6 +586,7 @@ ifconfig bridge0 addm fxp0 addm gif0 up .Xr ipfw 4 , .Xr netmap 4 , .Xr pf 4 , +.Xr vlan 4 , .Xr ifconfig 8 .Sh HISTORY The diff --git a/share/man/man4/rights.4 b/share/man/man4/rights.4 index 0c24f6b45f88..8f5f6ad9c2d2 100644 --- a/share/man/man4/rights.4 +++ b/share/man/man4/rights.4 @@ -30,7 +30,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd May 1, 2024 +.Dd May 22, 2025 .Dt RIGHTS 4 .Os .Sh NAME @@ -319,6 +319,14 @@ Permit .It Dv CAP_GETSOCKOPT Permit .Xr getsockopt 2 . +.It Dv CAP_INOTIFY_ADD +Permit +.Xr inotify_add_watch 2 +and +.Xr inotify_add_watch_at 2 . +.It Dv CAP_INOTIFY_RM +Permit +.Xr inotify_rm_watch 2 . .It Dv CAP_IOCTL Permit .Xr ioctl 2 . diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5 index 3c9706063a65..fe848b030484 100644 --- a/share/man/man5/pf.conf.5 +++ b/share/man/man5/pf.conf.5 @@ -27,7 +27,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd June 26, 2025 +.Dd July 2, 2025 .Dt PF.CONF 5 .Os .Sh NAME @@ -542,6 +542,9 @@ an ICMP UNREACHABLE is returned for blocked UDP packets, and all other packets are silently dropped. .El .Pp +The default value is +.Cm drop . +.Pp For example: .Bd -literal -offset indent set block-policy return @@ -666,6 +669,8 @@ but can be overridden via this option. Setting this option may leave a small period of time where the fingerprints referenced by the currently active ruleset are inconsistent until the new ruleset finishes loading. +The default location for fingerprints is +.Pa /etc/pf.os . .Pp For example: .Pp diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index aecdde416578..63e9f471f1f1 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,5 +1,5 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. -.Dd June 20, 2025 +.Dd July 5, 2025 .Dt SRC.CONF 5 .Os .Sh NAME @@ -1570,6 +1570,8 @@ utility. Build .Xr rpcbind 8 with warmstart support. +.It Va WITH_RUN_TESTS +Run tests as part of the build. .It Va WITHOUT_SCTP_SUPPORT Disable support in the kernel for the .Xr sctp 4 diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index badaee1479f7..f709a4818dd5 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -434,6 +434,7 @@ MAN= accept_filter.9 \ VOP_GETEXTATTR.9 \ VOP_GETPAGES.9 \ VOP_INACTIVE.9 \ + VOP_INOTIFY.9 \ VOP_IOCTL.9 \ VOP_LINK.9 \ VOP_LISTEXTATTR.9 \ @@ -2460,6 +2461,7 @@ MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \ MLINKS+=VOP_FSYNC.9 VOP_FDATASYNC.9 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9 +MLINKS+=VOP_INOTIFY.9 VOP_INOTIFY_ADD_WATCH.9 MLINKS+=VOP_LOCK.9 vn_lock.9 \ VOP_LOCK.9 VOP_ISLOCKED.9 \ VOP_LOCK.9 VOP_UNLOCK.9 diff --git a/share/man/man9/VOP_INOTIFY.9 b/share/man/man9/VOP_INOTIFY.9 new file mode 100644 index 000000000000..43b644682153 --- /dev/null +++ b/share/man/man9/VOP_INOTIFY.9 @@ -0,0 +1,60 @@ +.\"- +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Klara, Inc. +.\" +.Dd May 27, 2025 +.Dt VOP_INOTIFY 9 +.Os +.Sh NAME +.Nm VOP_INOTIFY +.Nd vnode inotify interface +.Sh SYNOPSIS +.In sys/param.h +.In sys/vnode.h +.Ft int +.Fo VOP_INOTIFY +.Fa struct vnode *vp +.Fa struct vnode *dvp +.Fa struct componentname *cnp +.Fa int event +.Fa uint32_t cookie +.Fc +.Ft int +.Fo VOP_INOTIFY_ADD_WATCH +.Fa struct vnode *vp +.Fa struct inotify_softc *sc +.Fa uint32_t mask +.Fa uint32_t *wdp +.Fa struct thread *td +.Fc +.Sh DESCRIPTION +The +.Fn VOP_INOTIFY +operation notifies the +.Xr inotify 2 +subsystem of a file system event on a vnode. +The +.Fa dvp +and +.Fa cnp +arguments are optional and are only used to obtain a file name for the event. +If they are omitted, the inotify subsystem will use the file name cache to +find a name for the vnode, but this is more expensive. +.Pp +The +.Fn VOP_INOTIFY_ADD_WATCH +operation is for internal use by the inotify subsystem to add a watch to a +vnode. +.Sh LOCKS +The +.Fn VOP_INOTIFY +operation does not assume any particular vnode lock state. +The +.Fn VOP_INOTIFY_ADD_WATCH +operation should be called with the vnode locked. +.Sh RETURN VALUES +Zero is returned on success, otherwise an error code is returned. +.Sh SEE ALSO +.Xr inotify 2 , +.Xr vnode 9 diff --git a/sys/amd64/amd64/efirt_machdep.c b/sys/amd64/amd64/efirt_machdep.c index 81a28ebe97ee..fe5d60c978dd 100644 --- a/sys/amd64/amd64/efirt_machdep.c +++ b/sys/amd64/amd64/efirt_machdep.c @@ -56,6 +56,13 @@ #include <vm/vm_pager.h> #include <vm/vm_radix.h> +/* The EFI regions we're allowed to map. */ +#define EFI_ALLOWED_TYPES_MASK ( \ + 1u << EFI_MD_TYPE_BS_CODE | 1u << EFI_MD_TYPE_BS_DATA | \ + 1u << EFI_MD_TYPE_RT_CODE | 1u << EFI_MD_TYPE_RT_DATA | \ + 1u << EFI_MD_TYPE_FIRMWARE \ +) + static pml5_entry_t *efi_pml5; static pml4_entry_t *efi_pml4; static vm_object_t obj_1t1_pt; @@ -181,6 +188,7 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) vm_offset_t va; uint64_t idx; int bits, i, mode; + bool map_pz = true; obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 + NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG), @@ -198,9 +206,16 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) pmap_pinit_pml4(efi_pmltop_page); } + if ((efi_map_regs & ~EFI_ALLOWED_TYPES_MASK) != 0) { + printf("Ignoring the following runtime EFI regions: %#x\n", + efi_map_regs & ~EFI_ALLOWED_TYPES_MASK); + efi_map_regs &= EFI_ALLOWED_TYPES_MASK; + } + for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { - if ((p->md_attr & EFI_MD_ATTR_RT) == 0) + if ((p->md_attr & EFI_MD_ATTR_RT) == 0 && + !EFI_MAP_BOOTTYPE_ALLOWED(p->md_type)) continue; if (p->md_virt != 0 && p->md_virt != p->md_phys) { if (bootverbose) @@ -256,6 +271,22 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) } } VM_OBJECT_WUNLOCK(obj_1t1_pt); + if (p->md_phys == 0) + map_pz = false; + } + + /* + * Some BIOSes tend to access phys 0 during efirt calls, + * so map it if we haven't yet. + */ + if (map_pz) { + VM_OBJECT_WLOCK(obj_1t1_pt); + pte = efi_1t1_pte(0); + /* Assume Write-Back */ + bits = pmap_cache_bits(kernel_pmap, VM_MEMATTR_WRITE_BACK, + false) | X86_PG_RW | X86_PG_V; + pte_store(pte, bits); + VM_OBJECT_WUNLOCK(obj_1t1_pt); } return (true); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 032a134bbd4b..f46462b39fa3 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -188,6 +188,12 @@ struct init_ops init_ops = { */ vm_paddr_t efi_systbl_phys; +/* + * Bitmap of extra EFI memory region types that should be preserved and mapped + * during runtime services calls. + */ +uint32_t efi_map_regs; + /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 @@ -645,7 +651,7 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, * NB: physmap_idx points to the next free slot. */ insert_idx = physmap_idx; - for (i = 0; i <= physmap_idx; i += 2) { + for (i = 0; i < physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; @@ -659,7 +665,7 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, } /* See if we can prepend to the next entry. */ - if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { + if (insert_idx < physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } @@ -670,8 +676,6 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, return (1); } - physmap_idx += 2; - *physmap_idxp = physmap_idx; if (physmap_idx == PHYS_AVAIL_ENTRIES) { printf( "Too many segments in the physical address map, giving up\n"); @@ -682,11 +686,14 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, * Move the last 'N' entries down to make room for the new * entry if needed. */ - for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { + for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } + physmap_idx += 2; + *physmap_idxp = physmap_idx; + /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; @@ -757,6 +764,7 @@ add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); + TUNABLE_INT_FETCH("machdep.efirt.regs", &efi_map_regs); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { @@ -794,10 +802,13 @@ add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, } switch (p->md_type) { - case EFI_MD_TYPE_CODE: - case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: + if (EFI_MAP_BOOTTYPE_ALLOWED(p->md_type)) + continue; + /* FALLTHROUGH */ + case EFI_MD_TYPE_CODE: + case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 2ab8c3b17e22..9c985df13ddf 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1301,8 +1301,10 @@ static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, static bool pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); +static bool pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, + vm_offset_t va, struct rwlock **lockp, vm_page_t mpte); static bool pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, - vm_offset_t va); + vm_offset_t va, vm_page_t m); static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, @@ -1334,7 +1336,7 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - struct spglist *free, struct rwlock **lockp); + bool remove_pt, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); @@ -5999,7 +6001,7 @@ pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, SLIST_INIT(&free); sva = trunc_2mpage(va); - pmap_remove_pde(pmap, pde, sva, &free, lockp); + pmap_remove_pde(pmap, pde, sva, true, &free, lockp); if ((oldpde & pmap_global_bit(pmap)) == 0) pmap_invalidate_pde_page(pmap, sva, oldpde); vm_page_free_pages_toq(&free, true); @@ -6011,11 +6013,17 @@ static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp) { + return (pmap_demote_pde_mpte(pmap, pde, va, lockp, NULL)); +} + +static bool +pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, + struct rwlock **lockp, vm_page_t mpte) +{ pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; vm_paddr_t mptepa; - vm_page_t mpte; int PG_PTE_CACHE; bool in_kernel; @@ -6028,61 +6036,65 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, PG_PKU_MASK = pmap_pku_mask_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - in_kernel = va >= VM_MAXUSER_ADDRESS; oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); - - /* - * Invalidate the 2MB page mapping and return "failure" if the - * mapping was never accessed. - */ - if ((oldpde & PG_A) == 0) { - KASSERT((oldpde & PG_W) == 0, - ("pmap_demote_pde: a wired mapping is missing PG_A")); - pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); - return (false); - } - - mpte = pmap_remove_pt_page(pmap, va); + KASSERT((oldpde & PG_MANAGED) == 0 || lockp != NULL, + ("pmap_demote_pde: lockp for a managed mapping is NULL")); + in_kernel = va >= VM_MAXUSER_ADDRESS; if (mpte == NULL) { - KASSERT((oldpde & PG_W) == 0, - ("pmap_demote_pde: page table page for a wired mapping" - " is missing")); - - /* - * If the page table page is missing and the mapping - * is for a kernel address, the mapping must belong to - * the direct map. Page table pages are preallocated - * for every other part of the kernel address space, - * so the direct map region is the only part of the - * kernel address space that must be handled here. - */ - KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && - va < DMAP_MAX_ADDRESS), - ("pmap_demote_pde: No saved mpte for va %#lx", va)); - /* - * If the 2MB page mapping belongs to the direct map - * region of the kernel's address space, then the page - * allocation request specifies the highest possible - * priority (VM_ALLOC_INTERRUPT). Otherwise, the - * priority is normal. + * Invalidate the 2MB page mapping and return "failure" if the + * mapping was never accessed. */ - mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), - (in_kernel ? VM_ALLOC_INTERRUPT : 0) | VM_ALLOC_WIRED); - - /* - * If the allocation of the new page table page fails, - * invalidate the 2MB page mapping and return "failure". - */ - if (mpte == NULL) { + if ((oldpde & PG_A) == 0) { + KASSERT((oldpde & PG_W) == 0, + ("pmap_demote_pde: a wired mapping is missing PG_A")); pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); return (false); } - if (!in_kernel) - mpte->ref_count = NPTEPG; + mpte = pmap_remove_pt_page(pmap, va); + if (mpte == NULL) { + KASSERT((oldpde & PG_W) == 0, + ("pmap_demote_pde: page table page for a wired mapping is missing")); + + /* + * If the page table page is missing and the mapping + * is for a kernel address, the mapping must belong to + * the direct map. Page table pages are preallocated + * for every other part of the kernel address space, + * so the direct map region is the only part of the + * kernel address space that must be handled here. + */ + KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && + va < DMAP_MAX_ADDRESS), + ("pmap_demote_pde: No saved mpte for va %#lx", va)); + + /* + * If the 2MB page mapping belongs to the direct map + * region of the kernel's address space, then the page + * allocation request specifies the highest possible + * priority (VM_ALLOC_INTERRUPT). Otherwise, the + * priority is normal. + */ + mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), + (in_kernel ? VM_ALLOC_INTERRUPT : 0) | + VM_ALLOC_WIRED); + + /* + * If the allocation of the new page table page fails, + * invalidate the 2MB page mapping and return "failure". + */ + if (mpte == NULL) { + pmap_demote_pde_abort(pmap, va, pde, oldpde, + lockp); + return (false); + } + + if (!in_kernel) + mpte->ref_count = NPTEPG; + } } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); @@ -6153,7 +6165,8 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void -pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, + bool remove_pt) { pd_entry_t newpde; vm_paddr_t mptepa; @@ -6161,7 +6174,10 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte = pmap_remove_pt_page(pmap, va); + if (remove_pt) + mpte = pmap_remove_pt_page(pmap, va); + else + mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); @@ -6193,7 +6209,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) * pmap_remove_pde: do the things to unmap a superpage in a process */ static int -pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; @@ -6234,7 +6250,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, } } if (pmap == kernel_pmap) { - pmap_remove_kernel_pde(pmap, pdq, sva); + pmap_remove_kernel_pde(pmap, pdq, sva, remove_pt); } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { @@ -6476,7 +6492,8 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; - pmap_remove_pde(pmap, pde, sva, &free, &lock); + pmap_remove_pde(pmap, pde, sva, true, &free, + &lock); continue; } else if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { @@ -7552,13 +7569,36 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, /* * The reference to the PD page that was acquired by * pmap_alloc_pde() ensures that it won't be freed. - * However, if the PDE resulted from a promotion, then + * However, if the PDE resulted from a promotion, and + * the mapping is not from kernel_pmap, then * a reserved PT page could be freed. */ - (void)pmap_remove_pde(pmap, pde, va, &free, lockp); + (void)pmap_remove_pde(pmap, pde, va, + pmap != kernel_pmap, &free, lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { + if (va >= VM_MAXUSER_ADDRESS) { + /* + * Try to save the ptp in the trie + * before any changes to mappings are + * made. Abort on failure. + */ + mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + if (pmap_insert_pt_page(pmap, mt, false, false)) { + if (pdpg != NULL) + pdpg->ref_count--; + CTR1(KTR_PMAP, + "pmap_enter_pde: cannot ins kern ptp va %#lx", + va); + return (KERN_RESOURCE_SHORTAGE); + } + /* + * Both pmap_remove_pde() and + * pmap_remove_ptes() will zero-fill + * the kernel page table page. + */ + } pmap_delayed_invl_start(); if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free, lockp)) @@ -7572,14 +7612,6 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, } else { KASSERT(SLIST_EMPTY(&free), ("pmap_enter_pde: freed kernel page table page")); - - /* - * Both pmap_remove_pde() and pmap_remove_ptes() will - * leave the kernel page table page zero filled. - */ - mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false, false)) - panic("pmap_enter_pde: trie insert failed"); } } @@ -9547,7 +9579,7 @@ pmap_unmapdev(void *p, vm_size_t size) * Tries to demote a 1GB page mapping. */ static bool -pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) +pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; @@ -9564,12 +9596,19 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) oldpdpe = *pdpe; KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); - pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, - VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT); - if (pdpg == NULL) { - CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" - " in pmap %p", va, pmap); - return (false); + if (m == NULL) { + pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, + VM_ALLOC_WIRED); + if (pdpg == NULL) { + CTR2(KTR_PMAP, + "pmap_demote_pdpe: failure for va %#lx in pmap %p", + va, pmap); + return (false); + } + } else { + pdpg = m; + pdpg->pindex = va >> PDPSHIFT; + pmap_pt_page_count_adj(pmap, 1); } pdpgpa = VM_PAGE_TO_PHYS(pdpg); firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); @@ -9779,7 +9818,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, tmpva += NBPDP; continue; } - if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) + if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva, NULL)) return (ENOMEM); } pde = pmap_pdpe_to_pde(pdpe, tmpva); @@ -9937,11 +9976,13 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, } /* - * Demotes any mapping within the direct map region that covers more than the - * specified range of physical addresses. This range's size must be a power - * of two and its starting address must be a multiple of its size. Since the - * demotion does not change any attributes of the mapping, a TLB invalidation - * is not mandatory. The caller may, however, request a TLB invalidation. + * Demotes any mapping within the direct map region that covers more + * than the specified range of physical addresses. This range's size + * must be a power of two and its starting address must be a multiple + * of its size, which means that any pdp from the mapping is fully + * covered by the range if len > NBPDP. Since the demotion does not + * change any attributes of the mapping, a TLB invalidation is not + * mandatory. The caller may, however, request a TLB invalidation. */ void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) @@ -9949,38 +9990,67 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) pdp_entry_t *pdpe; pd_entry_t *pde; vm_offset_t va; - bool changed; + vm_page_t m, mpte; + bool changed, rv __diagused; if (len == 0) return; KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2")); KASSERT((base & (len - 1)) == 0, ("pmap_demote_DMAP: base is not a multiple of len")); + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "pmap_demote_DMAP"); + if (len < NBPDP && base < dmaplimit) { va = PHYS_TO_DMAP(base); changed = false; + + /* + * Assume that it is fine to sleep there. + * The only existing caller of pmap_demote_DMAP() is the + * x86_mr_split_dmap() function. + */ + m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK); + if (len < NBPDR) { + mpte = vm_page_alloc_noobj(VM_ALLOC_WIRED | + VM_ALLOC_WAITOK); + } else + mpte = NULL; + PMAP_LOCK(kernel_pmap); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDPE"); if ((*pdpe & PG_PS) != 0) { - if (!pmap_demote_pdpe(kernel_pmap, pdpe, va)) - panic("pmap_demote_DMAP: PDPE failed"); + rv = pmap_demote_pdpe(kernel_pmap, pdpe, va, m); + KASSERT(rv, ("pmap_demote_DMAP: PDPE failed")); changed = true; + m = NULL; } if (len < NBPDR) { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDE"); if ((*pde & PG_PS) != 0) { - if (!pmap_demote_pde(kernel_pmap, pde, va)) - panic("pmap_demote_DMAP: PDE failed"); + mpte->pindex = pmap_pde_pindex(va); + pmap_pt_page_count_adj(kernel_pmap, 1); + rv = pmap_demote_pde_mpte(kernel_pmap, pde, va, + NULL, mpte); + KASSERT(rv, ("pmap_demote_DMAP: PDE failed")); changed = true; + mpte = NULL; } } if (changed && invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); + if (m != NULL) { + vm_page_unwire_noq(m); + vm_page_free(m); + } + if (mpte != NULL) { + vm_page_unwire_noq(mpte); + vm_page_free(mpte); + } } } diff --git a/sys/amd64/include/efi.h b/sys/amd64/include/efi.h index b47c4aa27ac7..439f2f0b317d 100644 --- a/sys/amd64/include/efi.h +++ b/sys/amd64/include/efi.h @@ -53,6 +53,10 @@ #define EFI_TIME_OWNED() mtx_assert(&atrtc_time_lock, MA_OWNED) #define EFI_RT_HANDLE_FAULTS_DEFAULT 1 + +#define EFI_MAP_BOOTTYPE_ALLOWED(type) (((efi_map_regs >> (type)) & 1) != 0) + +extern uint32_t efi_map_regs; #endif struct efirt_callinfo { diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h index 15e1dfc1a444..f1d9c96a78d7 100644 --- a/sys/amd64/linux/linux_proto.h +++ b/sys/amd64/linux/linux_proto.h @@ -914,10 +914,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c index 8413d2723551..62b50cf68a32 100644 --- a/sys/amd64/linux/linux_sysent.c +++ b/sys/amd64/linux/linux_sysent.c @@ -268,8 +268,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 251 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 252 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 253 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 256 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 257 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 258 = linux_mkdirat */ diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c index 20322f7a8660..1dc4de019080 100644 --- a/sys/amd64/linux/linux_systrace_args.c +++ b/sys/amd64/linux/linux_systrace_args.c @@ -1918,12 +1918,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 254: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 255: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -5860,9 +5867,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 254: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 255: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 256: @@ -8353,8 +8383,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 253: /* linux_inotify_add_watch */ case 254: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 255: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 256: /* linux_openat */ diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master index fd08c9b0279d..5e1394751ef6 100644 --- a/sys/amd64/linux/syscalls.master +++ b/sys/amd64/linux/syscalls.master @@ -1476,10 +1476,17 @@ int linux_inotify_init(void); } 254 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 255 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 256 AUE_NULL STD { int linux_migrate_pages(void); diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h index ab0edd99df42..57a303271f1c 100644 --- a/sys/amd64/linux32/linux32_proto.h +++ b/sys/amd64/linux32/linux32_proto.h @@ -983,10 +983,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1184,7 +1187,7 @@ struct linux_pipe2_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c index add9844254ce..1bc8841badf3 100644 --- a/sys/amd64/linux32/linux32_sysent.c +++ b/sys/amd64/linux32/linux32_sysent.c @@ -307,8 +307,8 @@ struct sysent linux32_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -347,7 +347,7 @@ struct sysent linux32_sysent[] = { { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c index 7793124e6935..cbd1641c2a34 100644 --- a/sys/amd64/linux32/linux32_systrace_args.c +++ b/sys/amd64/linux32/linux32_systrace_args.c @@ -2036,12 +2036,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2379,7 +2386,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6536,9 +6545,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7116,6 +7148,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9809,8 +9848,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -9982,6 +10027,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master index 92d5f09c423f..7bd522a598e8 100644 --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -1589,10 +1589,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1860,7 +1867,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/arm/allwinner/aw_gpio.c b/sys/arm/allwinner/aw_gpio.c index 18b47bab12d9..2061e38a155f 100644 --- a/sys/arm/allwinner/aw_gpio.c +++ b/sys/arm/allwinner/aw_gpio.c @@ -1154,10 +1154,6 @@ aw_gpio_attach(device_t dev) aw_gpio_register_isrcs(sc); intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; - /* * Register as a pinctrl device */ @@ -1166,6 +1162,10 @@ aw_gpio_attach(device_t dev) fdt_pinctrl_register(dev, "allwinner,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; + config_intrhook_oneshot(aw_gpio_enable_bank_supply, sc); return (0); diff --git a/sys/arm/allwinner/aw_rtc.c b/sys/arm/allwinner/aw_rtc.c index 9938601f17ce..4af57ab879e8 100644 --- a/sys/arm/allwinner/aw_rtc.c +++ b/sys/arm/allwinner/aw_rtc.c @@ -134,6 +134,7 @@ static struct ofw_compat_data compat_data[] = { { "allwinner,sun7i-a20-rtc", (uintptr_t) &a20_conf }, { "allwinner,sun6i-a31-rtc", (uintptr_t) &a31_conf }, { "allwinner,sun8i-h3-rtc", (uintptr_t) &h3_conf }, + { "allwinner,sun20i-d1-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h5-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h6-rtc", (uintptr_t) &h3_conf }, { NULL, 0 } @@ -147,11 +148,13 @@ struct aw_rtc_softc { static struct clk_fixed_def aw_rtc_osc32k = { .clkdef.id = 0, + .clkdef.name = "osc32k", .freq = 32768, }; static struct clk_fixed_def aw_rtc_iosc = { .clkdef.id = 2, + .clkdef.name = "iosc", }; static void aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev); @@ -250,23 +253,33 @@ aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev) { int nclocks; node = ofw_bus_get_node(dev); - nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); - /* No clocks to export */ - if (nclocks <= 0) - return; - if (nclocks != 3) { - device_printf(dev, "Having only %d clocks instead of 3, aborting\n", nclocks); + /* Nothing to do. */ + if (!OF_hasprop(node, "clocks")) return; + + /* + * If the device tree gives us specific output names for the clocks, + * use them. + */ + nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); + if (nclocks > 0) { + if (nclocks != 3) { + device_printf(dev, + "Found %d clocks names instead of 3, aborting\n", + nclocks); + return; + } + + aw_rtc_osc32k.clkdef.name = clknames[0]; + aw_rtc_iosc.clkdef.name = clknames[2]; } clkdom = clkdom_create(dev); - aw_rtc_osc32k.clkdef.name = clknames[0]; if (clknode_fixed_register(clkdom, &aw_rtc_osc32k) != 0) device_printf(dev, "Cannot register osc32k clock\n"); - aw_rtc_iosc.clkdef.name = clknames[2]; aw_rtc_iosc.freq = sc->conf->iosc_freq; if (clknode_fixed_register(clkdom, &aw_rtc_iosc) != 0) device_printf(dev, "Cannot register iosc clock\n"); diff --git a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c index e4fc57b79ba5..48d1d2af5abc 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c @@ -837,12 +837,12 @@ bcm_gpio_attach(device_t dev) } sc->sc_gpio_npins = i; bcm_gpio_sysctl_init(sc); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; fdt_pinctrl_register(dev, "brcm,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; return (0); diff --git a/sys/arm/mv/mvebu_gpio.c b/sys/arm/mv/mvebu_gpio.c index 681cf20f7f9f..7acdfff539dc 100644 --- a/sys/arm/mv/mvebu_gpio.c +++ b/sys/arm/mv/mvebu_gpio.c @@ -810,7 +810,6 @@ mvebu_gpio_attach(device_t dev) return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm/nvidia/as3722_gpio.c b/sys/arm/nvidia/as3722_gpio.c index 073d057884c9..f7b3d4d43bab 100644 --- a/sys/arm/nvidia/as3722_gpio.c +++ b/sys/arm/nvidia/as3722_gpio.c @@ -544,7 +544,7 @@ as3722_gpio_attach(struct as3722_softc *sc, phandle_t node) sc->gpio_pins = malloc(sizeof(struct as3722_gpio_pin *) * sc->gpio_npins, M_AS3722_GPIO, M_WAITOK | M_ZERO); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); for (i = 0; i < sc->gpio_npins; i++) { diff --git a/sys/arm/nvidia/tegra_gpio.c b/sys/arm/nvidia/tegra_gpio.c index 16e1ef94d6a9..e37fd69a121e 100644 --- a/sys/arm/nvidia/tegra_gpio.c +++ b/sys/arm/nvidia/tegra_gpio.c @@ -824,7 +824,6 @@ tegra_gpio_attach(device_t dev) return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c index ec2dd5907024..ebaaccea1d99 100644 --- a/sys/arm64/apple/apple_pinctrl.c +++ b/sys/arm64/apple/apple_pinctrl.c @@ -161,22 +161,22 @@ apple_pinctrl_attach(device_t dev) goto error; } + fdt_pinctrl_register(dev, "pinmux"); + fdt_pinctrl_configure_tree(dev); + + if (OF_hasprop(node, "interrupt-controller")) { + sc->sc_irqs = mallocarray(sc->sc_ngpios, + sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); + intr_pic_register(dev, + OF_xref_from_node(ofw_bus_get_node(dev))); + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "failed to attach gpiobus\n"); goto error; } - fdt_pinctrl_register(dev, "pinmux"); - fdt_pinctrl_configure_tree(dev); - - if (!OF_hasprop(node, "interrupt-controller")) - return (0); - - sc->sc_irqs = mallocarray(sc->sc_ngpios, - sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); - intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - return (0); error: mtx_destroy(&sc->sc_mtx); diff --git a/sys/arm64/linux/linux_proto.h b/sys/arm64/linux/linux_proto.h index ae3d8569df58..82f57f77ffae 100644 --- a/sys/arm64/linux/linux_proto.h +++ b/sys/arm64/linux/linux_proto.h @@ -141,10 +141,13 @@ struct linux_inotify_init1_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_ioctl_args { char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)]; diff --git a/sys/arm64/linux/linux_sysent.c b/sys/arm64/linux/linux_sysent.c index 722ada465730..e54a76cfd55e 100644 --- a/sys/arm64/linux/linux_sysent.c +++ b/sys/arm64/linux/linux_sysent.c @@ -41,8 +41,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 24 = linux_dup3 */ { .sy_narg = AS(linux_fcntl_args), .sy_call = (sy_call_t *)linux_fcntl, .sy_auevent = AUE_FCNTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 25 = linux_fcntl */ { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 26 = linux_inotify_init1 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ { .sy_narg = AS(linux_ioctl_args), .sy_call = (sy_call_t *)linux_ioctl, .sy_auevent = AUE_IOCTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 29 = linux_ioctl */ { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 30 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 31 = linux_ioprio_get */ diff --git a/sys/arm64/linux/linux_systrace_args.c b/sys/arm64/linux/linux_systrace_args.c index 54e4dd82355d..1b946a9406a5 100644 --- a/sys/arm64/linux/linux_systrace_args.c +++ b/sys/arm64/linux/linux_systrace_args.c @@ -210,12 +210,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 27: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 28: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_ioctl */ @@ -2780,9 +2787,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 27: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 28: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_ioctl */ case 29: @@ -6455,8 +6485,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 27: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 28: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_ioctl */ case 29: if (ndx == 0 || ndx == 1) diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master index 79c04c398e00..2babdcaf03bf 100644 --- a/sys/arm64/linux/syscalls.master +++ b/sys/arm64/linux/syscalls.master @@ -170,10 +170,17 @@ ); } 27 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 28 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 29 AUE_IOCTL STD { int linux_ioctl( diff --git a/sys/arm64/nvidia/tegra210/max77620_gpio.c b/sys/arm64/nvidia/tegra210/max77620_gpio.c index 8dcf98099dac..5d91e23324c7 100644 --- a/sys/arm64/nvidia/tegra210/max77620_gpio.c +++ b/sys/arm64/nvidia/tegra210/max77620_gpio.c @@ -672,7 +672,7 @@ max77620_gpio_attach(struct max77620_softc *sc, phandle_t node) sx_init(&sc->gpio_lock, "MAX77620 GPIO lock"); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c index a86392f16624..847bc7394dd0 100644 --- a/sys/arm64/rockchip/rk_gpio.c +++ b/sys/arm64/rockchip/rk_gpio.c @@ -362,12 +362,6 @@ rk_gpio_attach(device_t dev) return (ENXIO); } - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) { - rk_gpio_detach(dev); - return (ENXIO); - } - /* Set the cached value to unknown */ for (i = 0; i < RK_GPIO_MAX_PINS; i++) sc->pin_cached[i].is_gpio = 2; @@ -377,6 +371,12 @@ rk_gpio_attach(device_t dev) sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR); RK_GPIO_UNLOCK(sc); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) { + rk_gpio_detach(dev); + return (ENXIO); + } + return (0); } diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h index 0f110d5f9ddd..9381396f247c 100644 --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -663,6 +663,7 @@ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ #define AUE_TIMERFD 43270 /* FreeBSD/Linux. */ #define AUE_SETCRED 43271 /* FreeBSD-specific. */ +#define AUE_INOTIFY 43272 /* FreeBSD/Linux. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index eaa086188b5f..8d2748098c00 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -511,4 +511,6 @@ #define FREEBSD32_SYS_fchroot 590 #define FREEBSD32_SYS_freebsd32_setcred 591 #define FREEBSD32_SYS_exterrctl 592 -#define FREEBSD32_SYS_MAXSYSCALL 593 +#define FREEBSD32_SYS_inotify_add_watch_at 593 +#define FREEBSD32_SYS_inotify_rm_watch 594 +#define FREEBSD32_SYS_MAXSYSCALL 595 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 989f32a5c6f0..bda373268cc5 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -598,4 +598,6 @@ const char *freebsd32_syscallnames[] = { "fchroot", /* 590 = fchroot */ "freebsd32_setcred", /* 591 = freebsd32_setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 476fe2ac3f80..ef0aff8bf852 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -659,5 +659,7 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = AS(getrlimitusage_args), .sy_call = (sy_call_t *)sys_getrlimitusage, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 589 = getrlimitusage */ { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(freebsd32_setcred_args), .sy_call = (sy_call_t *)freebsd32_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = freebsd32_setcred */ - { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index cf08938cd5de..37564a737a62 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3395,6 +3395,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9172,6 +9190,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11070,6 +11120,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c index 35d6debe0da9..19cd55849f65 100644 --- a/sys/compat/linux/linux_dummy.c +++ b/sys/compat/linux/linux_dummy.c @@ -74,9 +74,6 @@ DUMMY(kexec_load); DUMMY(add_key); DUMMY(request_key); DUMMY(keyctl); -/* Linux 2.6.13: */ -DUMMY(inotify_add_watch); -DUMMY(inotify_rm_watch); /* Linux 2.6.16: */ DUMMY(migrate_pages); DUMMY(unshare); @@ -87,7 +84,6 @@ DUMMY(vmsplice); DUMMY(move_pages); /* Linux 2.6.27: */ DUMMY(signalfd4); -DUMMY(inotify_init1); /* Linux 2.6.31: */ DUMMY(perf_event_open); /* Linux 2.6.36: */ diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 246bc26d85d4..86834a7ecea8 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -32,11 +32,13 @@ #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filedesc.h> +#include <sys/inotify.h> #include <sys/lock.h> #include <sys/mman.h> #include <sys/selinfo.h> #include <sys/pipe.h> #include <sys/proc.h> +#include <sys/specialfd.h> #include <sys/stat.h> #include <sys/sx.h> #include <sys/syscallsubr.h> @@ -1877,3 +1879,122 @@ linux_writev(struct thread *td, struct linux_writev_args *args) freeuio(auio); return (linux_enobufs2eagain(td, args->fd, error)); } + +static int +linux_inotify_init_flags(int l_flags) +{ + int bsd_flags; + + if ((l_flags & ~(LINUX_IN_CLOEXEC | LINUX_IN_NONBLOCK)) != 0) + linux_msg(NULL, "inotify_init1 unsupported flags 0x%x", + l_flags); + + bsd_flags = 0; + if ((l_flags & LINUX_IN_CLOEXEC) != 0) + bsd_flags |= O_CLOEXEC; + if ((l_flags & LINUX_IN_NONBLOCK) != 0) + bsd_flags |= O_NONBLOCK; + return (bsd_flags); +} + +static int +inotify_init_common(struct thread *td, int flags) +{ + struct specialfd_inotify si; + + si.flags = linux_inotify_init_flags(flags); + return (kern_specialfd(td, SPECIALFD_INOTIFY, &si)); +} + +#if defined(__i386__) || defined(__amd64__) +int +linux_inotify_init(struct thread *td, struct linux_inotify_init_args *args) +{ + return (inotify_init_common(td, 0)); +} +#endif + +int +linux_inotify_init1(struct thread *td, struct linux_inotify_init1_args *args) +{ + return (inotify_init_common(td, args->flags)); +} + +/* + * The native implementation uses the same values for inotify events as + * libinotify, which gives us binary compatibility with Linux. This simplifies + * the shim implementation a lot, as otherwise we would have to handle read(2) + * calls on inotify descriptors and translate events to Linux's ABI. + */ +_Static_assert(LINUX_IN_ACCESS == IN_ACCESS, + "IN_ACCESS mismatch"); +_Static_assert(LINUX_IN_MODIFY == IN_MODIFY, + "IN_MODIFY mismatch"); +_Static_assert(LINUX_IN_ATTRIB == IN_ATTRIB, + "IN_ATTRIB mismatch"); +_Static_assert(LINUX_IN_CLOSE_WRITE == IN_CLOSE_WRITE, + "IN_CLOSE_WRITE mismatch"); +_Static_assert(LINUX_IN_CLOSE_NOWRITE == IN_CLOSE_NOWRITE, + "IN_CLOSE_NOWRITE mismatch"); +_Static_assert(LINUX_IN_OPEN == IN_OPEN, + "IN_OPEN mismatch"); +_Static_assert(LINUX_IN_MOVED_FROM == IN_MOVED_FROM, + "IN_MOVED_FROM mismatch"); +_Static_assert(LINUX_IN_MOVED_TO == IN_MOVED_TO, + "IN_MOVED_TO mismatch"); +_Static_assert(LINUX_IN_CREATE == IN_CREATE, + "IN_CREATE mismatch"); +_Static_assert(LINUX_IN_DELETE == IN_DELETE, + "IN_DELETE mismatch"); +_Static_assert(LINUX_IN_DELETE_SELF == IN_DELETE_SELF, + "IN_DELETE_SELF mismatch"); +_Static_assert(LINUX_IN_MOVE_SELF == IN_MOVE_SELF, + "IN_MOVE_SELF mismatch"); + +_Static_assert(LINUX_IN_UNMOUNT == IN_UNMOUNT, + "IN_UNMOUNT mismatch"); +_Static_assert(LINUX_IN_Q_OVERFLOW == IN_Q_OVERFLOW, + "IN_Q_OVERFLOW mismatch"); +_Static_assert(LINUX_IN_IGNORED == IN_IGNORED, + "IN_IGNORED mismatch"); + +_Static_assert(LINUX_IN_ISDIR == IN_ISDIR, + "IN_ISDIR mismatch"); +_Static_assert(LINUX_IN_ONLYDIR == IN_ONLYDIR, + "IN_ONLYDIR mismatch"); +_Static_assert(LINUX_IN_DONT_FOLLOW == IN_DONT_FOLLOW, + "IN_DONT_FOLLOW mismatch"); +_Static_assert(LINUX_IN_MASK_CREATE == IN_MASK_CREATE, + "IN_MASK_CREATE mismatch"); +_Static_assert(LINUX_IN_MASK_ADD == IN_MASK_ADD, + "IN_MASK_ADD mismatch"); +_Static_assert(LINUX_IN_ONESHOT == IN_ONESHOT, + "IN_ONESHOT mismatch"); +_Static_assert(LINUX_IN_EXCL_UNLINK == IN_EXCL_UNLINK, + "IN_EXCL_UNLINK mismatch"); + +static int +linux_inotify_watch_flags(int l_flags) +{ + if ((l_flags & ~(LINUX_IN_ALL_EVENTS | LINUX_IN_ALL_FLAGS)) != 0) { + linux_msg(NULL, "inotify_add_watch unsupported flags 0x%x", + l_flags); + } + + return (l_flags); +} + +int +linux_inotify_add_watch(struct thread *td, + struct linux_inotify_add_watch_args *args) +{ + return (kern_inotify_add_watch(args->fd, AT_FDCWD, args->pathname, + linux_inotify_watch_flags(args->mask), td)); +} + +int +linux_inotify_rm_watch(struct thread *td, + struct linux_inotify_rm_watch_args *args) +{ + return (kern_inotify_rm_watch(args->fd, args->wd, td)); +} diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h index 2e56942b0f40..7448dc597230 100644 --- a/sys/compat/linux/linux_file.h +++ b/sys/compat/linux/linux_file.h @@ -189,6 +189,38 @@ #define LINUX_HUGETLB_FLAG_ENCODE_2GB (31 << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) #define LINUX_HUGETLB_FLAG_ENCODE_16GB (34U << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) +/* inotify flags */ +#define LINUX_IN_ACCESS 0x00000001 +#define LINUX_IN_MODIFY 0x00000002 +#define LINUX_IN_ATTRIB 0x00000004 +#define LINUX_IN_CLOSE_WRITE 0x00000008 +#define LINUX_IN_CLOSE_NOWRITE 0x00000010 +#define LINUX_IN_OPEN 0x00000020 +#define LINUX_IN_MOVED_FROM 0x00000040 +#define LINUX_IN_MOVED_TO 0x00000080 +#define LINUX_IN_CREATE 0x00000100 +#define LINUX_IN_DELETE 0x00000200 +#define LINUX_IN_DELETE_SELF 0x00000400 +#define LINUX_IN_MOVE_SELF 0x00000800 + +#define LINUX_IN_UNMOUNT 0x00002000 +#define LINUX_IN_Q_OVERFLOW 0x00004000 +#define LINUX_IN_IGNORED 0x00008000 + +#define LINUX_IN_ONLYDIR 0x01000000 +#define LINUX_IN_DONT_FOLLOW 0x02000000 +#define LINUX_IN_EXCL_UNLINK 0x04000000 +#define LINUX_IN_MASK_CREATE 0x10000000 +#define LINUX_IN_MASK_ADD 0x20000000 +#define LINUX_IN_ISDIR 0x40000000 +#define LINUX_IN_ONESHOT 0x80000000 + +#define LINUX_IN_ALL_EVENTS 0x00000fff +#define LINUX_IN_ALL_FLAGS 0xf700e000 + +#define LINUX_IN_NONBLOCK 0x00000800 +#define LINUX_IN_CLOEXEC 0x00080000 + #if defined(_KERNEL) struct l_file_handle { l_uint handle_bytes; diff --git a/sys/conf/files b/sys/conf/files index f6d473b1431b..866901ba4c51 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3173,8 +3173,6 @@ dev/sound/midi/midi.c optional sound dev/sound/midi/mpu401.c optional sound dev/sound/midi/mpu_if.m optional sound dev/sound/midi/mpufoi_if.m optional sound -dev/sound/midi/sequencer.c optional sound -dev/sound/midi/synth_if.m optional sound dev/spibus/acpi_spibus.c optional acpi spibus dev/spibus/ofw_spibus.c optional fdt spibus dev/spibus/spibus.c optional spibus \ @@ -3992,6 +3990,7 @@ kern/vfs_export.c standard kern/vfs_extattr.c standard kern/vfs_hash.c standard kern/vfs_init.c standard +kern/vfs_inotify.c standard kern/vfs_lookup.c standard kern/vfs_mount.c standard kern/vfs_mountroot.c standard diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c index 2987af634866..f9468e0deda0 100644 --- a/sys/dev/gpio/acpi_gpiobus.c +++ b/sys/dev/gpio/acpi_gpiobus.c @@ -36,6 +36,7 @@ #include <dev/gpio/gpiobusvar.h> #include <dev/gpio/acpi_gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include "gpiobus_if.h" diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index 2e2618805e7b..ab7f13177969 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -39,6 +39,7 @@ #include <sys/sbuf.h> #include <dev/gpio/gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include "gpiobus_if.h" @@ -213,20 +214,40 @@ gpio_pin_is_active(gpio_pin_t pin, bool *active) return (0); } +/* + * Note that this function should only + * be used in cases where a pre-existing + * gpiobus_pin structure exists. In most + * cases, the gpio_pin_get_by_* functions + * suffice. + */ +int +gpio_pin_acquire(gpio_pin_t gpio) +{ + device_t busdev; + + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); + KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); + + busdev = GPIO_GET_BUS(gpio->dev); + if (busdev == NULL) + return (ENXIO); + + return (gpiobus_acquire_pin(busdev, gpio->pin)); +} + void gpio_pin_release(gpio_pin_t gpio) { device_t busdev; - if (gpio == NULL) - return; - + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); busdev = GPIO_GET_BUS(gpio->dev); - if (busdev != NULL) - gpiobus_release_pin(busdev, gpio->pin); + KASSERT(busdev != NULL, ("gpiobus dev is NULL.")); + gpiobus_release_pin(busdev, gpio->pin); free(gpio, M_DEVBUF); } @@ -293,7 +314,7 @@ gpiobus_print_pins(struct gpiobus_ivar *devi, struct sbuf *sb) } device_t -gpiobus_attach_bus(device_t dev) +gpiobus_add_bus(device_t dev) { device_t busdev; @@ -307,8 +328,24 @@ gpiobus_attach_bus(device_t dev) #ifdef FDT ofw_gpiobus_register_provider(dev); #endif - bus_attach_children(dev); + return (busdev); +} + +/* + * Attach a gpiobus child. + * Note that the controller is expected + * to be fully initialized at this point. + */ +device_t +gpiobus_attach_bus(device_t dev) +{ + device_t busdev; + busdev = gpiobus_add_bus(dev); + if (busdev == NULL) + return (NULL); + + bus_attach_children(dev); return (busdev); } @@ -385,14 +422,13 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin) sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max: %d\n", pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } /* Mark pin as mapped and give warning if it's already mapped. */ if (sc->sc_pins[pin].mapped) { device_printf(bus, "warning: pin %d is already mapped\n", pin); - return (-1); + return (EBUSY); } sc->sc_pins[pin].mapped = 1; @@ -400,7 +436,7 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin) } /* Release mapped pin */ -int +void gpiobus_release_pin(device_t bus, uint32_t pin) { struct gpiobus_softc *sc; @@ -408,19 +444,15 @@ gpiobus_release_pin(device_t bus, uint32_t pin) sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max=%d\n", - pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } - if (!sc->sc_pins[pin].mapped) { - device_printf(bus, "pin %d is not mapped\n", pin); - return (-1); - } - sc->sc_pins[pin].mapped = 0; + if (!sc->sc_pins[pin].mapped) + panic("%s: pin %d is not mapped", device_get_nameunit(bus), + pin); - return (0); + sc->sc_pins[pin].mapped = 0; } static int @@ -435,8 +467,7 @@ gpiobus_acquire_child_pins(device_t dev, device_t child) device_printf(child, "cannot acquire pin %d\n", devi->pins[i]); while (--i >= 0) { - (void)gpiobus_release_pin(dev, - devi->pins[i]); + gpiobus_release_pin(dev, devi->pins[i]); } gpiobus_free_ivars(devi); return (EBUSY); diff --git a/sys/dev/sound/midi/sequencer.h b/sys/dev/gpio/gpiobus_internal.h index 22ea0ae6c1b6..de3f57663132 100644 --- a/sys/dev/sound/midi/sequencer.h +++ b/sys/dev/gpio/gpiobus_internal.h @@ -1,8 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2003 Mathew Kanner - * Copyright (c) 1999 Seigo Tanimura + * Copyright (c) 2009 Oleksandr Tymoshenko <gonzo@freebsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,65 +24,24 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * */ -/* - * Include file for the midi sequence driver. - */ - -#ifndef _SEQUENCER_H_ -#define _SEQUENCER_H_ - -#define NSEQ_MAX 16 +#ifndef __GPIOBUS_INTERNAL_H__ +#define __GPIOBUS_INTERNAL_H__ /* - * many variables should be reduced to a range. Here define a macro + * Functions shared between gpiobus and other bus classes that derive from it; + * these should not be called directly by other drivers. */ - -#define RANGE(var, low, high) (var) = \ -((var)<(low)?(low) : (var)>(high)?(high) : (var)) - -#ifdef _KERNEL - -void seq_timer(void *arg); - -SYSCTL_DECL(_hw_midi_seq); - -extern int seq_debug; - -#define SEQ_DEBUG(y, x) \ - do { \ - if (seq_debug >= y) { \ - (x); \ - } \ - } while (0) - -SYSCTL_DECL(_hw_midi); - -#endif /* _KERNEL */ - -#define SYNTHPROP_MIDI 1 -#define SYNTHPROP_SYNTH 2 -#define SYNTHPROP_RX 4 -#define SYNTHPROP_TX 8 - -struct _midi_cmdtab { - int cmd; - char *name; -}; -typedef struct _midi_cmdtab midi_cmdtab; -extern midi_cmdtab cmdtab_seqevent[]; -extern midi_cmdtab cmdtab_seqioctl[]; -extern midi_cmdtab cmdtab_timer[]; -extern midi_cmdtab cmdtab_seqcv[]; -extern midi_cmdtab cmdtab_seqccmn[]; - -char *midi_cmdname(int cmd, midi_cmdtab * tab); - -enum { - MORE, - TIMERARMED, - QUEUEFULL -}; - +int gpiobus_attach(device_t); +int gpiobus_detach(device_t); +int gpiobus_init_softc(device_t); +int gpiobus_alloc_ivars(struct gpiobus_ivar *); +void gpiobus_free_ivars(struct gpiobus_ivar *); +int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); +int gpiobus_acquire_pin(device_t, uint32_t); +void gpiobus_release_pin(device_t, uint32_t); + +extern driver_t gpiobus_driver; #endif diff --git a/sys/dev/gpio/gpiobusvar.h b/sys/dev/gpio/gpiobusvar.h index 74783e112f89..7f504236a774 100644 --- a/sys/dev/gpio/gpiobusvar.h +++ b/sys/dev/gpio/gpiobusvar.h @@ -156,6 +156,8 @@ int gpio_pin_get_by_bus_pinnum(device_t _bus, uint32_t _pinnum, gpio_pin_t *_gp) /* Acquire a pin by child and index (used by direct children of gpiobus). */ int gpio_pin_get_by_child_index(device_t _child, uint32_t _idx, gpio_pin_t *_gp); +/* Acquire a pin from an existing gpio_pin_t. */ +int gpio_pin_acquire(gpio_pin_t gpio); /* Release a pin acquired via any gpio_pin_get_xxx() function. */ void gpio_pin_release(gpio_pin_t gpio); @@ -167,22 +169,9 @@ int gpio_pin_setflags(gpio_pin_t pin, uint32_t flags); struct resource *gpio_alloc_intr_resource(device_t consumer_dev, int *rid, u_int alloc_flags, gpio_pin_t pin, uint32_t intr_mode); -/* - * Functions shared between gpiobus and other bus classes that derive from it; - * these should not be called directly by other drivers. - */ int gpio_check_flags(uint32_t, uint32_t); +device_t gpiobus_add_bus(device_t); device_t gpiobus_attach_bus(device_t); int gpiobus_detach_bus(device_t); -int gpiobus_attach(device_t); -int gpiobus_detach(device_t); -int gpiobus_init_softc(device_t); -int gpiobus_alloc_ivars(struct gpiobus_ivar *); -void gpiobus_free_ivars(struct gpiobus_ivar *); -int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); -int gpiobus_acquire_pin(device_t, uint32_t); -int gpiobus_release_pin(device_t, uint32_t); - -extern driver_t gpiobus_driver; #endif /* __GPIOBUS_H__ */ diff --git a/sys/dev/gpio/gpiopps.c b/sys/dev/gpio/gpiopps.c index bb8afa5e062c..82620a50a798 100644 --- a/sys/dev/gpio/gpiopps.c +++ b/sys/dev/gpio/gpiopps.c @@ -160,7 +160,7 @@ gpiopps_detach(device_t dev) if (sc->ires != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irid, sc->ires); if (sc->gpin != NULL) - gpiobus_release_pin(GPIO_GET_BUS(sc->gpin->dev), sc->gpin->pin); + gpio_pin_release(sc->gpin); return (0); } diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c index 32dc5b55e698..fc5fb03d6824 100644 --- a/sys/dev/gpio/ofw_gpiobus.c +++ b/sys/dev/gpio/ofw_gpiobus.c @@ -36,6 +36,7 @@ #include <sys/module.h> #include <dev/gpio/gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include <dev/ofw/ofw_bus.h> #include "gpiobus_if.h" diff --git a/sys/dev/gpio/pl061.c b/sys/dev/gpio/pl061.c index cc39790322b6..87d4310a6396 100644 --- a/sys/dev/gpio/pl061.c +++ b/sys/dev/gpio/pl061.c @@ -487,14 +487,21 @@ pl061_attach(device_t dev) } } + mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); + + if (sc->sc_xref != 0 && !intr_pic_register(dev, sc->sc_xref)) { + device_printf(dev, "couldn't register PIC\n"); + PL061_LOCK_DESTROY(sc); + goto free_isrc; + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "couldn't attach gpio bus\n"); + PL061_LOCK_DESTROY(sc); goto free_isrc; } - mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); - return (0); free_isrc: @@ -503,6 +510,7 @@ free_isrc: * for (irq = 0; irq < PL061_NUM_GPIO; irq++) * intr_isrc_deregister(PIC_INTR_ISRC(sc, irq)); */ + bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_irq_hdlr); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid, sc->sc_irq_res); free_pic: diff --git a/sys/dev/gpio/pl061.h b/sys/dev/gpio/pl061.h index 809a1168493d..d9fe23e502b1 100644 --- a/sys/dev/gpio/pl061.h +++ b/sys/dev/gpio/pl061.h @@ -46,6 +46,7 @@ struct pl061_softc { struct resource *sc_mem_res; struct resource *sc_irq_res; void *sc_irq_hdlr; + intptr_t sc_xref; int sc_mem_rid; int sc_irq_rid; struct pl061_pin_irqsrc sc_isrcs[PL061_NUM_GPIO]; diff --git a/sys/dev/gpio/pl061_acpi.c b/sys/dev/gpio/pl061_acpi.c index f5885025083e..8e9921261e4e 100644 --- a/sys/dev/gpio/pl061_acpi.c +++ b/sys/dev/gpio/pl061_acpi.c @@ -67,19 +67,12 @@ pl061_acpi_probe(device_t dev) static int pl061_acpi_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = ACPI_GPIO_XREF; - if (!intr_pic_register(dev, ACPI_GPIO_XREF)) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_acpi_methods[] = { diff --git a/sys/dev/gpio/pl061_fdt.c b/sys/dev/gpio/pl061_fdt.c index aa22298b43c6..681b3ccdfdeb 100644 --- a/sys/dev/gpio/pl061_fdt.c +++ b/sys/dev/gpio/pl061_fdt.c @@ -61,19 +61,12 @@ pl061_fdt_probe(device_t dev) static int pl061_fdt_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = OF_xref_from_node(ofw_bus_get_node(dev)); - if (!intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev)))) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_fdt_methods[] = { diff --git a/sys/dev/gpio/qoriq_gpio.c b/sys/dev/gpio/qoriq_gpio.c index 25dfccede29f..8b44cd256c79 100644 --- a/sys/dev/gpio/qoriq_gpio.c +++ b/sys/dev/gpio/qoriq_gpio.c @@ -369,11 +369,6 @@ qoriq_gpio_attach(device_t dev) for (i = 0; i <= MAXPIN; i++) sc->sc_pins[i].gp_caps = DEFAULT_CAPS; - sc->busdev = gpiobus_attach_bus(dev); - if (sc->busdev == NULL) { - qoriq_gpio_detach(dev); - return (ENOMEM); - } /* * Enable the GPIO Input Buffer for all GPIOs. * This is safe on devices without a GPIBE register, because those @@ -384,6 +379,12 @@ qoriq_gpio_attach(device_t dev) OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); + if (sc->busdev == NULL) { + qoriq_gpio_detach(dev); + return (ENOMEM); + } + return (0); } diff --git a/sys/dev/iicbus/gpio/tca64xx.c b/sys/dev/iicbus/gpio/tca64xx.c index 3b3bca9936f1..cd011ae9be75 100644 --- a/sys/dev/iicbus/gpio/tca64xx.c +++ b/sys/dev/iicbus/gpio/tca64xx.c @@ -261,14 +261,13 @@ tca64xx_attach(device_t dev) sc->addr = iicbus_get_addr(dev); mtx_init(&sc->mtx, "tca64xx gpio", "gpio", MTX_DEF); + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Could not create busdev child\n"); return (ENXIO); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - #ifdef DEBUG switch (sc->chip) { case TCA6416_TYPE: diff --git a/sys/dev/mem/memutil.c b/sys/dev/mem/memutil.c index cf9714d6ec8f..20ce337df0ab 100644 --- a/sys/dev/mem/memutil.c +++ b/sys/dev/mem/memutil.c @@ -26,15 +26,14 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/param.h> +#include <sys/systm.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/memrange.h> -#include <sys/rwlock.h> -#include <sys/systm.h> +#include <sys/sx.h> -static struct rwlock mr_lock; +static struct sx mr_lock; /* * Implementation-neutral, kernel-callable functions for manipulating @@ -46,7 +45,7 @@ mem_range_init(void) if (mem_range_softc.mr_op == NULL) return; - rw_init(&mr_lock, "memrange"); + sx_init(&mr_lock, "memrange"); mem_range_softc.mr_op->init(&mem_range_softc); } @@ -56,7 +55,7 @@ mem_range_destroy(void) if (mem_range_softc.mr_op == NULL) return; - rw_destroy(&mr_lock); + sx_destroy(&mr_lock); } int @@ -67,12 +66,12 @@ mem_range_attr_get(struct mem_range_desc *mrd, int *arg) if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); nd = *arg; - rw_rlock(&mr_lock); + sx_slock(&mr_lock); if (nd == 0) *arg = mem_range_softc.mr_ndesc; else bcopy(mem_range_softc.mr_desc, mrd, nd * sizeof(*mrd)); - rw_runlock(&mr_lock); + sx_sunlock(&mr_lock); return (0); } @@ -83,8 +82,8 @@ mem_range_attr_set(struct mem_range_desc *mrd, int *arg) if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); - rw_wlock(&mr_lock); + sx_xlock(&mr_lock); ret = mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg); - rw_wunlock(&mr_lock); + sx_xunlock(&mr_lock); return (ret); } diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index ee37bda36496..395310b115fb 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -131,30 +131,14 @@ static struct harvest_context { /* The context of the kernel thread processing harvested entropy */ struct proc *hc_kthread_proc; /* - * Lockless ring buffer holding entropy events - * If ring.in == ring.out, - * the buffer is empty. - * If ring.in != ring.out, - * the buffer contains harvested entropy. - * If (ring.in + 1) == ring.out (mod RANDOM_RING_MAX), - * the buffer is full. - * - * NOTE: ring.in points to the last added element, - * and ring.out points to the last consumed element. - * - * The ring.in variable needs locking as there are multiple - * sources to the ring. Only the sources may change ring.in, - * but the consumer may examine it. - * - * The ring.out variable does not need locking as there is - * only one consumer. Only the consumer may change ring.out, - * but the sources may examine it. + * A pair of buffers for queued events. New events are added to the + * active queue while the kthread processes the other one in parallel. */ - struct entropy_ring { + struct entropy_buffer { struct harvest_event ring[RANDOM_RING_MAX]; - volatile u_int in; - volatile u_int out; - } hc_entropy_ring; + u_int pos; + } hc_entropy_buf[2]; + u_int hc_active_buf; struct fast_entropy_accumulator { volatile u_int pos; uint32_t buf[RANDOM_ACCUM_MAX]; @@ -183,37 +167,41 @@ random_harvestq_fast_process_event(struct harvest_event *event) static void random_kthread(void) { - u_int maxloop, ring_out, i; + struct harvest_context *hc; - /* - * Locking is not needed as this is the only place we modify ring.out, and - * we only examine ring.in without changing it. Both of these are volatile, - * and this is a unique thread. - */ + hc = &harvest_context; for (random_kthread_control = 1; random_kthread_control;) { - /* Deal with events, if any. Restrict the number we do in one go. */ - maxloop = RANDOM_RING_MAX; - while (harvest_context.hc_entropy_ring.out != harvest_context.hc_entropy_ring.in) { - ring_out = (harvest_context.hc_entropy_ring.out + 1)%RANDOM_RING_MAX; - random_harvestq_fast_process_event(harvest_context.hc_entropy_ring.ring + ring_out); - harvest_context.hc_entropy_ring.out = ring_out; - if (!--maxloop) - break; - } + struct entropy_buffer *buf; + u_int entries; + + /* Deal with queued events. */ + RANDOM_HARVEST_LOCK(); + buf = &hc->hc_entropy_buf[hc->hc_active_buf]; + entries = buf->pos; + buf->pos = 0; + hc->hc_active_buf = (hc->hc_active_buf + 1) % + nitems(hc->hc_entropy_buf); + RANDOM_HARVEST_UNLOCK(); + for (u_int i = 0; i < entries; i++) + random_harvestq_fast_process_event(&buf->ring[i]); + + /* Poll sources of noise. */ random_sources_feed(); + /* XXX: FIX!! Increase the high-performance data rate? Need some measurements first. */ - for (i = 0; i < RANDOM_ACCUM_MAX; i++) { - if (harvest_context.hc_entropy_fast_accumulator.buf[i]) { - random_harvest_direct(harvest_context.hc_entropy_fast_accumulator.buf + i, sizeof(harvest_context.hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA); - harvest_context.hc_entropy_fast_accumulator.buf[i] = 0; + for (u_int i = 0; i < RANDOM_ACCUM_MAX; i++) { + if (hc->hc_entropy_fast_accumulator.buf[i]) { + random_harvest_direct(&hc->hc_entropy_fast_accumulator.buf[i], + sizeof(hc->hc_entropy_fast_accumulator.buf[0]), RANDOM_UMA); + hc->hc_entropy_fast_accumulator.buf[i] = 0; } } /* XXX: FIX!! This is a *great* place to pass hardware/live entropy to random(9) */ - tsleep_sbt(&harvest_context.hc_kthread_proc, 0, "-", + tsleep_sbt(&hc->hc_kthread_proc, 0, "-", SBT_1S/RANDOM_KTHREAD_HZ, 0, C_PREL(1)); } random_kthread_control = -1; - wakeup(&harvest_context.hc_kthread_proc); + wakeup(&hc->hc_kthread_proc); kproc_exit(0); /* NOTREACHED */ } @@ -435,7 +423,7 @@ random_harvestq_init(void *unused __unused) hc_source_mask = almost_everything_mask; RANDOM_HARVEST_INIT_LOCK(); - harvest_context.hc_entropy_ring.in = harvest_context.hc_entropy_ring.out = 0; + harvest_context.hc_active_buf = 0; } SYSINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_init, NULL); @@ -540,9 +528,9 @@ SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_d * This is supposed to be fast; do not do anything slow in here! * It is also illegal (and morally reprehensible) to insert any * high-rate data here. "High-rate" is defined as a data source - * that will usually cause lots of failures of the "Lockless read" - * check a few lines below. This includes the "always-on" sources - * like the Intel "rdrand" or the VIA Nehamiah "xstore" sources. + * that is likely to fill up the buffer in much less than 100ms. + * This includes the "always-on" sources like the Intel "rdrand" + * or the VIA Nehamiah "xstore" sources. */ /* XXXRW: get_cyclecount() is cheap on most modern hardware, where cycle * counters are built in, but on older hardware it will do a real time clock @@ -551,28 +539,29 @@ SYSUNINIT(random_device_h_init, SI_SUB_RANDOM, SI_ORDER_THIRD, random_harvestq_d void random_harvest_queue_(const void *entropy, u_int size, enum random_entropy_source origin) { + struct harvest_context *hc; + struct entropy_buffer *buf; struct harvest_event *event; - u_int ring_in; - KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, ("%s: origin %d invalid\n", __func__, origin)); + KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, + ("%s: origin %d invalid", __func__, origin)); + + hc = &harvest_context; RANDOM_HARVEST_LOCK(); - ring_in = (harvest_context.hc_entropy_ring.in + 1)%RANDOM_RING_MAX; - if (ring_in != harvest_context.hc_entropy_ring.out) { - /* The ring is not full */ - event = harvest_context.hc_entropy_ring.ring + ring_in; + buf = &hc->hc_entropy_buf[hc->hc_active_buf]; + if (buf->pos < RANDOM_RING_MAX) { + event = &buf->ring[buf->pos++]; event->he_somecounter = random_get_cyclecount(); event->he_source = origin; - event->he_destination = harvest_context.hc_destination[origin]++; + event->he_destination = hc->hc_destination[origin]++; if (size <= sizeof(event->he_entropy)) { event->he_size = size; memcpy(event->he_entropy, entropy, size); - } - else { + } else { /* Big event, so squash it */ event->he_size = sizeof(event->he_entropy[0]); event->he_entropy[0] = jenkins_hash(entropy, size, (uint32_t)(uintptr_t)event); } - harvest_context.hc_entropy_ring.in = ring_in; } RANDOM_HARVEST_UNLOCK(); } diff --git a/sys/dev/regulator/regulator_fixed.c b/sys/dev/regulator/regulator_fixed.c index 0a76da7140a0..55cdb5e4aeae 100644 --- a/sys/dev/regulator/regulator_fixed.c +++ b/sys/dev/regulator/regulator_fixed.c @@ -100,12 +100,8 @@ static struct gpio_entry * regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin) { struct gpio_entry *entry, *tmp; - device_t busdev; int rv; - busdev = GPIO_GET_BUS(gpio_pin->dev); - if (busdev == NULL) - return (NULL); entry = malloc(sizeof(struct gpio_entry), M_FIXEDREGULATOR, M_WAITOK | M_ZERO); @@ -122,8 +118,8 @@ regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin) } /* Reserve pin. */ - /* XXX Can we call gpiobus_acquire_pin() with gpio_list_mtx held? */ - rv = gpiobus_acquire_pin(busdev, gpio_pin->pin); + /* XXX Can we call gpio_pin_acquire() with gpio_list_mtx held? */ + rv = gpio_pin_acquire(gpio_pin); if (rv != 0) { mtx_unlock(&gpio_list_mtx); free(entry, M_FIXEDREGULATOR); diff --git a/sys/dev/sound/midi/midi.c b/sys/dev/sound/midi/midi.c index fbfb69de2913..6753f864ba9c 100644 --- a/sys/dev/sound/midi/midi.c +++ b/sys/dev/sound/midi/midi.c @@ -30,12 +30,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ - /* - * Parts of this file started out as NetBSD: midi.c 1.31 - * They are mostly gone. Still the most obvious will be the state - * machine midi_in - */ - #include <sys/param.h> #include <sys/systm.h> #include <sys/queue.h> @@ -66,7 +60,6 @@ #include "mpu_if.h" #include <dev/sound/midi/midiq.h> -#include "synth_if.h" MALLOC_DEFINE(M_MIDI, "midi buffers", "Midi data allocation area"); #ifndef KOBJMETHOD_END @@ -79,17 +72,6 @@ enum midi_states { MIDI_IN_START, MIDI_IN_SYSEX, MIDI_IN_DATA }; -/* - * The MPU interface current has init() uninit() inqsize() outqsize() - * callback() : fiddle with the tx|rx status. - */ - -#include "mpu_if.h" - -/* - * /dev/rmidi Structure definitions - */ - #define MIDI_NAMELEN 16 struct snd_midi { KOBJ_FIELDS; @@ -115,95 +97,13 @@ struct snd_midi { * complete command packets. */ struct proc *async; struct cdev *dev; - struct synth_midi *synth; - int synth_flags; TAILQ_ENTRY(snd_midi) link; }; -struct synth_midi { - KOBJ_FIELDS; - struct snd_midi *m; -}; - -static synth_open_t midisynth_open; -static synth_close_t midisynth_close; -static synth_writeraw_t midisynth_writeraw; -static synth_killnote_t midisynth_killnote; -static synth_startnote_t midisynth_startnote; -static synth_setinstr_t midisynth_setinstr; -static synth_alloc_t midisynth_alloc; -static synth_controller_t midisynth_controller; -static synth_bender_t midisynth_bender; - -static kobj_method_t midisynth_methods[] = { - KOBJMETHOD(synth_open, midisynth_open), - KOBJMETHOD(synth_close, midisynth_close), - KOBJMETHOD(synth_writeraw, midisynth_writeraw), - KOBJMETHOD(synth_setinstr, midisynth_setinstr), - KOBJMETHOD(synth_startnote, midisynth_startnote), - KOBJMETHOD(synth_killnote, midisynth_killnote), - KOBJMETHOD(synth_alloc, midisynth_alloc), - KOBJMETHOD(synth_controller, midisynth_controller), - KOBJMETHOD(synth_bender, midisynth_bender), - KOBJMETHOD_END -}; - -DEFINE_CLASS(midisynth, midisynth_methods, 0); - -/* - * Module Exports & Interface - * - * struct midi_chan *midi_init(MPU_CLASS cls, int unit, int chan, - * void *cookie) - * int midi_uninit(struct snd_midi *) - * - * 0 == no error - * EBUSY or other error - * - * int midi_in(struct snd_midi *, char *buf, int count) - * int midi_out(struct snd_midi *, char *buf, int count) - * - * midi_{in,out} return actual size transfered - * - */ - -/* - * midi_devs tailq, holder of all rmidi instances protected by midistat_lock - */ - TAILQ_HEAD(, snd_midi) midi_devs; -/* - * /dev/midistat variables and declarations, protected by midistat_lock - */ - struct sx mstat_lock; -static int midistat_isopen = 0; -static struct sbuf midistat_sbuf; -static struct cdev *midistat_dev; - -/* - * /dev/midistat dev_t declarations - */ - -static d_open_t midistat_open; -static d_close_t midistat_close; -static d_read_t midistat_read; - -static struct cdevsw midistat_cdevsw = { - .d_version = D_VERSION, - .d_open = midistat_open, - .d_close = midistat_close, - .d_read = midistat_read, - .d_name = "midistat", -}; - -/* - * /dev/rmidi dev_t declarations, struct variable access is protected by - * locks contained within the structure. - */ - static d_open_t midi_open; static d_close_t midi_close; static d_ioctl_t midi_ioctl; @@ -222,41 +122,18 @@ static struct cdevsw midi_cdevsw = { .d_name = "rmidi", }; -/* - * Prototypes of library functions - */ - static int midi_destroy(struct snd_midi *, int); -static int midistat_prepare(struct sbuf * s); static int midi_load(void); static int midi_unload(void); -/* - * Misc declr. - */ SYSCTL_NODE(_hw, OID_AUTO, midi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Midi driver"); -static SYSCTL_NODE(_hw_midi, OID_AUTO, stat, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, - "Status device"); int midi_debug; /* XXX: should this be moved into debug.midi? */ SYSCTL_INT(_hw_midi, OID_AUTO, debug, CTLFLAG_RW, &midi_debug, 0, ""); -int midi_dumpraw; -SYSCTL_INT(_hw_midi, OID_AUTO, dumpraw, CTLFLAG_RW, &midi_dumpraw, 0, ""); - -int midi_instroff; -SYSCTL_INT(_hw_midi, OID_AUTO, instroff, CTLFLAG_RW, &midi_instroff, 0, ""); - -int midistat_verbose; -SYSCTL_INT(_hw_midi_stat, OID_AUTO, verbose, CTLFLAG_RW, - &midistat_verbose, 0, ""); - #define MIDI_DEBUG(l,a) if(midi_debug>=l) a -/* - * CODE START - */ void midistat_lock(void) @@ -285,9 +162,6 @@ midistat_lockassert(void) * what unit number is used. * * It is an error to call midi_init with an already used unit/channel combo. - * - * Returns NULL on error - * */ struct snd_midi * midi_init(kobj_class_t cls, int unit, int channel, void *cookie) @@ -326,9 +200,6 @@ midi_init(kobj_class_t cls, int unit, int channel, void *cookie) MIDI_DEBUG(1, printf("midiinit #2: unit %d/%d.\n", unit, channel)); m = malloc(sizeof(*m), M_MIDI, M_WAITOK | M_ZERO); - m->synth = malloc(sizeof(*m->synth), M_MIDI, M_WAITOK | M_ZERO); - kobj_init((kobj_t)m->synth, &midisynth_class); - m->synth->m = m; kobj_init((kobj_t)m, cls); inqsize = MPU_INQSIZE(m, cookie); outqsize = MPU_OUTQSIZE(m, cookie); @@ -393,7 +264,6 @@ err2: if (MIDIQ_BUF(m->outq)) free(MIDIQ_BUF(m->outq), M_MIDI); err1: - free(m->synth, M_MIDI); free(m, M_MIDI); err0: midistat_unlock(); @@ -405,9 +275,7 @@ err0: * midi_uninit does not call MIDI_UNINIT, as since this is the implementors * entry point. midi_uninit if fact, does not send any methods. A call to * midi_uninit is a defacto promise that you won't manipulate ch anymore - * */ - int midi_uninit(struct snd_midi *m) { @@ -440,13 +308,6 @@ exit: return err; } -/* - * midi_in: process all data until the queue is full, then discards the rest. - * Since midi_in is a state machine, data discards can cause it to get out of - * whack. Process as much as possible. It calls, wakeup, selnotify and - * psignal at most once. - */ - #ifdef notdef static int midi_lengths[] = {2, 2, 2, 2, 1, 1, 2, 0}; @@ -460,6 +321,12 @@ static int midi_lengths[] = {2, 2, 2, 2, 1, 1, 2, 0}; #define MIDI_SYSEX_START 0xF0 #define MIDI_SYSEX_END 0xF7 +/* + * midi_in: process all data until the queue is full, then discards the rest. + * Since midi_in is a state machine, data discards can cause it to get out of + * whack. Process as much as possible. It calls, wakeup, selnotify and + * psignal at most once. + */ int midi_in(struct snd_midi *m, uint8_t *buf, int size) { @@ -627,9 +494,6 @@ midi_out(struct snd_midi *m, uint8_t *buf, int size) return used; } -/* - * /dev/rmidi#.# device access functions - */ int midi_open(struct cdev *i_dev, int flags, int mode, struct thread *td) { @@ -934,434 +798,6 @@ midi_poll(struct cdev *i_dev, int events, struct thread *td) } /* - * /dev/midistat device functions - * - */ -static int -midistat_open(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - int error; - - MIDI_DEBUG(1, printf("midistat_open\n")); - - midistat_lock(); - if (midistat_isopen) { - midistat_unlock(); - return EBUSY; - } - midistat_isopen = 1; - sbuf_new(&midistat_sbuf, NULL, 4096, SBUF_AUTOEXTEND); - error = (midistat_prepare(&midistat_sbuf) > 0) ? 0 : ENOMEM; - if (error) - midistat_isopen = 0; - midistat_unlock(); - return error; -} - -static int -midistat_close(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - MIDI_DEBUG(1, printf("midistat_close\n")); - midistat_lock(); - if (!midistat_isopen) { - midistat_unlock(); - return EBADF; - } - sbuf_delete(&midistat_sbuf); - midistat_isopen = 0; - midistat_unlock(); - return 0; -} - -static int -midistat_read(struct cdev *i_dev, struct uio *uio, int flag) -{ - long l; - int err; - - MIDI_DEBUG(4, printf("midistat_read\n")); - midistat_lock(); - if (!midistat_isopen) { - midistat_unlock(); - return EBADF; - } - if (uio->uio_offset < 0 || uio->uio_offset > sbuf_len(&midistat_sbuf)) { - midistat_unlock(); - return EINVAL; - } - err = 0; - l = lmin(uio->uio_resid, sbuf_len(&midistat_sbuf) - uio->uio_offset); - if (l > 0) { - err = uiomove(sbuf_data(&midistat_sbuf) + uio->uio_offset, l, - uio); - } - midistat_unlock(); - return err; -} - -/* - * Module library functions - */ - -static int -midistat_prepare(struct sbuf *s) -{ - struct snd_midi *m; - - midistat_lockassert(); - - sbuf_printf(s, "FreeBSD Midi Driver (midi2)\n"); - if (TAILQ_EMPTY(&midi_devs)) { - sbuf_printf(s, "No devices installed.\n"); - sbuf_finish(s); - return sbuf_len(s); - } - sbuf_printf(s, "Installed devices:\n"); - - TAILQ_FOREACH(m, &midi_devs, link) { - mtx_lock(&m->lock); - sbuf_printf(s, "%s [%d/%d:%s]", m->name, m->unit, m->channel, - MPU_PROVIDER(m, m->cookie)); - sbuf_printf(s, "%s", MPU_DESCR(m, m->cookie, midistat_verbose)); - sbuf_printf(s, "\n"); - mtx_unlock(&m->lock); - } - - sbuf_finish(s); - return sbuf_len(s); -} - -#ifdef notdef -/* - * Convert IOCTL command to string for debugging - */ - -static char * -midi_cmdname(int cmd) -{ - static struct { - int cmd; - char *name; - } *tab, cmdtab_midiioctl[] = { -#define A(x) {x, ## x} - /* - * Once we have some real IOCTLs define, the following will - * be relavant. - * - * A(SNDCTL_MIDI_PRETIME), A(SNDCTL_MIDI_MPUMODE), - * A(SNDCTL_MIDI_MPUCMD), A(SNDCTL_SYNTH_INFO), - * A(SNDCTL_MIDI_INFO), A(SNDCTL_SYNTH_MEMAVL), - * A(SNDCTL_FM_LOAD_INSTR), A(SNDCTL_FM_4OP_ENABLE), - * A(MIOSPASSTHRU), A(MIOGPASSTHRU), A(AIONWRITE), - * A(AIOGSIZE), A(AIOSSIZE), A(AIOGFMT), A(AIOSFMT), - * A(AIOGMIX), A(AIOSMIX), A(AIOSTOP), A(AIOSYNC), - * A(AIOGCAP), - */ -#undef A - { - -1, "unknown" - }, - }; - - for (tab = cmdtab_midiioctl; tab->cmd != cmd && tab->cmd != -1; tab++); - return tab->name; -} - -#endif /* notdef */ - -/* - * midisynth - */ - -int -midisynth_open(void *n, void *arg, int flags) -{ - struct snd_midi *m = ((struct synth_midi *)n)->m; - int retval; - - MIDI_DEBUG(1, printf("midisynth_open %s %s\n", - flags & FREAD ? "M_RX" : "", flags & FWRITE ? "M_TX" : "")); - - if (m == NULL) - return ENXIO; - - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - - retval = 0; - - if (flags & FREAD) { - if (MIDIQ_SIZE(m->inq) == 0) - retval = ENXIO; - else if (m->flags & M_RX) - retval = EBUSY; - if (retval) - goto err; - } - if (flags & FWRITE) { - if (MIDIQ_SIZE(m->outq) == 0) - retval = ENXIO; - else if (m->flags & M_TX) - retval = EBUSY; - if (retval) - goto err; - } - m->busy++; - - /* - * TODO: Consider m->async = 0; - */ - - if (flags & FREAD) { - m->flags |= M_RX | M_RXEN; - /* - * Only clear the inq, the outq might still have data to drain - * from a previous session - */ - MIDIQ_CLEAR(m->inq); - m->rchan = 0; - } - - if (flags & FWRITE) { - m->flags |= M_TX; - m->wchan = 0; - } - m->synth_flags = flags & (FREAD | FWRITE); - - MPU_CALLBACK(m, m->cookie, m->flags); - -err: mtx_unlock(&m->qlock); - mtx_unlock(&m->lock); - MIDI_DEBUG(2, printf("midisynth_open: return %d.\n", retval)); - return retval; -} - -int -midisynth_close(void *n) -{ - struct snd_midi *m = ((struct synth_midi *)n)->m; - int retval; - int oldflags; - - MIDI_DEBUG(1, printf("midisynth_close %s %s\n", - m->synth_flags & FREAD ? "M_RX" : "", - m->synth_flags & FWRITE ? "M_TX" : "")); - - if (m == NULL) - return ENXIO; - - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - - if ((m->synth_flags & FREAD && !(m->flags & M_RX)) || - (m->synth_flags & FWRITE && !(m->flags & M_TX))) { - retval = ENXIO; - goto err; - } - m->busy--; - - oldflags = m->flags; - - if (m->synth_flags & FREAD) - m->flags &= ~(M_RX | M_RXEN); - if (m->synth_flags & FWRITE) - m->flags &= ~M_TX; - - if ((m->flags & (M_TXEN | M_RXEN)) != (oldflags & (M_RXEN | M_TXEN))) - MPU_CALLBACK(m, m->cookie, m->flags); - - MIDI_DEBUG(1, printf("midi_close: closed, busy = %d.\n", m->busy)); - - mtx_unlock(&m->qlock); - mtx_unlock(&m->lock); - retval = 0; -err: return retval; -} - -/* - * Always blocking. - */ - -int -midisynth_writeraw(void *n, uint8_t *buf, size_t len) -{ - struct snd_midi *m = ((struct synth_midi *)n)->m; - int retval; - int used; - int i; - - MIDI_DEBUG(4, printf("midisynth_writeraw\n")); - - retval = 0; - - if (m == NULL) - return ENXIO; - - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - - if (!(m->flags & M_TX)) - goto err1; - - if (midi_dumpraw) - printf("midi dump: "); - - while (len > 0) { - while (MIDIQ_AVAIL(m->outq) == 0) { - if (!(m->flags & M_TXEN)) { - m->flags |= M_TXEN; - MPU_CALLBACK(m, m->cookie, m->flags); - } - mtx_unlock(&m->lock); - m->wchan = 1; - MIDI_DEBUG(3, printf("midisynth_writeraw msleep\n")); - retval = msleep(&m->wchan, &m->qlock, - PCATCH | PDROP, "midi TX", 0); - /* - * We slept, maybe things have changed since last - * dying check - */ - if (retval == EINTR) - goto err0; - - if (retval) - goto err0; - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - m->wchan = 0; - if (!m->busy) - goto err1; - } - - /* - * We are certain than data can be placed on the queue - */ - - used = MIN(MIDIQ_AVAIL(m->outq), len); - used = MIN(used, MIDI_WSIZE); - MIDI_DEBUG(5, - printf("midi_synth: resid %zu len %jd avail %jd\n", - len, (intmax_t)MIDIQ_LEN(m->outq), - (intmax_t)MIDIQ_AVAIL(m->outq))); - - if (midi_dumpraw) - for (i = 0; i < used; i++) - printf("%x ", buf[i]); - - MIDIQ_ENQ(m->outq, buf, used); - len -= used; - - /* - * Inform the bottom half that data can be written - */ - if (!(m->flags & M_TXEN)) { - m->flags |= M_TXEN; - MPU_CALLBACK(m, m->cookie, m->flags); - } - } - /* - * If we Made it here then transfer is good - */ - if (midi_dumpraw) - printf("\n"); - - retval = 0; -err1: mtx_unlock(&m->qlock); - mtx_unlock(&m->lock); -err0: return retval; -} - -static int -midisynth_killnote(void *n, uint8_t chn, uint8_t note, uint8_t vel) -{ - u_char c[3]; - - if (note > 127 || chn > 15) - return (EINVAL); - - if (vel > 127) - vel = 127; - - if (vel == 64) { - c[0] = 0x90 | (chn & 0x0f); /* Note on. */ - c[1] = (u_char)note; - c[2] = 0; - } else { - c[0] = 0x80 | (chn & 0x0f); /* Note off. */ - c[1] = (u_char)note; - c[2] = (u_char)vel; - } - - return midisynth_writeraw(n, c, 3); -} - -static int -midisynth_setinstr(void *n, uint8_t chn, uint16_t instr) -{ - u_char c[2]; - - if (instr > 127 || chn > 15) - return EINVAL; - - c[0] = 0xc0 | (chn & 0x0f); /* Progamme change. */ - c[1] = instr + midi_instroff; - - return midisynth_writeraw(n, c, 2); -} - -static int -midisynth_startnote(void *n, uint8_t chn, uint8_t note, uint8_t vel) -{ - u_char c[3]; - - if (note > 127 || chn > 15) - return EINVAL; - - if (vel > 127) - vel = 127; - - c[0] = 0x90 | (chn & 0x0f); /* Note on. */ - c[1] = (u_char)note; - c[2] = (u_char)vel; - - return midisynth_writeraw(n, c, 3); -} -static int -midisynth_alloc(void *n, uint8_t chan, uint8_t note) -{ - return chan; -} - -static int -midisynth_controller(void *n, uint8_t chn, uint8_t ctrlnum, uint16_t val) -{ - u_char c[3]; - - if (ctrlnum > 127 || chn > 15) - return EINVAL; - - c[0] = 0xb0 | (chn & 0x0f); /* Control Message. */ - c[1] = ctrlnum; - c[2] = val; - return midisynth_writeraw(n, c, 3); -} - -static int -midisynth_bender(void *n, uint8_t chn, uint16_t val) -{ - u_char c[3]; - - if (val > 16383 || chn > 15) - return EINVAL; - - c[0] = 0xe0 | (chn & 0x0f); /* Pitch bend. */ - c[1] = (u_char)val & 0x7f; - c[2] = (u_char)(val >> 7) & 0x7f; - - return midisynth_writeraw(n, c, 3); -} - -/* * Single point of midi destructions. */ static int @@ -1381,24 +817,16 @@ midi_destroy(struct snd_midi *m, int midiuninit) free(MIDIQ_BUF(m->outq), M_MIDI); mtx_destroy(&m->qlock); mtx_destroy(&m->lock); - free(m->synth, M_MIDI); free(m, M_MIDI); return 0; } -/* - * Load and unload functions, creates the /dev/midistat device - */ - static int midi_load(void) { sx_init(&mstat_lock, "midistat lock"); TAILQ_INIT(&midi_devs); - midistat_dev = make_dev(&midistat_cdevsw, MIDI_DEV_MIDICTL, UID_ROOT, - GID_WHEEL, 0666, "midistat"); - return 0; } @@ -1411,9 +839,6 @@ midi_unload(void) MIDI_DEBUG(1, printf("midi_unload()\n")); retval = EBUSY; midistat_lock(); - if (midistat_isopen) - goto exit0; - TAILQ_FOREACH_SAFE(m, &midi_devs, link, tmp) { mtx_lock(&m->lock); if (m->busy) @@ -1421,28 +846,21 @@ midi_unload(void) else retval = midi_destroy(m, 1); if (retval) - goto exit1; + goto exit; } midistat_unlock(); - destroy_dev(midistat_dev); - /* - * Made it here then unload is complete - */ sx_destroy(&mstat_lock); return 0; -exit1: +exit: mtx_unlock(&m->lock); -exit0: midistat_unlock(); if (retval) MIDI_DEBUG(2, printf("midi_unload: failed\n")); return retval; } -extern int seq_modevent(module_t mod, int type, void *data); - static int midi_modevent(module_t mod, int type, void *data) { @@ -1453,14 +871,10 @@ midi_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: retval = midi_load(); - if (retval == 0) - retval = seq_modevent(mod, type, data); break; case MOD_UNLOAD: retval = midi_unload(); - if (retval == 0) - retval = seq_modevent(mod, type, data); break; default: @@ -1470,73 +884,5 @@ midi_modevent(module_t mod, int type, void *data) return retval; } -kobj_t -midimapper_addseq(void *arg1, int *unit, void **cookie) -{ - unit = NULL; - - return (kobj_t)arg1; -} - -int -midimapper_open_locked(void *arg1, void **cookie) -{ - int retval = 0; - struct snd_midi *m; - - midistat_lockassert(); - TAILQ_FOREACH(m, &midi_devs, link) { - retval++; - } - - return retval; -} - -int -midimapper_open(void *arg1, void **cookie) -{ - int retval; - - midistat_lock(); - retval = midimapper_open_locked(arg1, cookie); - midistat_unlock(); - - return retval; -} - -int -midimapper_close(void *arg1, void *cookie) -{ - return 0; -} - -kobj_t -midimapper_fetch_synth_locked(void *arg, void *cookie, int unit) -{ - struct snd_midi *m; - int retval = 0; - - midistat_lockassert(); - TAILQ_FOREACH(m, &midi_devs, link) { - if (unit == retval) - return (kobj_t)m->synth; - retval++; - } - - return NULL; -} - -kobj_t -midimapper_fetch_synth(void *arg, void *cookie, int unit) -{ - kobj_t synth; - - midistat_lock(); - synth = midimapper_fetch_synth_locked(arg, cookie, unit); - midistat_unlock(); - - return synth; -} - DEV_MODULE(midi, midi_modevent, NULL); MODULE_VERSION(midi, 1); diff --git a/sys/dev/sound/midi/midi.h b/sys/dev/sound/midi/midi.h index 2254fab690e9..286e84264ef3 100644 --- a/sys/dev/sound/midi/midi.h +++ b/sys/dev/sound/midi/midi.h @@ -51,11 +51,4 @@ int midi_uninit(struct snd_midi *_m); int midi_out(struct snd_midi *_m, uint8_t *_buf, int _size); int midi_in(struct snd_midi *_m, uint8_t *_buf, int _size); -kobj_t midimapper_addseq(void *arg1, int *unit, void **cookie); -int midimapper_open_locked(void *arg1, void **cookie); -int midimapper_open(void *arg1, void **cookie); -int midimapper_close(void *arg1, void *cookie); -kobj_t midimapper_fetch_synth_locked(void *arg, void *cookie, int unit); -kobj_t midimapper_fetch_synth(void *arg, void *cookie, int unit); - #endif diff --git a/sys/dev/sound/midi/mpu401.c b/sys/dev/sound/midi/mpu401.c index 2be285bc0040..224ebb1b01f4 100644 --- a/sys/dev/sound/midi/mpu401.c +++ b/sys/dev/sound/midi/mpu401.c @@ -88,8 +88,6 @@ static int mpu401_minqsize(struct snd_midi *, void *); static int mpu401_moutqsize(struct snd_midi *, void *); static void mpu401_mcallback(struct snd_midi *, void *, int); static void mpu401_mcallbackp(struct snd_midi *, void *, int); -static const char *mpu401_mdescr(struct snd_midi *, void *, int); -static const char *mpu401_mprovider(struct snd_midi *, void *); static kobj_method_t mpu401_methods[] = { KOBJMETHOD(mpu_init, mpu401_minit), @@ -98,8 +96,6 @@ static kobj_method_t mpu401_methods[] = { KOBJMETHOD(mpu_outqsize, mpu401_moutqsize), KOBJMETHOD(mpu_callback, mpu401_mcallback), KOBJMETHOD(mpu_callbackp, mpu401_mcallbackp), - KOBJMETHOD(mpu_descr, mpu401_mdescr), - KOBJMETHOD(mpu_provider, mpu401_mprovider), KOBJMETHOD_END }; @@ -122,24 +118,12 @@ mpu401_intr(struct mpu401 *m) int i; int s; -/* - printf("mpu401_intr\n"); -*/ #define RXRDY(m) ( (STATUS(m) & MPU_INPUTBUSY) == 0) #define TXRDY(m) ( (STATUS(m) & MPU_OUTPUTBUSY) == 0) -#if 0 -#define D(x,l) printf("mpu401_intr %d %x %s %s\n",l, x, x&MPU_INPUTBUSY?"RX":"", x&MPU_OUTPUTBUSY?"TX":"") -#else -#define D(x,l) -#endif i = 0; s = STATUS(m); - D(s, 1); while ((s & MPU_INPUTBUSY) == 0 && i < MPU_INTR_BUF) { b[i] = READ(m); -/* - printf("mpu401_intr in i %d d %d\n", i, b[i]); -*/ i++; s = STATUS(m); } @@ -148,15 +132,9 @@ mpu401_intr(struct mpu401 *m) i = 0; while (!(s & MPU_OUTPUTBUSY) && i < MPU_INTR_BUF) { if (midi_out(m->mid, b, 1)) { -/* - printf("mpu401_intr out i %d d %d\n", i, b[0]); -*/ WRITE(m, *b); } else { -/* - printf("mpu401_intr write: no output\n"); -*/ return 0; } i++; @@ -262,13 +240,7 @@ static void mpu401_mcallback(struct snd_midi *sm, void *arg, int flags) { struct mpu401 *m = arg; -#if 0 - printf("mpu401_callback %s %s %s %s\n", - flags & M_RX ? "M_RX" : "", - flags & M_TX ? "M_TX" : "", - flags & M_RXEN ? "M_RXEN" : "", - flags & M_TXEN ? "M_TXEN" : ""); -#endif + if (flags & M_TXEN && m->si) { callout_reset(&m->timer, 1, mpu401_timeout, m); } @@ -278,19 +250,5 @@ mpu401_mcallback(struct snd_midi *sm, void *arg, int flags) static void mpu401_mcallbackp(struct snd_midi *sm, void *arg, int flags) { -/* printf("mpu401_callbackp\n"); */ mpu401_mcallback(sm, arg, flags); } - -static const char * -mpu401_mdescr(struct snd_midi *sm, void *arg, int verbosity) -{ - - return "descr mpu401"; -} - -static const char * -mpu401_mprovider(struct snd_midi *m, void *arg) -{ - return "provider mpu401"; -} diff --git a/sys/dev/sound/midi/mpu_if.m b/sys/dev/sound/midi/mpu_if.m index b7cb586c5dd0..835d887f703a 100644 --- a/sys/dev/sound/midi/mpu_if.m +++ b/sys/dev/sound/midi/mpu_if.m @@ -56,17 +56,6 @@ METHOD void callback { int _flags; }; -METHOD const char * provider { - struct snd_midi *_kobj; - void *_cookie; -}; - -METHOD const char * descr { - struct snd_midi *_kobj; - void *_cookie; - int _verbosity; -}; - METHOD int uninit { struct snd_midi *_kobj; void *_cookie; diff --git a/sys/dev/sound/midi/sequencer.c b/sys/dev/sound/midi/sequencer.c deleted file mode 100644 index 03b71688175c..000000000000 --- a/sys/dev/sound/midi/sequencer.c +++ /dev/null @@ -1,2107 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2003 Mathew Kanner - * Copyright (c) 1993 Hannu Savolainen - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * The sequencer personality manager. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/ioccom.h> - -#include <sys/filio.h> -#include <sys/lock.h> -#include <sys/sockio.h> -#include <sys/fcntl.h> -#include <sys/proc.h> -#include <sys/sysctl.h> - -#include <sys/kernel.h> /* for DATA_SET */ - -#include <sys/module.h> -#include <sys/conf.h> -#include <sys/file.h> -#include <sys/uio.h> -#include <sys/syslog.h> -#include <sys/errno.h> -#include <sys/malloc.h> -#include <sys/bus.h> -#include <machine/resource.h> -#include <machine/bus.h> -#include <machine/clock.h> /* for DELAY */ -#include <sys/soundcard.h> -#include <sys/rman.h> -#include <sys/mman.h> -#include <sys/poll.h> -#include <sys/mutex.h> -#include <sys/condvar.h> -#include <sys/kthread.h> -#include <sys/unistd.h> -#include <sys/selinfo.h> -#include <sys/sx.h> - -#ifdef HAVE_KERNEL_OPTION_HEADERS -#include "opt_snd.h" -#endif - -#include <dev/sound/midi/midi.h> -#include <dev/sound/midi/midiq.h> -#include "synth_if.h" - -#include <dev/sound/midi/sequencer.h> - -#define TMR_TIMERBASE 13 - -#define SND_DEV_SEQ 1 /* Sequencer output /dev/sequencer (FM - * synthesizer and MIDI output) */ -#define SND_DEV_MUSIC 8 /* /dev/music, level 2 interface */ - -/* Length of a sequencer event. */ -#define EV_SZ 8 -#define IEV_SZ 8 - -/* Lookup modes */ -#define LOOKUP_EXIST (0) -#define LOOKUP_OPEN (1) -#define LOOKUP_CLOSE (2) - -#define MIDIDEV(y) (dev2unit(y) & 0x0f) - -/* These are the entries to the sequencer driver. */ -static d_open_t mseq_open; -static d_close_t mseq_close; -static d_ioctl_t mseq_ioctl; -static d_read_t mseq_read; -static d_write_t mseq_write; -static d_poll_t mseq_poll; - -static struct cdevsw seq_cdevsw = { - .d_version = D_VERSION, - .d_open = mseq_open, - .d_close = mseq_close, - .d_read = mseq_read, - .d_write = mseq_write, - .d_ioctl = mseq_ioctl, - .d_poll = mseq_poll, - .d_name = "sequencer", -}; - -struct seq_softc { - KOBJ_FIELDS; - - struct mtx seq_lock, q_lock; - struct cv empty_cv, reset_cv, in_cv, out_cv, state_cv, th_cv; - - MIDIQ_HEAD(, u_char) in_q, out_q; - - u_long flags; - /* Flags (protected by flag_mtx of mididev_info) */ - int fflags; /* Access mode */ - int music; - - int out_water; /* Sequence output threshould */ - snd_sync_parm sync_parm; /* AIOSYNC parameter set */ - struct thread *sync_thread; /* AIOSYNCing thread */ - struct selinfo in_sel, out_sel; - int midi_number; - struct cdev *seqdev, *musicdev; - int unit; - int maxunits; - kobj_t *midis; - int *midi_flags; - kobj_t mapper; - void *mapper_cookie; - struct timeval timerstop, timersub; - int timerbase, tempo; - int timerrun; - int done; - int playing; - int recording; - int busy; - int pre_event_timeout; - int waiting; -}; - -/* - * Module specific stuff, including how many sequecers - * we currently own. - */ - -SYSCTL_NODE(_hw_midi, OID_AUTO, seq, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, - "Midi sequencer"); - -int seq_debug; -/* XXX: should this be moved into debug.midi? */ -SYSCTL_INT(_hw_midi_seq, OID_AUTO, debug, CTLFLAG_RW, &seq_debug, 0, ""); - -midi_cmdtab cmdtab_seqevent[] = { - {SEQ_NOTEOFF, "SEQ_NOTEOFF"}, - {SEQ_NOTEON, "SEQ_NOTEON"}, - {SEQ_WAIT, "SEQ_WAIT"}, - {SEQ_PGMCHANGE, "SEQ_PGMCHANGE"}, - {SEQ_SYNCTIMER, "SEQ_SYNCTIMER"}, - {SEQ_MIDIPUTC, "SEQ_MIDIPUTC"}, - {SEQ_DRUMON, "SEQ_DRUMON"}, - {SEQ_DRUMOFF, "SEQ_DRUMOFF"}, - {SEQ_ECHO, "SEQ_ECHO"}, - {SEQ_AFTERTOUCH, "SEQ_AFTERTOUCH"}, - {SEQ_CONTROLLER, "SEQ_CONTROLLER"}, - {SEQ_BALANCE, "SEQ_BALANCE"}, - {SEQ_VOLMODE, "SEQ_VOLMODE"}, - {SEQ_FULLSIZE, "SEQ_FULLSIZE"}, - {SEQ_PRIVATE, "SEQ_PRIVATE"}, - {SEQ_EXTENDED, "SEQ_EXTENDED"}, - {EV_SEQ_LOCAL, "EV_SEQ_LOCAL"}, - {EV_TIMING, "EV_TIMING"}, - {EV_CHN_COMMON, "EV_CHN_COMMON"}, - {EV_CHN_VOICE, "EV_CHN_VOICE"}, - {EV_SYSEX, "EV_SYSEX"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_seqioctl[] = { - {SNDCTL_SEQ_RESET, "SNDCTL_SEQ_RESET"}, - {SNDCTL_SEQ_SYNC, "SNDCTL_SEQ_SYNC"}, - {SNDCTL_SYNTH_INFO, "SNDCTL_SYNTH_INFO"}, - {SNDCTL_SEQ_CTRLRATE, "SNDCTL_SEQ_CTRLRATE"}, - {SNDCTL_SEQ_GETOUTCOUNT, "SNDCTL_SEQ_GETOUTCOUNT"}, - {SNDCTL_SEQ_GETINCOUNT, "SNDCTL_SEQ_GETINCOUNT"}, - {SNDCTL_SEQ_PERCMODE, "SNDCTL_SEQ_PERCMODE"}, - {SNDCTL_FM_LOAD_INSTR, "SNDCTL_FM_LOAD_INSTR"}, - {SNDCTL_SEQ_TESTMIDI, "SNDCTL_SEQ_TESTMIDI"}, - {SNDCTL_SEQ_RESETSAMPLES, "SNDCTL_SEQ_RESETSAMPLES"}, - {SNDCTL_SEQ_NRSYNTHS, "SNDCTL_SEQ_NRSYNTHS"}, - {SNDCTL_SEQ_NRMIDIS, "SNDCTL_SEQ_NRMIDIS"}, - {SNDCTL_SEQ_GETTIME, "SNDCTL_SEQ_GETTIME"}, - {SNDCTL_MIDI_INFO, "SNDCTL_MIDI_INFO"}, - {SNDCTL_SEQ_THRESHOLD, "SNDCTL_SEQ_THRESHOLD"}, - {SNDCTL_SYNTH_MEMAVL, "SNDCTL_SYNTH_MEMAVL"}, - {SNDCTL_FM_4OP_ENABLE, "SNDCTL_FM_4OP_ENABLE"}, - {SNDCTL_PMGR_ACCESS, "SNDCTL_PMGR_ACCESS"}, - {SNDCTL_SEQ_PANIC, "SNDCTL_SEQ_PANIC"}, - {SNDCTL_SEQ_OUTOFBAND, "SNDCTL_SEQ_OUTOFBAND"}, - {SNDCTL_TMR_TIMEBASE, "SNDCTL_TMR_TIMEBASE"}, - {SNDCTL_TMR_START, "SNDCTL_TMR_START"}, - {SNDCTL_TMR_STOP, "SNDCTL_TMR_STOP"}, - {SNDCTL_TMR_CONTINUE, "SNDCTL_TMR_CONTINUE"}, - {SNDCTL_TMR_TEMPO, "SNDCTL_TMR_TEMPO"}, - {SNDCTL_TMR_SOURCE, "SNDCTL_TMR_SOURCE"}, - {SNDCTL_TMR_METRONOME, "SNDCTL_TMR_METRONOME"}, - {SNDCTL_TMR_SELECT, "SNDCTL_TMR_SELECT"}, - {SNDCTL_MIDI_PRETIME, "SNDCTL_MIDI_PRETIME"}, - {AIONWRITE, "AIONWRITE"}, - {AIOGSIZE, "AIOGSIZE"}, - {AIOSSIZE, "AIOSSIZE"}, - {AIOGFMT, "AIOGFMT"}, - {AIOSFMT, "AIOSFMT"}, - {AIOGMIX, "AIOGMIX"}, - {AIOSMIX, "AIOSMIX"}, - {AIOSTOP, "AIOSTOP"}, - {AIOSYNC, "AIOSYNC"}, - {AIOGCAP, "AIOGCAP"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_timer[] = { - {TMR_WAIT_REL, "TMR_WAIT_REL"}, - {TMR_WAIT_ABS, "TMR_WAIT_ABS"}, - {TMR_STOP, "TMR_STOP"}, - {TMR_START, "TMR_START"}, - {TMR_CONTINUE, "TMR_CONTINUE"}, - {TMR_TEMPO, "TMR_TEMPO"}, - {TMR_ECHO, "TMR_ECHO"}, - {TMR_CLOCK, "TMR_CLOCK"}, - {TMR_SPP, "TMR_SPP"}, - {TMR_TIMESIG, "TMR_TIMESIG"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_seqcv[] = { - {MIDI_NOTEOFF, "MIDI_NOTEOFF"}, - {MIDI_NOTEON, "MIDI_NOTEON"}, - {MIDI_KEY_PRESSURE, "MIDI_KEY_PRESSURE"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_seqccmn[] = { - {MIDI_CTL_CHANGE, "MIDI_CTL_CHANGE"}, - {MIDI_PGM_CHANGE, "MIDI_PGM_CHANGE"}, - {MIDI_CHN_PRESSURE, "MIDI_CHN_PRESSURE"}, - {MIDI_PITCH_BEND, "MIDI_PITCH_BEND"}, - {MIDI_SYSTEM_PREFIX, "MIDI_SYSTEM_PREFIX"}, - {-1, NULL}, -}; - -#ifndef KOBJMETHOD_END -#define KOBJMETHOD_END { NULL, NULL } -#endif - -/* - * static const char *mpu401_mprovider(kobj_t obj, struct mpu401 *m); - */ - -static kobj_method_t seq_methods[] = { - /* KOBJMETHOD(mpu_provider,mpu401_mprovider), */ - KOBJMETHOD_END -}; - -DEFINE_CLASS(sequencer, seq_methods, 0); - -/* The followings are the local function. */ -static int seq_convertold(u_char *event, u_char *out); - -/* - * static void seq_midiinput(struct seq_softc * scp, void *md); - */ -static void seq_reset(struct seq_softc *scp); -static int seq_sync(struct seq_softc *scp); - -static int seq_processevent(struct seq_softc *scp, u_char *event); - -static int seq_timing(struct seq_softc *scp, u_char *event); -static int seq_local(struct seq_softc *scp, u_char *event); - -static int seq_chnvoice(struct seq_softc *scp, kobj_t md, u_char *event); -static int seq_chncommon(struct seq_softc *scp, kobj_t md, u_char *event); -static int seq_sysex(struct seq_softc *scp, kobj_t md, u_char *event); - -static int seq_fetch_mid(struct seq_softc *scp, int unit, kobj_t *md); -void seq_copytoinput(struct seq_softc *scp, u_char *event, int len); -int seq_modevent(module_t mod, int type, void *data); -struct seq_softc *seqs[10]; -static struct mtx seqinfo_mtx; -static u_long nseq = 0; - -static void timer_start(struct seq_softc *t); -static void timer_stop(struct seq_softc *t); -static void timer_setvals(struct seq_softc *t, int tempo, int timerbase); -static void timer_wait(struct seq_softc *t, int ticks, int wait_abs); -static int timer_now(struct seq_softc *t); - -static void -timer_start(struct seq_softc *t) -{ - t->timerrun = 1; - getmicrotime(&t->timersub); -} - -static void -timer_continue(struct seq_softc *t) -{ - struct timeval now; - - if (t->timerrun == 1) - return; - t->timerrun = 1; - getmicrotime(&now); - timevalsub(&now, &t->timerstop); - timevaladd(&t->timersub, &now); -} - -static void -timer_stop(struct seq_softc *t) -{ - t->timerrun = 0; - getmicrotime(&t->timerstop); -} - -static void -timer_setvals(struct seq_softc *t, int tempo, int timerbase) -{ - t->tempo = tempo; - t->timerbase = timerbase; -} - -static void -timer_wait(struct seq_softc *t, int ticks, int wait_abs) -{ - struct timeval now, when; - int ret; - unsigned long long i; - - while (t->timerrun == 0) { - SEQ_DEBUG(2, printf("Timer wait when timer isn't running\n")); - /* - * The old sequencer used timeouts that only increased - * the timer when the timer was running. - * Hence the sequencer would stick (?) if the - * timer was disabled. - */ - cv_wait(&t->reset_cv, &t->seq_lock); - if (t->playing == 0) - return; - } - - i = ticks * 60ull * 1000000ull / (t->tempo * t->timerbase); - - when.tv_sec = i / 1000000; - when.tv_usec = i % 1000000; - -#if 0 - printf("timer_wait tempo %d timerbase %d ticks %d abs %d u_sec %llu\n", - t->tempo, t->timerbase, ticks, wait_abs, i); -#endif - - if (wait_abs != 0) { - getmicrotime(&now); - timevalsub(&now, &t->timersub); - timevalsub(&when, &now); - } - if (when.tv_sec < 0 || when.tv_usec < 0) { - SEQ_DEBUG(3, - printf("seq_timer error negative time %lds.%06lds\n", - (long)when.tv_sec, (long)when.tv_usec)); - return; - } - i = when.tv_sec * 1000000ull; - i += when.tv_usec; - i *= hz; - i /= 1000000ull; -#if 0 - printf("seq_timer usec %llu ticks %llu\n", - when.tv_sec * 1000000ull + when.tv_usec, i); -#endif - t->waiting = 1; - ret = cv_timedwait(&t->reset_cv, &t->seq_lock, i + 1); - t->waiting = 0; - - if (ret != EWOULDBLOCK) - SEQ_DEBUG(3, printf("seq_timer didn't timeout\n")); - -} - -static int -timer_now(struct seq_softc *t) -{ - struct timeval now; - unsigned long long i; - int ret; - - if (t->timerrun == 0) - now = t->timerstop; - else - getmicrotime(&now); - - timevalsub(&now, &t->timersub); - - i = now.tv_sec * 1000000ull; - i += now.tv_usec; - i *= t->timerbase; -/* i /= t->tempo; */ - i /= 1000000ull; - - ret = i; - /* - * printf("timer_now: %llu %d\n", i, ret); - */ - - return ret; -} - -static void -seq_eventthread(void *arg) -{ - struct seq_softc *scp = arg; - u_char event[EV_SZ]; - - mtx_lock(&scp->seq_lock); - SEQ_DEBUG(2, printf("seq_eventthread started\n")); - while (scp->done == 0) { -restart: - while (scp->playing == 0) { - cv_wait(&scp->state_cv, &scp->seq_lock); - if (scp->done) - goto done; - } - - while (MIDIQ_EMPTY(scp->out_q)) { - cv_broadcast(&scp->empty_cv); - cv_wait(&scp->out_cv, &scp->seq_lock); - if (scp->playing == 0) - goto restart; - if (scp->done) - goto done; - } - - MIDIQ_DEQ(scp->out_q, event, EV_SZ); - - if (MIDIQ_AVAIL(scp->out_q) < scp->out_water) { - cv_broadcast(&scp->out_cv); - selwakeup(&scp->out_sel); - } - seq_processevent(scp, event); - } - -done: - cv_broadcast(&scp->th_cv); - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(2, printf("seq_eventthread finished\n")); - kproc_exit(0); -} - -/* - * seq_processevent: This maybe called by the event thread or the IOCTL - * handler for queued and out of band events respectively. - */ -static int -seq_processevent(struct seq_softc *scp, u_char *event) -{ - int ret; - kobj_t m; - - ret = 0; - - if (event[0] == EV_SEQ_LOCAL) - ret = seq_local(scp, event); - else if (event[0] == EV_TIMING) - ret = seq_timing(scp, event); - else if (event[0] != EV_CHN_VOICE && - event[0] != EV_CHN_COMMON && - event[0] != EV_SYSEX && - event[0] != SEQ_MIDIPUTC) { - ret = 1; - SEQ_DEBUG(2, printf("seq_processevent not known %d\n", - event[0])); - } else if (seq_fetch_mid(scp, event[1], &m) != 0) { - ret = 1; - SEQ_DEBUG(2, printf("seq_processevent midi unit not found %d\n", - event[1])); - } else - switch (event[0]) { - case EV_CHN_VOICE: - ret = seq_chnvoice(scp, m, event); - break; - case EV_CHN_COMMON: - ret = seq_chncommon(scp, m, event); - break; - case EV_SYSEX: - ret = seq_sysex(scp, m, event); - break; - case SEQ_MIDIPUTC: - mtx_unlock(&scp->seq_lock); - ret = SYNTH_WRITERAW(m, &event[2], 1); - mtx_lock(&scp->seq_lock); - break; - } - return ret; -} - -static int -seq_addunit(void) -{ - struct seq_softc *scp; - int ret; - u_char *buf; - - gone_in(15, "Warning! MIDI sequencer to be removed soon: no longer " - "needed or used\n"); - - /* Allocate the softc. */ - ret = ENOMEM; - scp = malloc(sizeof(*scp), M_DEVBUF, M_NOWAIT | M_ZERO); - if (scp == NULL) { - SEQ_DEBUG(1, printf("seq_addunit: softc allocation failed.\n")); - goto err; - } - kobj_init((kobj_t)scp, &sequencer_class); - - buf = malloc(sizeof(*buf) * EV_SZ * 1024, M_TEMP, M_NOWAIT | M_ZERO); - if (buf == NULL) - goto err; - MIDIQ_INIT(scp->in_q, buf, EV_SZ * 1024); - buf = malloc(sizeof(*buf) * EV_SZ * 1024, M_TEMP, M_NOWAIT | M_ZERO); - if (buf == NULL) - goto err; - MIDIQ_INIT(scp->out_q, buf, EV_SZ * 1024); - ret = EINVAL; - - scp->midis = malloc(sizeof(kobj_t) * 32, M_TEMP, M_NOWAIT | M_ZERO); - scp->midi_flags = malloc(sizeof(*scp->midi_flags) * 32, M_TEMP, - M_NOWAIT | M_ZERO); - - if (scp->midis == NULL || scp->midi_flags == NULL) - goto err; - - scp->flags = 0; - - mtx_init(&scp->seq_lock, "seqflq", NULL, 0); - cv_init(&scp->state_cv, "seqstate"); - cv_init(&scp->empty_cv, "seqempty"); - cv_init(&scp->reset_cv, "seqtimer"); - cv_init(&scp->out_cv, "seqqout"); - cv_init(&scp->in_cv, "seqqin"); - cv_init(&scp->th_cv, "seqstart"); - - /* - * Init the damn timer - */ - - scp->mapper = midimapper_addseq(scp, &scp->unit, &scp->mapper_cookie); - if (scp->mapper == NULL) - goto err; - - scp->seqdev = make_dev(&seq_cdevsw, SND_DEV_SEQ, UID_ROOT, GID_WHEEL, - 0666, "sequencer%d", scp->unit); - - scp->musicdev = make_dev(&seq_cdevsw, SND_DEV_MUSIC, UID_ROOT, - GID_WHEEL, 0666, "music%d", scp->unit); - - if (scp->seqdev == NULL || scp->musicdev == NULL) - goto err; - /* - * TODO: Add to list of sequencers this module provides - */ - - ret = - kproc_create - (seq_eventthread, scp, NULL, RFHIGHPID, 0, - "sequencer %02d", scp->unit); - - if (ret) - goto err; - - scp->seqdev->si_drv1 = scp->musicdev->si_drv1 = scp; - - SEQ_DEBUG(2, printf("sequencer %d created scp %p\n", scp->unit, scp)); - - ret = 0; - - mtx_lock(&seqinfo_mtx); - seqs[nseq++] = scp; - mtx_unlock(&seqinfo_mtx); - - goto ok; - -err: - if (scp != NULL) { - if (scp->seqdev != NULL) - destroy_dev(scp->seqdev); - if (scp->musicdev != NULL) - destroy_dev(scp->musicdev); - /* - * TODO: Destroy mutex and cv - */ - if (scp->midis != NULL) - free(scp->midis, M_TEMP); - if (scp->midi_flags != NULL) - free(scp->midi_flags, M_TEMP); - if (scp->out_q.b) - free(scp->out_q.b, M_TEMP); - if (scp->in_q.b) - free(scp->in_q.b, M_TEMP); - free(scp, M_DEVBUF); - } -ok: - return ret; -} - -static int -seq_delunit(int unit) -{ - struct seq_softc *scp = seqs[unit]; - int i; - - //SEQ_DEBUG(4, printf("seq_delunit: %d\n", unit)); - SEQ_DEBUG(1, printf("seq_delunit: 1 \n")); - mtx_lock(&scp->seq_lock); - - scp->playing = 0; - scp->done = 1; - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->reset_cv); - SEQ_DEBUG(1, printf("seq_delunit: 2 \n")); - cv_wait(&scp->th_cv, &scp->seq_lock); - SEQ_DEBUG(1, printf("seq_delunit: 3.0 \n")); - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(1, printf("seq_delunit: 3.1 \n")); - - cv_destroy(&scp->state_cv); - SEQ_DEBUG(1, printf("seq_delunit: 4 \n")); - cv_destroy(&scp->empty_cv); - SEQ_DEBUG(1, printf("seq_delunit: 5 \n")); - cv_destroy(&scp->reset_cv); - SEQ_DEBUG(1, printf("seq_delunit: 6 \n")); - cv_destroy(&scp->out_cv); - SEQ_DEBUG(1, printf("seq_delunit: 7 \n")); - cv_destroy(&scp->in_cv); - SEQ_DEBUG(1, printf("seq_delunit: 8 \n")); - cv_destroy(&scp->th_cv); - - SEQ_DEBUG(1, printf("seq_delunit: 10 \n")); - if (scp->seqdev) - destroy_dev(scp->seqdev); - SEQ_DEBUG(1, printf("seq_delunit: 11 \n")); - if (scp->musicdev) - destroy_dev(scp->musicdev); - SEQ_DEBUG(1, printf("seq_delunit: 12 \n")); - scp->seqdev = scp->musicdev = NULL; - if (scp->midis != NULL) - free(scp->midis, M_TEMP); - SEQ_DEBUG(1, printf("seq_delunit: 13 \n")); - if (scp->midi_flags != NULL) - free(scp->midi_flags, M_TEMP); - SEQ_DEBUG(1, printf("seq_delunit: 14 \n")); - free(scp->out_q.b, M_TEMP); - SEQ_DEBUG(1, printf("seq_delunit: 15 \n")); - free(scp->in_q.b, M_TEMP); - - SEQ_DEBUG(1, printf("seq_delunit: 16 \n")); - - mtx_destroy(&scp->seq_lock); - SEQ_DEBUG(1, printf("seq_delunit: 17 \n")); - free(scp, M_DEVBUF); - - mtx_lock(&seqinfo_mtx); - for (i = unit; i < (nseq - 1); i++) - seqs[i] = seqs[i + 1]; - nseq--; - mtx_unlock(&seqinfo_mtx); - - return 0; -} - -int -seq_modevent(module_t mod, int type, void *data) -{ - int retval, r; - - retval = 0; - - switch (type) { - case MOD_LOAD: - mtx_init(&seqinfo_mtx, "seqmod", NULL, 0); - retval = seq_addunit(); - break; - - case MOD_UNLOAD: - while (nseq) { - r = seq_delunit(nseq - 1); - if (r) { - retval = r; - break; - } - } - if (nseq == 0) { - retval = 0; - mtx_destroy(&seqinfo_mtx); - } - break; - - default: - break; - } - - return retval; -} - -static int -seq_fetch_mid(struct seq_softc *scp, int unit, kobj_t *md) -{ - - if (unit >= scp->midi_number || unit < 0) - return EINVAL; - - *md = scp->midis[unit]; - - return 0; -} - -int -mseq_open(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - struct seq_softc *scp = i_dev->si_drv1; - int i; - - gone_in(15, "Warning! MIDI sequencer to be removed soon: no longer " - "needed or used\n"); - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(3, printf("seq_open: scp %p unit %d, flags 0x%x.\n", - scp, scp->unit, flags)); - - /* - * Mark this device busy. - */ - - midistat_lock(); - mtx_lock(&scp->seq_lock); - if (scp->busy) { - mtx_unlock(&scp->seq_lock); - midistat_unlock(); - SEQ_DEBUG(2, printf("seq_open: unit %d is busy.\n", scp->unit)); - return EBUSY; - } - scp->fflags = flags; - /* - if ((scp->fflags & O_NONBLOCK) != 0) - scp->flags |= SEQ_F_NBIO; - */ - scp->music = MIDIDEV(i_dev) == SND_DEV_MUSIC; - - /* - * Enumerate the available midi devices - */ - scp->midi_number = 0; - scp->maxunits = midimapper_open_locked(scp->mapper, &scp->mapper_cookie); - - if (scp->maxunits == 0) - SEQ_DEBUG(2, printf("seq_open: no midi devices\n")); - - for (i = 0; i < scp->maxunits; i++) { - scp->midis[scp->midi_number] = - midimapper_fetch_synth_locked(scp->mapper, - scp->mapper_cookie, i); - if (scp->midis[scp->midi_number]) { - if (SYNTH_OPEN(scp->midis[scp->midi_number], scp, - scp->fflags) != 0) - scp->midis[scp->midi_number] = NULL; - else { - scp->midi_flags[scp->midi_number] = - SYNTH_QUERY(scp->midis[scp->midi_number]); - scp->midi_number++; - } - } - } - midistat_unlock(); - - timer_setvals(scp, 60, 100); - - timer_start(scp); - timer_stop(scp); - /* - * actually, if we're in rdonly mode, we should start the timer - */ - /* - * TODO: Handle recording now - */ - - scp->out_water = MIDIQ_SIZE(scp->out_q) / 2; - - scp->busy = 1; - mtx_unlock(&scp->seq_lock); - - SEQ_DEBUG(2, printf("seq_open: opened, mode %s.\n", - scp->music ? "music" : "sequencer")); - SEQ_DEBUG(2, - printf("Sequencer %d %p opened maxunits %d midi_number %d:\n", - scp->unit, scp, scp->maxunits, scp->midi_number)); - for (i = 0; i < scp->midi_number; i++) - SEQ_DEBUG(3, printf(" midi %d %p\n", i, scp->midis[i])); - - return 0; -} - -/* - * mseq_close - */ -int -mseq_close(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - int i; - struct seq_softc *scp = i_dev->si_drv1; - int ret; - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(2, printf("seq_close: unit %d.\n", scp->unit)); - - mtx_lock(&scp->seq_lock); - - ret = ENXIO; - if (scp->busy == 0) - goto err; - - seq_reset(scp); - seq_sync(scp); - - for (i = 0; i < scp->midi_number; i++) - if (scp->midis[i]) - SYNTH_CLOSE(scp->midis[i]); - - midimapper_close(scp->mapper, scp->mapper_cookie); - - timer_stop(scp); - - scp->busy = 0; - ret = 0; - -err: - SEQ_DEBUG(3, printf("seq_close: closed ret = %d.\n", ret)); - mtx_unlock(&scp->seq_lock); - return ret; -} - -int -mseq_read(struct cdev *i_dev, struct uio *uio, int ioflag) -{ - int retval, used; - struct seq_softc *scp = i_dev->si_drv1; - -#define SEQ_RSIZE 32 - u_char buf[SEQ_RSIZE]; - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(7, printf("mseq_read: unit %d, resid %zd.\n", - scp->unit, uio->uio_resid)); - - mtx_lock(&scp->seq_lock); - if ((scp->fflags & FREAD) == 0) { - SEQ_DEBUG(2, printf("mseq_read: unit %d is not for reading.\n", - scp->unit)); - retval = EIO; - goto err1; - } - /* - * Begin recording. - */ - /* - * if ((scp->flags & SEQ_F_READING) == 0) - */ - /* - * TODO, start recording if not alread - */ - - /* - * I think the semantics are to return as soon - * as possible. - * Second thought, it doesn't seem like midimoutain - * expects that at all. - * TODO: Look up in some sort of spec - */ - - while (uio->uio_resid > 0) { - while (MIDIQ_EMPTY(scp->in_q)) { - retval = EWOULDBLOCK; - /* - * I wish I knew which one to care about - */ - - if (scp->fflags & O_NONBLOCK) - goto err1; - if (ioflag & O_NONBLOCK) - goto err1; - - retval = cv_wait_sig(&scp->in_cv, &scp->seq_lock); - if (retval != 0) - goto err1; - } - - used = MIN(MIDIQ_LEN(scp->in_q), uio->uio_resid); - used = MIN(used, SEQ_RSIZE); - - SEQ_DEBUG(8, printf("midiread: uiomove cc=%d\n", used)); - MIDIQ_DEQ(scp->in_q, buf, used); - mtx_unlock(&scp->seq_lock); - retval = uiomove(buf, used, uio); - mtx_lock(&scp->seq_lock); - if (retval) - goto err1; - } - - retval = 0; -err1: - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(6, printf("mseq_read: ret %d, resid %zd.\n", - retval, uio->uio_resid)); - - return retval; -} - -int -mseq_write(struct cdev *i_dev, struct uio *uio, int ioflag) -{ - u_char event[EV_SZ], newevent[EV_SZ], ev_code; - struct seq_softc *scp = i_dev->si_drv1; - int retval; - int used; - - SEQ_DEBUG(7, printf("seq_write: unit %d, resid %zd.\n", - scp->unit, uio->uio_resid)); - - if (scp == NULL) - return ENXIO; - - mtx_lock(&scp->seq_lock); - - if ((scp->fflags & FWRITE) == 0) { - SEQ_DEBUG(2, printf("seq_write: unit %d is not for writing.\n", - scp->unit)); - retval = EIO; - goto err0; - } - while (uio->uio_resid > 0) { - while (MIDIQ_AVAIL(scp->out_q) == 0) { - retval = EWOULDBLOCK; - if (scp->fflags & O_NONBLOCK) - goto err0; - if (ioflag & O_NONBLOCK) - goto err0; - SEQ_DEBUG(8, printf("seq_write cvwait\n")); - - scp->playing = 1; - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->state_cv); - - retval = cv_wait_sig(&scp->out_cv, &scp->seq_lock); - /* - * We slept, maybe things have changed since last - * dying check - */ - if (retval != 0) - goto err0; -#if 0 - /* - * Useless test - */ - if (scp != i_dev->si_drv1) - retval = ENXIO; -#endif - } - - used = MIN(uio->uio_resid, 4); - - SEQ_DEBUG(8, printf("seqout: resid %zd len %jd avail %jd\n", - uio->uio_resid, (intmax_t)MIDIQ_LEN(scp->out_q), - (intmax_t)MIDIQ_AVAIL(scp->out_q))); - - if (used != 4) { - retval = ENXIO; - goto err0; - } - mtx_unlock(&scp->seq_lock); - retval = uiomove(event, used, uio); - mtx_lock(&scp->seq_lock); - if (retval) - goto err0; - - ev_code = event[0]; - SEQ_DEBUG(8, printf("seq_write: unit %d, event %s.\n", - scp->unit, midi_cmdname(ev_code, cmdtab_seqevent))); - - /* Have a look at the event code. */ - if (ev_code == SEQ_FULLSIZE) { - /* - * TODO: restore code for SEQ_FULLSIZE - */ -#if 0 - /* - * A long event, these are the patches/samples for a - * synthesizer. - */ - midiunit = *(u_short *)&event[2]; - mtx_lock(&sd->seq_lock); - ret = lookup_mididev(scp, midiunit, LOOKUP_OPEN, &md); - mtx_unlock(&sd->seq_lock); - if (ret != 0) - return (ret); - - SEQ_DEBUG(printf("seq_write: loading a patch to the unit %d.\n", midiunit)); - - ret = md->synth.loadpatch(md, *(short *)&event[0], buf, - p + 4, count, 0); - return (ret); -#else - /* - * For now, just flush the darn buffer - */ - SEQ_DEBUG(2, - printf("seq_write: SEQ_FULLSIZE flusing buffer.\n")); - while (uio->uio_resid > 0) { - mtx_unlock(&scp->seq_lock); - retval = uiomove(event, MIN(EV_SZ, uio->uio_resid), uio); - mtx_lock(&scp->seq_lock); - if (retval) - goto err0; - } - retval = 0; - goto err0; -#endif - } - retval = EINVAL; - if (ev_code >= 128) { - int error; - - /* - * Some sort of an extended event. The size is eight - * bytes. scoop extra info. - */ - if (scp->music && ev_code == SEQ_EXTENDED) { - SEQ_DEBUG(2, printf("seq_write: invalid level two event %x.\n", ev_code)); - goto err0; - } - mtx_unlock(&scp->seq_lock); - if (uio->uio_resid < 4) - error = EINVAL; - else - error = uiomove((caddr_t)&event[4], 4, uio); - mtx_lock(&scp->seq_lock); - if (error) { - SEQ_DEBUG(2, - printf("seq_write: user memory mangled?\n")); - goto err0; - } - } else { - /* - * Size four event. - */ - if (scp->music) { - SEQ_DEBUG(2, printf("seq_write: four byte event in music mode.\n")); - goto err0; - } - } - if (ev_code == SEQ_MIDIPUTC) { - /* - * TODO: event[2] is unit number to receive char. - * Range check it. - */ - } - if (scp->music) { -#ifdef not_ever_ever - if (event[0] == EV_TIMING && - (event[1] == TMR_START || event[1] == TMR_STOP)) { - /* - * For now, try to make midimoutain work by - * forcing these events to be processed - * immediately. - */ - seq_processevent(scp, event); - } else - MIDIQ_ENQ(scp->out_q, event, EV_SZ); -#else - MIDIQ_ENQ(scp->out_q, event, EV_SZ); -#endif - } else { - if (seq_convertold(event, newevent) > 0) - MIDIQ_ENQ(scp->out_q, newevent, EV_SZ); -#if 0 - else - goto err0; -#endif - } - } - - scp->playing = 1; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - - retval = 0; - -err0: - SEQ_DEBUG(6, - printf("seq_write done: leftover buffer length %zd retval %d\n", - uio->uio_resid, retval)); - mtx_unlock(&scp->seq_lock); - return retval; -} - -int -mseq_ioctl(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, - struct thread *td) -{ - int midiunit, ret, tmp; - struct seq_softc *scp = i_dev->si_drv1; - struct synth_info *synthinfo; - struct midi_info *midiinfo; - u_char event[EV_SZ]; - u_char newevent[EV_SZ]; - - kobj_t md; - - /* - * struct snd_size *sndsize; - */ - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(6, printf("seq_ioctl: unit %d, cmd %s.\n", - scp->unit, midi_cmdname(cmd, cmdtab_seqioctl))); - - ret = 0; - - switch (cmd) { - case SNDCTL_SEQ_GETTIME: - /* - * ioctl needed by libtse - */ - mtx_lock(&scp->seq_lock); - *(int *)arg = timer_now(scp); - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(6, printf("seq_ioctl: gettime %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_TMR_METRONOME: - /* fallthrough */ - case SNDCTL_TMR_SOURCE: - /* - * Not implemented - */ - ret = 0; - break; - case SNDCTL_TMR_TEMPO: - event[1] = TMR_TEMPO; - event[4] = *(int *)arg & 0xFF; - event[5] = (*(int *)arg >> 8) & 0xFF; - event[6] = (*(int *)arg >> 16) & 0xFF; - event[7] = (*(int *)arg >> 24) & 0xFF; - goto timerevent; - case SNDCTL_TMR_TIMEBASE: - event[1] = TMR_TIMERBASE; - event[4] = *(int *)arg & 0xFF; - event[5] = (*(int *)arg >> 8) & 0xFF; - event[6] = (*(int *)arg >> 16) & 0xFF; - event[7] = (*(int *)arg >> 24) & 0xFF; - goto timerevent; - case SNDCTL_TMR_START: - event[1] = TMR_START; - goto timerevent; - case SNDCTL_TMR_STOP: - event[1] = TMR_STOP; - goto timerevent; - case SNDCTL_TMR_CONTINUE: - event[1] = TMR_CONTINUE; -timerevent: - event[0] = EV_TIMING; - mtx_lock(&scp->seq_lock); - if (!scp->music) { - ret = EINVAL; - mtx_unlock(&scp->seq_lock); - break; - } - seq_processevent(scp, event); - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_TMR_SELECT: - SEQ_DEBUG(2, - printf("seq_ioctl: SNDCTL_TMR_SELECT not supported\n")); - ret = EINVAL; - break; - case SNDCTL_SEQ_SYNC: - if (mode == O_RDONLY) { - ret = 0; - break; - } - mtx_lock(&scp->seq_lock); - ret = seq_sync(scp); - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_SEQ_PANIC: - /* fallthrough */ - case SNDCTL_SEQ_RESET: - /* - * SNDCTL_SEQ_PANIC == SNDCTL_SEQ_RESET - */ - mtx_lock(&scp->seq_lock); - seq_reset(scp); - mtx_unlock(&scp->seq_lock); - ret = 0; - break; - case SNDCTL_SEQ_TESTMIDI: - mtx_lock(&scp->seq_lock); - /* - * TODO: SNDCTL_SEQ_TESTMIDI now means "can I write to the - * device?". - */ - mtx_unlock(&scp->seq_lock); - break; -#if 0 - case SNDCTL_SEQ_GETINCOUNT: - if (mode == O_WRONLY) - *(int *)arg = 0; - else { - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->in_q.rl; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(printf("seq_ioctl: incount %d.\n", - *(int *)arg)); - } - ret = 0; - break; - case SNDCTL_SEQ_GETOUTCOUNT: - if (mode == O_RDONLY) - *(int *)arg = 0; - else { - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->out_q.fl; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(printf("seq_ioctl: outcount %d.\n", - *(int *)arg)); - } - ret = 0; - break; -#endif - case SNDCTL_SEQ_CTRLRATE: - if (*(int *)arg != 0) { - ret = EINVAL; - break; - } - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->timerbase; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: ctrlrate %d.\n", *(int *)arg)); - ret = 0; - break; - /* - * TODO: ioctl SNDCTL_SEQ_RESETSAMPLES - */ -#if 0 - case SNDCTL_SEQ_RESETSAMPLES: - mtx_lock(&scp->seq_lock); - ret = lookup_mididev(scp, *(int *)arg, LOOKUP_OPEN, &md); - mtx_unlock(&scp->seq_lock); - if (ret != 0) - break; - ret = midi_ioctl(MIDIMKDEV(major(i_dev), *(int *)arg, - SND_DEV_MIDIN), cmd, arg, mode, td); - break; -#endif - case SNDCTL_SEQ_NRSYNTHS: - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->midi_number; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: synths %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_SEQ_NRMIDIS: - mtx_lock(&scp->seq_lock); - if (scp->music) - *(int *)arg = 0; - else { - /* - * TODO: count the numbder of devices that can WRITERAW - */ - *(int *)arg = scp->midi_number; - } - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: midis %d.\n", *(int *)arg)); - ret = 0; - break; - /* - * TODO: ioctl SNDCTL_SYNTH_MEMAVL - */ -#if 0 - case SNDCTL_SYNTH_MEMAVL: - mtx_lock(&scp->seq_lock); - ret = lookup_mididev(scp, *(int *)arg, LOOKUP_OPEN, &md); - mtx_unlock(&scp->seq_lock); - if (ret != 0) - break; - ret = midi_ioctl(MIDIMKDEV(major(i_dev), *(int *)arg, - SND_DEV_MIDIN), cmd, arg, mode, td); - break; -#endif - case SNDCTL_SEQ_OUTOFBAND: - for (ret = 0; ret < EV_SZ; ret++) - event[ret] = (u_char)arg[0]; - - mtx_lock(&scp->seq_lock); - if (scp->music) - ret = seq_processevent(scp, event); - else { - if (seq_convertold(event, newevent) > 0) - ret = seq_processevent(scp, newevent); - else - ret = EINVAL; - } - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_SYNTH_INFO: - synthinfo = (struct synth_info *)arg; - midiunit = synthinfo->device; - mtx_lock(&scp->seq_lock); - if (seq_fetch_mid(scp, midiunit, &md) == 0) { - bzero(synthinfo, sizeof(*synthinfo)); - synthinfo->name[0] = 'f'; - synthinfo->name[1] = 'a'; - synthinfo->name[2] = 'k'; - synthinfo->name[3] = 'e'; - synthinfo->name[4] = 's'; - synthinfo->name[5] = 'y'; - synthinfo->name[6] = 'n'; - synthinfo->name[7] = 't'; - synthinfo->name[8] = 'h'; - synthinfo->device = midiunit; - synthinfo->synth_type = SYNTH_TYPE_MIDI; - synthinfo->capabilities = scp->midi_flags[midiunit]; - ret = 0; - } else - ret = EINVAL; - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_MIDI_INFO: - midiinfo = (struct midi_info *)arg; - midiunit = midiinfo->device; - mtx_lock(&scp->seq_lock); - if (seq_fetch_mid(scp, midiunit, &md) == 0) { - bzero(midiinfo, sizeof(*midiinfo)); - midiinfo->name[0] = 'f'; - midiinfo->name[1] = 'a'; - midiinfo->name[2] = 'k'; - midiinfo->name[3] = 'e'; - midiinfo->name[4] = 'm'; - midiinfo->name[5] = 'i'; - midiinfo->name[6] = 'd'; - midiinfo->name[7] = 'i'; - midiinfo->device = midiunit; - midiinfo->capabilities = scp->midi_flags[midiunit]; - /* - * TODO: What devtype? - */ - midiinfo->dev_type = 0x01; - ret = 0; - } else - ret = EINVAL; - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_SEQ_THRESHOLD: - mtx_lock(&scp->seq_lock); - RANGE(*(int *)arg, 1, MIDIQ_SIZE(scp->out_q) - 1); - scp->out_water = *(int *)arg; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: water %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_MIDI_PRETIME: - tmp = *(int *)arg; - if (tmp < 0) - tmp = 0; - mtx_lock(&scp->seq_lock); - scp->pre_event_timeout = (hz * tmp) / 10; - *(int *)arg = scp->pre_event_timeout; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: pretime %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_FM_4OP_ENABLE: - case SNDCTL_PMGR_IFACE: - case SNDCTL_PMGR_ACCESS: - /* - * Patch manager and fm are ded, ded, ded. - */ - /* fallthrough */ - default: - /* - * TODO: Consider ioctl default case. - * Old code used to - * if ((scp->fflags & O_ACCMODE) == FREAD) { - * ret = EIO; - * break; - * } - * Then pass on the ioctl to device 0 - */ - SEQ_DEBUG(2, - printf("seq_ioctl: unsupported IOCTL %ld.\n", cmd)); - ret = EINVAL; - break; - } - - return ret; -} - -int -mseq_poll(struct cdev *i_dev, int events, struct thread *td) -{ - int ret, lim; - struct seq_softc *scp = i_dev->si_drv1; - - SEQ_DEBUG(3, printf("seq_poll: unit %d.\n", scp->unit)); - SEQ_DEBUG(1, printf("seq_poll: unit %d.\n", scp->unit)); - - mtx_lock(&scp->seq_lock); - - ret = 0; - - /* Look up the appropriate queue and select it. */ - if ((events & (POLLOUT | POLLWRNORM)) != 0) { - /* Start playing. */ - scp->playing = 1; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - - lim = scp->out_water; - - if (MIDIQ_AVAIL(scp->out_q) < lim) - /* No enough space, record select. */ - selrecord(td, &scp->out_sel); - else - /* We can write now. */ - ret |= events & (POLLOUT | POLLWRNORM); - } - if ((events & (POLLIN | POLLRDNORM)) != 0) { - /* TODO: Start recording. */ - - /* Find out the boundary. */ - lim = 1; - if (MIDIQ_LEN(scp->in_q) < lim) - /* No data ready, record select. */ - selrecord(td, &scp->in_sel); - else - /* We can read now. */ - ret |= events & (POLLIN | POLLRDNORM); - } - mtx_unlock(&scp->seq_lock); - - return (ret); -} - -#if 0 -static void -sein_qtr(void *p, void /* mididev_info */ *md) -{ - struct seq_softc *scp; - - scp = (struct seq_softc *)p; - - mtx_lock(&scp->seq_lock); - - /* Restart playing if we have the data to output. */ - if (scp->queueout_pending) - seq_callback(scp, SEQ_CB_START | SEQ_CB_WR); - /* Check the midi device if we are reading. */ - if ((scp->flags & SEQ_F_READING) != 0) - seq_midiinput(scp, md); - - mtx_unlock(&scp->seq_lock); -} - -#endif -/* - * seq_convertold - * Was the old playevent. Use this to convert and old - * style /dev/sequencer event to a /dev/music event - */ -static int -seq_convertold(u_char *event, u_char *out) -{ - int used; - u_char dev, chn, note, vel; - - out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = - out[7] = 0; - - dev = 0; - chn = event[1]; - note = event[2]; - vel = event[3]; - - used = 0; - -restart: - /* - * TODO: Debug statement - */ - switch (event[0]) { - case EV_TIMING: - case EV_CHN_VOICE: - case EV_CHN_COMMON: - case EV_SYSEX: - case EV_SEQ_LOCAL: - out[0] = event[0]; - out[1] = event[1]; - out[2] = event[2]; - out[3] = event[3]; - out[4] = event[4]; - out[5] = event[5]; - out[6] = event[6]; - out[7] = event[7]; - used += 8; - break; - case SEQ_NOTEOFF: - out[0] = EV_CHN_VOICE; - out[1] = dev; - out[2] = MIDI_NOTEOFF; - out[3] = chn; - out[4] = note; - out[5] = 255; - used += 4; - break; - - case SEQ_NOTEON: - out[0] = EV_CHN_VOICE; - out[1] = dev; - out[2] = MIDI_NOTEON; - out[3] = chn; - out[4] = note; - out[5] = vel; - used += 4; - break; - - /* - * wait delay = (event[2] << 16) + (event[3] << 8) + event[4] - */ - - case SEQ_PGMCHANGE: - out[0] = EV_CHN_COMMON; - out[1] = dev; - out[2] = MIDI_PGM_CHANGE; - out[3] = chn; - out[4] = note; - out[5] = vel; - used += 4; - break; -/* - out[0] = EV_TIMING; - out[1] = dev; - out[2] = MIDI_PGM_CHANGE; - out[3] = chn; - out[4] = note; - out[5] = vel; - SEQ_DEBUG(4,printf("seq_playevent: synctimer\n")); - break; -*/ - - case SEQ_MIDIPUTC: - SEQ_DEBUG(4, - printf("seq_playevent: put data 0x%02x, unit %d.\n", - event[1], event[2])); - /* - * Pass through to the midi device. - * device = event[2] - * data = event[1] - */ - out[0] = SEQ_MIDIPUTC; - out[1] = dev; - out[2] = chn; - used += 4; - break; -#ifdef notyet - case SEQ_ECHO: - /* - * This isn't handled here yet because I don't know if I can - * just use four bytes events. There might be consequences - * in the _read routing - */ - if (seq_copytoinput(scp, event, 4) == EAGAIN) { - ret = QUEUEFULL; - break; - } - ret = MORE; - break; -#endif - case SEQ_EXTENDED: - switch (event[1]) { - case SEQ_NOTEOFF: - case SEQ_NOTEON: - case SEQ_PGMCHANGE: - event++; - used = 4; - goto restart; - break; - case SEQ_AFTERTOUCH: - /* - * SYNTH_AFTERTOUCH(md, event[3], event[4]) - */ - case SEQ_BALANCE: - /* - * SYNTH_PANNING(md, event[3], (char)event[4]) - */ - case SEQ_CONTROLLER: - /* - * SYNTH_CONTROLLER(md, event[3], event[4], *(short *)&event[5]) - */ - case SEQ_VOLMODE: - /* - * SYNTH_VOLUMEMETHOD(md, event[3]) - */ - default: - SEQ_DEBUG(2, - printf("seq_convertold: SEQ_EXTENDED type %d" - "not handled\n", event[1])); - break; - } - break; - case SEQ_WAIT: - out[0] = EV_TIMING; - out[1] = TMR_WAIT_REL; - out[4] = event[2]; - out[5] = event[3]; - out[6] = event[4]; - - SEQ_DEBUG(5, printf("SEQ_WAIT %d", - event[2] + (event[3] << 8) + (event[4] << 24))); - - used += 4; - break; - - case SEQ_ECHO: - case SEQ_SYNCTIMER: - case SEQ_PRIVATE: - default: - SEQ_DEBUG(2, - printf("seq_convertold: event type %d not handled %d %d %d\n", - event[0], event[1], event[2], event[3])); - break; - } - return used; -} - -/* - * Writting to the sequencer buffer never blocks and drops - * input which cannot be queued - */ -void -seq_copytoinput(struct seq_softc *scp, u_char *event, int len) -{ - - mtx_assert(&scp->seq_lock, MA_OWNED); - - if (MIDIQ_AVAIL(scp->in_q) < len) { - /* - * ENOROOM? EINPUTDROPPED? ETOUGHLUCK? - */ - SEQ_DEBUG(2, printf("seq_copytoinput: queue full\n")); - } else { - MIDIQ_ENQ(scp->in_q, event, len); - selwakeup(&scp->in_sel); - cv_broadcast(&scp->in_cv); - } - -} - -static int -seq_chnvoice(struct seq_softc *scp, kobj_t md, u_char *event) -{ - int ret, voice; - u_char cmd, chn, note, parm; - - ret = 0; - cmd = event[2]; - chn = event[3]; - note = event[4]; - parm = event[5]; - - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(5, printf("seq_chnvoice: unit %d, dev %d, cmd %s," - " chn %d, note %d, parm %d.\n", scp->unit, event[1], - midi_cmdname(cmd, cmdtab_seqcv), chn, note, parm)); - - voice = SYNTH_ALLOC(md, chn, note); - - mtx_unlock(&scp->seq_lock); - - switch (cmd) { - case MIDI_NOTEON: - if (note < 128 || note == 255) { -#if 0 - if (scp->music && chn == 9) { - /* - * This channel is a percussion. The note - * number is the patch number. - */ - /* - mtx_unlock(&scp->seq_lock); - if (SYNTH_SETINSTR(md, voice, 128 + note) - == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - */ - note = 60; /* Middle C. */ - } -#endif - if (scp->music) { - /* - mtx_unlock(&scp->seq_lock); - if (SYNTH_SETUPVOICE(md, voice, chn) - == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - */ - } - SYNTH_STARTNOTE(md, voice, note, parm); - } - break; - case MIDI_NOTEOFF: - SYNTH_KILLNOTE(md, voice, note, parm); - break; - case MIDI_KEY_PRESSURE: - SYNTH_AFTERTOUCH(md, voice, parm); - break; - default: - ret = 1; - SEQ_DEBUG(2, printf("seq_chnvoice event type %d not handled\n", - event[1])); - break; - } - - mtx_lock(&scp->seq_lock); - return ret; -} - -static int -seq_chncommon(struct seq_softc *scp, kobj_t md, u_char *event) -{ - int ret; - u_short w14; - u_char cmd, chn, p1; - - ret = 0; - cmd = event[2]; - chn = event[3]; - p1 = event[4]; - w14 = *(u_short *)&event[6]; - - SEQ_DEBUG(5, printf("seq_chncommon: unit %d, dev %d, cmd %s, chn %d," - " p1 %d, w14 %d.\n", scp->unit, event[1], - midi_cmdname(cmd, cmdtab_seqccmn), chn, p1, w14)); - mtx_unlock(&scp->seq_lock); - switch (cmd) { - case MIDI_PGM_CHANGE: - SEQ_DEBUG(4, printf("seq_chncommon pgmchn chn %d pg %d\n", - chn, p1)); - SYNTH_SETINSTR(md, chn, p1); - break; - case MIDI_CTL_CHANGE: - SEQ_DEBUG(4, printf("seq_chncommon ctlch chn %d pg %d %d\n", - chn, p1, w14)); - SYNTH_CONTROLLER(md, chn, p1, w14); - break; - case MIDI_PITCH_BEND: - if (scp->music) { - /* - * TODO: MIDI_PITCH_BEND - */ -#if 0 - mtx_lock(&md->synth.vc_mtx); - md->synth.chn_info[chn].bender_value = w14; - if (md->midiunit >= 0) { - /* - * Handle all of the notes playing on this - * channel. - */ - key = ((int)chn << 8); - for (i = 0; i < md->synth.alloc.max_voice; i++) - if ((md->synth.alloc.map[i] & 0xff00) == key) { - mtx_unlock(&md->synth.vc_mtx); - mtx_unlock(&scp->seq_lock); - if (md->synth.bender(md, i, w14) == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - } - } else { - mtx_unlock(&md->synth.vc_mtx); - mtx_unlock(&scp->seq_lock); - if (md->synth.bender(md, chn, w14) == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - } -#endif - } else - SYNTH_BENDER(md, chn, w14); - break; - default: - ret = 1; - SEQ_DEBUG(2, - printf("seq_chncommon event type %d not handled.\n", - event[1])); - break; - } - mtx_lock(&scp->seq_lock); - return ret; -} - -static int -seq_timing(struct seq_softc *scp, u_char *event) -{ - int param; - int ret; - - ret = 0; - param = event[4] + (event[5] << 8) + - (event[6] << 16) + (event[7] << 24); - - SEQ_DEBUG(5, printf("seq_timing: unit %d, cmd %d, param %d.\n", - scp->unit, event[1], param)); - switch (event[1]) { - case TMR_WAIT_REL: - timer_wait(scp, param, 0); - break; - case TMR_WAIT_ABS: - timer_wait(scp, param, 1); - break; - case TMR_START: - timer_start(scp); - cv_broadcast(&scp->reset_cv); - break; - case TMR_STOP: - timer_stop(scp); - /* - * The following cv_broadcast isn't needed since we only - * wait for 0->1 transitions. It probably won't hurt - */ - cv_broadcast(&scp->reset_cv); - break; - case TMR_CONTINUE: - timer_continue(scp); - cv_broadcast(&scp->reset_cv); - break; - case TMR_TEMPO: - if (param < 8) - param = 8; - if (param > 360) - param = 360; - SEQ_DEBUG(4, printf("Timer set tempo %d\n", param)); - timer_setvals(scp, param, scp->timerbase); - break; - case TMR_TIMERBASE: - if (param < 1) - param = 1; - if (param > 1000) - param = 1000; - SEQ_DEBUG(4, printf("Timer set timerbase %d\n", param)); - timer_setvals(scp, scp->tempo, param); - break; - case TMR_ECHO: - /* - * TODO: Consider making 4-byte events for /dev/sequencer - * PRO: Maybe needed by legacy apps - * CON: soundcard.h has been warning for a while many years - * to expect 8 byte events. - */ -#if 0 - if (scp->music) - seq_copytoinput(scp, event, 8); - else { - param = (param << 8 | SEQ_ECHO); - seq_copytoinput(scp, (u_char *)¶m, 4); - } -#else - seq_copytoinput(scp, event, 8); -#endif - break; - default: - SEQ_DEBUG(2, printf("seq_timing event type %d not handled.\n", - event[1])); - ret = 1; - break; - } - return ret; -} - -static int -seq_local(struct seq_softc *scp, u_char *event) -{ - int ret; - - ret = 0; - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(5, printf("seq_local: unit %d, cmd %d\n", scp->unit, - event[1])); - switch (event[1]) { - default: - SEQ_DEBUG(1, printf("seq_local event type %d not handled\n", - event[1])); - ret = 1; - break; - } - return ret; -} - -static int -seq_sysex(struct seq_softc *scp, kobj_t md, u_char *event) -{ - int i, l; - - mtx_assert(&scp->seq_lock, MA_OWNED); - SEQ_DEBUG(5, printf("seq_sysex: unit %d device %d\n", scp->unit, - event[1])); - l = 0; - for (i = 0; i < 6 && event[i + 2] != 0xff; i++) - l = i + 1; - if (l > 0) { - mtx_unlock(&scp->seq_lock); - if (SYNTH_SENDSYSEX(md, &event[2], l) == EAGAIN) { - mtx_lock(&scp->seq_lock); - return 1; - } - mtx_lock(&scp->seq_lock); - } - return 0; -} - -/* - * Reset no longer closes the raw devices nor seq_sync's - * Callers are IOCTL and seq_close - */ -static void -seq_reset(struct seq_softc *scp) -{ - int chn, i; - kobj_t m; - - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(5, printf("seq_reset: unit %d.\n", scp->unit)); - - /* - * Stop reading and writing. - */ - - /* scp->recording = 0; */ - scp->playing = 0; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->reset_cv); - - /* - * For now, don't reset the timers. - */ - MIDIQ_CLEAR(scp->in_q); - MIDIQ_CLEAR(scp->out_q); - - for (i = 0; i < scp->midi_number; i++) { - m = scp->midis[i]; - mtx_unlock(&scp->seq_lock); - SYNTH_RESET(m); - for (chn = 0; chn < 16; chn++) { - SYNTH_CONTROLLER(m, chn, 123, 0); - SYNTH_CONTROLLER(m, chn, 121, 0); - SYNTH_BENDER(m, chn, 1 << 13); - } - mtx_lock(&scp->seq_lock); - } -} - -/* - * seq_sync - * *really* flush the output queue - * flush the event queue, then flush the synthsisers. - * Callers are IOCTL and close - */ - -#define SEQ_SYNC_TIMEOUT 8 -static int -seq_sync(struct seq_softc *scp) -{ - int i, rl, sync[16], done; - - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(4, printf("seq_sync: unit %d.\n", scp->unit)); - - /* - * Wait until output queue is empty. Check every so often to see if - * the queue is moving along. If it isn't just abort. - */ - while (!MIDIQ_EMPTY(scp->out_q)) { - if (!scp->playing) { - scp->playing = 1; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - } - rl = MIDIQ_LEN(scp->out_q); - - i = cv_timedwait_sig(&scp->out_cv, - &scp->seq_lock, SEQ_SYNC_TIMEOUT * hz); - - if (i == EINTR || i == ERESTART) { - if (i == EINTR) { - /* - * XXX: I don't know why we stop playing - */ - scp->playing = 0; - cv_broadcast(&scp->out_cv); - } - return i; - } - if (i == EWOULDBLOCK && rl == MIDIQ_LEN(scp->out_q) && - scp->waiting == 0) { - /* - * A queue seems to be stuck up. Give up and clear - * queues. - */ - MIDIQ_CLEAR(scp->out_q); - scp->playing = 0; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->reset_cv); - - /* - * TODO: Consider if the raw devices need to be flushed - */ - - SEQ_DEBUG(1, printf("seq_sync queue stuck, aborting\n")); - - return i; - } - } - - scp->playing = 0; - /* - * Since syncing a midi device might block, unlock scp->seq_lock. - */ - - mtx_unlock(&scp->seq_lock); - for (i = 0; i < scp->midi_number; i++) - sync[i] = 1; - - do { - done = 1; - for (i = 0; i < scp->midi_number; i++) - if (sync[i]) { - if (SYNTH_INSYNC(scp->midis[i]) == 0) - sync[i] = 0; - else - done = 0; - } - if (!done) - DELAY(5000); - - } while (!done); - - mtx_lock(&scp->seq_lock); - return 0; -} - -char * -midi_cmdname(int cmd, midi_cmdtab *tab) -{ - while (tab->name != NULL) { - if (cmd == tab->cmd) - return (tab->name); - tab++; - } - - return ("unknown"); -} diff --git a/sys/dev/sound/midi/synth_if.m b/sys/dev/sound/midi/synth_if.m deleted file mode 100644 index a763b3422bc6..000000000000 --- a/sys/dev/sound/midi/synth_if.m +++ /dev/null @@ -1,312 +0,0 @@ -#- -# Copyright (c) 2003 Mathew Kanner -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# - -INTERFACE synth; - -#include <sys/systm.h> - -CODE { - -synth_killnote_t nokillnote; -synth_startnote_t nostartnote; -synth_setinstr_t nosetinstr; -synth_hwcontrol_t nohwcontrol; -synth_aftertouch_t noaftertouch; -synth_panning_t nopanning; -synth_controller_t nocontroller; -synth_volumemethod_t novolumemethod; -synth_bender_t nobender; -synth_setupvoice_t nosetupvoice; -synth_sendsysex_t nosendsysex; -synth_allocvoice_t noallocvoice; -synth_writeraw_t nowriteraw; -synth_reset_t noreset; -synth_shortname_t noshortname; -synth_open_t noopen; -synth_close_t noclose; -synth_query_t noquery; -synth_insync_t noinsync; -synth_alloc_t noalloc; - - int - nokillnote(void *_kobj, uint8_t _chn, uint8_t _note, uint8_t _vel) - { - printf("nokillnote\n"); - return 0; - } - - int - noopen(void *_kobj, void *_arg, int mode) - { - printf("noopen\n"); - return 0; - } - - int - noquery(void *_kboj) - { - printf("noquery\n"); - return 0; - } - - int - nostartnote(void *_kb, uint8_t _voice, uint8_t _note, uint8_t _parm) - { - printf("nostartnote\n"); - return 0; - } - - int - nosetinstr(void *_kb, uint8_t _chn, uint16_t _patchno) - { - printf("nosetinstr\n"); - return 0; - } - - int - nohwcontrol(void *_kb, uint8_t *_event) - { - printf("nohwcontrol\n"); - return 0; - } - - int - noaftertouch ( void /* X */ * _kobj, uint8_t _x1, uint8_t _x2) - { - printf("noaftertouch\n"); - return 0; - } - - int - nopanning ( void /* X */ * _kobj, uint8_t _x1, uint8_t _x2) - { - printf("nopanning\n"); - return 0; - } - - int - nocontroller ( void /* X */ * _kobj, uint8_t _x1, uint8_t _x2, uint16_t _x3) - { - printf("nocontroller\n"); - return 0; - } - - int - novolumemethod ( - void /* X */ * _kobj, - uint8_t _x1) - { - printf("novolumemethod\n"); - return 0; - } - - int - nobender ( void /* X */ * _kobj, uint8_t _voice, uint16_t _bend) - { - printf("nobender\n"); - return 0; - } - - int - nosetupvoice ( void /* X */ * _kobj, uint8_t _voice, uint8_t _chn) - { - - printf("nosetupvoice\n"); - return 0; - } - - int - nosendsysex ( void /* X */ * _kobj, void * _buf, size_t _len) - { - printf("nosendsysex\n"); - return 0; - } - - int - noallocvoice ( void /* X */ * _kobj, uint8_t _chn, uint8_t _note, void *_x) - { - printf("noallocvoice\n"); - return 0; - } - - int - nowriteraw ( void /* X */ * _kobjt, uint8_t * _buf, size_t _len) - { - printf("nowriteraw\n"); - return 1; - } - - int - noreset ( void /* X */ * _kobjt) - { - - printf("noreset\n"); - return 0; - } - - char * - noshortname (void /* X */ * _kobjt) - { - printf("noshortname\n"); - return "noshortname"; - } - - int - noclose ( void /* X */ * _kobjt) - { - - printf("noclose\n"); - return 0; - } - - int - noinsync (void /* X */ * _kobjt) - { - - printf("noinsync\n"); - return 0; - } - - int - noalloc ( void /* x */ * _kbojt, uint8_t _chn, uint8_t _note) - { - printf("noalloc\n"); - return 0; - } -} - -METHOD int killnote { - void /* X */ *_kobj; - uint8_t _chan; - uint8_t _note; - uint8_t _vel; -} DEFAULT nokillnote; - -METHOD int startnote { - void /* X */ *_kobj; - uint8_t _voice; - uint8_t _note; - uint8_t _parm; -} DEFAULT nostartnote; - -METHOD int setinstr { - void /* X */ *_kobj; - uint8_t _chn; - uint16_t _patchno; -} DEFAULT nosetinstr; - -METHOD int hwcontrol { - void /* X */ *_kobj; - uint8_t *_event; -} DEFAULT nohwcontrol; - -METHOD int aftertouch { - void /* X */ *_kobj; - uint8_t _x1; - uint8_t _x2; -} DEFAULT noaftertouch; - -METHOD int panning { - void /* X */ *_kobj; - uint8_t _x1; - uint8_t _x2; -} DEFAULT nopanning; - -METHOD int controller { - void /* X */ *_kobj; - uint8_t _x1; - uint8_t _x2; - uint16_t _x3; -} DEFAULT nocontroller; - -METHOD int volumemethod { - void /* X */ *_kobj; - uint8_t _x1; -} DEFAULT novolumemethod; - -METHOD int bender { - void /* X */ *_kobj; - uint8_t _voice; - uint16_t _bend; -} DEFAULT nobender; - -METHOD int setupvoice { - void /* X */ *_kobj; - uint8_t _voice; - uint8_t _chn; -} DEFAULT nosetupvoice; - -METHOD int sendsysex { - void /* X */ *_kobj; - void *_buf; - size_t _len; -} DEFAULT nosendsysex; - -METHOD int allocvoice { - void /* X */ *_kobj; - uint8_t _chn; - uint8_t _note; - void *_x; -} DEFAULT noallocvoice; - -METHOD int writeraw { - void /* X */ *_kobjt; - uint8_t *_buf; - size_t _len; -} DEFAULT nowriteraw; - -METHOD int reset { - void /* X */ *_kobjt; -} DEFAULT noreset; - -METHOD char * shortname { - void /* X */ *_kobjt; -} DEFAULT noshortname; - -METHOD int open { - void /* X */ *_kobjt; - void *_sythn; - int _mode; -} DEFAULT noopen; - -METHOD int close { - void /* X */ *_kobjt; -} DEFAULT noclose; - -METHOD int query { - void /* X */ *_kobjt; -} DEFAULT noquery; - -METHOD int insync { - void /* X */ *_kobjt; -} DEFAULT noinsync; - -METHOD int alloc { - void /* x */ *_kbojt; - uint8_t _chn; - uint8_t _note; -} DEFAULT noalloc; diff --git a/sys/dev/sound/pcm/mixer.c b/sys/dev/sound/pcm/mixer.c index 092af3298f0e..f281dff36248 100644 --- a/sys/dev/sound/pcm/mixer.c +++ b/sys/dev/sound/pcm/mixer.c @@ -750,8 +750,8 @@ mixer_init(device_t dev, kobj_class_t cls, void *devinfo) mixer_setrecsrc(m, 0); /* Set default input. */ - pdev = make_dev(&mixer_cdevsw, SND_DEV_CTL, UID_ROOT, GID_WHEEL, 0666, - "mixer%d", unit); + pdev = make_dev(&mixer_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "mixer%d", + unit); pdev->si_drv1 = m; snddev->mixer_dev = pdev; diff --git a/sys/dev/sound/pcm/sndstat.c b/sys/dev/sound/pcm/sndstat.c index 509a35c5a038..51d0fb3bb686 100644 --- a/sys/dev/sound/pcm/sndstat.c +++ b/sys/dev/sound/pcm/sndstat.c @@ -52,7 +52,6 @@ #define SS_TYPE_PCM 1 #define SS_TYPE_MIDI 2 -#define SS_TYPE_SEQUENCER 3 static d_open_t sndstat_open; static void sndstat_close(void *); @@ -1165,8 +1164,6 @@ sndstat_register(device_t dev, char *str) type = SS_TYPE_PCM; else if (!strcmp(devtype, "midi")) type = SS_TYPE_MIDI; - else if (!strcmp(devtype, "sequencer")) - type = SS_TYPE_SEQUENCER; else return (EINVAL); @@ -1441,8 +1438,8 @@ static void sndstat_sysinit(void *p) { sx_init(&sndstat_lock, "sndstat lock"); - sndstat_dev = make_dev(&sndstat_cdevsw, SND_DEV_STATUS, - UID_ROOT, GID_WHEEL, 0644, "sndstat"); + sndstat_dev = make_dev(&sndstat_cdevsw, 0, UID_ROOT, GID_WHEEL, 0644, + "sndstat"); } SYSINIT(sndstat_sysinit, SI_SUB_DRIVERS, SI_ORDER_FIRST, sndstat_sysinit, NULL); diff --git a/sys/dev/sound/pcm/sound.h b/sys/dev/sound/pcm/sound.h index 315452e294d1..6bd435d0ea25 100644 --- a/sys/dev/sound/pcm/sound.h +++ b/sys/dev/sound/pcm/sound.h @@ -148,14 +148,6 @@ struct snd_mixer; #define RANGE(var, low, high) (var) = \ (((var)<(low))? (low) : ((var)>(high))? (high) : (var)) -enum { - SND_DEV_CTL = 0, /* Control port /dev/mixer */ - SND_DEV_SEQ, /* Sequencer /dev/sequencer */ - SND_DEV_MIDIN, /* Raw midi access */ - SND_DEV_DSP, /* Digitized voice /dev/dsp */ - SND_DEV_STATUS, /* /dev/sndstat */ -}; - #define DSP_DEFAULT_SPEED 8000 extern int snd_unit; diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c index e799a5ce05f6..8ab6d35a2685 100644 --- a/sys/fs/msdosfs/msdosfs_lookup.c +++ b/sys/fs/msdosfs/msdosfs_lookup.c @@ -845,7 +845,6 @@ doscheckpath(struct denode *source, struct denode *target, daddr_t *wait_scn) *wait_scn = 0; pmp = target->de_pmp; - lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED); KASSERT(pmp == source->de_pmp, ("doscheckpath: source and target on different filesystems")); diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index adcffe45df82..4431d36c8a8e 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -575,7 +575,6 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp) pmp->pm_bo = bo; lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0); - lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0); TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp); @@ -871,7 +870,6 @@ error_exit: } if (pmp != NULL) { lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; @@ -971,7 +969,6 @@ msdosfs_unmount(struct mount *mp, int mntflags) dev_rel(pmp->pm_dev); free(pmp->pm_inusemap, M_MSDOSFSFAT); lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; return (error); diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 6417b7dac16b..5db61c8951f6 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -945,7 +945,7 @@ msdosfs_rename(struct vop_rename_args *ap) struct denode *fdip, *fip, *tdip, *tip, *nip; u_char toname[12], oldname[11]; u_long to_diroffset; - bool checkpath_locked, doingdirectory, newparent; + bool doingdirectory, newparent; int error; u_long cn, pcl, blkoff; daddr_t bn, wait_scn, scn; @@ -986,8 +986,6 @@ msdosfs_rename(struct vop_rename_args *ap) if (tvp != NULL && tvp != tdvp) VOP_UNLOCK(tvp); - checkpath_locked = false; - relock: doingdirectory = newparent = false; @@ -1108,12 +1106,8 @@ relock: if (doingdirectory && newparent) { if (error != 0) /* write access check above */ goto unlock; - lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL); - checkpath_locked = true; error = doscheckpath(fip, tdip, &wait_scn); if (wait_scn != 0) { - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); @@ -1276,8 +1270,6 @@ relock: cache_purge(fvp); unlock: - if (checkpath_locked) - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); vput(fdvp); vput(fvp); if (tvp != NULL) { @@ -1289,7 +1281,6 @@ unlock: vput(tdvp); return (error); releout: - MPASS(!checkpath_locked); vrele(tdvp); if (tvp != NULL) vrele(tvp); @@ -1951,6 +1942,9 @@ msdosfs_pathconf(struct vop_pathconf_args *ap) case _PC_NO_TRUNC: *ap->a_retval = 0; return (0); + case _PC_HAS_HIDDENSYSTEM: + *ap->a_retval = 1; + return (0); default: return (vop_stdpathconf(ap)); } diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h index fcaac544a74d..04e6b75bea2a 100644 --- a/sys/fs/msdosfs/msdosfsmount.h +++ b/sys/fs/msdosfs/msdosfsmount.h @@ -118,7 +118,6 @@ struct msdosfsmount { void *pm_u2d; /* Unicode->DOS iconv handle */ void *pm_d2u; /* DOS->Local iconv handle */ struct lock pm_fatlock; /* lockmgr protecting allocations */ - struct lock pm_checkpath_lock; /* protects doscheckpath result */ struct task pm_rw2ro_task; /* context for emergency remount ro */ }; diff --git a/sys/fs/nfs/nfs_commonsubs.c b/sys/fs/nfs/nfs_commonsubs.c index f46b0d282861..4c498e96a3c0 100644 --- a/sys/fs/nfs/nfs_commonsubs.c +++ b/sys/fs/nfs/nfs_commonsubs.c @@ -630,6 +630,10 @@ nfscl_fillsattr(struct nfsrv_descript *nd, struct vattr *vap, NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNERGROUP); if ((flags & NFSSATTR_FULL) && vap->va_size != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SIZE); + if ((flags & NFSSATTR_FULL) && vap->va_flags != VNOVAL) { + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM); + } if (vap->va_atime.tv_sec != VNOVAL) NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMEACCESSSET); if (vap->va_mtime.tv_sec != VNOVAL) @@ -1314,6 +1318,7 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, u_int32_t freenum = 0, tuint; u_int64_t uquad = 0, thyp, thyp2; uint16_t tui16; + long has_pathconf; #ifdef QUOTA struct dqblk dqb; uid_t savuid; @@ -1421,6 +1426,16 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACL); NFSCLRBIT_ATTRBIT(&checkattrbits, NFSATTRBIT_ACLSUPPORT); } + /* Some filesystems do not support uf_hidden */ + if (vp == NULL || VOP_PATHCONF(vp, + _PC_HAS_HIDDENSYSTEM, &has_pathconf) != 0) + has_pathconf = 0; + if (has_pathconf == 0) { + NFSCLRBIT_ATTRBIT(&checkattrbits, + NFSATTRBIT_HIDDEN); + NFSCLRBIT_ATTRBIT(&checkattrbits, + NFSATTRBIT_SYSTEM); + } if (!NFSEQUAL_ATTRBIT(&retattrbits, &checkattrbits) || retnotsup) *retcmpp = NFSERR_NOTSAME; @@ -1521,15 +1536,13 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); if (compare) { if (!(*retcmpp)) { - long has_named_attr; - if (vp == NULL || VOP_PATHCONF(vp, - _PC_HAS_NAMEDATTR, &has_named_attr) + _PC_HAS_NAMEDATTR, &has_pathconf) != 0) - has_named_attr = 0; - if ((has_named_attr != 0 && + has_pathconf = 0; + if ((has_pathconf != 0 && *tl != newnfs_true) || - (has_named_attr == 0 && + (has_pathconf == 0 && *tl != newnfs_false)) *retcmpp = NFSERR_NOTSAME; } @@ -1792,9 +1805,17 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, free(cp2, M_NFSSTRING); break; case NFSATTRBIT_HIDDEN: - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - if (compare && !(*retcmpp)) - *retcmpp = NFSERR_ATTRNOTSUPP; + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (compare) { + if (!(*retcmpp) && ((*tl == newnfs_true && + (nap->na_flags & UF_HIDDEN) == 0) || + (*tl == newnfs_false && + (nap->na_flags & UF_HIDDEN) != 0))) + *retcmpp = NFSERR_NOTSAME; + } else if (nap != NULL) { + if (*tl == newnfs_true) + nap->na_flags |= UF_HIDDEN; + } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HOMOGENEOUS: @@ -2166,9 +2187,17 @@ nfsv4_loadattr(struct nfsrv_descript *nd, vnode_t vp, attrsum += NFSX_HYPER; break; case NFSATTRBIT_SYSTEM: - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - if (compare && !(*retcmpp)) - *retcmpp = NFSERR_ATTRNOTSUPP; + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (compare) { + if (!(*retcmpp) && ((*tl == newnfs_true && + (nap->na_flags & UF_SYSTEM) == 0) || + (*tl == newnfs_false && + (nap->na_flags & UF_SYSTEM) != 0))) + *retcmpp = NFSERR_NOTSAME; + } else if (nap != NULL) { + if (*tl == newnfs_true) + nap->na_flags |= UF_SYSTEM; + } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESS: @@ -2634,7 +2663,7 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, size_t atsiz; bool xattrsupp; short irflag; - long has_named_attr; + long has_pathconf; #ifdef QUOTA struct dqblk dqb; uid_t savuid; @@ -2751,6 +2780,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACLSUPPORT); NFSCLRBIT_ATTRBIT(&attrbits,NFSATTRBIT_ACL); } + if (cred == NULL || p == NULL || vp == NULL || + VOP_PATHCONF(vp, _PC_HAS_HIDDENSYSTEM, + &has_pathconf) != 0) + has_pathconf = 0; + if (has_pathconf == 0) { + NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN); + NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM); + } retnum += nfsrv_putattrbit(nd, &attrbits); break; case NFSATTRBIT_TYPE: @@ -2791,10 +2828,10 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, break; case NFSATTRBIT_NAMEDATTR: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); - if (VOP_PATHCONF(vp, _PC_HAS_NAMEDATTR, &has_named_attr) - != 0) - has_named_attr = 0; - if (has_named_attr != 0) + if (VOP_PATHCONF(vp, _PC_HAS_NAMEDATTR, + &has_pathconf) != 0) + has_pathconf = 0; + if (has_pathconf != 0) *tl = newnfs_true; else *tl = newnfs_false; @@ -2899,6 +2936,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, *tl = 0; retnum += 2 * NFSX_UNSIGNED; break; + case NFSATTRBIT_HIDDEN: + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + if ((vap->va_flags & UF_HIDDEN) != 0) + *tl = newnfs_true; + else + *tl = newnfs_false; + retnum += NFSX_UNSIGNED; + break; case NFSATTRBIT_HOMOGENEOUS: NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); if (fsinf.fs_properties & NFSV3FSINFO_HOMOGENEOUS) @@ -3088,6 +3133,14 @@ nfsv4_fillattr(struct nfsrv_descript *nd, struct mount *mp, vnode_t vp, txdr_hyper(vap->va_bytes, tl); retnum += NFSX_HYPER; break; + case NFSATTRBIT_SYSTEM: + NFSM_BUILD(tl, uint32_t *, NFSX_UNSIGNED); + if ((vap->va_flags & UF_SYSTEM) != 0) + *tl = newnfs_true; + else + *tl = newnfs_false; + retnum += NFSX_UNSIGNED; + break; case NFSATTRBIT_TIMEACCESS: NFSM_BUILD(tl, u_int32_t *, NFSX_V4TIME); txdr_nfsv4time(&vap->va_atime, tl); diff --git a/sys/fs/nfs/nfsproto.h b/sys/fs/nfs/nfsproto.h index eff53e1a384e..cb5a80e8df73 100644 --- a/sys/fs/nfs/nfsproto.h +++ b/sys/fs/nfs/nfsproto.h @@ -1142,6 +1142,7 @@ struct nfsv3_sattr { NFSATTRBM_FILESFREE | \ NFSATTRBM_FILESTOTAL | \ NFSATTRBM_FSLOCATIONS | \ + NFSATTRBM_HIDDEN | \ NFSATTRBM_HOMOGENEOUS | \ NFSATTRBM_MAXFILESIZE | \ NFSATTRBM_MAXLINK | \ @@ -1163,6 +1164,7 @@ struct nfsv3_sattr { NFSATTRBM_SPACEFREE | \ NFSATTRBM_SPACETOTAL | \ NFSATTRBM_SPACEUSED | \ + NFSATTRBM_SYSTEM | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEDELTA | \ @@ -1210,11 +1212,13 @@ struct nfsv3_sattr { */ #define NFSATTRBIT_SETABLE0 \ (NFSATTRBM_SIZE | \ + NFSATTRBM_HIDDEN | \ NFSATTRBM_ACL) #define NFSATTRBIT_SETABLE1 \ (NFSATTRBM_MODE | \ NFSATTRBM_OWNER | \ NFSATTRBM_OWNERGROUP | \ + NFSATTRBM_SYSTEM | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEACCESSSET | \ NFSATTRBM_TIMEMODIFYSET) @@ -1254,6 +1258,7 @@ struct nfsv3_sattr { NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ + NFSATTRBM_HIDDEN | \ NFSATTRBM_MAXREAD) /* @@ -1266,6 +1271,7 @@ struct nfsv3_sattr { NFSATTRBM_OWNERGROUP | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ + NFSATTRBM_SYSTEM | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEMETADATA | \ @@ -1288,6 +1294,7 @@ struct nfsv3_sattr { NFSATTRBM_SIZE | \ NFSATTRBM_FSID | \ NFSATTRBM_FILEID | \ + NFSATTRBM_HIDDEN | \ NFSATTRBM_MAXREAD) /* @@ -1298,6 +1305,7 @@ struct nfsv3_sattr { NFSATTRBM_NUMLINKS | \ NFSATTRBM_RAWDEV | \ NFSATTRBM_SPACEUSED | \ + NFSATTRBM_SYSTEM | \ NFSATTRBM_TIMEACCESS | \ NFSATTRBM_TIMECREATE | \ NFSATTRBM_TIMEMETADATA | \ diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index c07da6f9275f..e0e66baca44d 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -4158,6 +4158,13 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep, if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, NFSATTRBIT_TIMECREATE)) NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_TIMECREATE); + if (!NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, + NFSATTRBIT_HIDDEN) || + !NFSISSET_ATTRBIT(&dnp->n_vattr.na_suppattr, + NFSATTRBIT_SYSTEM)) { + NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN); + NFSCLRBIT_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM); + } } /* diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 0049d7edca33..fbfcdafaa06b 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -1074,15 +1074,23 @@ nfs_setattr(struct vop_setattr_args *ap) int error = 0; u_quad_t tsize; struct timespec ts; + struct nfsmount *nmp; #ifndef nolint tsize = (u_quad_t)0; #endif /* - * Setting of flags and marking of atimes are not supported. + * Only setting of UF_HIDDEN and UF_SYSTEM are supported and + * only for NFSv4 servers that support them. */ - if (vap->va_flags != VNOVAL) + nmp = VFSTONFS(vp->v_mount); + if (vap->va_flags != VNOVAL && (!NFSHASNFSV4(nmp) || + (vap->va_flags & ~(UF_HIDDEN | UF_SYSTEM)) != 0 || + ((vap->va_flags & UF_HIDDEN) != 0 && + !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_HIDDEN)) || + ((vap->va_flags & UF_SYSTEM) != 0 && + !NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, NFSATTRBIT_SYSTEM)))) return (EOPNOTSUPP); /* @@ -1092,7 +1100,8 @@ nfs_setattr(struct vop_setattr_args *ap) vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL || - vap->va_mode != (mode_t)VNOVAL) && + vap->va_mode != (mode_t)VNOVAL || + vap->va_flags != (u_long)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { @@ -4754,6 +4763,15 @@ nfs_pathconf(struct vop_pathconf_args *ap) else *ap->a_retval = 0; break; + case _PC_HAS_HIDDENSYSTEM: + if (NFS_ISV4(vp) && NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, + NFSATTRBIT_HIDDEN) && + NFSISSET_ATTRBIT(&np->n_vattr.na_suppattr, + NFSATTRBIT_SYSTEM)) + *ap->a_retval = 1; + else + *ap->a_retval = 0; + break; default: error = vop_stdpathconf(ap); diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 3bf54d82b959..a81f1492ef95 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -449,6 +449,7 @@ nfsvno_getattr(struct vnode *vp, struct nfsvattr *nvap, } nvap->na_bsdflags = 0; + nvap->na_flags = 0; error = VOP_GETATTR(vp, &nvap->na_vattr, nd->nd_cred); if (lockedit != 0) NFSVOPUNLOCK(vp); @@ -3127,6 +3128,9 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, bitpos = NFSATTRBIT_MAX; } else { bitpos = 0; + if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_HIDDEN) || + NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SYSTEM)) + nvap->na_flags = 0; } moderet = 0; for (; bitpos < NFSATTRBIT_MAX; bitpos++) { @@ -3163,9 +3167,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_HIDDEN: - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - if (!nd->nd_repstat) - nd->nd_repstat = NFSERR_ATTRNOTSUPP; + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (nd->nd_repstat == 0) { + if (*tl == newnfs_true) + nvap->na_flags |= UF_HIDDEN; + } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_MIMETYPE: @@ -3240,9 +3246,11 @@ nfsv4_sattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, attrsum += (NFSX_UNSIGNED + NFSM_RNDUP(j)); break; case NFSATTRBIT_SYSTEM: - NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); - if (!nd->nd_repstat) - nd->nd_repstat = NFSERR_ATTRNOTSUPP; + NFSM_DISSECT(tl, uint32_t *, NFSX_UNSIGNED); + if (nd->nd_repstat == 0) { + if (*tl == newnfs_true) + nvap->na_flags |= UF_SYSTEM; + } attrsum += NFSX_UNSIGNED; break; case NFSATTRBIT_TIMEACCESSSET: diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index 4e15d55eb312..f7564ade401b 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -403,8 +403,10 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, if (error) goto nfsmout; - /* For NFSv4, only va_uid is used from nva2. */ + /* For NFSv4, only va_uid and va_flags is used from nva2. */ NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_OWNER); + NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_HIDDEN); + NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SYSTEM); preat_ret = nfsvno_getattr(vp, &nva2, nd, p, 1, &retbits); if (!nd->nd_repstat) nd->nd_repstat = preat_ret; @@ -463,6 +465,9 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, &nva, &attrbits, exp, p); if (!nd->nd_repstat && (nd->nd_flag & ND_NFSV4)) { + u_long oldflags; + + oldflags = nva2.na_flags; /* * For V4, try setting the attributes in sets, so that the * reply bitmap will be correct for an error case. @@ -532,6 +537,32 @@ nfsrvd_setattr(struct nfsrv_descript *nd, __unused int isdgram, NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_MODESETMASKED); } } + if (!nd->nd_repstat && + (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN) || + NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM))) { + if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN)) { + if ((nva.na_flags & UF_HIDDEN) != 0) + oldflags |= UF_HIDDEN; + else + oldflags &= ~UF_HIDDEN; + } + if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM)) { + if ((nva.na_flags & UF_SYSTEM) != 0) + oldflags |= UF_SYSTEM; + else + oldflags &= ~UF_SYSTEM; + } + NFSVNO_ATTRINIT(&nva2); + NFSVNO_SETATTRVAL(&nva2, flags, oldflags); + nd->nd_repstat = nfsvno_setattr(vp, &nva2, nd->nd_cred, p, + exp); + if (!nd->nd_repstat) { + if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_HIDDEN)) + NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_HIDDEN); + if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SYSTEM)) + NFSSETBIT_ATTRBIT(&retbits, NFSATTRBIT_SYSTEM); + } + } #ifdef NFS4_ACL_EXTATTR_NAME if (!nd->nd_repstat && aclp->acl_cnt > 0 && diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index 0356877eaf05..7dcc83880bb9 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -245,6 +245,10 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) vp->v_object = lowervp->v_object; vn_irflag_set(vp, VIRF_PGREAD); } + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0) + vn_irflag_set(vp, VIRF_INOTIFY); + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0) + vn_irflag_set(vp, VIRF_INOTIFY_PARENT); if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp) vp->v_vflag |= VV_ROOT; diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 8608216e10e5..74c1a8f3acb6 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -190,6 +190,26 @@ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); /* + * Synchronize inotify flags with the lower vnode: + * - If the upper vnode has the flag set and the lower does not, then the lower + * vnode is unwatched and the upper vnode does not need to go through + * VOP_INOTIFY. + * - If the lower vnode is watched, then the upper vnode should go through + * VOP_INOTIFY, so copy the flag up. + */ +static void +null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag) +{ + if ((vn_irflag_read(vp) & flag) != 0) { + if (__predict_false((vn_irflag_read(lvp) & flag) == 0)) + vn_irflag_unset(vp, flag); + } else if ((vn_irflag_read(lvp) & flag) != 0) { + if (__predict_false((vn_irflag_read(vp) & flag) == 0)) + vn_irflag_set(vp, flag); + } +} + +/* * This is the 10-Apr-92 bypass routine. * This version has been optimized for speed, throwing away some * safety checks. It should still always work, but it's not as @@ -305,7 +325,10 @@ null_bypass(struct vop_generic_args *ap) lvp = *(vps_p[i]); /* - * Get rid of the transient hold on lvp. + * Get rid of the transient hold on lvp. Copy inotify + * flags up in case something is watching the lower + * layer. + * * If lowervp was unlocked during VOP * operation, nullfs upper vnode could have * been reclaimed, which changes its v_vnlock @@ -314,6 +337,10 @@ null_bypass(struct vop_generic_args *ap) * upper (reclaimed) vnode. */ if (lvp != NULLVP) { + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY); + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY_PARENT); if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE && old_vps[i]->v_vnlock != lvp->v_vnlock) { VOP_UNLOCK(lvp); diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c index c30995508c00..5d412cabadb8 100644 --- a/sys/fs/smbfs/smbfs_vnops.c +++ b/sys/fs/smbfs/smbfs_vnops.c @@ -810,6 +810,9 @@ smbfs_pathconf(struct vop_pathconf_args *ap) case _PC_NO_TRUNC: *retval = 1; break; + case _PC_HAS_HIDDENSYSTEM: + *retval = 1; + break; default: error = vop_stdpathconf(ap); } diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index c99d0732be50..9d2a587b177a 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -1691,6 +1691,10 @@ tmpfs_pathconf(struct vop_pathconf_args *v) *retval = PAGE_SIZE; break; + case _PC_HAS_HIDDENSYSTEM: + *retval = 1; + break; + default: error = vop_stdpathconf(v); } diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h index aa2dfbb68745..49f002a633d2 100644 --- a/sys/i386/linux/linux_proto.h +++ b/sys/i386/linux/linux_proto.h @@ -981,10 +981,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1178,7 +1181,7 @@ struct linux_pipe2_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c index 7be646f34144..b8893008944b 100644 --- a/sys/i386/linux/linux_sysent.c +++ b/sys/i386/linux/linux_sysent.c @@ -306,8 +306,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -346,7 +346,7 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c index f3e3c32a2bbf..563d1a795ae1 100644 --- a/sys/i386/linux/linux_systrace_args.c +++ b/sys/i386/linux/linux_systrace_args.c @@ -2071,12 +2071,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2410,7 +2417,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6604,9 +6613,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7172,6 +7204,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9889,8 +9928,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -10062,6 +10107,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index 958336be0f08..2113ea51ac5d 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -1605,10 +1605,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1872,7 +1879,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index a48a513aa3b5..91792430d24c 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -658,5 +658,7 @@ struct sysent sysent[] = { { .sy_narg = AS(getrlimitusage_args), .sy_call = (sy_call_t *)sys_getrlimitusage, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 589 = getrlimitusage */ { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(setcred_args), .sy_call = (sy_call_t *)sys_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = setcred */ - { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index ac4b6ac3f457..406236fc2723 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -38,9 +38,11 @@ #include "opt_ddb.h" #include "opt_ktrace.h" +#define EXTERR_CATEGORY EXTERR_CAT_FILEDESC #include <sys/systm.h> #include <sys/capsicum.h> #include <sys/conf.h> +#include <sys/exterrvar.h> #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filedesc.h> @@ -492,6 +494,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) int error, flg, kif_sz, seals, tmp, got_set, got_cleared; uint64_t bsize; off_t foffset; + int flags; error = 0; flg = F_POSIX; @@ -511,6 +514,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOEXEC, fd, tmp); break; + case F_DUPFD_CLOFORK: + tmp = arg; + error = kern_dup(td, FDDUP_FCNTL, FDDUP_FLAG_CLOFORK, fd, tmp); + break; + case F_DUP2FD: tmp = arg; error = kern_dup(td, FDDUP_FIXED, 0, fd, tmp); @@ -528,6 +536,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) if (fde != NULL) { td->td_retval[0] = ((fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0) | + ((fde->fde_flags & UF_FOCLOSE) ? FD_CLOFORK : 0) | ((fde->fde_flags & UF_RESOLVE_BENEATH) ? FD_RESOLVE_BENEATH : 0); error = 0; @@ -545,6 +554,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) */ fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | ((arg & FD_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | + ((arg & FD_CLOFORK) != 0 ? UF_FOCLOSE : 0) | ((arg & FD_RESOLVE_BENEATH) != 0 ? UF_RESOLVE_BENEATH : 0); error = 0; @@ -916,7 +926,17 @@ revert_f_setfl: break; default: - error = EINVAL; + if ((cmd & ((1u << F_DUP3FD_SHIFT) - 1)) != F_DUP3FD) + return (EXTERROR(EINVAL, "invalid fcntl cmd")); + /* Handle F_DUP3FD */ + flags = (cmd >> F_DUP3FD_SHIFT); + if ((flags & ~(FD_CLOEXEC | FD_CLOFORK)) != 0) + return (EXTERROR(EINVAL, "invalid flags for F_DUP3FD")); + tmp = arg; + error = kern_dup(td, FDDUP_FIXED, + ((flags & FD_CLOEXEC) != 0 ? FDDUP_FLAG_CLOEXEC : 0) | + ((flags & FD_CLOFORK) != 0 ? FDDUP_FLAG_CLOFORK : 0), + fd, tmp); break; } return (error); @@ -946,7 +966,7 @@ kern_dup(struct thread *td, u_int mode, int flags, int old, int new) fdp = p->p_fd; oioctls = NULL; - MPASS((flags & ~(FDDUP_FLAG_CLOEXEC)) == 0); + MPASS((flags & ~(FDDUP_FLAG_CLOEXEC | FDDUP_FLAG_CLOFORK)) == 0); MPASS(mode < FDDUP_LASTMODE); AUDIT_ARG_FD(old); @@ -971,8 +991,10 @@ kern_dup(struct thread *td, u_int mode, int flags, int old, int new) goto unlock; if (mode == FDDUP_FIXED && old == new) { td->td_retval[0] = new; - if (flags & FDDUP_FLAG_CLOEXEC) + if ((flags & FDDUP_FLAG_CLOEXEC) != 0) fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; + if ((flags & FDDUP_FLAG_CLOFORK) != 0) + fdp->fd_ofiles[new].fde_flags |= UF_FOCLOSE; error = 0; goto unlock; } @@ -1047,10 +1069,9 @@ kern_dup(struct thread *td, u_int mode, int flags, int old, int new) fde_copy(oldfde, newfde); filecaps_copy_finish(&oldfde->fde_caps, &newfde->fde_caps, nioctls); - if ((flags & FDDUP_FLAG_CLOEXEC) != 0) - newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; - else - newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; + newfde->fde_flags = (oldfde->fde_flags & ~(UF_EXCLOSE | UF_FOCLOSE)) | + ((flags & FDDUP_FLAG_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | + ((flags & FDDUP_FLAG_CLOFORK) != 0 ? UF_FOCLOSE : 0); #ifdef CAPABILITIES seqc_write_end(&newfde->fde_seqc); #endif @@ -1416,13 +1437,15 @@ kern_close(struct thread *td, int fd) } static int -close_range_cloexec(struct thread *td, u_int lowfd, u_int highfd) +close_range_flags(struct thread *td, u_int lowfd, u_int highfd, int flags) { struct filedesc *fdp; struct fdescenttbl *fdt; struct filedescent *fde; - int fd; + int fd, fde_flags; + fde_flags = ((flags & CLOSE_RANGE_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | + ((flags & CLOSE_RANGE_CLOFORK) != 0 ? UF_FOCLOSE : 0); fdp = td->td_proc->p_fd; FILEDESC_XLOCK(fdp); fdt = atomic_load_ptr(&fdp->fd_files); @@ -1434,7 +1457,7 @@ close_range_cloexec(struct thread *td, u_int lowfd, u_int highfd) for (; fd <= highfd; fd++) { fde = &fdt->fdt_ofiles[fd]; if (fde->fde_file != NULL) - fde->fde_flags |= UF_EXCLOSE; + fde->fde_flags |= fde_flags; } out_locked: FILEDESC_XUNLOCK(fdp); @@ -1492,8 +1515,8 @@ kern_close_range(struct thread *td, int flags, u_int lowfd, u_int highfd) return (EINVAL); } - if ((flags & CLOSE_RANGE_CLOEXEC) != 0) - return (close_range_cloexec(td, lowfd, highfd)); + if ((flags & (CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_CLOFORK)) != 0) + return (close_range_flags(td, lowfd, highfd, flags)); return (close_range_impl(td, lowfd, highfd)); } @@ -1513,7 +1536,7 @@ sys_close_range(struct thread *td, struct close_range_args *uap) AUDIT_ARG_CMD(uap->highfd); AUDIT_ARG_FFLAGS(uap->flags); - if ((uap->flags & ~(CLOSE_RANGE_CLOEXEC)) != 0) + if ((uap->flags & ~(CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_CLOFORK)) != 0) return (EINVAL); return (kern_close_range(td, uap->flags, uap->lowfd, uap->highfd)); } @@ -2172,6 +2195,7 @@ _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags, #endif fde->fde_file = fp; fde->fde_flags = ((flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0) | + ((flags & O_CLOFORK) != 0 ? UF_FOCLOSE : 0) | ((flags & O_RESOLVE_BENEATH) != 0 ? UF_RESOLVE_BENEATH : 0); if (fcaps != NULL) filecaps_move(fcaps, &fde->fde_caps); @@ -2432,6 +2456,7 @@ fdcopy(struct filedesc *fdp) newfdp->fd_freefile = fdp->fd_freefile; FILEDESC_FOREACH_FDE(fdp, i, ofde) { if ((ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) == 0 || + (ofde->fde_flags & UF_FOCLOSE) != 0 || !fhold(ofde->fde_file)) { if (newfdp->fd_freefile == fdp->fd_freefile) newfdp->fd_freefile = i; @@ -2729,6 +2754,12 @@ fdcloseexec(struct thread *td) fdfree(fdp, i); (void) closefp(fdp, i, fp, td, false, false); FILEDESC_UNLOCK_ASSERT(fdp); + } else if (fde->fde_flags & UF_FOCLOSE) { + /* + * https://austingroupbugs.net/view.php?id=1851 + * FD_CLOFORK should not be preserved across exec + */ + fde->fde_flags &= ~UF_FOCLOSE; } } } diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index c8b01afeab4f..dcd38c6e6fbe 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1637,6 +1637,12 @@ uifree(struct uidinfo *uip) if (uip->ui_pipecnt != 0) printf("freeing uidinfo: uid = %d, pipecnt = %ld\n", uip->ui_uid, uip->ui_pipecnt); + if (uip->ui_inotifycnt != 0) + printf("freeing uidinfo: uid = %d, inotifycnt = %ld\n", + uip->ui_uid, uip->ui_inotifycnt); + if (uip->ui_inotifywatchcnt != 0) + printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n", + uip->ui_uid, uip->ui_inotifywatchcnt); free(uip, M_UIDINFO); } @@ -1742,6 +1748,21 @@ chgpipecnt(struct uidinfo *uip, int diff, rlim_t max) return (chglimit(uip, &uip->ui_pipecnt, diff, max, "pipecnt")); } +int +chginotifycnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifycnt, diff, max, "inotifycnt")); +} + +int +chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifywatchcnt, diff, max, + "inotifywatchcnt")); +} + static int sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS) { diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 17b53208157a..35b258e68701 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -27,12 +27,12 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> #include "opt_kern_tls.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/capsicum.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/ktls.h> @@ -1246,6 +1246,8 @@ out: */ if (error == 0) { td->td_retval[0] = 0; + if (sbytes > 0 && vp != NULL) + INOTIFY(vp, IN_ACCESS); } if (sent != NULL) { (*sent) = sbytes; diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 4565abc4b540..5d51aa675cb7 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1050,8 +1050,7 @@ osigaction(struct thread *td, struct osigaction_args *uap) int osigreturn(struct thread *td, struct osigreturn_args *uap) { - - return (nosys(td, (struct nosys_args *)uap)); + return (kern_nosys(td, 0)); } #endif #endif /* COMPAT_43 */ @@ -4139,7 +4138,7 @@ coredump(struct thread *td) struct flock lf; struct vattr vattr; size_t fullpathsize; - int error, error1, locked; + int error, error1, jid, locked, ppid, sig; char *name; /* name of corefile */ void *rl_cookie; off_t limit; @@ -4168,6 +4167,10 @@ coredump(struct thread *td) PROC_UNLOCK(p); return (EFBIG); } + + ppid = p->p_oppid; + sig = p->p_sig; + jid = p->p_ucred->cr_prison->pr_id; PROC_UNLOCK(p); error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, @@ -4253,6 +4256,9 @@ coredump(struct thread *td) } devctl_safe_quote_sb(sb, name); sbuf_putc(sb, '"'); + + sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", + jid, p->p_pid, ppid, sig); if (sbuf_finish(sb) == 0) devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); out2: @@ -4281,6 +4287,12 @@ struct nosys_args { int nosys(struct thread *td, struct nosys_args *args) { + return (kern_nosys(td, args->dummy)); +} + +int +kern_nosys(struct thread *td, int dummy) +{ struct proc *p; p = td->td_proc; diff --git a/sys/kern/kern_syscalls.c b/sys/kern/kern_syscalls.c index 24406763a93a..a93d711e7597 100644 --- a/sys/kern/kern_syscalls.c +++ b/sys/kern/kern_syscalls.c @@ -35,6 +35,7 @@ #include <sys/resourcevar.h> #include <sys/sx.h> #include <sys/syscall.h> +#include <sys/syscallsubr.h> #include <sys/sysent.h> #include <sys/sysproto.h> #include <sys/systm.h> @@ -50,14 +51,14 @@ int lkmnosys(struct thread *td, struct nosys_args *args) { - return (nosys(td, args)); + return (kern_nosys(td, 0)); } int lkmressys(struct thread *td, struct nosys_args *args) { - return (nosys(td, args)); + return (kern_nosys(td, 0)); } struct sysent nosys_sysent = { diff --git a/sys/kern/subr_capability.c b/sys/kern/subr_capability.c index 7cc6fb593697..5ad5b0af1681 100644 --- a/sys/kern/subr_capability.c +++ b/sys/kern/subr_capability.c @@ -74,6 +74,10 @@ const cap_rights_t cap_getsockopt_rights = CAP_RIGHTS_INITIALIZER(CAP_GETSOCKOPT); const cap_rights_t cap_getsockname_rights = CAP_RIGHTS_INITIALIZER(CAP_GETSOCKNAME); +const cap_rights_t cap_inotify_add_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_ADD); +const cap_rights_t cap_inotify_rm_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_RM); const cap_rights_t cap_ioctl_rights = CAP_RIGHTS_INITIALIZER(CAP_IOCTL); const cap_rights_t cap_listen_rights = CAP_RIGHTS_INITIALIZER(CAP_LISTEN); const cap_rights_t cap_linkat_source_rights = diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index d31ff3b939cc..94e44d888181 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -37,16 +37,17 @@ #include "opt_capsicum.h" #include "opt_ktrace.h" -#define EXTERR_CATEGORY EXTERR_CAT_FILEDESC +#define EXTERR_CATEGORY EXTERR_CAT_GENIO #include <sys/param.h> #include <sys/systm.h> #include <sys/sysproto.h> #include <sys/capsicum.h> +#include <sys/exterrvar.h> #include <sys/filedesc.h> #include <sys/filio.h> #include <sys/fcntl.h> #include <sys/file.h> -#include <sys/exterrvar.h> +#include <sys/inotify.h> #include <sys/lock.h> #include <sys/proc.h> #include <sys/signalvar.h> @@ -195,7 +196,7 @@ sys_read(struct thread *td, struct read_args *uap) int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -233,7 +234,7 @@ kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset) int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -329,7 +330,7 @@ kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset) error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -396,7 +397,7 @@ sys_write(struct thread *td, struct write_args *uap) int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -435,7 +436,7 @@ kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -531,7 +532,7 @@ kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset) error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -602,14 +603,14 @@ kern_ftruncate(struct thread *td, int fd, off_t length) AUDIT_ARG_FD(fd); if (length < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative length")); error = fget(td, fd, &cap_ftruncate_rights, &fp); if (error) return (error); AUDIT_ARG_FILE(td->td_proc, fp); if (!(fp->f_flag & FWRITE)) { fdrop(fp, td); - return (EINVAL); + return (EXTERROR(EINVAL, "non-writable")); } error = fo_truncate(fp, length, td->td_ucred, td); fdrop(fp, td); @@ -840,8 +841,10 @@ kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) int error; AUDIT_ARG_FD(fd); - if (offset < 0 || len <= 0) - return (EINVAL); + if (offset < 0) + return (EXTERROR(EINVAL, "negative offset")); + if (len <= 0) + return (EXTERROR(EINVAL, "negative length")); /* Check for wrap. */ if (offset > OFF_MAX - len) return (EFBIG); @@ -898,16 +901,21 @@ kern_fspacectl(struct thread *td, int fd, int cmd, AUDIT_ARG_FFLAGS(flags); if (rqsr == NULL) - return (EINVAL); + return (EXTERROR(EINVAL, "no range")); rmsr = *rqsr; if (rmsrp != NULL) *rmsrp = rmsr; - if (cmd != SPACECTL_DEALLOC || - rqsr->r_offset < 0 || rqsr->r_len <= 0 || - rqsr->r_offset > OFF_MAX - rqsr->r_len || - (flags & ~SPACECTL_F_SUPPORTED) != 0) - return (EINVAL); + if (cmd != SPACECTL_DEALLOC) + return (EXTERROR(EINVAL, "cmd", cmd)); + if (rqsr->r_offset < 0) + return (EXTERROR(EINVAL, "neg offset")); + if (rqsr->r_len <= 0) + return (EXTERROR(EINVAL, "neg len")); + if (rqsr->r_offset > OFF_MAX - rqsr->r_len) + return (EXTERROR(EINVAL, "offset too large")); + if ((flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EXTERROR(EINVAL, "reserved flags", flags)); error = fget_write(td, fd, &cap_pwrite_rights, &fp); if (error != 0) @@ -939,7 +947,6 @@ int kern_specialfd(struct thread *td, int type, void *arg) { struct file *fp; - struct specialfd_eventfd *ae; int error, fd, fflags; fflags = 0; @@ -948,14 +955,24 @@ kern_specialfd(struct thread *td, int type, void *arg) return (error); switch (type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd *ae; + ae = arg; if ((ae->flags & EFD_CLOEXEC) != 0) fflags |= O_CLOEXEC; error = eventfd_create_file(td, fp, ae->initval, ae->flags); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify *si; + + si = arg; + error = inotify_create_file(td, fp, si->flags, &fflags); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "invalid type", type); break; } @@ -970,13 +987,14 @@ kern_specialfd(struct thread *td, int type, void *arg) int sys___specialfd(struct thread *td, struct __specialfd_args *args) { - struct specialfd_eventfd ae; int error; switch (args->type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd ae; + if (args->len != sizeof(struct specialfd_eventfd)) { - error = EINVAL; + error = EXTERROR(EINVAL, "eventfd params ABI"); break; } error = copyin(args->req, &ae, sizeof(ae)); @@ -984,13 +1002,27 @@ sys___specialfd(struct thread *td, struct __specialfd_args *args) break; if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) != 0) { - error = EINVAL; + error = EXTERROR(EINVAL, "reserved flag"); break; } error = kern_specialfd(td, args->type, &ae); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify si; + + if (args->len != sizeof(si)) { + error = EINVAL; + break; + } + error = copyin(args->req, &si, sizeof(si)); + if (error != 0) + break; + error = kern_specialfd(td, args->type, &si); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown type", args->type); break; } return (error); @@ -1166,7 +1198,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, int error, lf, ndu; if (nd < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative ndescs")); fdp = td->td_proc->p_fd; ndu = nd; lf = fdp->fd_nfiles; @@ -1259,7 +1291,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) { - error = EINVAL; + error = EXTERROR(EINVAL, "invalid timeval"); goto done; } if (!timevalisset(&rtv)) @@ -1491,7 +1523,7 @@ sys_poll(struct thread *td, struct poll_args *uap) if (uap->timeout != INFTIM) { if (uap->timeout < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeout")); ts.tv_sec = uap->timeout / 1000; ts.tv_nsec = (uap->timeout % 1000) * 1000000; tsp = &ts; @@ -1516,7 +1548,7 @@ kern_poll_kfds(struct thread *td, struct pollfd *kfds, u_int nfds, precision = 0; if (tsp != NULL) { if (!timespecvalid_interval(tsp)) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timespec")); if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) sbt = 0; else { @@ -1619,7 +1651,7 @@ kern_poll(struct thread *td, struct pollfd *ufds, u_int nfds, int error; if (kern_poll_maxfds(nfds)) - return (EINVAL); + return (EXTERROR(EINVAL, "too large nfds")); if (nfds > nitems(stackfds)) kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); else @@ -1796,7 +1828,7 @@ selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeval")); if (!timevalisset(&rtv)) asbt = 0; else if (rtv.tv_sec <= INT32_MAX) { @@ -2173,7 +2205,7 @@ kern_kcmp(struct thread *td, pid_t pid1, pid_t pid2, int type, (uintptr_t)p2->p_vmspace); break; default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown op"); break; } diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 9340779918a2..ed651da96b14 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -548,7 +548,7 @@ sys_pipe2(struct thread *td, struct pipe2_args *uap) { int error, fildes[2]; - if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK)) + if ((uap->flags & ~(O_CLOEXEC | O_CLOFORK | O_NONBLOCK)) != 0) return (EINVAL); error = kern_pipe(td, fildes, uap->flags, NULL, NULL); if (error) diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index fa36cc824078..90a4f3a7dad8 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -598,4 +598,6 @@ const char *syscallnames[] = { "fchroot", /* 590 = fchroot */ "setcred", /* 591 = setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 08b557a7a540..90559fab6086 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3349,11 +3349,26 @@ size_t size ); } -592 AUE_NULL STD { +592 AUE_NULL STD|CAPENABLED { int exterrctl( u_int op, u_int flags, _In_reads_bytes_(4) void *ptr ); } +593 AUE_INOTIFY STD|CAPENABLED { + int inotify_add_watch_at( + int fd, + int dfd, + _In_z_ const char *path, + uint32_t mask + ); + } +594 AUE_INOTIFY STD|CAPENABLED { + int inotify_rm_watch( + int fd, + int wd + ); + } + ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index 15789d3eb5fa..90b21616a558 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3482,6 +3482,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9317,6 +9335,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11305,6 +11355,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c index 11141d197aec..a545a0a54c25 100644 --- a/sys/kern/sysv_msg.c +++ b/sys/kern/sysv_msg.c @@ -1724,7 +1724,7 @@ freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap) return (sys_msgsys(td, (struct msgsys_args *)uap)); } #else - return (nosys(td, NULL)); + return (kern_nosys(td, 0)); #endif } diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c index e399517010fc..a99e1a4de14e 100644 --- a/sys/kern/sysv_sem.c +++ b/sys/kern/sysv_sem.c @@ -1904,7 +1904,7 @@ freebsd32_semsys(struct thread *td, struct freebsd32_semsys_args *uap) return (sys_semsys(td, (struct semsys_args *)uap)); } #else - return (nosys(td, NULL)); + return (kern_nosys(td, 0)); #endif } diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 60e3fe92a4b7..8d1a469127c6 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -1474,7 +1474,7 @@ freebsd32_shmsys(struct thread *td, struct freebsd32_shmsys_args *uap) return (EINVAL); } #else - return (nosys(td, NULL)); + return (kern_nosys(td, 0)); #endif } diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index ad8485028987..133724ac76c5 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -151,6 +151,10 @@ kern_socket(struct thread *td, int domain, int type, int protocol) type &= ~SOCK_CLOEXEC; oflag |= O_CLOEXEC; } + if ((type & SOCK_CLOFORK) != 0) { + type &= ~SOCK_CLOFORK; + oflag |= O_CLOFORK; + } if ((type & SOCK_NONBLOCK) != 0) { type &= ~SOCK_NONBLOCK; fflag |= FNONBLOCK; @@ -352,7 +356,8 @@ kern_accept4(struct thread *td, int s, struct sockaddr *sa, int flags, goto done; #endif error = falloc_caps(td, &nfp, &fd, - (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps); + ((flags & SOCK_CLOEXEC) != 0 ? O_CLOEXEC : 0) | + ((flags & SOCK_CLOFORK) != 0 ? O_CLOFORK : 0), &fcaps); if (error != 0) goto done; SOCK_LOCK(head); @@ -435,7 +440,7 @@ int sys_accept4(struct thread *td, struct accept4_args *uap) { - if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + if ((uap->flags & ~(SOCK_CLOEXEC | SOCK_CLOFORK | SOCK_NONBLOCK)) != 0) return (EINVAL); return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); @@ -557,6 +562,10 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol, type &= ~SOCK_CLOEXEC; oflag |= O_CLOEXEC; } + if ((type & SOCK_CLOFORK) != 0) { + type &= ~SOCK_CLOFORK; + oflag |= O_CLOFORK; + } if ((type & SOCK_NONBLOCK) != 0) { type &= ~SOCK_NONBLOCK; fflag |= FNONBLOCK; diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 72bd0246db11..0056dac65c7d 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -3463,7 +3463,8 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags) UNP_LINK_UNLOCK_ASSERT(); - fdflags = (flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0; + fdflags = ((flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0) | + ((flags & MSG_CMSG_CLOFORK) ? O_CLOFORK : 0); error = 0; if (controlp != NULL) /* controlp == NULL => free control messages */ diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 97dc854c9386..02973146068d 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -301,7 +301,7 @@ static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */ static struct unrhdr *aiod_unr; static void aio_biocleanup(struct bio *bp); -void aio_init_aioinfo(struct proc *p); +static int aio_init_aioinfo(struct proc *p); static int aio_onceonly(void); static int aio_free_entry(struct kaiocb *job); static void aio_process_rw(struct kaiocb *job); @@ -309,7 +309,7 @@ static void aio_process_sync(struct kaiocb *job); static void aio_process_mlock(struct kaiocb *job); static void aio_schedule_fsync(void *context, int pending); static int aio_newproc(int *); -int aio_aqueue(struct thread *td, struct aiocb *ujob, +static int aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lio, int type, struct aiocb_ops *ops); static int aio_queue_file(struct file *fp, struct kaiocb *job); static void aio_biowakeup(struct bio *bp); @@ -422,10 +422,11 @@ aio_onceonly(void) * Init the per-process aioinfo structure. The aioinfo limits are set * per-process for user limit (resource) management. */ -void +static int aio_init_aioinfo(struct proc *p) { struct kaioinfo *ki; + int error; ki = uma_zalloc(kaio_zone, M_WAITOK); mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF | MTX_NEW); @@ -451,8 +452,20 @@ aio_init_aioinfo(struct proc *p) uma_zfree(kaio_zone, ki); } - while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) - aio_newproc(NULL); + error = 0; + while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) { + error = aio_newproc(NULL); + if (error != 0) { + /* + * At least one worker is enough to have AIO + * functional. Clear error in that case. + */ + if (num_aio_procs > 0) + error = 0; + break; + } + } + return (error); } static int @@ -1476,7 +1489,7 @@ static struct aiocb_ops aiocb_ops_osigevent = { * Queue a new AIO request. Choosing either the threaded or direct bio VCHR * technique is done in this code. */ -int +static int aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, int type, struct aiocb_ops *ops) { @@ -1490,8 +1503,11 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, int fd, kqfd; u_short evflags; - if (p->p_aioinfo == NULL) - aio_init_aioinfo(p); + if (p->p_aioinfo == NULL) { + error = aio_init_aioinfo(p); + if (error != 0) + goto err1; + } ki = p->p_aioinfo; @@ -2213,8 +2229,11 @@ kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, if (nent < 0 || nent > max_aio_queue_per_proc) return (EINVAL); - if (p->p_aioinfo == NULL) - aio_init_aioinfo(p); + if (p->p_aioinfo == NULL) { + error = aio_init_aioinfo(p); + if (error != 0) + return (error); + } ki = p->p_aioinfo; @@ -2503,8 +2522,11 @@ kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp, timo = tvtohz(&atv); } - if (p->p_aioinfo == NULL) - aio_init_aioinfo(p); + if (p->p_aioinfo == NULL) { + error = aio_init_aioinfo(p); + if (error != 0) + return (error); + } ki = p->p_aioinfo; error = 0; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 883beaf6d1da..3d455b3874cc 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -41,6 +41,7 @@ #include <sys/counter.h> #include <sys/filedesc.h> #include <sys/fnv_hash.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/lock.h> @@ -2629,6 +2630,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, atomic_store_ptr(&dvp->v_cache_dd, ncp); } else if (vp != NULL) { /* + * Take the slow path in INOTIFY(). This flag will be lazily + * cleared by cache_vop_inotify() once all directories referring + * to vp are unwatched. + */ + if (__predict_false((vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) + vn_irflag_set_cond(vp, VIRF_INOTIFY_PARENT); + + /* * For this case, the cache entry maps both the * directory name in it and the name ".." for the * directory's parent. @@ -4008,6 +4017,56 @@ out: return (error); } +void +cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie) +{ + struct mtx *vlp; + struct namecache *ncp; + int isdir; + bool logged, self; + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + self = (vn_irflag_read(vp) & VIRF_INOTIFY) != 0 && + (vp->v_type != VDIR || (event & ~_IN_DIR_EVENTS) != 0); + + if (self) { + int selfevent; + + if (event == _IN_ATTRIB_LINKCOUNT) + selfevent = IN_ATTRIB; + else + selfevent = event; + inotify_log(vp, NULL, 0, selfevent | isdir, cookie); + } + if ((event & IN_ALL_EVENTS) == 0) + return; + + logged = false; + vlp = VP2VNODELOCK(vp); + mtx_lock(vlp); + TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) { + if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) + continue; + if ((vn_irflag_read(ncp->nc_dvp) & VIRF_INOTIFY) != 0) { + /* + * XXX-MJ if the vnode has two links in the same + * dir, we'll log the same event twice. + */ + inotify_log(ncp->nc_dvp, ncp->nc_name, ncp->nc_nlen, + event | isdir, cookie); + logged = true; + } + } + if (!logged && (vn_irflag_read(vp) & VIRF_INOTIFY_PARENT) != 0) { + /* + * We didn't find a watched directory that contains this vnode, + * so stop calling VOP_INOTIFY for operations on the vnode. + */ + vn_irflag_unset(vp, VIRF_INOTIFY_PARENT); + } + mtx_unlock(vlp); +} + #ifdef DDB static void db_print_vpath(struct vnode *vp) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index be49c0887609..fd6202a1424c 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -39,6 +39,7 @@ #include <sys/conf.h> #include <sys/event.h> #include <sys/filio.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/limits.h> #include <sys/lock.h> @@ -119,6 +120,8 @@ struct vop_vector default_vnodeops = { .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, .vop_need_inactive = vop_stdneed_inactive, + .vop_inotify = vop_stdinotify, + .vop_inotify_add_watch = vop_stdinotify_add_watch, .vop_ioctl = vop_stdioctl, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, @@ -453,6 +456,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap) case _PC_MAC_PRESENT: case _PC_NAMEDATTR_ENABLED: case _PC_HAS_NAMEDATTR: + case _PC_HAS_HIDDENSYSTEM: *ap->a_retval = 0; return (0); default: @@ -1306,6 +1310,20 @@ vop_stdneed_inactive(struct vop_need_inactive_args *ap) } int +vop_stdinotify(struct vop_inotify_args *ap) +{ + vn_inotify(ap->a_vp, ap->a_dvp, ap->a_cnp, ap->a_event, ap->a_cookie); + return (0); +} + +int +vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *ap) +{ + return (vn_inotify_add_watch(ap->a_vp, ap->a_sc, ap->a_mask, + ap->a_wdp, ap->a_td)); +} + +int vop_stdioctl(struct vop_ioctl_args *ap) { struct vnode *vp; diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c new file mode 100644 index 000000000000..9562350c897f --- /dev/null +++ b/sys/kern/vfs_inotify.c @@ -0,0 +1,1008 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include "opt_ktrace.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/caprights.h> +#include <sys/counter.h> +#include <sys/dirent.h> +#define EXTERR_CATEGORY EXTERR_CAT_INOTIFY +#include <sys/exterrvar.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/filio.h> +#include <sys/inotify.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/ktrace.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/resourcevar.h> +#include <sys/selinfo.h> +#include <sys/stat.h> +#include <sys/syscallsubr.h> +#include <sys/sysctl.h> +#include <sys/sysent.h> +#include <sys/syslimits.h> +#include <sys/sysproto.h> +#include <sys/tree.h> +#include <sys/user.h> +#include <sys/vnode.h> + +uint32_t inotify_rename_cookie; + +static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "inotify configuration"); + +static int inotify_max_queued_events = 16384; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_queued_events, CTLFLAG_RWTUN, + &inotify_max_queued_events, 0, + "Maximum number of events to queue on an inotify descriptor"); + +static int inotify_max_user_instances = 256; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN, + &inotify_max_user_instances, 0, + "Maximum number of inotify descriptors per user"); + +static int inotify_max_user_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN, + &inotify_max_user_watches, 0, + "Maximum number of inotify watches per user"); + +static int inotify_max_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_watches, CTLFLAG_RWTUN, + &inotify_max_watches, 0, + "Maximum number of inotify watches system-wide"); + +static int inotify_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, watches, CTLFLAG_RD, + &inotify_watches, 0, + "Total number of inotify watches currently in use"); + +static int inotify_coalesce = 1; +SYSCTL_INT(_vfs_inotify, OID_AUTO, coalesce, CTLFLAG_RWTUN, + &inotify_coalesce, 0, + "Coalesce inotify events when possible"); + +static COUNTER_U64_DEFINE_EARLY(inotify_event_drops); +SYSCTL_COUNTER_U64(_vfs_inotify, OID_AUTO, event_drops, CTLFLAG_RD, + &inotify_event_drops, + "Number of inotify events dropped due to limits or allocation failures"); + +static fo_rdwr_t inotify_read; +static fo_ioctl_t inotify_ioctl; +static fo_poll_t inotify_poll; +static fo_kqfilter_t inotify_kqfilter; +static fo_stat_t inotify_stat; +static fo_close_t inotify_close; +static fo_fill_kinfo_t inotify_fill_kinfo; + +static const struct fileops inotifyfdops = { + .fo_read = inotify_read, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = inotify_ioctl, + .fo_poll = inotify_poll, + .fo_kqfilter = inotify_kqfilter, + .fo_stat = inotify_stat, + .fo_close = inotify_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = inotify_fill_kinfo, + .fo_cmp = file_kcmp_generic, + .fo_flags = DFLAG_PASSABLE, +}; + +static void filt_inotifydetach(struct knote *kn); +static int filt_inotifyevent(struct knote *kn, long hint); + +static const struct filterops inotify_rfiltops = { + .f_isfd = 1, + .f_detach = filt_inotifydetach, + .f_event = filt_inotifyevent, +}; + +static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); + +struct inotify_record { + STAILQ_ENTRY(inotify_record) link; + struct inotify_event ev; +}; + +static uint64_t inotify_ino = 1; + +/* + * On LP64 systems this occupies 64 bytes, so we don't get internal + * fragmentation by allocating watches with malloc(9). If the size changes, + * consider using a UMA zone to improve memory efficiency. + */ +struct inotify_watch { + struct inotify_softc *sc; /* back-pointer */ + int wd; /* unique ID */ + uint32_t mask; /* event mask */ + struct vnode *vp; /* vnode being watched, refed */ + RB_ENTRY(inotify_watch) ilink; /* inotify linkage */ + TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */ +}; + +static void +inotify_init(void *arg __unused) +{ + /* Don't let a user hold too many vnodes. */ + inotify_max_user_watches = desiredvnodes / 3; + /* Don't let the system hold too many vnodes. */ + inotify_max_watches = desiredvnodes / 2; +} +SYSINIT(inotify, SI_SUB_VFS, SI_ORDER_ANY, inotify_init, NULL); + +static int +inotify_watch_cmp(const struct inotify_watch *a, + const struct inotify_watch *b) +{ + if (a->wd < b->wd) + return (-1); + else if (a->wd > b->wd) + return (1); + else + return (0); +} +RB_HEAD(inotify_watch_tree, inotify_watch); +RB_GENERATE_STATIC(inotify_watch_tree, inotify_watch, ilink, inotify_watch_cmp); + +struct inotify_softc { + struct mtx lock; /* serialize all softc writes */ + STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */ + struct inotify_record overflow; /* preallocated record */ + int nextwatch; /* next watch ID to try */ + int npending; /* number of pending events */ + size_t nbpending; /* bytes available to read */ + uint64_t ino; /* unique identifier */ + struct inotify_watch_tree watches; /* active watches */ + struct selinfo sel; /* select/poll/kevent info */ + struct ucred *cred; /* credential ref */ +}; + +static struct inotify_record * +inotify_dequeue(struct inotify_softc *sc) +{ + struct inotify_record *rec; + + mtx_assert(&sc->lock, MA_OWNED); + KASSERT(!STAILQ_EMPTY(&sc->pending), + ("%s: queue for %p is empty", __func__, sc)); + + rec = STAILQ_FIRST(&sc->pending); + STAILQ_REMOVE_HEAD(&sc->pending, link); + sc->npending--; + sc->nbpending -= sizeof(rec->ev) + rec->ev.len; + return (rec); +} + +static void +inotify_enqueue(struct inotify_softc *sc, struct inotify_record *rec, bool head) +{ + mtx_assert(&sc->lock, MA_OWNED); + + if (head) + STAILQ_INSERT_HEAD(&sc->pending, rec, link); + else + STAILQ_INSERT_TAIL(&sc->pending, rec, link); + sc->npending++; + sc->nbpending += sizeof(rec->ev) + rec->ev.len; +} + +static int +inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, + struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + int error; + bool first; + + sc = fp->f_data; + error = 0; + + mtx_lock(&sc->lock); + while (STAILQ_EMPTY(&sc->pending)) { + if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&sc->lock); + return (EWOULDBLOCK); + } + error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0); + if (error != 0) { + mtx_unlock(&sc->lock); + return (error); + } + } + for (first = true; !STAILQ_EMPTY(&sc->pending); first = false) { + size_t len; + + rec = inotify_dequeue(sc); + len = sizeof(rec->ev) + rec->ev.len; + if (uio->uio_resid < (ssize_t)len) { + inotify_enqueue(sc, rec, true); + if (first) { + error = EXTERROR(EINVAL, + "read buffer is too small"); + } + break; + } + mtx_unlock(&sc->lock); + error = uiomove(&rec->ev, len, uio); +#ifdef KTRACE + if (error == 0 && KTRPOINT(td, KTR_STRUCT)) + ktrstruct("inotify", &rec->ev, len); +#endif + mtx_lock(&sc->lock); + if (error != 0) { + inotify_enqueue(sc, rec, true); + mtx_unlock(&sc->lock); + return (error); + } + if (rec == &sc->overflow) { + /* + * Signal to inotify_queue_record() that the overflow + * record can be reused. + */ + memset(rec, 0, sizeof(*rec)); + } else { + free(rec, M_INOTIFY); + } + } + mtx_unlock(&sc->lock); + return (error); +} + +static int +inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred, + struct thread *td) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + switch (com) { + case FIONREAD: + *(int *)data = (int)sc->nbpending; + return (0); + case FIONBIO: + case FIOASYNC: + return (0); + default: + return (ENOTTY); + } + + return (0); +} + +static int +inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) +{ + struct inotify_softc *sc; + int revents; + + sc = fp->f_data; + revents = 0; + + mtx_lock(&sc->lock); + if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(td, &sc->sel); + mtx_unlock(&sc->lock); + return (revents); +} + +static void +filt_inotifydetach(struct knote *kn) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + knlist_remove(&sc->sel.si_note, kn, 0); +} + +static int +filt_inotifyevent(struct knote *kn, long hint) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + mtx_assert(&sc->lock, MA_OWNED); + kn->kn_data = sc->nbpending; + return (kn->kn_data > 0); +} + +static int +inotify_kqfilter(struct file *fp, struct knote *kn) +{ + struct inotify_softc *sc; + + if (kn->kn_filter != EVFILT_READ) + return (EINVAL); + sc = fp->f_data; + kn->kn_fop = &inotify_rfiltops; + kn->kn_hook = sc; + knlist_add(&sc->sel.si_note, kn, 0); + return (0); +} + +static int +inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + memset(sb, 0, sizeof(*sb)); + sb->st_mode = S_IFREG | S_IRUSR; + sb->st_blksize = sizeof(struct inotify_event) + _IN_NAMESIZE(NAME_MAX); + mtx_lock(&sc->lock); + sb->st_size = sc->nbpending; + sb->st_blocks = sc->npending; + sb->st_uid = sc->cred->cr_ruid; + sb->st_gid = sc->cred->cr_rgid; + sb->st_ino = sc->ino; + mtx_unlock(&sc->lock); + return (0); +} + +static void +inotify_unlink_watch_locked(struct inotify_softc *sc, struct inotify_watch *watch) +{ + struct vnode *vp; + + vp = watch->vp; + mtx_assert(&vp->v_pollinfo->vpi_lock, MA_OWNED); + + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + + TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink); + if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify)) + vn_irflag_unset_locked(vp, VIRF_INOTIFY); +} + +/* + * Assumes that the watch has already been removed from its softc. + */ +static void +inotify_remove_watch(struct inotify_watch *watch) +{ + struct inotify_softc *sc; + struct vnode *vp; + + sc = watch->sc; + + vp = watch->vp; + mtx_lock(&vp->v_pollinfo->vpi_lock); + inotify_unlink_watch_locked(sc, watch); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + vrele(vp); + free(watch, M_INOTIFY); +} + +static int +inotify_close(struct file *fp, struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch *watch; + + sc = fp->f_data; + + mtx_lock(&sc->lock); + (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0); + while ((watch = RB_MIN(inotify_watch_tree, &sc->watches)) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + mtx_unlock(&sc->lock); + inotify_remove_watch(watch); + mtx_lock(&sc->lock); + } + while (!STAILQ_EMPTY(&sc->pending)) { + rec = inotify_dequeue(sc); + if (rec != &sc->overflow) + free(rec, M_INOTIFY); + } + mtx_unlock(&sc->lock); + seldrain(&sc->sel); + knlist_destroy(&sc->sel.si_note); + mtx_destroy(&sc->lock); + crfree(sc->cred); + free(sc, M_INOTIFY); + return (0); +} + +static int +inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif, + struct filedesc *fdp) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + kif->kf_type = KF_TYPE_INOTIFY; + kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending; + kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending; + return (0); +} + +int +inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp) +{ + struct inotify_softc *sc; + int fflags; + + if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0) + return (EINVAL); + + if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1, + inotify_max_user_instances)) + return (EMFILE); + + sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO); + sc->nextwatch = 1; /* Required for compatibility. */ + STAILQ_INIT(&sc->pending); + RB_INIT(&sc->watches); + mtx_init(&sc->lock, "inotify", NULL, MTX_DEF); + knlist_init_mtx(&sc->sel.si_note, &sc->lock); + sc->cred = crhold(td->td_ucred); + sc->ino = atomic_fetchadd_64(&inotify_ino, 1); + + fflags = FREAD; + if ((flags & IN_NONBLOCK) != 0) + fflags |= FNONBLOCK; + if ((flags & IN_CLOEXEC) != 0) + *fflagsp |= O_CLOEXEC; + finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops); + + return (0); +} + +static struct inotify_record * +inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event, + uint32_t cookie, int waitok) +{ + struct inotify_event *evp; + struct inotify_record *rec; + + rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY, + waitok | M_ZERO); + if (rec == NULL) + return (NULL); + evp = &rec->ev; + evp->wd = wd; + evp->mask = event; + evp->cookie = cookie; + evp->len = _IN_NAMESIZE(namelen); + if (name != NULL) + memcpy(evp->name, name, namelen); + return (rec); +} + +static bool +inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp) +{ + struct inotify_record *prev; + + mtx_assert(&sc->lock, MA_OWNED); + + prev = STAILQ_LAST(&sc->pending, inotify_record, link); + return (prev != NULL && prev->ev.mask == evp->mask && + prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie && + prev->ev.len == evp->len && + (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0)); +} + +static void +inotify_overflow_event(struct inotify_event *evp) +{ + evp->mask = IN_Q_OVERFLOW; + evp->wd = -1; + evp->cookie = 0; + evp->len = 0; +} + +/* + * Put an event record on the queue for an inotify desscriptor. Return false if + * the record was not enqueued for some reason, true otherwise. + */ +static bool +inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec) +{ + struct inotify_event *evp; + + mtx_assert(&sc->lock, MA_OWNED); + + evp = &rec->ev; + if (__predict_false(rec == &sc->overflow)) { + /* + * Is the overflow record already in the queue? If so, there's + * not much else we can do: we're here because a kernel memory + * shortage prevented new record allocations. + */ + counter_u64_add(inotify_event_drops, 1); + if (evp->mask == IN_Q_OVERFLOW) + return (false); + inotify_overflow_event(evp); + } else { + /* Try to coalesce duplicate events. */ + if (inotify_coalesce && inotify_can_coalesce(sc, evp)) + return (false); + + /* + * Would this one overflow the queue? If so, convert it to an + * overflow event and try again to coalesce. + */ + if (sc->npending >= inotify_max_queued_events) { + counter_u64_add(inotify_event_drops, 1); + inotify_overflow_event(evp); + if (inotify_can_coalesce(sc, evp)) + return (false); + } + } + inotify_enqueue(sc, rec, false); + selwakeup(&sc->sel); + KNOTE_LOCKED(&sc->sel.si_note, 0); + wakeup(&sc->pending); + return (true); +} + +static int +inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen, + int event, uint32_t cookie) +{ + struct inotify_watch key; + struct inotify_softc *sc; + struct inotify_record *rec; + int relecount; + bool allocfail; + + relecount = 0; + + sc = watch->sc; + rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie, + M_NOWAIT); + if (rec == NULL) { + rec = &sc->overflow; + allocfail = true; + } else { + allocfail = false; + } + + mtx_lock(&sc->lock); + if (!inotify_queue_record(sc, rec) && rec != &sc->overflow) + free(rec, M_INOTIFY); + if ((watch->mask & IN_ONESHOT) != 0 || + (event & (IN_DELETE_SELF | IN_UNMOUNT)) != 0) { + if (!allocfail) { + rec = inotify_alloc_record(watch->wd, NULL, 0, + IN_IGNORED, 0, M_NOWAIT); + if (rec == NULL) + rec = &sc->overflow; + if (!inotify_queue_record(sc, rec) && + rec != &sc->overflow) + free(rec, M_INOTIFY); + } + + /* + * Remove the watch, taking care to handle races with + * inotify_close(). + */ + key.wd = watch->wd; + if (RB_FIND(inotify_watch_tree, &sc->watches, &key) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + inotify_unlink_watch_locked(sc, watch); + free(watch, M_INOTIFY); + + /* Defer vrele() to until locks are dropped. */ + relecount++; + } + } + mtx_unlock(&sc->lock); + return (relecount); +} + +void +inotify_log(struct vnode *vp, const char *name, size_t namelen, int event, + uint32_t cookie) +{ + struct inotify_watch *watch, *tmp; + int relecount; + + KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT)) == 0, + ("inotify_log: invalid event %#x", event)); + + relecount = 0; + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) { + KASSERT(watch->vp == vp, + ("inotify_log: watch %p vp != vp", watch)); + if ((watch->mask & event) != 0 || event == IN_UNMOUNT) { + relecount += inotify_log_one(watch, name, namelen, event, + cookie); + } + } + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + for (int i = 0; i < relecount; i++) + vrele(vp); +} + +/* + * An inotify event occurred on a watched vnode. + */ +void +vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp, + int event, uint32_t cookie) +{ + int isdir; + + VNPASS(vp->v_holdcnt > 0, vp); + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + + if (dvp != NULL) { + VNPASS(dvp->v_holdcnt > 0, dvp); + + /* + * Should we log an event for the vnode itself? + */ + if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) { + int selfevent; + + switch (event) { + case _IN_MOVE_DELETE: + case IN_DELETE: + /* + * IN_DELETE_SELF is only generated when the + * last hard link of a file is removed. + */ + selfevent = IN_DELETE_SELF; + if (vp->v_type != VDIR) { + struct vattr va; + int error; + + error = VOP_GETATTR(vp, &va, cnp->cn_cred); + if (error == 0 && va.va_nlink != 0) + selfevent = 0; + } + break; + case IN_MOVED_FROM: + cookie = 0; + selfevent = IN_MOVE_SELF; + break; + case _IN_ATTRIB_LINKCOUNT: + selfevent = IN_ATTRIB; + break; + default: + selfevent = event; + break; + } + + if ((selfevent & ~_IN_DIR_EVENTS) != 0) { + inotify_log(vp, NULL, 0, selfevent | isdir, + cookie); + } + } + + /* + * Something is watching the directory through which this vnode + * was referenced, so we may need to log the event. + */ + if ((event & IN_ALL_EVENTS) != 0 && + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) { + inotify_log(dvp, cnp->cn_nameptr, + cnp->cn_namelen, event | isdir, cookie); + } + } else { + /* + * We don't know which watched directory might contain the + * vnode, so we have to fall back to searching the name cache. + */ + cache_vop_inotify(vp, event, cookie); + } +} + +int +vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, + uint32_t *wdp, struct thread *td) +{ + struct inotify_watch *watch, *watch1; + uint32_t wd; + + /* + * If this is a directory, make sure all of its entries are present in + * the name cache so that we're able to look them up if an event occurs. + * The persistent reference on the directory prevents the outgoing name + * cache entries from being reclaimed. + */ + if (vp->v_type == VDIR) { + struct dirent *dp; + char *buf; + off_t off; + size_t buflen, len; + int eof, error; + + buflen = 128 * sizeof(struct dirent); + buf = malloc(buflen, M_TEMP, M_WAITOK); + + error = 0; + len = off = eof = 0; + for (;;) { + struct nameidata nd; + + error = vn_dir_next_dirent(vp, td, buf, buflen, &dp, + &len, &off, &eof); + if (error != 0) + break; + if (len == 0) + /* Finished reading. */ + break; + if (strcmp(dp->d_name, ".") == 0 || + strcmp(dp->d_name, "..") == 0) + continue; + + /* + * namei() consumes a reference on the starting + * directory if it's specified as a vnode. + */ + vrefact(vp); + NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, + dp->d_name, vp); + error = namei(&nd); + if (error != 0) + break; + vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); + vrele(nd.ni_vp); + } + free(buf, M_TEMP); + if (error != 0) + return (error); + } + + /* + * The vnode referenced in kern_inotify_add_watch() might be different + * than this one if nullfs is in the picture. + */ + vrefact(vp); + watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO); + watch->sc = sc; + watch->vp = vp; + watch->mask = mask; + + /* + * Are we updating an existing watch? Search the vnode's list rather + * than that of the softc, as the former is likely to be shorter. + */ + v_addpollinfo(vp); + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH(watch1, &vp->v_pollinfo->vpi_inotify, vlink) { + if (watch1->sc == sc) + break; + } + mtx_lock(&sc->lock); + if (watch1 != NULL) { + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + /* + * We found an existing watch, update it based on our flags. + */ + if ((mask & IN_MASK_CREATE) != 0) { + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EEXIST); + } + if ((mask & IN_MASK_ADD) != 0) + watch1->mask |= mask; + else + watch1->mask = mask; + *wdp = watch1->wd; + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EJUSTRETURN); + } + + /* + * We're creating a new watch. Add it to the softc and vnode watch + * lists. + */ + do { + struct inotify_watch key; + + /* + * Search for the next available watch descriptor. This is + * implemented so as to avoid reusing watch descriptors for as + * long as possible. + */ + key.wd = wd = sc->nextwatch++; + watch1 = RB_FIND(inotify_watch_tree, &sc->watches, &key); + } while (watch1 != NULL || wd == 0); + watch->wd = wd; + RB_INSERT(inotify_watch_tree, &sc->watches, watch); + TAILQ_INSERT_TAIL(&vp->v_pollinfo->vpi_inotify, watch, vlink); + mtx_unlock(&sc->lock); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + vn_irflag_set_cond(vp, VIRF_INOTIFY); + + *wdp = wd; + + return (0); +} + +void +vn_inotify_revoke(struct vnode *vp) +{ + if (vp->v_pollinfo == NULL) { + /* This is a nullfs vnode which shadows a watched vnode. */ + return; + } + inotify_log(vp, NULL, 0, IN_UNMOUNT, 0); +} + +static int +fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp, + struct file **fpp) +{ + struct file *fp; + int error; + + error = fget(td, fd, needrightsp, &fp); + if (error != 0) + return (error); + if (fp->f_type != DTYPE_INOTIFY) { + fdrop(fp, td); + return (EINVAL); + } + *fpp = fp; + return (0); +} + +int +kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask, + struct thread *td) +{ + struct nameidata nd; + struct file *fp; + struct inotify_softc *sc; + struct vnode *vp; + uint32_t wd; + int count, error; + + fp = NULL; + vp = NULL; + + if ((mask & IN_ALL_EVENTS) == 0) + return (EXTERROR(EINVAL, "no events specified")); + if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) == + (IN_MASK_ADD | IN_MASK_CREATE)) + return (EXTERROR(EINVAL, + "IN_MASK_ADD and IN_MASK_CREATE are mutually exclusive")); + if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS | IN_UNMOUNT)) != 0) + return (EXTERROR(EINVAL, "unrecognized flag")); + + error = fget_inotify(td, fd, &cap_inotify_add_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + NDINIT_AT(&nd, LOOKUP, + ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF | + LOCKSHARED | AUDITVNODE1, UIO_USERSPACE, path, dfd); + error = namei(&nd); + if (error != 0) + goto out; + NDFREE_PNBUF(&nd); + vp = nd.ni_vp; + + error = VOP_ACCESS(vp, VREAD, td->td_ucred, td); + if (error != 0) + goto out; + + if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + + count = atomic_fetchadd_int(&inotify_watches, 1); + if (count > inotify_max_watches) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1, + inotify_max_user_watches)) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td); + if (error != 0) { + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + if (error == EJUSTRETURN) { + /* We updated an existing watch, everything is ok. */ + error = 0; + } else { + goto out; + } + } + td->td_retval[0] = wd; + +out: + if (vp != NULL) + vput(vp); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_add_watch_at(struct thread *td, + struct inotify_add_watch_at_args *uap) +{ + return (kern_inotify_add_watch(uap->fd, uap->dfd, uap->path, + uap->mask, td)); +} + +int +kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td) +{ + struct file *fp; + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch key, *watch; + int error; + + error = fget_inotify(td, fd, &cap_inotify_rm_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + rec = inotify_alloc_record(wd, NULL, 0, IN_IGNORED, 0, M_WAITOK); + + /* + * For compatibility with Linux, we do not remove pending events + * associated with the watch. Watch descriptors are implemented so as + * to avoid being reused for as long as possible, so one hopes that any + * pending events from the removed watch descriptor will be removed + * before the watch descriptor is recycled. + */ + key.wd = wd; + mtx_lock(&sc->lock); + watch = RB_FIND(inotify_watch_tree, &sc->watches, &key); + if (watch == NULL) { + free(rec, M_INOTIFY); + error = EINVAL; + } else { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + if (!inotify_queue_record(sc, rec)) { + free(rec, M_INOTIFY); + error = 0; + } + } + mtx_unlock(&sc->lock); + if (watch != NULL) + inotify_remove_watch(watch); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap) +{ + return (kern_inotify_rm_watch(uap->fd, uap->wd, td)); +} diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 86c7bdaa02c0..fb3e6a7a2534 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -75,14 +75,20 @@ static void NDVALIDATE_impl(struct nameidata *, int); #endif /* + * Reset ndp to its original state. + */ +#define NDRESET(ndp) do { \ + NDREINIT_DBG(ndp); \ + ndp->ni_resflags = 0; \ + ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ +} while (0) +/* * Prepare namei() to restart. Reset components to its original state and set * ISRESTARTED flag which signals the underlying lookup code to change the root * from ABI root to actual root and prevents a further restarts. */ #define NDRESTART(ndp) do { \ - NDREINIT_DBG(ndp); \ - ndp->ni_resflags = 0; \ - ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ + NDRESET(ndp); \ ndp->ni_cnd.cn_flags |= ISRESTARTED; \ } while (0) @@ -162,8 +168,8 @@ static struct vop_vector crossmp_vnodeops = { */ struct nameicap_tracker { - struct vnode *dp; TAILQ_ENTRY(nameicap_tracker) nm_link; + struct mount *mp; }; /* Zone for cap mode tracker elements used for dotdot capability checks. */ @@ -192,49 +198,75 @@ SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, "enables \"..\" components in path lookup in capability mode " "on non-local mount"); -static void +static int nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; + struct mount *mp; + int error; if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) - return; + return (0); + mp = NULL; + error = VOP_GETWRITEMOUNT(dp, &mp); + if (error != 0) + return (error); nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head); - if (nt != NULL && nt->dp == dp) - return; + if (nt != NULL && nt->mp == mp) { + vfs_rel(mp); + return (0); + } nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK); - vhold(dp); - nt->dp = dp; - TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + nt->mp = mp; + error = lockmgr(&mp->mnt_renamelock, LK_SHARED | LK_NOWAIT, 0); + if (error != 0) { + MPASS(ndp->ni_nctrack_mnt == NULL); + ndp->ni_nctrack_mnt = mp; + free(nt, M_NAMEITRACKER); + error = ERESTART; + } else { + TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + } + return (error); } static void -nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first) +nameicap_cleanup(struct nameidata *ndp, int error) { struct nameicap_tracker *nt, *nt1; + struct mount *mp; + + KASSERT((ndp->ni_nctrack_mnt == NULL && + TAILQ_EMPTY(&ndp->ni_cap_tracker)) || + (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, + ("tracker active and not strictrelative")); - nt = first; - TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + mp = nt->mp; + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(mp); TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); - vdrop(nt->dp); free(nt, M_NAMEITRACKER); } -} -static void -nameicap_cleanup(struct nameidata *ndp) -{ - KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || - (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); - nameicap_cleanup_from(ndp, NULL); + mp = ndp->ni_nctrack_mnt; + if (mp != NULL) { + if (error == ERESTART) { + lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0); + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + } + vfs_rel(mp); + ndp->ni_nctrack_mnt = NULL; + } } /* - * For dotdot lookups in capability mode, only allow the component - * lookup to succeed if the resulting directory was already traversed - * during the operation. This catches situations where already - * traversed directory is moved to different parent, and then we walk - * over it with dotdots. + * For dotdot lookups in capability mode, disallow walking over the + * directory no_rbeneath_dpp that was used as the starting point of + * the lookup. Since we take the mnt_renamelocks of all mounts we + * ever walked over during lookup, parallel renames are disabled. + * This prevents the situation where we circumvent walk over + * ni_rbeneath_dpp following dotdots. * * Also allow to force failure of dotdot lookups for non-local * filesystems, where external agents might assist local lookups to @@ -243,7 +275,6 @@ nameicap_cleanup(struct nameidata *ndp) static int nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) { - struct nameicap_tracker *nt; struct mount *mp; if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf & @@ -253,22 +284,16 @@ nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0) - return (ENOTCAPABLE); + goto violation; + if (dp == ndp->ni_rbeneath_dpp) + goto violation; mp = dp->v_mount; if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && (mp->mnt_flag & MNT_LOCAL) == 0) - goto capfail; - TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, - nm_link) { - if (dp == nt->dp) { - nt = TAILQ_NEXT(nt, nm_link); - if (nt != NULL) - nameicap_cleanup_from(ndp, nt); - return (0); - } - } + goto violation; + return (0); -capfail: +violation: if (__predict_false((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); return (ENOTCAPABLE); @@ -394,6 +419,8 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) NI_LCF_CAP_DOTDOT; } } + if (error == 0 && (ndp->ni_lcf & NI_LCF_STRICTREL) != 0) + ndp->ni_rbeneath_dpp = *dpp; /* * If we are auditing the kernel pathname, save the user pathname. @@ -631,6 +658,7 @@ restart: error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); return (error); @@ -661,12 +689,12 @@ restart: else if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && (cnp->cn_flags & ISRESTARTED) == 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ERESTART); NDRESTART(ndp); goto restart; } return (error); case CACHE_FPL_STATUS_PARTIAL: - TAILQ_INIT(&ndp->ni_cap_tracker); dp = ndp->ni_startdir; break; case CACHE_FPL_STATUS_DESTROYED: @@ -674,18 +702,21 @@ restart: error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } cnp->cn_nameptr = cnp->cn_pnbuf; /* FALLTHROUGH */ case CACHE_FPL_STATUS_ABORTED: - TAILQ_INIT(&ndp->ni_cap_tracker); MPASS(ndp->ni_lcf == 0); if (*cnp->cn_pnbuf == '\0') { if ((cnp->cn_flags & EMPTYPATH) != 0) { - return (namei_emptypath(ndp)); + error = namei_emptypath(ndp); + nameicap_cleanup(ndp, error); + return (error); } namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ENOENT); SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL, false, ndp); return (ENOENT); @@ -693,6 +724,7 @@ restart: error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } break; @@ -705,16 +737,23 @@ restart: ndp->ni_startdir = dp; error = vfs_lookup(ndp); if (error != 0) { - if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && - error == ENOENT && - (cnp->cn_flags & ISRESTARTED) == 0)) { - nameicap_cleanup(ndp); - pwd_drop(pwd); - namei_cleanup_cnp(cnp); - NDRESTART(ndp); - goto restart; - } else + uint64_t was_restarted; + bool abi_restart; + + was_restarted = ndp->ni_cnd.cn_flags & + ISRESTARTED; + abi_restart = pwd->pwd_adir != pwd->pwd_rdir && + error == ENOENT && was_restarted == 0; + if (error != ERESTART && !abi_restart) goto out; + nameicap_cleanup(ndp, error); + pwd_drop(pwd); + namei_cleanup_cnp(cnp); + NDRESET(ndp); + if (abi_restart) + was_restarted = ISRESTARTED; + ndp->ni_cnd.cn_flags |= was_restarted; + goto restart; } /* @@ -723,7 +762,7 @@ restart: if ((cnp->cn_flags & ISSYMLINK) == 0) { SDT_PROBE4(vfs, namei, lookup, return, error, ndp->ni_vp, false, ndp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, 0); pwd_drop(pwd); NDVALIDATE(ndp); return (0); @@ -756,10 +795,10 @@ restart: ndp->ni_vp = NULL; vrele(ndp->ni_dvp); out: - MPASS(error != 0); + MPASS(error != 0 && error != ERESTART); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); namei_cleanup_cnp(cnp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, error); pwd_drop(pwd); return (error); } @@ -1185,7 +1224,9 @@ dirloop: } } - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; /* * Make sure degenerate names don't get here, their handling was @@ -1210,9 +1251,7 @@ dirloop: * the jail or chroot, don't let them out. * 5. If doing a capability lookup and lookup_cap_dotdot is * enabled, return ENOTCAPABLE if the lookup would escape - * from the initial file descriptor directory. Checks are - * done by ensuring that namei() already traversed the - * result of dotdot lookup. + * from the initial file descriptor directory. */ if (cnp->cn_flags & ISDOTDOT) { if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR | @@ -1238,7 +1277,7 @@ dirloop: NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0) { error = ENOTCAPABLE; - goto capdotdot; + goto bad; } } if (isroot || ((dp->v_vflag & VV_ROOT) != 0 && @@ -1261,11 +1300,6 @@ dirloop: vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - error = nameicap_check_dotdot(ndp, dp); - if (error != 0) { -capdotdot: - goto bad; - } } } @@ -1314,7 +1348,9 @@ unionlookup: vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; goto unionlookup; } @@ -1415,7 +1451,7 @@ nextname: goto dirloop; } if (cnp->cn_flags & ISDOTDOT) { - error = nameicap_check_dotdot(ndp, ndp->ni_vp); + error = nameicap_check_dotdot(ndp, ndp->ni_dvp); if (error != 0) goto bad2; } @@ -1485,8 +1521,11 @@ success: } success_right_lock: if (ndp->ni_vp != NULL) { - if ((cnp->cn_flags & ISDOTDOT) == 0) - nameicap_tracker_add(ndp, ndp->ni_vp); + if ((cnp->cn_flags & ISDOTDOT) == 0) { + error = nameicap_tracker_add(ndp, ndp->ni_vp); + if (error != 0) + goto bad2; + } if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS) return (vfs_lookup_failifexists(ndp)); } diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index cb18468d28bc..8e64a7fe966b 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -156,6 +156,7 @@ mount_init(void *mem, int size, int flags) mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF); lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); + lockinit(&mp->mnt_renamelock, PVFS, "rename", 0, 0); mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO); mp->mnt_ref = 0; mp->mnt_vfs_ops = 1; @@ -170,6 +171,7 @@ mount_fini(void *mem, int size) mp = (struct mount *)mem; uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu); + lockdestroy(&mp->mnt_renamelock); lockdestroy(&mp->mnt_explock); mtx_destroy(&mp->mnt_listmtx); mtx_destroy(&mp->mnt_mtx); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index dc2fb59fb81c..918b256e6c59 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -38,7 +38,6 @@ * External virtual filesystem routines */ -#include <sys/cdefs.h> #include "opt_ddb.h" #include "opt_watchdog.h" @@ -57,6 +56,7 @@ #include <sys/extattr.h> #include <sys/file.h> #include <sys/fcntl.h> +#include <sys/inotify.h> #include <sys/jail.h> #include <sys/kdb.h> #include <sys/kernel.h> @@ -5246,7 +5246,8 @@ destroy_vpollinfo_free(struct vpollinfo *vi) static void destroy_vpollinfo(struct vpollinfo *vi) { - + KASSERT(TAILQ_EMPTY(&vi->vpi_inotify), + ("%s: pollinfo %p has lingering watches", __func__, vi)); knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); @@ -5260,12 +5261,13 @@ v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; - if (vp->v_pollinfo != NULL) + if (atomic_load_ptr(&vp->v_pollinfo) != NULL) return; vi = malloc(sizeof(*vi), M_VNODEPOLL, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_lock); + TAILQ_INIT(&vi->vpi_inotify); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); @@ -5851,6 +5853,8 @@ vop_rename_pre(void *ap) struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS + struct mount *tmp; + if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); @@ -5868,6 +5872,11 @@ vop_rename_pre(void *ap) if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); + + tmp = NULL; + VOP_GETWRITEMOUNT(a->a_tdvp, &tmp); + lockmgr_assert(&tmp->mnt_renamelock, KA_XLOCKED); + vfs_rel(tmp); #endif /* * It may be tempting to add vn_seqc_write_begin/end calls here and @@ -6057,6 +6066,28 @@ vop_need_inactive_debugpost(void *ap, int rc) #endif void +vop_allocate_post(void *ap, int rc) +{ + struct vop_allocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); +} + +void +vop_copy_file_range_post(void *ap, int rc) +{ + struct vop_copy_file_range_args *a; + + a = ap; + if (rc == 0) { + INOTIFY(a->a_invp, IN_ACCESS); + INOTIFY(a->a_outvp, IN_MODIFY); + } +} + +void vop_create_pre(void *ap) { struct vop_create_args *a; @@ -6076,8 +6107,20 @@ vop_create_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } +} + +void +vop_deallocate_post(void *ap, int rc) +{ + struct vop_deallocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); } void @@ -6122,8 +6165,10 @@ vop_deleteextattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6153,6 +6198,8 @@ vop_link_post(void *ap, int rc) if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_LINK); VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE); + INOTIFY_NAME(vp, tdvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, tdvp, a->a_cnp, IN_CREATE); } } @@ -6176,8 +6223,10 @@ vop_mkdir_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } #ifdef DEBUG_VFS_LOCKS @@ -6212,8 +6261,10 @@ vop_mknod_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6225,8 +6276,10 @@ vop_reclaim_post(void *ap, int rc) a = ap; vp = a->a_vp; ASSERT_VOP_IN_SEQC(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_REVOKE); + INOTIFY_REVOKE(vp); + } } void @@ -6257,6 +6310,8 @@ vop_remove_post(void *ap, int rc) if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6288,6 +6343,8 @@ vop_rename_post(void *ap, int rc) VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); + INOTIFY_MOVE(a->a_fvp, a->a_fdvp, a->a_fcnp, a->a_tvp, + a->a_tdvp, a->a_tcnp); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); @@ -6327,6 +6384,7 @@ vop_rmdir_post(void *ap, int rc) vp->v_vflag |= VV_UNLINKED; VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6350,8 +6408,10 @@ vop_setattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6396,8 +6456,10 @@ vop_setextattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6420,8 +6482,10 @@ vop_symlink_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6429,8 +6493,10 @@ vop_open_post(void *ap, int rc) { struct vop_open_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN); + INOTIFY(a->a_vp, IN_OPEN); + } } void @@ -6442,6 +6508,8 @@ vop_close_post(void *ap, int rc) !VN_IS_DOOMED(a->a_vp))) { VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ? NOTE_CLOSE_WRITE : NOTE_CLOSE); + INOTIFY(a->a_vp, (a->a_fflag & FWRITE) != 0 ? + IN_CLOSE_WRITE : IN_CLOSE_NOWRITE); } } @@ -6450,8 +6518,10 @@ vop_read_post(void *ap, int rc) { struct vop_read_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } void @@ -6468,8 +6538,10 @@ vop_readdir_post(void *ap, int rc) { struct vop_readdir_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } static struct knlist fs_knlist; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index c236f241bf20..d880733cbfe7 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3766,7 +3766,7 @@ int kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg) { - struct mount *mp = NULL; + struct mount *mp, *tmp; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; uint64_t tondflags; @@ -3774,6 +3774,7 @@ kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, short irflag; again: + tmp = mp = NULL; bwillwrite(); #ifdef MAC if (mac_vnode_check_rename_from_enabled()) { @@ -3809,6 +3810,7 @@ again: tvp = tond.ni_vp; error = vn_start_write(fvp, &mp, V_NOWAIT); if (error != 0) { +again1: NDFREE_PNBUF(&fromnd); NDFREE_PNBUF(&tond); if (tvp != NULL) @@ -3819,11 +3821,25 @@ again: vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); + if (tmp != NULL) { + lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); + vfs_rel(tmp); + tmp = NULL; + } error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); if (error != 0) return (error); goto again; } + error = VOP_GETWRITEMOUNT(tdvp, &tmp); + if (error != 0 || tmp == NULL) + goto again1; + error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); + if (error != 0) { + vn_finished_write(mp); + goto again1; + } irflag = vn_irflag_read(fvp); if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || (irflag & VIRF_NAMEDDIR) != 0) { @@ -3884,6 +3900,8 @@ out: vrele(fromnd.ni_dvp); vrele(fvp); } + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(tmp); vn_finished_write(mp); out1: if (error == ERESTART) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 7487f93e4880..6451c9e07a60 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -52,6 +52,7 @@ #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filio.h> +#include <sys/inotify.h> #include <sys/ktr.h> #include <sys/ktrace.h> #include <sys/limits.h> @@ -308,7 +309,8 @@ restart: NDREINIT(ndp); goto restart; } - if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0) + if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0 || + (vn_irflag_read(ndp->ni_dvp) & VIRF_INOTIFY) != 0) ndp->ni_cnd.cn_flags |= MAKEENTRY; #ifdef MAC error = mac_vnode_check_create(cred, ndp->ni_dvp, @@ -484,6 +486,7 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, if (vp->v_type != VFIFO && vp->v_type != VSOCK && VOP_ACCESS(vp, VREAD, cred, td) == 0) fp->f_flag |= FKQALLOWED; + INOTIFY(vp, IN_OPEN); return (0); } @@ -1746,6 +1749,8 @@ vn_truncate_locked(struct vnode *vp, off_t length, bool sync, vattr.va_vaflags |= VA_SYNC; error = VOP_SETATTR(vp, &vattr, cred); VOP_ADD_WRITECOUNT_CHECKED(vp, -1); + if (error == 0) + INOTIFY(vp, IN_MODIFY); } return (error); } diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index a2b6a7c8ff9f..38138a4af921 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -702,6 +702,7 @@ vop_vptocnp { %% allocate vp E E E +%! allocate post vop_allocate_post vop_allocate { IN struct vnode *vp; @@ -786,6 +787,7 @@ vop_fdatasync { %% copy_file_range invp U U U %% copy_file_range outvp U U U +%! copy_file_range post vop_copy_file_range_post vop_copy_file_range { IN struct vnode *invp; @@ -810,6 +812,7 @@ vop_vput_pair { %% deallocate vp L L L +%! deallocate post vop_deallocate_post vop_deallocate { IN struct vnode *vp; @@ -821,6 +824,27 @@ vop_deallocate { }; +%% inotify vp - - - + +vop_inotify { + IN struct vnode *vp; + IN struct vnode *dvp; + IN struct componentname *cnp; + IN int event; + IN uint32_t cookie; +}; + + +%% inotify_add_watch vp L L L + +vop_inotify_add_watch { + IN struct vnode *vp; + IN struct inotify_softc *sc; + IN uint32_t mask; + OUT uint32_t *wdp; + IN struct thread *td; +}; + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, diff --git a/sys/modules/sound/sound/Makefile b/sys/modules/sound/sound/Makefile index d2cfed2f4b6a..f3978e9bd9cc 100644 --- a/sys/modules/sound/sound/Makefile +++ b/sys/modules/sound/sound/Makefile @@ -13,11 +13,11 @@ SRCS+= feeder.c feeder_rate.c feeder_volume.c SRCS+= feeder_chain.c feeder_eq.c feeder_format.c SRCS+= feeder_matrix.c feeder_mixer.c SRCS+= feeder_eq_gen.h feeder_rate_gen.h snd_fxdiv_gen.h -SRCS+= mpu_if.h mpufoi_if.h synth_if.h -SRCS+= mpu_if.c mpufoi_if.c synth_if.c +SRCS+= mpu_if.h mpufoi_if.h +SRCS+= mpu_if.c mpufoi_if.c SRCS+= ac97.c buffer.c channel.c dsp.c SRCS+= mixer.c sndstat.c sound.c vchan.c -SRCS+= midi.c mpu401.c sequencer.c +SRCS+= midi.c mpu401.c feeder_eq_gen.h: ${SYSDIR}/tools/sound/feeder_eq_mkfilter.awk ${AWK} -f ${SYSDIR}/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > ${.TARGET} diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index 6eefedba8775..cf4f75bd0b6c 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -81,6 +81,23 @@ struct ether_addr { (addr)[3] | (addr)[4] | (addr)[5]) == 0x00) /* + * 802.1q VID constants from IEEE 802.1Q-2014, table 9-2. + */ + +/* Null VID: The tag contains only PCP (priority) and DEI information. */ +#define DOT1Q_VID_NULL 0x0 +/* The default PVID for a bridge port. NB: bridge(4) does not honor this. */ +#define DOT1Q_VID_DEF_PVID 0x1 +/* The default SR_PVID for SRP Stream related traffic. */ +#define DOT1Q_VID_DEF_SR_PVID 0x2 +/* A VID reserved for implementation use, not permitted on the wire. */ +#define DOT1Q_VID_RSVD_IMPL 0xfff +/* The lowest valid VID. */ +#define DOT1Q_VID_MIN 0x1 +/* The highest valid VID. */ +#define DOT1Q_VID_MAX 0xffe + +/* * This is the type of the VLAN ID inside the tag, not the tag itself. */ typedef uint16_t ether_vlanid_t; diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index bc421a8e156d..5b3ee740d75e 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -254,6 +254,8 @@ struct bridge_iflist { uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ struct epoch_context bif_epoch_ctx; + ether_vlanid_t bif_untagged; /* untagged vlan id */ + ifbvlan_set_t bif_vlan_set; /* allowed tagged vlans */ }; /* @@ -331,13 +333,12 @@ static void bridge_inject(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, - struct mbuf *); + struct mbuf *, struct bridge_iflist *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static bool bridge_member_ifaddrs(void); - static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, @@ -353,6 +354,9 @@ static void bridge_rtage(struct bridge_softc *); static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, ether_vlanid_t); +static bool bridge_vfilter_in(const struct bridge_iflist *, struct mbuf *); +static bool bridge_vfilter_out(const struct bridge_iflist *, + const struct mbuf *); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); @@ -400,6 +404,9 @@ static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); +static int bridge_ioctl_sifuntagged(struct bridge_softc *, void *); +static int bridge_ioctl_sifvlanset(struct bridge_softc *, void *); +static int bridge_ioctl_gifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); @@ -618,6 +625,14 @@ static const struct bridge_control bridge_control_table[] = { { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, + { bridge_ioctl_sifuntagged, sizeof(struct ifbreq), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_sifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_gifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_COPYOUT }, }; static const int bridge_control_table_size = nitems(bridge_control_table); @@ -832,6 +847,7 @@ bridge_clone_create(struct if_clone *ifc, char *name, size_t len, ifp->if_softc = sc; if_initname(ifp, bridge_name, ifd->unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_capabilities = ifp->if_capenable = IFCAP_VLAN_HWTAGGING; ifp->if_ioctl = bridge_ioctl; #ifdef ALTQ ifp->if_start = bridge_altq_start; @@ -954,6 +970,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; + struct ifbif_vlan_req ifvlanreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; @@ -1495,6 +1512,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; + req->ifbr_untagged = bif->bif_untagged; /* Copy STP state options as flags */ if (bp->bp_operedge) @@ -1873,6 +1891,84 @@ bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) } static int +bridge_ioctl_sifuntagged(struct bridge_softc *sc, void *arg) +{ + struct ifbreq *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->ifbr_ifsname); + if (bif == NULL) + return (ENOENT); + + if (req->ifbr_untagged > DOT1Q_VID_MAX) + return (EINVAL); + + if (req->ifbr_untagged != DOT1Q_VID_NULL) + bif->bif_flags |= IFBIF_VLANFILTER; + bif->bif_untagged = req->ifbr_untagged; + return (0); +} + +static int +bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + /* Reject invalid VIDs. */ + if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) || + BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL)) + return (EINVAL); + + switch (req->bv_op) { + /* Replace the existing vlan set with the new set */ + case BRDG_VLAN_OP_SET: + BIT_COPY(BRVLAN_SETSIZE, &req->bv_set, &bif->bif_vlan_set); + break; + + /* Modify the existing vlan set to add the given vlans */ + case BRDG_VLAN_OP_ADD: + BIT_OR(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Modify the existing vlan set to remove the given vlans */ + case BRDG_VLAN_OP_DEL: + BIT_ANDNOT(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Invalid or unknown operation */ + default: + return (EINVAL); + } + + /* + * The only reason to modify the VLAN access list is to use VLAN + * filtering on this interface, so enable it automatically. + */ + bif->bif_flags |= IFBIF_VLANFILTER; + + return (0); +} + +static int +bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + return (0); +} + +static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; @@ -2150,12 +2246,25 @@ bridge_stop(struct ifnet *ifp, int disable) * */ static int -bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) +bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, + struct bridge_iflist *bif) { int len, err = 0; short mflags; struct mbuf *m0; + /* + * Find the bridge member port this packet is being sent on, if the + * caller didn't already provide it. + */ + if (bif == NULL) + bif = bridge_lookup_member_if(sc, dst_ifp); + if (bif == NULL) { + /* Perhaps the interface was removed from the bridge */ + m_freem(m); + return (EINVAL); + } + /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; @@ -2164,6 +2273,18 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) mflags = m->m_flags; /* + * If VLAN filtering is enabled, and the native VLAN ID of the + * outgoing interface matches the VLAN ID of the frame, remove + * the VLAN header. + */ + if ((bif->bif_flags & IFBIF_VLANFILTER) && + bif->bif_untagged != DOT1Q_VID_NULL && + VLANTAGOF(m) == bif->bif_untagged) { + m->m_flags &= ~M_VLANTAG; + m->m_pkthdr.ether_vtag = 0; + } + + /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. */ @@ -2234,7 +2355,7 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp) return; } - bridge_enqueue(sc, ifp, m); + bridge_enqueue(sc, ifp, m, NULL); } /* @@ -2329,7 +2450,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, } } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } if (used == 0) m_freem(m); @@ -2347,7 +2468,7 @@ sendunicast: return (0); } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, NULL); return (0); } @@ -2364,17 +2485,18 @@ bridge_transmit(struct ifnet *ifp, struct mbuf *m) struct ether_header *eh; struct ifnet *dst_if; int error = 0; + ether_vlanid_t vlan; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); + vlan = VLANTAGOF(m); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && - (dst_if = bridge_rtlookup(sc, eh->ether_dhost, DOT1Q_VID_NULL)) != - NULL) { - error = bridge_enqueue(sc, dst_if, m); + (dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan)) != NULL) { + error = bridge_enqueue(sc, dst_if, m, NULL); } else bridge_broadcast(sc, ifp, m, 0); @@ -2435,18 +2557,18 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; - uint16_t vlan; uint8_t *dst; int error; + ether_vlanid_t vlan; NET_EPOCH_ASSERT(); src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; + vlan = VLANTAGOF(m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); - vlan = VLANTAGOF(m); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) @@ -2555,6 +2677,10 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + goto drop; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; @@ -2566,7 +2692,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, return; } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, dbif); return; drop: @@ -2636,6 +2762,15 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) return (NULL); } + /* Do VLAN filtering. */ + if (!bridge_vfilter_in(bif, m)) { + if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + return (NULL); + } + /* bridge_vfilter_in() may add a tag */ + vlan = VLANTAGOF(m); + bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { @@ -2761,6 +2896,15 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) } \ if ((iface) != bifp) \ ETHER_BPF_MTAP(iface, m); \ + /* Pass tagged packets to if_vlan, if it's loaded */ \ + if (VLANTAGOF(m) != 0) { \ + if (bifp->if_vlantrunk == NULL) { \ + m_freem(m); \ + return (NULL); \ + } \ + (*vlan_input_p)(bifp, m); \ + return (NULL); \ + } \ return (m); \ } \ \ @@ -2817,6 +2961,30 @@ bridge_inject(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; + if (ifp->if_type == IFT_L2VLAN) { + /* + * vlan(4) gives us the vlan ifnet, so we need to get the + * bridge softc to get a pointer to ether_input to send the + * packet to. + */ + struct ifnet *bifp = NULL; + + if (vlan_trunkdev_p == NULL) { + m_freem(m); + return; + } + + bifp = vlan_trunkdev_p(ifp); + if (bifp == NULL) { + m_freem(m); + return; + } + + sc = if_getsoftc(bifp); + sc->sc_if_input(ifp, m); + return; + } + KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0, ("%s: iface %s is not running in netmap mode", __func__, if_name(ifp))); @@ -2867,6 +3035,10 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + continue; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; @@ -2910,7 +3082,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, dbif); } if (used == 0) m_freem(m); @@ -2946,11 +3118,116 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m) continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } } /* + * Incoming VLAN filtering. Given a frame and the member interface it was + * received on, decide whether the port configuration allows it. + */ +static bool +bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) +{ + ether_vlanid_t vlan; + + vlan = VLANTAGOF(m); + /* Make sure the vlan id is reasonable. */ + if (vlan > DOT1Q_VID_MAX) + return (false); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((sbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + if (vlan == DOT1Q_VID_NULL) { + /* + * The frame doesn't have a tag. If the interface does not + * have an untagged vlan configured, drop the frame. + */ + if (sbif->bif_untagged == DOT1Q_VID_NULL) + return (false); + + /* + * Otherwise, insert a new tag based on the interface's + * untagged vlan id. + */ + m->m_pkthdr.ether_vtag = sbif->bif_untagged; + m->m_flags |= M_VLANTAG; + } else { + /* + * The frame has a tag, so check it matches the interface's + * vlan access list. We explicitly do not accept tagged + * frames for the untagged vlan id here (unless it's also + * in the access list). + */ + if (!BRVLAN_TEST(&sbif->bif_vlan_set, vlan)) + return (false); + } + + /* Accept the frame. */ + return (true); +} + +/* + * Outgoing VLAN filtering. Given a frame, its vlan, and the member interface + * we intend to send it to, decide whether the port configuration allows it to + * be sent. + */ +static bool +bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m) +{ + struct ether_header *eh; + ether_vlanid_t vlan; + + NET_EPOCH_ASSERT(); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((dbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + vlan = VLANTAGOF(m); + + /* + * Always allow untagged 802.1D STP frames, even if they would + * otherwise be dropped. This is required for STP to work on + * a filtering bridge. + * + * Tagged STP (Cisco PVST+) is a non-standard extension, so + * handle those frames via the normal filtering path. + */ + eh = mtod(m, struct ether_header *); + if (vlan == DOT1Q_VID_NULL && + memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) + return (true); + + /* + * If the frame wasn't assigned to a vlan at ingress, drop it. + * We can't forward these frames to filtering ports because we + * don't know what VLAN they're supposed to be in. + */ + if (vlan == DOT1Q_VID_NULL) + return (false); + + /* + * If the frame's vlan matches the interfaces's untagged vlan, + * allow it. + */ + if (vlan == dbif->bif_untagged) + return (true); + + /* + * If the frame's vlan is on the interface's tagged access list, + * allow it. + */ + if (BRVLAN_TEST(&dbif->bif_vlan_set, vlan)) + return (true); + + /* The frame was not permitted, so drop it. */ + return (false); +} + +/* * bridge_rtupdate: * * Add a bridge routing entry. diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h index 90beb6c96d82..97b63e3d4416 100644 --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -78,6 +78,8 @@ #define _NET_IF_BRIDGEVAR_H_ #include <sys/types.h> +#include <sys/_bitset.h> +#include <sys/bitset.h> #include <sys/callout.h> #include <sys/queue.h> #include <sys/condvar.h> @@ -122,6 +124,9 @@ #define BRDGSPROTO 28 /* set protocol (ifbrparam) */ #define BRDGSTXHC 29 /* set tx hold count (ifbrparam) */ #define BRDGSIFAMAX 30 /* set max interface addrs (ifbreq) */ +#define BRDGSIFUNTAGGED 31 /* set if untagged vlan */ +#define BRDGSIFVLANSET 32 /* set if vlan set */ +#define BRDGGIFVLANSET 33 /* get if vlan set */ /* * Generic bridge control request. @@ -139,6 +144,7 @@ struct ifbreq { uint32_t ifbr_addrcnt; /* member if addr number */ uint32_t ifbr_addrmax; /* member if addr max */ uint32_t ifbr_addrexceeded; /* member if addr violations */ + ether_vlanid_t ifbr_untagged; /* member if untagged vlan */ uint8_t pad[32]; }; @@ -155,10 +161,11 @@ struct ifbreq { #define IFBIF_BSTP_ADMEDGE 0x0200 /* member stp admin edge enabled */ #define IFBIF_BSTP_ADMCOST 0x0400 /* member stp admin path cost */ #define IFBIF_PRIVATE 0x0800 /* if is a private segment */ +#define IFBIF_VLANFILTER 0x1000 /* if does vlan filtering */ #define IFBIFBITS "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \ "\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \ - "\011AUTOPTP" + "\011AUTOPTP\015VLANFILTER" #define IFBIFMASK ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \ IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \ IFBIF_BSTP_ADMCOST) /* not saved */ @@ -304,6 +311,26 @@ struct ifbpstpconf { eaddr[5] = pv >> 0; \ } while (0) +/* + * Bridge VLAN access request. + */ +#define BRVLAN_SETSIZE 4096 +typedef __BITSET_DEFINE(ifbvlan_set, BRVLAN_SETSIZE) ifbvlan_set_t; + +#define BRVLAN_SET(set, bit) __BIT_SET(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_CLR(set, bit) __BIT_CLR(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_TEST(set, bit) __BIT_ISSET(BRVLAN_SETSIZE, (bit), set) + +#define BRDG_VLAN_OP_SET 1 /* replace current vlan set */ +#define BRDG_VLAN_OP_ADD 2 /* add vlans to current set */ +#define BRDG_VLAN_OP_DEL 3 /* remove vlans from current set */ + +struct ifbif_vlan_req { + char bv_ifname[IFNAMSIZ]; + uint8_t bv_op; + ifbvlan_set_t bv_set; +}; + #ifdef _KERNEL #define BRIDGE_INPUT(_ifp, _m) do { \ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index e9e1c82cb688..22fcb7bf7c64 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1673,6 +1673,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, */ if (p->if_type != IFT_ETHER && p->if_type != IFT_L2VLAN && + p->if_type != IFT_BRIDGE && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h index f0b09445d04b..695bb81f77b3 100644 --- a/sys/net/if_vlan_var.h +++ b/sys/net/if_vlan_var.h @@ -126,13 +126,6 @@ struct vlanreq { #define VLAN_PCP_MAX 7 -#define DOT1Q_VID_NULL 0x0 -#define DOT1Q_VID_DEF_PVID 0x1 -#define DOT1Q_VID_DEF_SR_PVID 0x2 -#define DOT1Q_VID_RSVD_IMPL 0xfff -#define DOT1Q_VID_MIN 1 /* minimum valid vlan id */ -#define DOT1Q_VID_MAX 4094 /* maximum valid vlan id */ - /* * 802.1q full tag. Proto and vid are stored in host byte order. */ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 3ea561e63503..687b0d538666 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -1520,7 +1520,8 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td) INP_WLOCK_ASSERT(inp); if (__predict_false((so->so_state & - (SS_ISCONNECTING | SS_ISCONNECTED)) != 0)) + (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING | + SS_ISDISCONNECTED)) != 0)) return (EISCONN); if (__predict_false((so->so_options & SO_REUSEPORT_LB) != 0)) return (EOPNOTSUPP); diff --git a/sys/powerpc/mpc85xx/mpc85xx_gpio.c b/sys/powerpc/mpc85xx/mpc85xx_gpio.c index 0f333feb747f..cb96d768adef 100644 --- a/sys/powerpc/mpc85xx/mpc85xx_gpio.c +++ b/sys/powerpc/mpc85xx/mpc85xx_gpio.c @@ -226,14 +226,14 @@ mpc85xx_gpio_attach(device_t dev) return (ENOMEM); } + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { mpc85xx_gpio_detach(dev); return (ENOMEM); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - return (0); } diff --git a/sys/riscv/allwinner/files.allwinner b/sys/riscv/allwinner/files.allwinner index 423a89c10c78..73fa9660e2d2 100644 --- a/sys/riscv/allwinner/files.allwinner +++ b/sys/riscv/allwinner/files.allwinner @@ -1,5 +1,6 @@ arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt +arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_syscon.c optional syscon arm/allwinner/aw_sid.c optional aw_sid nvmem arm/allwinner/aw_timer.c optional aw_timer fdt diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner index 1bf6b027a4cb..2b1e0d4e09dc 100644 --- a/sys/riscv/conf/std.allwinner +++ b/sys/riscv/conf/std.allwinner @@ -7,6 +7,7 @@ options SOC_ALLWINNER_D1 device aw_ccu # Allwinner clock controller device aw_gpio # Allwinner GPIO controller +device aw_rtc # Allwinner Real-time Clock device aw_sid # Allwinner Secure ID EFUSE device aw_timer # Allwinner Timer device aw_usbphy # Allwinner USB PHY diff --git a/sys/sys/caprights.h b/sys/sys/caprights.h index 48c75afc62a0..6a5a17eda5ee 100644 --- a/sys/sys/caprights.h +++ b/sys/sys/caprights.h @@ -79,6 +79,8 @@ extern const cap_rights_t cap_futimes_rights; extern const cap_rights_t cap_getpeername_rights; extern const cap_rights_t cap_getsockopt_rights; extern const cap_rights_t cap_getsockname_rights; +extern const cap_rights_t cap_inotify_add_rights; +extern const cap_rights_t cap_inotify_rm_rights; extern const cap_rights_t cap_ioctl_rights; extern const cap_rights_t cap_linkat_source_rights; extern const cap_rights_t cap_linkat_target_rights; diff --git a/sys/sys/capsicum.h b/sys/sys/capsicum.h index d493535454e9..3847c4c73e75 100644 --- a/sys/sys/capsicum.h +++ b/sys/sys/capsicum.h @@ -279,11 +279,15 @@ #define CAP_KQUEUE (CAP_KQUEUE_EVENT | CAP_KQUEUE_CHANGE) +/* Allows operations on inotify descriptors. */ +#define CAP_INOTIFY_ADD CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_INOTIFY_RM CAPRIGHT(1, 0x0000000000400000ULL) + /* All used bits for index 1. */ -#define CAP_ALL1 CAPRIGHT(1, 0x00000000001FFFFFULL) +#define CAP_ALL1 CAPRIGHT(1, 0x00000000007FFFFFULL) /* Available bits for index 1. */ -#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000800000ULL) /* ... */ #define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL) diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h index d770c274d7b7..cab94ac511a5 100644 --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -16,6 +16,8 @@ #define EXTERR_KTRACE 3 /* To allow inclusion of this file into kern_ktrace.c */ #define EXTERR_CAT_FUSE 4 +#define EXTERR_CAT_INOTIFY 5 +#define EXTERR_CAT_GENIO 6 #endif diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index dd9fccf5cf38..18d3928e91c7 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -144,6 +144,10 @@ typedef __pid_t pid_t; #define O_XATTR O_NAMEDATTR /* Solaris compatibility */ #endif +#if __POSIX_VISIBLE >= 202405 +#define O_CLOFORK 0x08000000 +#endif + /* * !!! DANGER !!! * @@ -280,6 +284,16 @@ typedef __pid_t pid_t; #define F_GET_SEALS 20 #define F_ISUNIONSTACK 21 /* Kludge for libc, don't use it. */ #define F_KINFO 22 /* Return kinfo_file for this fd */ +#endif /* __BSD_VISIBLE */ + +#if __POSIX_VISIBLE >= 202405 +#define F_DUPFD_CLOFORK 23 /* Like F_DUPFD, but FD_CLOFORK is set */ +#endif + +#if __BSD_VISIBLE +#define F_DUP3FD 24 /* Used with dup3() */ + +#define F_DUP3FD_SHIFT 16 /* Shift used for F_DUP3FD */ /* Seals (F_ADD_SEALS, F_GET_SEALS). */ #define F_SEAL_SEAL 0x0001 /* Prevent adding sealings */ @@ -292,6 +306,9 @@ typedef __pid_t pid_t; #define FD_CLOEXEC 1 /* close-on-exec flag */ #define FD_RESOLVE_BENEATH 2 /* all lookups relative to fd have O_RESOLVE_BENEATH semantics */ +#if __POSIX_VISIBLE >= 202405 +#define FD_CLOFORK 4 /* close-on-fork flag */ +#endif /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */ #define F_RDLCK 1 /* shared or read lock */ diff --git a/sys/sys/file.h b/sys/sys/file.h index 284d523147b6..63313926c4f0 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -71,6 +71,7 @@ struct nameidata; #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_EVENTFD 13 /* eventfd */ #define DTYPE_TIMERFD 14 /* timerfd */ +#define DTYPE_INOTIFY 15 /* inotify descriptor */ #ifdef _KERNEL diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 55969b2ff4b3..0a388c90de26 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -149,6 +149,7 @@ struct filedesc_to_leader { */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ #define UF_RESOLVE_BENEATH 0x02 /* lookups must be beneath this dir */ +#define UF_FOCLOSE 0x04 /* auto-close on fork */ #ifdef _KERNEL @@ -221,6 +222,7 @@ enum { /* Flags for kern_dup(). */ #define FDDUP_FLAG_CLOEXEC 0x1 /* Atomically set UF_EXCLOSE. */ +#define FDDUP_FLAG_CLOFORK 0x2 /* Atomically set UF_FOCLOSE. */ /* For backward compatibility. */ #define falloc(td, resultfp, resultfd, flags) \ diff --git a/sys/sys/inotify.h b/sys/sys/inotify.h new file mode 100644 index 000000000000..65dc5dba43f3 --- /dev/null +++ b/sys/sys/inotify.h @@ -0,0 +1,150 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#ifndef _INOTIFY_H_ +#define _INOTIFY_H_ + +#include <sys/_types.h> + +/* Flags for inotify_init1(). */ +#define IN_NONBLOCK 0x00000004 /* O_NONBLOCK */ +#define IN_CLOEXEC 0x00100000 /* O_CLOEXEC */ + +struct inotify_event { + int wd; + __uint32_t mask; + __uint32_t cookie; + __uint32_t len; + char name[0]; +}; + +/* Events, set in the mask field. */ +#define IN_ACCESS 0x00000001 +#define IN_MODIFY 0x00000002 +#define IN_ATTRIB 0x00000004 +#define IN_CLOSE_WRITE 0x00000008 +#define IN_CLOSE_NOWRITE 0x00000010 +#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) +#define IN_OPEN 0x00000020 +#define IN_MOVED_FROM 0x00000040 +#define IN_MOVED_TO 0x00000080 +#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) +#define IN_CREATE 0x00000100 +#define IN_DELETE 0x00000200 +#define IN_DELETE_SELF 0x00000400 +#define IN_MOVE_SELF 0x00000800 +#define IN_ALL_EVENTS 0x00000fff + +/* Events report only for entries in a watched dir, not the dir itself. */ +#define _IN_DIR_EVENTS (IN_CLOSE_WRITE | IN_DELETE | IN_MODIFY | \ + IN_MOVED_FROM | IN_MOVED_TO) + +#ifdef _KERNEL +/* + * An unlink that's done as part of a rename only records IN_DELETE if the + * unlinked vnode itself is watched, and not when the containing directory is + * watched. + */ +#define _IN_MOVE_DELETE 0x40000000 +/* + * Inode link count changes only trigger IN_ATTRIB events if the inode itself is + * watched, and not when the containing directory is watched. + */ +#define _IN_ATTRIB_LINKCOUNT 0x80000000 +#endif + +/* Flags, set in the mask field. */ +#define IN_ONLYDIR 0x01000000 +#define IN_DONT_FOLLOW 0x02000000 +#define IN_EXCL_UNLINK 0x04000000 +#define IN_MASK_CREATE 0x10000000 +#define IN_MASK_ADD 0x20000000 +#define IN_ONESHOT 0x80000000 +#define _IN_ALL_FLAGS (IN_ONLYDIR | IN_DONT_FOLLOW | \ + IN_EXCL_UNLINK | IN_MASK_CREATE | \ + IN_MASK_ADD | IN_ONESHOT) + +/* Flags returned by the kernel. */ +#define IN_UNMOUNT 0x00002000 +#define IN_Q_OVERFLOW 0x00004000 +#define IN_IGNORED 0x00008000 +#define IN_ISDIR 0x40000000 +#define _IN_ALL_RETFLAGS (IN_Q_OVERFLOW | IN_UNMOUNT | IN_IGNORED | \ + IN_ISDIR) + +#define _IN_ALIGN _Alignof(struct inotify_event) +#define _IN_NAMESIZE(namelen) \ + ((namelen) == 0 ? 0 : __align_up((namelen) + 1, _IN_ALIGN)) + +#ifdef _KERNEL +struct componentname; +struct file; +struct inotify_softc; +struct thread; +struct vnode; + +int inotify_create_file(struct thread *, struct file *, int, int *); +void inotify_log(struct vnode *, const char *, size_t, int, __uint32_t); + +int kern_inotify_rm_watch(int, uint32_t, struct thread *); +int kern_inotify_add_watch(int, int, const char *, uint32_t, + struct thread *); + +void vn_inotify(struct vnode *, struct vnode *, struct componentname *, int, + uint32_t); +int vn_inotify_add_watch(struct vnode *, struct inotify_softc *, + __uint32_t, __uint32_t *, struct thread *); +void vn_inotify_revoke(struct vnode *); + +/* Log an inotify event. */ +#define INOTIFY(vp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & (VIRF_INOTIFY | \ + VIRF_INOTIFY_PARENT)) != 0)) \ + VOP_INOTIFY((vp), NULL, NULL, (ev), 0); \ +} while (0) + +/* Log an inotify event using a specific name for the vnode. */ +#define INOTIFY_NAME(vp, dvp, cnp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) \ + VOP_INOTIFY((vp), (dvp), (cnp), (ev), 0); \ +} while (0) + +extern __uint32_t inotify_rename_cookie; + +#define INOTIFY_MOVE(vp, fdvp, fcnp, tvp, tdvp, tcnp) do { \ + if (__predict_false((vn_irflag_read(fdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(tdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) { \ + __uint32_t cookie; \ + \ + cookie = atomic_fetchadd_32(&inotify_rename_cookie, 1); \ + VOP_INOTIFY((vp), (fdvp), (fcnp), IN_MOVED_FROM, cookie); \ + VOP_INOTIFY((vp), (tdvp), (tcnp), IN_MOVED_TO, cookie); \ + } \ + if ((tvp) != NULL) \ + INOTIFY_NAME((tvp), (tdvp), (tcnp), _IN_MOVE_DELETE); \ +} while (0) + +#define INOTIFY_REVOKE(vp) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) \ + vn_inotify_revoke((vp)); \ +} while (0) + +#else +#include <sys/cdefs.h> + +__BEGIN_DECLS +int inotify_init(void); +int inotify_init1(int flags); +int inotify_add_watch(int fd, const char *pathname, __uint32_t mask); +int inotify_add_watch_at(int fd, int dfd, const char *pathname, + __uint32_t mask); +int inotify_rm_watch(int fd, int wd); +__END_DECLS +#endif /* !_KERNEL */ + +#endif /* !_INOTIFY_H_ */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index a6f858e02395..f6480b173a5c 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -267,6 +267,7 @@ struct mount { int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */ int mnt_upper_pending; /* (i) # of pending ops on mnt_uppers */ struct lock mnt_explock; /* vfs_export walkers lock */ + struct lock mnt_renamelock; /* renames and O_RESOLVE_BENEATH */ TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */ TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */ STAILQ_ENTRY(mount) mnt_taskqueue_link; /* (d) our place in deferred unmount list */ diff --git a/sys/sys/namei.h b/sys/sys/namei.h index 5c245235ace5..6008d83f729d 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -108,7 +108,12 @@ struct nameidata { * through the VOP interface. */ struct componentname ni_cnd; + + /* Serving RBENEATH. */ struct nameicap_tracker_head ni_cap_tracker; + struct vnode *ni_rbeneath_dpp; + struct mount *ni_nctrack_mnt; + /* * Private helper data for UFS, must be at the end. See * NDINIT_PREFILL(). @@ -235,6 +240,10 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, panic("namei data not inited"); \ if (((arg)->ni_debugflags & NAMEI_DBG_HADSTARTDIR) != 0) \ panic("NDREINIT on namei data with NAMEI_DBG_HADSTARTDIR"); \ + if ((arg)->ni_nctrack_mnt != NULL) \ + panic("NDREINIT on namei data with leaked ni_nctrack_mnt"); \ + if (!TAILQ_EMPTY(&(arg)->ni_cap_tracker)) \ + panic("NDREINIT on namei data with leaked ni_cap_tracker"); \ (arg)->ni_debugflags = NAMEI_DBG_INITED; \ } #else @@ -259,6 +268,9 @@ do { \ _ndp->ni_resflags = 0; \ filecaps_init(&_ndp->ni_filecaps); \ _ndp->ni_rightsneeded = _rightsp; \ + _ndp->ni_rbeneath_dpp = NULL; \ + _ndp->ni_nctrack_mnt = NULL; \ + TAILQ_INIT(&_ndp->ni_cap_tracker); \ } while (0) #define NDREINIT(ndp) do { \ diff --git a/sys/sys/param.h b/sys/sys/param.h index 57eb8ebcf12c..af116d6e3f7a 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500050 +#define __FreeBSD_version 1500051 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index b15dace8cfa0..61411890c85b 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -122,6 +122,8 @@ struct uidinfo { long ui_kqcnt; /* (b) number of kqueues */ long ui_umtxcnt; /* (b) number of shared umtxs */ long ui_pipecnt; /* (b) consumption of pipe buffers */ + long ui_inotifycnt; /* (b) number of inotify descriptors */ + long ui_inotifywatchcnt; /* (b) number of inotify watches */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ #ifdef RACCT @@ -144,6 +146,8 @@ int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgpipecnt(struct uidinfo *uip, int diff, rlim_t max); +int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval); +int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval); int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp); struct plimit diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 5e7c554c34cf..cdd4fa3b4b89 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -111,10 +111,11 @@ typedef __uintptr_t uintptr_t; */ #define SOCK_CLOEXEC 0x10000000 #define SOCK_NONBLOCK 0x20000000 +#define SOCK_CLOFORK 0x40000000 #ifdef _KERNEL /* * Flags for accept1(), kern_accept4() and solisten_dequeue, in addition - * to SOCK_CLOEXEC and SOCK_NONBLOCK. + * to SOCK_CLOEXEC, SOCK_CLOFORK and SOCK_NONBLOCK. */ #define ACCEPT4_INHERIT 0x1 #define ACCEPT4_COMPAT 0x2 @@ -478,6 +479,9 @@ struct msghdr { #define MSG_MORETOCOME 0x00100000 /* additional data pending */ #define MSG_TLSAPPDATA 0x00200000 /* do not soreceive() alert rec. (TLS) */ #endif +#if __BSD_VISIBLE +#define MSG_CMSG_CLOFORK 0x00400000 /* make received fds close-on-fork */ +#endif /* * Header for ancillary data objects in msg_control buffer. diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h index dc4d88ce689f..0b79c841d149 100644 --- a/sys/sys/specialfd.h +++ b/sys/sys/specialfd.h @@ -30,6 +30,7 @@ enum specialfd_type { SPECIALFD_EVENTFD = 1, + SPECIALFD_INOTIFY = 2, }; struct specialfd_eventfd { @@ -37,4 +38,8 @@ struct specialfd_eventfd { int flags; }; +struct specialfd_inotify { + int flags; +}; + #endif /* !_SYS_SPECIALFD_H_ */ diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 68406a2dfc29..eec923d0b82e 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -529,4 +529,6 @@ #define SYS_fchroot 590 #define SYS_setcred 591 #define SYS_exterrctl 592 -#define SYS_MAXSYSCALL 593 +#define SYS_inotify_add_watch_at 593 +#define SYS_inotify_rm_watch 594 +#define SYS_MAXSYSCALL 595 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index 9a90a63f35a3..547242a73277 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -434,4 +434,6 @@ MIASM = \ getrlimitusage.o \ fchroot.o \ setcred.o \ - exterrctl.o + exterrctl.o \ + inotify_add_watch_at.o \ + inotify_rm_watch.o diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index fe6dd9e14fb4..fd183ffbc7a4 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -257,6 +257,7 @@ int kern_munlock(struct thread *td, uintptr_t addr, size_t size); int kern_munmap(struct thread *td, uintptr_t addr, size_t size); int kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt); +int kern_nosys(struct thread *td, int dummy); int kern_ntp_adjtime(struct thread *td, struct timex *ntv, int *retvalp); int kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, long *ploff); diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h index 6314b03142e7..4ddfc8516053 100644 --- a/sys/sys/sysent.h +++ b/sys/sys/sysent.h @@ -79,11 +79,10 @@ struct sysent { /* system call table */ */ #define SYF_CAPENABLED 0x00000001 -#define SY_THR_FLAGMASK 0x7 -#define SY_THR_STATIC 0x1 -#define SY_THR_DRAINING 0x2 -#define SY_THR_ABSENT 0x4 -#define SY_THR_INCR 0x8 +#define SY_THR_STATIC 0x01 +#define SY_THR_DRAINING 0x02 +#define SY_THR_ABSENT 0x04 +#define SY_THR_INCR 0x08 #ifdef KLD_MODULE #define SY_THR_STATIC_KLD 0 diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 94da81c84d25..94b5a0a7a95e 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -1891,6 +1891,16 @@ struct exterrctl_args { char flags_l_[PADL_(u_int)]; u_int flags; char flags_r_[PADR_(u_int)]; char ptr_l_[PADL_(void *)]; void * ptr; char ptr_r_[PADR_(void *)]; }; +struct inotify_add_watch_at_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char dfd_l_[PADL_(int)]; int dfd; char dfd_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; +}; +struct inotify_rm_watch_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char wd_l_[PADL_(int)]; int wd; char wd_r_[PADR_(int)]; +}; int sys_exit(struct thread *, struct exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2293,6 +2303,8 @@ int sys_getrlimitusage(struct thread *, struct getrlimitusage_args *); int sys_fchroot(struct thread *, struct fchroot_args *); int sys_setcred(struct thread *, struct setcred_args *); int sys_exterrctl(struct thread *, struct exterrctl_args *); +int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *); +int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *); #ifdef COMPAT_43 @@ -3275,6 +3287,8 @@ int freebsd13_swapoff(struct thread *, struct freebsd13_swapoff_args *); #define SYS_AUE_fchroot AUE_NULL #define SYS_AUE_setcred AUE_SETCRED #define SYS_AUE_exterrctl AUE_NULL +#define SYS_AUE_inotify_add_watch_at AUE_INOTIFY +#define SYS_AUE_inotify_rm_watch AUE_INOTIFY #undef PAD_ #undef PADL_ diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index f5caea2e3919..c291c1dc2b95 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -156,6 +156,7 @@ #define _PC_DEALLOC_PRESENT 65 #define _PC_NAMEDATTR_ENABLED 66 #define _PC_HAS_NAMEDATTR 67 +#define _PC_HAS_HIDDENSYSTEM 68 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ @@ -210,6 +211,7 @@ * close_range() options. */ #define CLOSE_RANGE_CLOEXEC (1<<2) +#define CLOSE_RANGE_CLOFORK (1<<3) #endif /* __BSD_VISIBLE */ diff --git a/sys/sys/user.h b/sys/sys/user.h index f94a91ca1238..103236b6ed1b 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -265,6 +265,7 @@ struct user { #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 #define KF_TYPE_TIMERFD 14 +#define KF_TYPE_INOTIFY 15 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -456,6 +457,10 @@ struct kinfo_file { int32_t kf_kqueue_count; int32_t kf_kqueue_state; } kf_kqueue; + struct { + uint64_t kf_inotify_npending; + uint64_t kf_inotify_nbpending; + } kf_inotify; } kf_un; }; uint16_t kf_status; /* Status flags. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index bed20f607339..2c6947103c94 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -86,11 +86,13 @@ enum vgetstate { * it from v_data. If non-null, this area is freed in getnewvnode(). */ -struct namecache; struct cache_fpl; +struct inotify_watch; +struct namecache; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ + TAILQ_HEAD(, inotify_watch) vpi_inotify; /* list of inotify watchers */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ @@ -248,6 +250,9 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes"); #define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */ #define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */ #define VIRF_NAMEDATTR 0x0040 /* Named attribute */ +#define VIRF_INOTIFY 0x0080 /* This vnode is being watched */ +#define VIRF_INOTIFY_PARENT 0x0100 /* A parent of this vnode may be being + watched */ #define VI_UNUSED0 0x0001 /* unused */ #define VI_MOUNT 0x0002 /* Mount in progress */ @@ -667,6 +672,7 @@ char *cache_symlink_alloc(size_t size, int flags); void cache_symlink_free(char *string, size_t size); int cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len); +void cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie); void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp); void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp); @@ -869,8 +875,10 @@ int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); -int vop_stdioctl(struct vop_ioctl_args *); int vop_stdneed_inactive(struct vop_need_inactive_args *); +int vop_stdinotify(struct vop_inotify_args *); +int vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *); +int vop_stdioctl(struct vop_ioctl_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); int vop_stdunlock(struct vop_unlock_args *); @@ -910,9 +918,12 @@ int dead_read(struct vop_read_args *ap); int dead_write(struct vop_write_args *ap); /* These are called from within the actual VOPS. */ +void vop_allocate_post(void *a, int rc); +void vop_copy_file_range_post(void *ap, int rc); void vop_close_post(void *a, int rc); void vop_create_pre(void *a); void vop_create_post(void *a, int rc); +void vop_deallocate_post(void *a, int rc); void vop_whiteout_pre(void *a); void vop_whiteout_post(void *a, int rc); void vop_deleteextattr_pre(void *a); @@ -1020,9 +1031,12 @@ void vop_rename_fail(struct vop_rename_args *ap); #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ - if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \ - VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \ - | (noffset > osize ? NOTE_EXTEND : 0)); \ + if (noffset > ooffset) { \ + if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \ + VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE | \ + (noffset > osize ? NOTE_EXTEND : 0)); \ + } \ + INOTIFY((ap)->a_vp, IN_MODIFY); \ } #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__) diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk index d23c2af9bd9a..e829105197cc 100644 --- a/sys/tools/vnode_if.awk +++ b/sys/tools/vnode_if.awk @@ -193,6 +193,7 @@ if (cfile) { printc(common_head \ "#include <sys/param.h>\n" \ "#include <sys/event.h>\n" \ + "#include <sys/inotify.h>\n" \ "#include <sys/kernel.h>\n" \ "#include <sys/mount.h>\n" \ "#include <sys/sdt.h>\n" \ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 891e490a7031..75f5fe716c31 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1012,7 +1012,6 @@ ffs_mountfs(struct vnode *odevvp, struct mount *mp, struct thread *td) else ump->um_check_blkno = NULL; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); - sx_init(&ump->um_checkpath_lock, "uchpth"); fs->fs_ronly = ronly; fs->fs_active = NULL; mp->mnt_data = ump; @@ -1182,7 +1181,6 @@ out: } if (ump != NULL) { mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; @@ -1306,7 +1304,6 @@ ffs_unmount(struct mount *mp, int mntflags) vrele(ump->um_odevvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index eaf37c58756b..3f9c95e934fc 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1412,7 +1412,6 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, vp = tvp = ITOV(target); mp = vp->v_mount; *wait_ino = 0; - sx_assert(&VFSTOUFS(mp)->um_checkpath_lock, SA_XLOCKED); if (target->i_number == source_ino) return (EEXIST); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 9aea01e70951..53fac4b0665e 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1273,9 +1273,9 @@ ufs_rename( struct mount *mp; ino_t ino; seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s; - bool checkpath_locked, want_seqc_end; + bool want_seqc_end; - checkpath_locked = want_seqc_end = false; + want_seqc_end = false; endoff = 0; mp = tdvp->v_mount; @@ -1427,10 +1427,6 @@ relock: } vfs_ref(mp); MPASS(!want_seqc_end); - if (checkpath_locked) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; - } VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1484,8 +1480,6 @@ relock: if (error) goto unlockout; - sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = true; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* @@ -1493,8 +1487,6 @@ relock: * everything else and VGET before restarting. */ if (ino) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); VOP_UNLOCK(tdvp); @@ -1574,9 +1566,6 @@ relock: vn_seqc_write_end(fdvp); want_seqc_end = false; vfs_ref(mp); - MPASS(checkpath_locked); - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1763,9 +1752,6 @@ unlockout: vn_seqc_write_end(fdvp); } - if (checkpath_locked) - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - vput(fdvp); vput(fvp); @@ -2734,6 +2720,9 @@ ufs_pathconf( case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; break; + case _PC_HAS_HIDDENSYSTEM: + *ap->a_retval = 1; + break; default: error = vop_stdpathconf(ap); diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index 5c7fa11dae6a..d33b01e4425e 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -97,8 +97,6 @@ struct ufsmount { uint64_t um_maxsymlinklen; /* (c) max size of short symlink */ struct mtx um_lock; /* (c) Protects ufsmount & fs */ - struct sx um_checkpath_lock; /* (c) Protects ufs_checkpath() - result */ struct mount_softdeps *um_softdep; /* (c) softdep mgmt structure */ struct vnode *um_quotas[MAXQUOTAS]; /* (q) pointer to quota files */ struct ucred *um_cred[MAXQUOTAS]; /* (q) quota file access cred */ diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 21584abacfa3..3e57e8d4f1d0 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1441,8 +1441,7 @@ vm_fault_busy_sleep(struct faultstate *fs) } vm_object_pip_wakeup(fs->object); vm_fault_unlock_map(fs); - if (fs->m != vm_page_lookup(fs->object, fs->pindex) || - !vm_page_busy_sleep(fs->m, "vmpfw", 0)) + if (!vm_page_busy_sleep(fs->m, "vmpfw", 0)) VM_OBJECT_UNLOCK(fs->object); VM_CNT_INC(v_intrans); vm_object_deallocate(fs->first_object); diff --git a/sys/x86/linux/linux_dummy_x86.c b/sys/x86/linux/linux_dummy_x86.c index ae1d23e811e7..221f5dbf5ba3 100644 --- a/sys/x86/linux/linux_dummy_x86.c +++ b/sys/x86/linux/linux_dummy_x86.c @@ -46,7 +46,5 @@ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); DUMMY(sysfs); DUMMY(quotactl); -/* Linux 2.6.13: */ -DUMMY(inotify_init); /* Linux 2.6.22: */ DUMMY(signalfd); diff --git a/tests/Makefile b/tests/Makefile index e8dd7793f169..451d55498a26 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,3 +1,5 @@ +.include <src.opts.mk> + PACKAGE= tests TESTSDIR= ${TESTSBASE} @@ -11,6 +13,9 @@ SUBDIR+= examples SUBDIR+= include SUBDIR+= sys SUBDIR+= atf_python +.if ${MK_CDDL} != "no" +SUBDIR+= oclo +.endif SUBDIR_PARALLEL= diff --git a/tests/ci/tools/freebsdci b/tests/ci/tools/freebsdci index f0030fe00aba..7b4ce9669ab2 100755 --- a/tests/ci/tools/freebsdci +++ b/tests/ci/tools/freebsdci @@ -34,6 +34,7 @@ rcvar=freebsdci_enable start_cmd="firstboot_ci_run" stop_cmd=":" os_arch=$(uname -p) +parallelism=$(nproc) tardev=/dev/vtbd1 metadir=/meta istar=$(file -s ${tardev} | grep "POSIX tar archive" | wc -l) @@ -74,7 +75,7 @@ full_tests() tar xvf ${tardev} -C ${metadir} cd /usr/tests set +e - kyua test + kyua -v parallelism=${parallelism} test rc=$? set -e if [ ${rc} -ne 0 ] && [ ${rc} -ne 1 ]; then diff --git a/tests/oclo/Makefile b/tests/oclo/Makefile new file mode 100644 index 000000000000..350c9f857c85 --- /dev/null +++ b/tests/oclo/Makefile @@ -0,0 +1,11 @@ +.PATH: ${SRCTOP}/cddl/contrib/opensolaris/tests/os-tests/tests/oclo + +TESTSDIR= ${TESTSBASE}/cddl/oclo + +PLAIN_TESTS_C= oclo oclo_errors ocloexec_verify + +SRCS.oclo= oclo.c +LIBADD.oclo+= openbsd +LIBADD.ocloexec_verify+= util + +.include <bsd.test.mk> diff --git a/tests/sys/file/closefrom_test.c b/tests/sys/file/closefrom_test.c index e30c5eb3d591..7dccf858c772 100644 --- a/tests/sys/file/closefrom_test.c +++ b/tests/sys/file/closefrom_test.c @@ -144,7 +144,7 @@ main(void) pid_t pid; int fd, flags, i, start; - printf("1..21\n"); + printf("1..22\n"); /* We'd better start up with fd's 0, 1, and 2 open. */ start = devnull(); @@ -356,5 +356,38 @@ main(void) fail_err("close_range"); ok("close_range(..., CLOSE_RANGE_CLOEXEC)"); + /* test CLOSE_RANGE_CLOFORK */ + for (i = 0; i < 8; i++) + (void)devnull(); + fd = highest_fd(); + start = fd - 8; + if (close_range(start + 1, start + 4, CLOSE_RANGE_CLOFORK) < 0) + fail_err("close_range(..., CLOSE_RANGE_CLOFORK)"); + flags = fcntl(start, F_GETFD); + if (flags < 0) + fail_err("fcntl(.., F_GETFD)"); + if ((flags & FD_CLOFORK) != 0) + fail("close_range", "CLOSE_RANGE_CLOFORK set close-on-exec " + "when it should not have on fd %d", start); + for (i = start + 1; i <= start + 4; i++) { + flags = fcntl(i, F_GETFD); + if (flags < 0) + fail_err("fcntl(.., F_GETFD)"); + if ((flags & FD_CLOFORK) == 0) + fail("close_range", "CLOSE_RANGE_CLOFORK did not set " + "close-on-exec on fd %d", i); + } + for (; i < start + 8; i++) { + flags = fcntl(i, F_GETFD); + if (flags < 0) + fail_err("fcntl(.., F_GETFD)"); + if ((flags & FD_CLOFORK) != 0) + fail("close_range", "CLOSE_RANGE_CLOFORK set close-on-exec " + "when it should not have on fd %d", i); + } + if (close_range(start, start + 8, 0) < 0) + fail_err("close_range"); + ok("close_range(..., CLOSE_RANGE_CLOFORK)"); + return (0); } diff --git a/tests/sys/file/dup_test.c b/tests/sys/file/dup_test.c index b024e72d0d1a..455115eda8c8 100644 --- a/tests/sys/file/dup_test.c +++ b/tests/sys/file/dup_test.c @@ -46,6 +46,8 @@ * Test #31: check if dup3(0) fails if oldfd == newfd. * Test #32: check if dup3(O_CLOEXEC) to a fd > current maximum number of * open files limit work. + * Tests #33-43 : Same as #18-26, 30 & 32 with O_CLOFORK instead of O_CLOEXEC, + * except F_DUP2FD_CLOEXEC. */ #include <sys/types.h> @@ -82,7 +84,7 @@ main(int __unused argc, char __unused *argv[]) orgfd = getafile(); - printf("1..32\n"); + printf("1..43\n"); /* If dup(2) ever work? */ if ((fd1 = dup(orgfd)) < 0) @@ -380,5 +382,99 @@ main(int __unused argc, char __unused *argv[]) printf("ok %d - dup3(O_CLOEXEC) didn't bypass NOFILE limit\n", test); + /* Does fcntl(F_DUPFD_CLOFORK) work? */ + if ((fd2 = fcntl(fd1, F_DUPFD_CLOFORK, 10)) < 0) + err(1, "fcntl(F_DUPFD_CLOFORK)"); + if (fd2 < 10) + printf("not ok %d - fcntl(F_DUPFD_CLOFORK) returned wrong fd %d\n", + ++test, fd2); + else + printf("ok %d - fcntl(F_DUPFD_CLOFORK) works\n", ++test); + + /* Was close-on-fork cleared? */ + ++test; + if (fcntl(fd2, F_GETFD) != FD_CLOFORK) + printf( + "not ok %d - fcntl(F_DUPFD_CLOFORK) didn't set close-on-fork\n", + test); + else + printf("ok %d - fcntl(F_DUPFD_CLOFORK) set close-on-fork\n", + test); + + /* Does dup3(O_CLOFORK) ever work? */ + if ((fd2 = dup3(fd1, fd1 + 1, O_CLOFORK)) < 0) + err(1, "dup3(O_CLOFORK)"); + printf("ok %d - dup3(O_CLOFORK) works\n", ++test); + + /* Do we get the right fd? */ + ++test; + if (fd2 != fd1 + 1) + printf( + "no ok %d - dup3(O_CLOFORK) didn't give us the right fd\n", + test); + else + printf("ok %d - dup3(O_CLOFORK) returned a correct fd\n", + test); + + /* Was close-on-fork set? */ + ++test; + if (fcntl(fd2, F_GETFD) != FD_CLOFORK) + printf( + "not ok %d - dup3(O_CLOFORK) didn't set close-on-fork\n", + test); + else + printf("ok %d - dup3(O_CLOFORK) set close-on-fork\n", + test); + + /* Does dup3(0) ever work? */ + if ((fd2 = dup3(fd1, fd1 + 1, 0)) < 0) + err(1, "dup3(0)"); + printf("ok %d - dup3(0) works\n", ++test); + + /* Do we get the right fd? */ + ++test; + if (fd2 != fd1 + 1) + printf( + "no ok %d - dup3(0) didn't give us the right fd\n", + test); + else + printf("ok %d - dup3(0) returned a correct fd\n", + test); + + /* Was close-on-fork cleared? */ + ++test; + if (fcntl(fd2, F_GETFD) != 0) + printf( + "not ok %d - dup3(0) didn't clear close-on-fork\n", + test); + else + printf("ok %d - dup3(0) cleared close-on-fork\n", + test); + + /* dup3() does not allow duplicating to the same fd */ + ++test; + if (dup3(fd1, fd1, O_CLOFORK) != -1) + printf( + "not ok %d - dup3(fd1, fd1, O_CLOFORK) succeeded\n", test); + else + printf("ok %d - dup3(fd1, fd1, O_CLOFORK) failed\n", test); + + ++test; + if (dup3(fd1, fd1, 0) != -1) + printf( + "not ok %d - dup3(fd1, fd1, 0) succeeded\n", test); + else + printf("ok %d - dup3(fd1, fd1, 0) failed\n", test); + + ++test; + if (getrlimit(RLIMIT_NOFILE, &rlp) < 0) + err(1, "getrlimit"); + if ((fd2 = dup3(fd1, rlp.rlim_cur + 1, O_CLOFORK)) >= 0) + printf("not ok %d - dup3(O_CLOFORK) bypassed NOFILE limit\n", + test); + else + printf("ok %d - dup3(O_CLOFORK) didn't bypass NOFILE limit\n", + test); + return (0); } diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile index 8cc7beade3f3..26c0013696c7 100644 --- a/tests/sys/kern/Makefile +++ b/tests/sys/kern/Makefile @@ -19,6 +19,7 @@ ATF_TESTS_C+= kern_descrip_test TEST_METADATA.kern_descrip_test+= is_exclusive="true" ATF_TESTS_C+= fdgrowtable_test ATF_TESTS_C+= jail_lookup_root +ATF_TESTS_C+= inotify_test ATF_TESTS_C+= kill_zombie .if ${MK_OPENSSL} != "no" ATF_TESTS_C+= ktls_test @@ -85,6 +86,7 @@ LIBADD.sys_getrandom+= c LIBADD.sys_getrandom+= pthread LIBADD.ptrace_test+= pthread LIBADD.unix_seqpacket_test+= pthread +LIBADD.inotify_test+= util LIBADD.kcov+= pthread CFLAGS.ktls_test+= -DOPENSSL_API_COMPAT=0x10100000L LIBADD.ktls_test+= crypto util diff --git a/tests/sys/kern/inotify_test.c b/tests/sys/kern/inotify_test.c new file mode 100644 index 000000000000..ed7cef5d148c --- /dev/null +++ b/tests/sys/kern/inotify_test.c @@ -0,0 +1,862 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include <sys/capsicum.h> +#include <sys/filio.h> +#include <sys/inotify.h> +#include <sys/ioccom.h> +#include <sys/mount.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysctl.h> +#include <sys/un.h> + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <mntopts.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <atf-c.h> + +static const char * +ev2name(int event) +{ + switch (event) { + case IN_ACCESS: + return ("IN_ACCESS"); + case IN_ATTRIB: + return ("IN_ATTRIB"); + case IN_CLOSE_WRITE: + return ("IN_CLOSE_WRITE"); + case IN_CLOSE_NOWRITE: + return ("IN_CLOSE_NOWRITE"); + case IN_CREATE: + return ("IN_CREATE"); + case IN_DELETE: + return ("IN_DELETE"); + case IN_DELETE_SELF: + return ("IN_DELETE_SELF"); + case IN_MODIFY: + return ("IN_MODIFY"); + case IN_MOVE_SELF: + return ("IN_MOVE_SELF"); + case IN_MOVED_FROM: + return ("IN_MOVED_FROM"); + case IN_MOVED_TO: + return ("IN_MOVED_TO"); + case IN_OPEN: + return ("IN_OPEN"); + default: + return (NULL); + } +} + +static void +close_checked(int fd) +{ + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Make sure that no other events are pending, and close the inotify descriptor. + */ +static void +close_inotify(int fd) +{ + int n; + + ATF_REQUIRE(ioctl(fd, FIONREAD, &n) == 0); + ATF_REQUIRE(n == 0); + close_checked(fd); +} + +static uint32_t +consume_event_cookie(int ifd, int wd, int event, int flags, const char *name) +{ + struct inotify_event *ev; + size_t evsz, namelen; + ssize_t n; + uint32_t cookie; + + /* Only read one record. */ + namelen = name == NULL ? 0 : strlen(name); + evsz = sizeof(*ev) + _IN_NAMESIZE(namelen); + ev = malloc(evsz); + ATF_REQUIRE(ev != NULL); + + n = read(ifd, ev, evsz); + ATF_REQUIRE_MSG(n >= 0, "failed to read event %s", ev2name(event)); + ATF_REQUIRE((size_t)n >= sizeof(*ev)); + ATF_REQUIRE((size_t)n == sizeof(*ev) + ev->len); + ATF_REQUIRE((size_t)n == evsz); + + ATF_REQUIRE_MSG((ev->mask & IN_ALL_EVENTS) == event, + "expected event %#x, got %#x", event, ev->mask); + ATF_REQUIRE_MSG((ev->mask & _IN_ALL_RETFLAGS) == flags, + "expected flags %#x, got %#x", flags, ev->mask); + ATF_REQUIRE_MSG(ev->wd == wd, + "expected wd %d, got %d", wd, ev->wd); + ATF_REQUIRE_MSG(name == NULL || strcmp(name, ev->name) == 0, + "expected name '%s', got '%s'", name, ev->name); + cookie = ev->cookie; + if ((ev->mask & (IN_MOVED_FROM | IN_MOVED_TO)) == 0) + ATF_REQUIRE(cookie == 0); + free(ev); + return (cookie); +} + +/* + * Read an event from the inotify file descriptor and check that it + * matches the expected values. + */ +static void +consume_event(int ifd, int wd, int event, int flags, const char *name) +{ + (void)consume_event_cookie(ifd, wd, event, flags, name); +} + +static int +inotify(int flags) +{ + int ifd; + + ifd = inotify_init1(flags); + ATF_REQUIRE(ifd != -1); + return (ifd); +} + +static void +mount_nullfs(char *dir, char *src) +{ + struct iovec *iov; + char errmsg[1024]; + int error, iovlen; + + iov = NULL; + iovlen = 0; + + build_iovec(&iov, &iovlen, "fstype", "nullfs", (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", dir, (size_t)-1); + build_iovec(&iov, &iovlen, "target", src, (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); + + errmsg[0] = '\0'; + error = nmount(iov, iovlen, 0); + ATF_REQUIRE_MSG(error == 0, + "mount nullfs %s %s: %s", src, dir, + errmsg[0] == '\0' ? strerror(errno) : errmsg); + + free_iovec(&iov, &iovlen); +} + +static void +mount_tmpfs(const char *dir) +{ + struct iovec *iov; + char errmsg[1024]; + int error, iovlen; + + iov = NULL; + iovlen = 0; + + build_iovec(&iov, &iovlen, "fstype", "tmpfs", (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir), + (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); + + errmsg[0] = '\0'; + error = nmount(iov, iovlen, 0); + ATF_REQUIRE_MSG(error == 0, + "mount tmpfs %s: %s", dir, + errmsg[0] == '\0' ? strerror(errno) : errmsg); + + free_iovec(&iov, &iovlen); +} + +static int +watch_file(int ifd, int events, char *path) +{ + int fd, wd; + + strncpy(path, "test.XXXXXX", PATH_MAX); + fd = mkstemp(path); + ATF_REQUIRE(fd != -1); + close_checked(fd); + + wd = inotify_add_watch(ifd, path, events); + ATF_REQUIRE(wd != -1); + + return (wd); +} + +static int +watch_dir(int ifd, int events, char *path) +{ + char *p; + int wd; + + strlcpy(path, "test.XXXXXX", PATH_MAX); + p = mkdtemp(path); + ATF_REQUIRE(p == path); + + wd = inotify_add_watch(ifd, path, events); + ATF_REQUIRE(wd != -1); + + return (wd); +} + +/* + * Verify that Capsicum restrictions are applied as expected. + */ +ATF_TC_WITHOUT_HEAD(inotify_capsicum); +ATF_TC_BODY(inotify_capsicum, tc) +{ + int error, dfd, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + ATF_REQUIRE(ifd != -1); + + dfd = open(".", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + + error = mkdirat(dfd, "testdir", 0755); + ATF_REQUIRE(error == 0); + + error = cap_enter(); + ATF_REQUIRE(error == 0); + + /* + * Plain inotify_add_watch() is disallowed. + */ + wd = inotify_add_watch(ifd, ".", IN_DELETE_SELF); + ATF_REQUIRE_ERRNO(ECAPMODE, wd == -1); + wd = inotify_add_watch_at(ifd, dfd, "testdir", IN_DELETE_SELF); + ATF_REQUIRE(wd >= 0); + + /* + * Generate a record and consume it. + */ + error = unlinkat(dfd, "testdir", AT_REMOVEDIR); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_checked(dfd); + close_inotify(ifd); +} + +/* + * Make sure that duplicate, back-to-back events are coalesced. + */ +ATF_TC_WITHOUT_HEAD(inotify_coalesce); +ATF_TC_BODY(inotify_coalesce, tc) +{ + char file[PATH_MAX], path[PATH_MAX]; + int fd, fd1, ifd, n, wd; + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it. */ + wd = watch_dir(ifd, IN_OPEN, path); + /* Create a file in the directory and open it. */ + snprintf(file, sizeof(file), "%s/file", path); + fd = open(file, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + fd = open(file, O_RDWR); + ATF_REQUIRE(fd != -1); + fd1 = open(file, O_RDONLY); + ATF_REQUIRE(fd1 != -1); + close_checked(fd1); + close_checked(fd); + + consume_event(ifd, wd, IN_OPEN, 0, "file"); + ATF_REQUIRE(ioctl(ifd, FIONREAD, &n) == 0); + ATF_REQUIRE(n == 0); + + close_inotify(ifd); +} + +/* + * Check handling of IN_MASK_CREATE. + */ +ATF_TC_WITHOUT_HEAD(inotify_mask_create); +ATF_TC_BODY(inotify_mask_create, tc) +{ + char path[PATH_MAX]; + int ifd, wd, wd1; + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it. */ + wd = watch_dir(ifd, IN_CREATE, path); + /* Updating the watch with IN_MASK_CREATE should result in an error. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_CREATE); + ATF_REQUIRE_ERRNO(EEXIST, wd1 == -1); + /* It's an error to specify IN_MASK_ADD with IN_MASK_CREATE. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_ADD | + IN_MASK_CREATE); + ATF_REQUIRE_ERRNO(EINVAL, wd1 == -1); + /* Updating the watch without IN_MASK_CREATE should work. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY); + ATF_REQUIRE(wd1 != -1); + ATF_REQUIRE_EQ(wd, wd1); + + close_inotify(ifd); +} + +/* + * Make sure that inotify cooperates with nullfs: if a lower vnode is the + * subject of an event, the upper vnode should be notified, and if the upper + * vnode is the subject of an event, the lower vnode should be notified. + */ +ATF_TC_WITH_CLEANUP(inotify_nullfs); +ATF_TC_HEAD(inotify_nullfs, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(inotify_nullfs, tc) +{ + char path[PATH_MAX], *p; + int dfd, error, fd, ifd, mask, wd; + + mask = IN_CREATE | IN_OPEN; + + ifd = inotify(IN_NONBLOCK); + + strlcpy(path, "./test.XXXXXX", sizeof(path)); + p = mkdtemp(path); + ATF_REQUIRE(p == path); + + error = mkdir("./mnt", 0755); + ATF_REQUIRE(error == 0); + + /* Mount the testdir onto ./mnt. */ + mount_nullfs("./mnt", path); + + wd = inotify_add_watch(ifd, "./mnt", mask); + ATF_REQUIRE(wd != -1); + + /* Create a file in the lower directory and open it. */ + dfd = open(path, O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + fd = openat(dfd, "file", O_RDWR | O_CREAT, 0644); + close_checked(fd); + close_checked(dfd); + + /* We should see events via the nullfs mount. */ + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + consume_event(ifd, wd, IN_CREATE, 0, "file"); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + error = inotify_rm_watch(ifd, wd); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + /* Watch the lower directory. */ + wd = inotify_add_watch(ifd, path, mask); + ATF_REQUIRE(wd != -1); + /* ... and create a file in the upper directory and open it. */ + dfd = open("./mnt", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + fd = openat(dfd, "file2", O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + close_checked(dfd); + + /* We should see events via the lower directory. */ + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + consume_event(ifd, wd, IN_CREATE, 0, "file2"); + consume_event(ifd, wd, IN_OPEN, 0, "file2"); + + close_inotify(ifd); +} +ATF_TC_CLEANUP(inotify_nullfs, tc) +{ + int error; + + error = unmount("./mnt", 0); + if (error != 0) { + perror("unmount"); + exit(1); + } +} + +/* + * Make sure that exceeding max_events pending events results in an overflow + * event. + */ +ATF_TC_WITHOUT_HEAD(inotify_queue_overflow); +ATF_TC_BODY(inotify_queue_overflow, tc) +{ + char path[PATH_MAX]; + size_t size; + int error, dfd, ifd, max, wd; + + size = sizeof(max); + error = sysctlbyname("vfs.inotify.max_queued_events", &max, &size, NULL, + 0); + ATF_REQUIRE(error == 0); + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it for file creation events. */ + wd = watch_dir(ifd, IN_CREATE, path); + dfd = open(path, O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + /* Generate max+1 file creation events. */ + for (int i = 0; i < max + 1; i++) { + char name[NAME_MAX]; + int fd; + + (void)snprintf(name, sizeof(name), "file%d", i); + fd = openat(dfd, name, O_CREAT | O_RDWR, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + } + + /* + * Read our events. We should see files 0..max-1 and then an overflow + * event. + */ + for (int i = 0; i < max; i++) { + char name[NAME_MAX]; + + (void)snprintf(name, sizeof(name), "file%d", i); + consume_event(ifd, wd, IN_CREATE, 0, name); + } + + /* Look for an overflow event. */ + consume_event(ifd, -1, 0, IN_Q_OVERFLOW, NULL); + + close_checked(dfd); + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_access_file); +ATF_TC_BODY(inotify_event_access_file, tc) +{ + char path[PATH_MAX], buf[16]; + off_t nb; + ssize_t n; + int error, fd, fd1, ifd, s[2], wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_ACCESS, path); + + fd = open(path, O_RDWR); + n = write(fd, "test", 4); + ATF_REQUIRE(n == 4); + + /* A simple read(2) should generate an access. */ + ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0); + n = read(fd, buf, sizeof(buf)); + ATF_REQUIRE(n == 4); + ATF_REQUIRE(memcmp(buf, "test", 4) == 0); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + + /* copy_file_range(2) should as well. */ + ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0); + fd1 = open("sink", O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd1 != -1); + n = copy_file_range(fd, NULL, fd1, NULL, 4, 0); + ATF_REQUIRE(n == 4); + close_checked(fd1); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + + /* As should sendfile(2). */ + error = socketpair(AF_UNIX, SOCK_STREAM, 0, s); + ATF_REQUIRE(error == 0); + error = sendfile(fd, s[0], 0, 4, NULL, &nb, 0); + ATF_REQUIRE(error == 0); + ATF_REQUIRE(nb == 4); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + close_checked(s[0]); + close_checked(s[1]); + + close_checked(fd); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_access_dir); +ATF_TC_BODY(inotify_event_access_dir, tc) +{ + char root[PATH_MAX], path[PATH_MAX]; + struct dirent *ent; + DIR *dir; + int error, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_ACCESS, root); + snprintf(path, sizeof(path), "%s/dir", root); + error = mkdir(path, 0755); + ATF_REQUIRE(error == 0); + + /* Read an entry and generate an access. */ + dir = opendir(path); + ATF_REQUIRE(dir != NULL); + ent = readdir(dir); + ATF_REQUIRE(ent != NULL); + ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 || + strcmp(ent->d_name, "..") == 0); + ATF_REQUIRE(closedir(dir) == 0); + consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, "dir"); + + /* + * Reading the watched directory should generate an access event. + * This is contrary to Linux's inotify man page, which states that + * IN_ACCESS is only generated for accesses to objects in a watched + * directory. + */ + dir = opendir(root); + ATF_REQUIRE(dir != NULL); + ent = readdir(dir); + ATF_REQUIRE(ent != NULL); + ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 || + strcmp(ent->d_name, "..") == 0); + ATF_REQUIRE(closedir(dir) == 0); + consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_attrib); +ATF_TC_BODY(inotify_event_attrib, tc) +{ + char path[PATH_MAX]; + int error, ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_ATTRIB, path); + + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + error = fchmod(fd, 0600); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_ATTRIB, 0, NULL); + + error = fchown(fd, getuid(), getgid()); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_ATTRIB, 0, NULL); + + close_checked(fd); + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_close_nowrite); +ATF_TC_BODY(inotify_event_close_nowrite, tc) +{ + char file[PATH_MAX], file1[PATH_MAX], dir[PATH_MAX]; + int ifd, fd, wd1, wd2; + + ifd = inotify(IN_NONBLOCK); + + wd1 = watch_dir(ifd, IN_CLOSE_NOWRITE, dir); + wd2 = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, file); + + fd = open(dir, O_DIRECTORY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd1, IN_CLOSE_NOWRITE, IN_ISDIR, NULL); + + fd = open(file, O_RDONLY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd2, IN_CLOSE_NOWRITE, 0, NULL); + + snprintf(file1, sizeof(file1), "%s/file", dir); + fd = open(file1, O_RDONLY | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd1, IN_CLOSE_NOWRITE, 0, "file"); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_close_write); +ATF_TC_BODY(inotify_event_close_write, tc) +{ + char path[PATH_MAX]; + int ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, path); + + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_CLOSE_WRITE, 0, NULL); + + close_inotify(ifd); +} + +/* Verify that various operations in a directory generate IN_CREATE events. */ +ATF_TC_WITHOUT_HEAD(inotify_event_create); +ATF_TC_BODY(inotify_event_create, tc) +{ + struct sockaddr_un sun; + char path[PATH_MAX], path1[PATH_MAX], root[PATH_MAX]; + ssize_t n; + int error, ifd, ifd1, fd, s, wd, wd1; + char b; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_CREATE, root); + + /* Regular file. */ + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + /* + * Make sure we get an event triggered by the fd used to create the + * file. + */ + ifd1 = inotify(IN_NONBLOCK); + wd1 = inotify_add_watch(ifd1, root, IN_MODIFY); + b = 42; + n = write(fd, &b, sizeof(b)); + ATF_REQUIRE(n == sizeof(b)); + close_checked(fd); + consume_event(ifd, wd, IN_CREATE, 0, "file"); + consume_event(ifd1, wd1, IN_MODIFY, 0, "file"); + close_inotify(ifd1); + + /* Hard link. */ + snprintf(path1, sizeof(path1), "%s/link", root); + error = link(path, path1); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "link"); + + /* Directory. */ + snprintf(path, sizeof(path), "%s/dir", root); + error = mkdir(path, 0755); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, IN_ISDIR, "dir"); + + /* Symbolic link. */ + snprintf(path1, sizeof(path1), "%s/symlink", root); + error = symlink(path, path1); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "symlink"); + + /* FIFO. */ + snprintf(path, sizeof(path), "%s/fifo", root); + error = mkfifo(path, 0644); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "fifo"); + + /* Binding a socket. */ + s = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&sun, 0, sizeof(sun)); + sun.sun_family = AF_UNIX; + sun.sun_len = sizeof(sun); + snprintf(sun.sun_path, sizeof(sun.sun_path), "%s/socket", root); + error = bind(s, (struct sockaddr *)&sun, sizeof(sun)); + ATF_REQUIRE(error == 0); + close_checked(s); + consume_event(ifd, wd, IN_CREATE, 0, "socket"); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_delete); +ATF_TC_BODY(inotify_event_delete, tc) +{ + char root[PATH_MAX], path[PATH_MAX], file[PATH_MAX]; + int error, fd, ifd, wd, wd2; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_DELETE | IN_DELETE_SELF, root); + + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + error = unlink(path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE, 0, "file"); + close_checked(fd); + + /* + * Make sure that renaming over a file generates a delete event when and + * only when that file is watched. + */ + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + wd2 = inotify_add_watch(ifd, path, IN_DELETE | IN_DELETE_SELF); + ATF_REQUIRE(wd2 != -1); + snprintf(file, sizeof(file), "%s/file2", root); + fd = open(file, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + error = rename(file, path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd2, IN_DELETE_SELF, 0, NULL); + consume_event(ifd, wd2, 0, IN_IGNORED, NULL); + + error = unlink(path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE, 0, "file"); + error = rmdir(root); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_move); +ATF_TC_BODY(inotify_event_move, tc) +{ + char dir1[PATH_MAX], dir2[PATH_MAX], path1[PATH_MAX], path2[PATH_MAX]; + char path3[PATH_MAX]; + int error, ifd, fd, wd1, wd2, wd3; + uint32_t cookie1, cookie2; + + ifd = inotify(IN_NONBLOCK); + + wd1 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir1); + wd2 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir2); + + snprintf(path1, sizeof(path1), "%s/file", dir1); + fd = open(path1, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + snprintf(path2, sizeof(path2), "%s/file2", dir2); + error = rename(path1, path2); + ATF_REQUIRE(error == 0); + cookie1 = consume_event_cookie(ifd, wd1, IN_MOVED_FROM, 0, "file"); + cookie2 = consume_event_cookie(ifd, wd2, IN_MOVED_TO, 0, "file2"); + ATF_REQUIRE_MSG(cookie1 == cookie2, + "expected cookie %u, got %u", cookie1, cookie2); + + snprintf(path2, sizeof(path2), "%s/dir", dir2); + error = rename(dir1, path2); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd1, IN_MOVE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd2, IN_MOVED_TO, IN_ISDIR, "dir"); + + wd3 = watch_file(ifd, IN_MOVE_SELF, path3); + error = rename(path3, "foo"); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd3, IN_MOVE_SELF, 0, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_open); +ATF_TC_BODY(inotify_event_open, tc) +{ + char root[PATH_MAX], path[PATH_MAX]; + int error, ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_OPEN, root); + + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + fd = open(path, O_PATH); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + fd = open(root, O_DIRECTORY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + + snprintf(path, sizeof(path), "%s/fifo", root); + error = mkfifo(path, 0644); + ATF_REQUIRE(error == 0); + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "fifo"); + + close_inotify(ifd); +} + +ATF_TC_WITH_CLEANUP(inotify_event_unmount); +ATF_TC_HEAD(inotify_event_unmount, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(inotify_event_unmount, tc) +{ + int error, fd, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + + error = mkdir("./root", 0755); + ATF_REQUIRE(error == 0); + + mount_tmpfs("./root"); + + error = mkdir("./root/dir", 0755); + ATF_REQUIRE(error == 0); + wd = inotify_add_watch(ifd, "./root/dir", IN_OPEN); + ATF_REQUIRE(wd >= 0); + + fd = open("./root/dir", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(fd != -1); + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + close_checked(fd); + + /* A regular unmount should fail, as inotify holds a vnode reference. */ + error = unmount("./root", 0); + ATF_REQUIRE_ERRNO(EBUSY, error == -1); + error = unmount("./root", MNT_FORCE); + ATF_REQUIRE_MSG(error == 0, + "unmounting ./root failed: %s", strerror(errno)); + + consume_event(ifd, wd, 0, IN_UNMOUNT, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_inotify(ifd); +} +ATF_TC_CLEANUP(inotify_event_unmount, tc) +{ + (void)unmount("./root", MNT_FORCE); +} + +ATF_TP_ADD_TCS(tp) +{ + /* Tests for the inotify syscalls. */ + ATF_TP_ADD_TC(tp, inotify_capsicum); + ATF_TP_ADD_TC(tp, inotify_coalesce); + ATF_TP_ADD_TC(tp, inotify_mask_create); + ATF_TP_ADD_TC(tp, inotify_nullfs); + ATF_TP_ADD_TC(tp, inotify_queue_overflow); + /* Tests for the various inotify event types. */ + ATF_TP_ADD_TC(tp, inotify_event_access_file); + ATF_TP_ADD_TC(tp, inotify_event_access_dir); + ATF_TP_ADD_TC(tp, inotify_event_attrib); + ATF_TP_ADD_TC(tp, inotify_event_close_nowrite); + ATF_TP_ADD_TC(tp, inotify_event_close_write); + ATF_TP_ADD_TC(tp, inotify_event_create); + ATF_TP_ADD_TC(tp, inotify_event_delete); + ATF_TP_ADD_TC(tp, inotify_event_move); + ATF_TP_ADD_TC(tp, inotify_event_open); + ATF_TP_ADD_TC(tp, inotify_event_unmount); + return (atf_no_error()); +} diff --git a/tests/sys/kern/unix_passfd_test.c b/tests/sys/kern/unix_passfd_test.c index 95271c04a16b..7dc4541ad402 100644 --- a/tests/sys/kern/unix_passfd_test.c +++ b/tests/sys/kern/unix_passfd_test.c @@ -380,6 +380,30 @@ ATF_TC_BODY(simple_send_fd_msg_cmsg_cloexec, tc) } /* + * Like simple_send_fd but also sets MSG_CMSG_CLOFORK and checks that the + * received file descriptor has the FD_CLOFORK flag set. + */ +ATF_TC_WITHOUT_HEAD(simple_send_fd_msg_cmsg_clofork); +ATF_TC_BODY(simple_send_fd_msg_cmsg_clofork, tc) +{ + struct stat getfd_stat, putfd_stat; + int fd[2], getfd, putfd; + + domainsocketpair(fd); + tempfile(&putfd); + dofstat(putfd, &putfd_stat); + sendfd(fd[0], putfd); + recvfd(fd[1], &getfd, MSG_CMSG_CLOFORK); + dofstat(getfd, &getfd_stat); + samefile(&putfd_stat, &getfd_stat); + ATF_REQUIRE_EQ_MSG(fcntl(getfd, F_GETFD) & FD_CLOFORK, FD_CLOFORK, + "FD_CLOFORK not set on the received file descriptor"); + close(putfd); + close(getfd); + closesocketpair(fd); +} + +/* * Same as simple_send_fd, only close the file reference after sending, so that * the only reference is the descriptor in the UNIX domain socket buffer. */ @@ -1170,6 +1194,7 @@ ATF_TP_ADD_TCS(tp) ATF_TP_ADD_TC(tp, simple_send_fd); ATF_TP_ADD_TC(tp, simple_send_fd_msg_cmsg_cloexec); + ATF_TP_ADD_TC(tp, simple_send_fd_msg_cmsg_clofork); ATF_TP_ADD_TC(tp, send_and_close); ATF_TP_ADD_TC(tp, send_and_cancel); ATF_TP_ADD_TC(tp, send_and_shutdown); diff --git a/tests/sys/net/if_bridge_test.sh b/tests/sys/net/if_bridge_test.sh index 2c6b039048e3..cc0b212aebd2 100755 --- a/tests/sys/net/if_bridge_test.sh +++ b/tests/sys/net/if_bridge_test.sh @@ -829,6 +829,398 @@ member_ifaddrs_vlan_cleanup() vnet_cleanup } +atf_test_case "vlan_pvid" "cleanup" +vlan_pvid_head() +{ + atf_set descr 'bridge with two ports with pvid set' + atf_set require.user root +} + +vlan_pvid_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 + + # With VLAN filtering enabled, traffic should be passed. + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Removed the untagged VLAN on one port; traffic should not be passed. + ifconfig ${bridge} -untagged ${epone}a + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_filtered" "cleanup" +vlan_pvid_filtered_head() +{ + atf_set descr 'bridge with two ports with different pvids' + atf_set require.user root +} + +vlan_pvid_filtered_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 30 + + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_filtered_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_tagged" "cleanup" +vlan_pvid_tagged_head() +{ + atf_set descr 'bridge pvid with tagged frames for pvid' + atf_set require.user root +} + +vlan_pvid_tagged_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Create two tagged interfaces on the appropriate VLANs + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 + + # Tagged frames should not be passed. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_tagged_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_1q" "cleanup" +vlan_pvid_1q_head() +{ + atf_set descr '802.1q tag addition and removal' + atf_set require.user root +} + +vlan_pvid_1q_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Set up one jail with an access port, and the other with a trunk port. + # This forces the bridge to add and remove .1q tags to bridge the + # traffic. + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_1q_cleanup() +{ + vnet_cleanup +} + +# +# Test vlan filtering. +# +atf_test_case "vlan_filtering" "cleanup" +vlan_filtering_head() +{ + atf_set descr 'tagged traffic with filtering' + atf_set require.user root +} + +vlan_filtering_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a + ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a + + # Right now there are no VLANs on the access list, so everything + # should be blocked. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Set the untagged vlan on both ports to 20 and make sure traffic is + # still blocked. We intentionally do not pass tagged traffic for the + # untagged vlan. + atf_check -s exit:0 ifconfig ${bridge} untagged ${epone}a 20 + atf_check -s exit:0 ifconfig ${bridge} untagged ${eptwo}a 20 + + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + atf_check -s exit:0 ifconfig ${bridge} -untagged ${epone}a + atf_check -s exit:0 ifconfig ${bridge} -untagged ${eptwo}a + + # Add VLANs 10-30 to the access list; now access should be allowed. + ifconfig ${bridge} +tagged ${epone}a 10-30 + ifconfig ${bridge} +tagged ${eptwo}a 10-30 + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Remove vlan 20 from the access list, now access should be blocked + # again. + ifconfig ${bridge} -tagged ${epone}a 20 + ifconfig ${bridge} -tagged ${eptwo}a 20 + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_filtering_cleanup() +{ + vnet_cleanup +} + +# +# Test the ifconfig 'tagged' option. +# +atf_test_case "vlan_ifconfig_tagged" "cleanup" +vlan_ifconfig_tagged_head() +{ + atf_set descr 'test the ifconfig tagged option' + atf_set require.user root +} + +vlan_ifconfig_tagged_body() +{ + vnet_init + vnet_init_bridge + + ep=$(vnet_mkepair) + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} addm ${ep}a vlanfilter ${ep}a up + ifconfig ${ep}a up + + # To start with, no vlans should be configured. + atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} + + # Add vlans 100-149. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 100-149 + atf_check -s exit:0 -o match:"tagged 100-149" ifconfig ${bridge} + + # Replace the vlan list with 139-199. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 139-199 + atf_check -s exit:0 -o match:"tagged 139-199" ifconfig ${bridge} + + # Add vlans 100-170. + atf_check -s exit:0 ifconfig ${bridge} +tagged ${ep}a 100-170 + atf_check -s exit:0 -o match:"tagged 100-199" ifconfig ${bridge} + + # Remove vlans 104, 105, and 150-159 + atf_check -s exit:0 ifconfig ${bridge} -tagged ${ep}a 104,105,150-159 + atf_check -s exit:0 -o match:"tagged 100-103,106-149,160-199" \ + ifconfig ${bridge} + + # Remove the entire vlan list. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a none + atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} + + # Test some invalid vlans sets. + for bad_vlan in -1 0 4096 4097 foo 0-10 4000-5000 foo-40 40-foo; do + atf_check -s exit:1 -e ignore \ + ifconfig ${bridge} tagged "$bad_vlan" + done +} + +vlan_ifconfig_tagged_cleanup() +{ + vnet_cleanup +} + +# +# Test a vlan(4) "SVI" interface on top of a bridge. +# +atf_test_case "vlan_svi" "cleanup" +vlan_svi_head() +{ + atf_set descr 'vlan bridge with an SVI' + atf_set require.user root +} + +vlan_svi_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${bridge} addm ${epone}a tagged ${epone}a 20 + + svi=$(vnet_mkvlan) + ifconfig ${svi} vlan 20 vlandev ${bridge} + ifconfig ${svi} inet 192.0.2.2/24 up + + atf_check -s exit:0 -o ignore ping -c 3 -t 1 192.0.2.1 +} + +vlan_svi_cleanup() +{ + vnet_cleanup +} + +# +# Test QinQ (802.1ad). +# +atf_test_case "vlan_qinq" "cleanup" +vlan_qinq_head() +{ + atf_set descr 'vlan filtering with QinQ traffic' + atf_set require.user root +} + +vlan_qinq_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Create a QinQ trunk between the two jails. The outer (provider) tag + # is 5, and the inner tag is 10. + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.5 create vlanproto 802.1ad up + jexec one ifconfig ${epone}b.5.10 create inet 192.0.2.1/24 up + + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.5 create vlanproto 802.1ad up + jexec two ifconfig ${eptwo}b.5.10 create inet 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a + ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a + + # Right now there are no VLANs on the access list, so everything + # should be blocked. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Add the provider tag to the access list; now traffic should be passed. + ifconfig ${bridge} +tagged ${epone}a 5 + ifconfig ${bridge} +tagged ${eptwo}a 5 + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_qinq_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case "bridge_transmit_ipv4_unicast" @@ -847,4 +1239,12 @@ atf_init_test_cases() atf_add_test_case "member_ifaddrs_enabled" atf_add_test_case "member_ifaddrs_disabled" atf_add_test_case "member_ifaddrs_vlan" + atf_add_test_case "vlan_pvid" + atf_add_test_case "vlan_pvid_1q" + atf_add_test_case "vlan_pvid_filtered" + atf_add_test_case "vlan_pvid_tagged" + atf_add_test_case "vlan_filtering" + atf_add_test_case "vlan_ifconfig_tagged" + atf_add_test_case "vlan_svi" + atf_add_test_case "vlan_qinq" } diff --git a/tests/sys/netpfil/pf/debug.sh b/tests/sys/netpfil/pf/debug.sh index 18a7febfbb5b..404d37ab8932 100644 --- a/tests/sys/netpfil/pf/debug.sh +++ b/tests/sys/netpfil/pf/debug.sh @@ -50,7 +50,57 @@ basic_cleanup() pft_cleanup } +atf_test_case "reset" "cleanup" +reset_head() +{ + atf_set descr 'Test resetting debug level' + atf_set require.user root +} + +reset_body() +{ + pft_init + + vnet_mkjail debug + + # Default is Urgent + atf_check -s exit:0 -o match:'Debug: Urgent' \ + jexec debug pfctl -sa + state_limit=$(jexec debug pfctl -sa | grep 'states.*hard limit' | awk '{ print $4; }') + + # Change defaults + pft_set_rules debug \ + "set limit states 42" + atf_check -s exit:0 -e ignore \ + jexec debug pfctl -x loud + + atf_check -s exit:0 -o match:'Debug: Loud' \ + jexec debug pfctl -sa + new_state_limit=$(jexec debug pfctl -sa | grep 'states.*hard limit' | awk '{ print $4; }') + if [ $state_limit -eq $new_state_limit ]; then + jexec debug pfctl -sa + atf_fail "Failed to change state limit" + fi + + # Reset + atf_check -s exit:0 -o ignore -e ignore \ + jexec debug pfctl -FR + atf_check -s exit:0 -o match:'Debug: Urgent' \ + jexec debug pfctl -sa + new_state_limit=$(jexec debug pfctl -sa | grep 'states.*hard limit' | awk '{ print $4; }') + if [ $state_limit -ne $new_state_limit ]; then + jexec debug pfctl -sa + atf_fail "Failed to reset state limit" + fi +} + +reset_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "basic" + atf_add_test_case "reset" } diff --git a/tools/build/options/WITH_RUN_TESTS b/tools/build/options/WITH_RUN_TESTS new file mode 100644 index 000000000000..91b30522a3d3 --- /dev/null +++ b/tools/build/options/WITH_RUN_TESTS @@ -0,0 +1 @@ +Run tests as part of the build. diff --git a/usr.bin/clang/Makefile b/usr.bin/clang/Makefile index a0cc015590f0..e2debfb8c582 100644 --- a/usr.bin/clang/Makefile +++ b/usr.bin/clang/Makefile @@ -5,6 +5,10 @@ SUBDIR+= clang .endif .if !defined(TOOLS_PREFIX) +.if ${MK_CLANG} != "no" +SUBDIR+= clang-scan-deps +.endif + # LLVM binutils are needed to support features such as LTO, so we build them # by default if clang is enabled. If MK_LLVM_BINUTILS is set, we also use them # as the default binutils (ar,nm,addr2line, etc.). diff --git a/usr.bin/clang/clang-scan-deps/Makefile b/usr.bin/clang/clang-scan-deps/Makefile new file mode 100644 index 000000000000..16fecdb88867 --- /dev/null +++ b/usr.bin/clang/clang-scan-deps/Makefile @@ -0,0 +1,26 @@ +.include <src.opts.mk> + +PROG_CXX= clang-scan-deps +MAN= + +SRCDIR= clang/tools/clang-scan-deps +SRCS+= ClangScanDeps.cpp \ + clang-scan-deps-driver.cpp + +.include "${SRCTOP}/lib/clang/clang.pre.mk" + +CFLAGS+= -I${.OBJDIR} +TDFILE= Opts.td +INCFILE= ${TDFILE:.td=.inc} +GENOPT= -gen-opt-parser-defs + +${INCFILE}: ${TDFILE} + ${LLVM_TBLGEN} ${GENOPT} -I ${LLVM_SRCS}/include -d ${.TARGET:C/$/.d/} \ + -o ${.TARGET} ${.ALLSRC} +TGHDRS+= ${INCFILE} + +DEPENDFILES+= ${TGHDRS:C/$/.d/} +DPSRCS+= ${TGHDRS} +CLEANFILES+= ${TGHDRS} ${TGHDRS:C/$/.d/} + +.include "../clang.prog.mk" diff --git a/usr.bin/clang/clang-scan-deps/clang-scan-deps-driver.cpp b/usr.bin/clang/clang-scan-deps/clang-scan-deps-driver.cpp new file mode 100644 index 000000000000..f941cc434ff6 --- /dev/null +++ b/usr.bin/clang/clang-scan-deps/clang-scan-deps-driver.cpp @@ -0,0 +1,18 @@ +//===-- driver-template.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/LLVMDriver.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/InitLLVM.h" + +int clang_scan_deps_main(int argc, char **, const llvm::ToolContext &); + +int main(int argc, char **argv) { + llvm::InitLLVM X(argc, argv); + return clang_scan_deps_main(argc, argv, {argv[0], nullptr, false}); +} diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 9cc22d382de5..17ed43b55c5a 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -46,6 +46,7 @@ #include <sys/ktrace.h> #include <sys/mman.h> #include <sys/ioctl.h> +#include <sys/inotify.h> #include <sys/poll.h> #include <sys/socket.h> #include <sys/stat.h> @@ -105,6 +106,7 @@ static void ktrcsw(struct ktr_csw *); static void ktrcsw_old(struct ktr_csw_old *); static void ktruser(int, void *); static void ktrcaprights(cap_rights_t *); +static void ktrinotify(struct inotify_event *); static void ktritimerval(struct itimerval *it); static void ktrsockaddr(struct sockaddr *); static void ktrsplice(struct splice *); @@ -1861,6 +1863,14 @@ ktrtimeval(struct timeval *tv) } static void +ktrinotify(struct inotify_event *ev) +{ + printf( + "inotify { .wd = %d, .mask = %#x, .cookie = %u, .len = %u, .name = %s }\n", + ev->wd, ev->mask, ev->cookie, ev->len, ev->name); +} + +static void ktritimerval(struct itimerval *it) { @@ -2128,6 +2138,17 @@ ktrstruct(char *buf, size_t buflen) goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); + } else if (strcmp(name, "inotify") == 0) { + struct inotify_event *ev; + + if (datalen < sizeof(struct inotify_event) || + datalen > sizeof(struct inotify_event) + NAME_MAX + 1) + goto invalid; + ev = malloc(datalen); + if (ev == NULL) + err(1, "malloc"); + memcpy(ev, data, datalen); + ktrinotify(ev); } else if (strcmp(name, "itimerval") == 0) { if (datalen != sizeof(struct itimerval)) goto invalid; diff --git a/usr.bin/procstat/procstat.1 b/usr.bin/procstat/procstat.1 index cc775ffe133b..1e05e235e619 100644 --- a/usr.bin/procstat/procstat.1 +++ b/usr.bin/procstat/procstat.1 @@ -377,6 +377,8 @@ eventfd fifo .It h shared memory +.It i +inotify descriptor .It k kqueue .It m @@ -862,6 +864,7 @@ procstat: procstat_getprocs() .Xr sockstat 1 , .Xr cap_enter 2 , .Xr cap_rights_limit 2 , +.Xr inotify 2 , .Xr mlock 2 , .Xr mlockall 2 , .Xr libprocstat 3 , diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c index d61cf1693053..aa4850632aa7 100644 --- a/usr.bin/procstat/procstat_files.c +++ b/usr.bin/procstat/procstat_files.c @@ -226,6 +226,10 @@ static struct cap_desc { { CAP_BINDAT, "ba" }, { CAP_CONNECTAT, "ca" }, + /* Inotify descriptor rights. */ + { CAP_INOTIFY_ADD, "ina" }, + { CAP_INOTIFY_RM, "inr" }, + /* Aliases and defines that combine multiple rights. */ { CAP_PREAD, "prd" }, { CAP_PWRITE, "pwr" }, @@ -416,6 +420,11 @@ procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) xo_emit("{eq:fd_type/eventfd}"); break; + case PS_FST_TYPE_INOTIFY: + str = "i"; + xo_emit("{eq:fd_type/inotify}"); + break; + case PS_FST_TYPE_NONE: str = "?"; xo_emit("{eq:fd_type/none}"); diff --git a/usr.bin/sockstat/sockstat.c b/usr.bin/sockstat/sockstat.c index 52243910a31c..1a24ff67c321 100644 --- a/usr.bin/sockstat/sockstat.c +++ b/usr.bin/sockstat/sockstat.c @@ -951,7 +951,7 @@ formataddr(struct sockaddr_storage *ss, char *buf, size_t bufsize) } if (addrstr[0] == '\0') { error = cap_getnameinfo(capnet, sstosa(ss), ss->ss_len, - addrstr, sizeof(addrstr), buf, bufsize, NI_NUMERICHOST); + addrstr, sizeof(addrstr), NULL, 0, NI_NUMERICHOST); if (error) errx(1, "cap_getnameinfo()"); } diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h index d79ef882cff0..47d973326dfb 100644 --- a/usr.bin/truss/syscall.h +++ b/usr.bin/truss/syscall.h @@ -99,6 +99,7 @@ enum Argtype { Getfsstatmode, Idtype, Ioctl, + Inotifyflags, Itimerwhich, Kldsymcmd, Kldunloadflags, diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 47d6aef8f6ff..656d642e1f19 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -31,7 +31,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> /* * This file has routines used to print out system calls and their * arguments. @@ -316,6 +315,9 @@ static const struct syscall_decode decoded_syscalls[] = { { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, + { .name = "inotify_add_watch_at", .ret_type = 1, .nargs = 4, + .args = { { Int, 0 }, { Atfd, 1 }, { Name | IN, 2 }, + { Inotifyflags, 3 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } }, { .name = "kevent", .ret_type = 1, .nargs = 6, @@ -2447,6 +2449,9 @@ print_arg(struct syscall_arg *sc, syscallarg_t *args, syscallarg_t *retval, print_integer_arg(sysdecode_getfsstat_mode, fp, args[sc->offset]); break; + case Inotifyflags: + print_mask_arg(sysdecode_inotifyflags, fp, args[sc->offset]); + break; case Itimerwhich: print_integer_arg(sysdecode_itimer, fp, args[sc->offset]); break; diff --git a/usr.sbin/mfiutil/Makefile b/usr.sbin/mfiutil/Makefile index da1c9249f8cd..85b66d4b6f49 100644 --- a/usr.sbin/mfiutil/Makefile +++ b/usr.sbin/mfiutil/Makefile @@ -9,7 +9,7 @@ MLINKS= mfiutil.8 mrsasutil.8 CFLAGS.gcc+= -fno-builtin-strftime -LIBADD= util +LIBADD= sbuf util # Here be dragons .ifdef DEBUG diff --git a/usr.sbin/mfiutil/mfi_bbu.c b/usr.sbin/mfiutil/mfi_bbu.c index e97227d47c70..3e78e791dfc2 100644 --- a/usr.sbin/mfiutil/mfi_bbu.c +++ b/usr.sbin/mfiutil/mfi_bbu.c @@ -40,41 +40,23 @@ /* The autolearn period is given in seconds. */ void -mfi_autolearn_period(uint32_t period, char *buf, size_t sz) +mfi_autolearn_period(FILE *fp, uint32_t period) { unsigned int d, h; - char *tmp; d = period / (24 * 3600); h = (period % (24 * 3600)) / 3600; - tmp = buf; if (d != 0) { - int fmt_len; - fmt_len = snprintf(buf, sz, "%u day%s", d, d == 1 ? "" : "s"); - if (fmt_len < 0) { - *buf = 0; - return; - } - if ((size_t)fmt_len >= sz) { - return; - } - tmp += fmt_len; - sz -= tmp - buf; - if (h != 0) { - fmt_len = snprintf(tmp, sz, ", "); - if (fmt_len < 0 || (size_t)fmt_len >= sz) { - return; - } - tmp += fmt_len; - sz -= 2; - } + fprintf(fp, "%u day%s", d, d == 1 ? "" : "s"); + if (h != 0) + fprintf(fp, ", "); } if (h != 0) - snprintf(tmp, sz, "%u hour%s", h, h == 1 ? "" : "s"); + fprintf(fp, "%u hour%s", h, h == 1 ? "" : "s"); if (d == 0 && h == 0) - snprintf(tmp, sz, "less than 1 hour"); + fprintf(fp, "less than 1 hour"); } /* The time to the next relearn is given in seconds since 1/1/2000. */ @@ -89,28 +71,28 @@ mfi_next_learn_time(uint32_t next_learn_time, char *buf, size_t sz) tm.tm_year = 100; basetime = timegm(&tm); basetime += (time_t)next_learn_time; - len = snprintf(buf, sz, "%s", ctime(&basetime)); - if (len > 0) + len = strlcpy(buf, ctime(&basetime), sz); + if (len < sz) /* Get rid of the newline added by ctime(3). */ buf[len - 1] = '\0'; } void -mfi_autolearn_mode(uint8_t mode, char *buf, size_t sz) +mfi_autolearn_mode(FILE *fp, uint8_t mode) { switch (mode) { case 0: - snprintf(buf, sz, "enabled"); + fprintf(fp, "enabled"); break; case 1: - snprintf(buf, sz, "disabled"); + fprintf(fp, "disabled"); break; case 2: - snprintf(buf, sz, "warn via event"); + fprintf(fp, "warn via event"); break; default: - snprintf(buf, sz, "mode 0x%02x", mode); + fprintf(fp, "mode 0x%02x", mode); break; } } diff --git a/usr.sbin/mfiutil/mfi_drive.c b/usr.sbin/mfiutil/mfi_drive.c index e8e945c566c4..c7c5aeb02f14 100644 --- a/usr.sbin/mfiutil/mfi_drive.c +++ b/usr.sbin/mfiutil/mfi_drive.c @@ -31,6 +31,7 @@ #include <sys/types.h> #include <sys/errno.h> +#include <sys/sbuf.h> #include <ctype.h> #include <err.h> #include <fcntl.h> @@ -56,9 +57,9 @@ const char * mfi_drive_name(struct mfi_pd_info *pinfo, uint16_t device_id, uint32_t def) { struct mfi_pd_info info; + struct sbuf sb; static char buf[16]; - char *p; - int error, fd, len; + int fd; if ((def & MFI_DNAME_HONOR_OPTS) != 0 && (mfi_opts & (MFI_DNAME_ES|MFI_DNAME_DEVICE_ID)) != 0) @@ -89,40 +90,29 @@ mfi_drive_name(struct mfi_pd_info *pinfo, uint16_t device_id, uint32_t def) pinfo = &info; } - p = buf; - len = sizeof(buf); + sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); if (def & MFI_DNAME_DEVICE_ID) { if (device_id == 0xffff) - error = snprintf(p, len, "MISSING"); + sbuf_printf(&sb, "MISSING"); else - error = snprintf(p, len, "%2u", device_id); - if (error >= 0) { - p += error; - len -= error; - } + sbuf_printf(&sb, "%2u", device_id); } if ((def & (MFI_DNAME_ES|MFI_DNAME_DEVICE_ID)) == - (MFI_DNAME_ES|MFI_DNAME_DEVICE_ID) && len >= 2) { - *p++ = ' '; - len--; - *p = '\0'; - len--; + (MFI_DNAME_ES|MFI_DNAME_DEVICE_ID)) { + sbuf_cat(&sb, " "); } if (def & MFI_DNAME_ES) { if (pinfo->encl_device_id == 0xffff) - error = snprintf(p, len, "S%u", + sbuf_printf(&sb, "S%u", pinfo->slot_number); else if (pinfo->encl_device_id == pinfo->ref.v.device_id) - error = snprintf(p, len, "E%u", + sbuf_printf(&sb, "E%u", pinfo->encl_index); else - error = snprintf(p, len, "E%u:S%u", + sbuf_printf(&sb, "E%u:S%u", pinfo->encl_index, pinfo->slot_number); - if (error >= 0) { - p += error; - len -= error; - } } + sbuf_finish(&sb); return (buf); } diff --git a/usr.sbin/mfiutil/mfi_show.c b/usr.sbin/mfiutil/mfi_show.c index bf85c8b82d69..2d413f2a46b4 100644 --- a/usr.sbin/mfiutil/mfi_show.c +++ b/usr.sbin/mfiutil/mfi_show.c @@ -218,8 +218,9 @@ show_battery(int ac, char **av __unused) printf(" Current Voltage: %d mV\n", stat.voltage); printf(" Temperature: %d C\n", stat.temperature); if (show_props) { - mfi_autolearn_period(props.auto_learn_period, buf, sizeof(buf)); - printf(" Autolearn period: %s\n", buf); + printf(" Autolearn period: "); + mfi_autolearn_period(stdout, props.auto_learn_period); + printf("\n"); if (props.auto_learn_mode != 0) snprintf(buf, sizeof(buf), "never"); else @@ -229,8 +230,9 @@ show_battery(int ac, char **av __unused) printf(" Learn delay interval: %u hour%s\n", props.learn_delay_interval, props.learn_delay_interval != 1 ? "s" : ""); - mfi_autolearn_mode(props.auto_learn_mode, buf, sizeof(buf)); - printf(" Autolearn mode: %s\n", buf); + printf(" Autolearn mode: "); + mfi_autolearn_mode(stdout, props.auto_learn_mode); + printf("\n"); if (props.bbu_mode != 0) printf(" BBU Mode: %d\n", props.bbu_mode); } diff --git a/usr.sbin/mfiutil/mfiutil.h b/usr.sbin/mfiutil/mfiutil.h index 34b423098862..86b03998163c 100644 --- a/usr.sbin/mfiutil/mfiutil.h +++ b/usr.sbin/mfiutil/mfiutil.h @@ -175,9 +175,9 @@ int mfi_bbu_get_props(int fd, struct mfi_bbu_properties *props, uint8_t *statusp); int mfi_bbu_set_props(int fd, struct mfi_bbu_properties *props, uint8_t *statusp); -void mfi_autolearn_period(uint32_t, char *, size_t); +void mfi_autolearn_period(FILE *, uint32_t); void mfi_next_learn_time(uint32_t, char *, size_t); -void mfi_autolearn_mode(uint8_t, char *, size_t); +void mfi_autolearn_mode(FILE *, uint8_t); int get_mfi_unit(const char *dev); char *get_mfi_type(const char *dev); diff --git a/usr.sbin/rip6query/rip6query.8 b/usr.sbin/rip6query/rip6query.8 index 856a59138bc1..92e49f5ade58 100644 --- a/usr.sbin/rip6query/rip6query.8 +++ b/usr.sbin/rip6query/rip6query.8 @@ -29,13 +29,19 @@ .\" .\" $Id: rip6query.8,v 1.2 2000/01/19 06:24:55 itojun Exp $ .\" -.Dd October 7, 1999 +.Dd May 20, 2025 .Dt RIP6QUERY 8 .Os .Sh NAME .Nm rip6query .Nd RIPng debugging tool .\" +.Sh DEPRECATION NOTICE +The +.Nm +utility is deprecated and will be removed in +.Fx 16.0 . +.\" .Sh SYNOPSIS .Nm .Op Fl I Ar interface diff --git a/usr.sbin/route6d/route6d.8 b/usr.sbin/route6d/route6d.8 index 3a7bc8721923..e9ad3266ba26 100644 --- a/usr.sbin/route6d/route6d.8 +++ b/usr.sbin/route6d/route6d.8 @@ -14,12 +14,17 @@ .\" LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR .\" A PARTICULAR PURPOSE. .\" -.Dd November 18, 2012 +.Dd May 20, 2025 .Dt ROUTE6D 8 .Os .Sh NAME .Nm route6d .Nd RIP6 Routing Daemon +.Sh DEPRECATION NOTICE +The +.Nm +utility is deprecated and will be removed in +.Fx 16.0 . .Sh SYNOPSIS .Nm .Op Fl adDhlnqsS |