diff options
248 files changed, 10249 insertions, 4239 deletions
diff --git a/.cirrus.yml b/.cirrus.yml index d6c4df7a9776..b03fac2b26b5 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -194,11 +194,13 @@ precommit_task: matrix: - name: amd64 smoke test using internal ci systems only_if: $CIRRUS_REPO_FULL_NAME != 'freebsd/freebsd-src' || $CIRRUS_BRANCH =~ 'pull/.*' + trigger_type: manual env: TARGET: amd64 TARGET_ARCH: amd64 - name: aarch64 smoke test using internal ci systems only_if: $CIRRUS_REPO_FULL_NAME != 'freebsd/freebsd-src' || $CIRRUS_BRANCH =~ 'pull/.*' + trigger_type: manual env: TARGET: arm64 TARGET_ARCH: aarch64 @@ -10,6 +10,9 @@ newline. Entries should be separated by a newline. Changes to this file should not be MFCed. +f1f230439fa4: + FreeBSD now implements the inotify(2) family of system calls. + 50e733f19b37, 171f66b0c2ca: These commits helped improve utilization of NFSv4.1/4.2 delegations. The changes are only used when the NFSv4 @@ -27,6 +27,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 15.x IS SLOW: world, or to merely disable the most expensive debugging functionality at runtime, run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20250704: + LinuxKPI device.h and acpi changes effecting drivers and drm-kmod. + Bump __FreeBSD_version 1500050 to be able to detect these changes. + 20250630: Commits 171f66b0c2ca and 8e2a90ac8089 changed the internal api between nfscommon.ko and the other nfs modules. diff --git a/etc/mtree/BSD.usr.dist b/etc/mtree/BSD.usr.dist index 97b555e50dc1..ffdd82ae9911 100644 --- a/etc/mtree/BSD.usr.dist +++ b/etc/mtree/BSD.usr.dist @@ -304,6 +304,8 @@ .. indent .. + inotify + .. ipfilter .. ipfw diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc index ad13aaa65621..4d064d18d36e 100644 --- a/lib/libc/gen/Makefile.inc +++ b/lib/libc/gen/Makefile.inc @@ -89,6 +89,7 @@ SRCS+= \ glob.c \ glob-compat11.c \ initgroups.c \ + inotify.c \ isatty.c \ isinf.c \ isnan.c \ diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map index 8faecf4b3048..50dbf3425964 100644 --- a/lib/libc/gen/Symbol.map +++ b/lib/libc/gen/Symbol.map @@ -464,6 +464,9 @@ FBSD_1.8 { fdscandir_b; fts_open_b; glob_b; + inotify_add_watch; + inotify_init; + inotify_init1; psiginfo; rtld_get_var; rtld_set_var; diff --git a/lib/libc/gen/inotify.c b/lib/libc/gen/inotify.c new file mode 100644 index 000000000000..7ce53aaccd58 --- /dev/null +++ b/lib/libc/gen/inotify.c @@ -0,0 +1,48 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include "namespace.h" +#include <sys/fcntl.h> +#include <sys/inotify.h> +#include <sys/specialfd.h> +#include "un-namespace.h" +#include "libc_private.h" + +/* + * Provide compatibility with libinotify, which uses different values for these + * flags. + */ +#define IN_NONBLOCK_OLD 0x80000 +#define IN_CLOEXEC_OLD 0x00800 + +int +inotify_add_watch(int fd, const char *pathname, uint32_t mask) +{ + return (inotify_add_watch_at(fd, AT_FDCWD, pathname, mask)); +} + +int +inotify_init1(int flags) +{ + struct specialfd_inotify args; + + if ((flags & IN_NONBLOCK_OLD) != 0) { + flags &= ~IN_NONBLOCK_OLD; + flags |= IN_NONBLOCK; + } + if ((flags & IN_CLOEXEC_OLD) != 0) { + flags &= ~IN_CLOEXEC_OLD; + flags |= IN_CLOEXEC; + } + args.flags = flags; + return (__sys___specialfd(SPECIALFD_INOTIFY, &args, sizeof(args))); +} + +int +inotify_init(void) +{ + return (inotify_init1(0)); +} diff --git a/lib/libifconfig/libifconfig.h b/lib/libifconfig/libifconfig.h index 8d5ca01b0ce6..fc835485a51e 100644 --- a/lib/libifconfig/libifconfig.h +++ b/lib/libifconfig/libifconfig.h @@ -29,6 +29,7 @@ #include <sys/types.h> #include <net/if.h> +#include <net/if_bridgevar.h> /* for ifbvlan_set_t */ #include <netinet/in.h> #include <netinet/ip_carp.h> @@ -64,6 +65,7 @@ struct lagg_reqport; struct ifconfig_bridge_status { struct ifbropreq *params; /**< current operational parameters */ struct ifbreq *members; /**< list of bridge members */ + ifbvlan_set_t *member_vlans; /**< bridge member vlan sets */ size_t members_count; /**< how many member interfaces */ uint32_t cache_size; /**< size of address cache */ uint32_t cache_lifetime; /**< address cache entry lifetime */ diff --git a/lib/libifconfig/libifconfig_bridge.c b/lib/libifconfig/libifconfig_bridge.c index 2a9bbc35858b..b4a920f488c5 100644 --- a/lib/libifconfig/libifconfig_bridge.c +++ b/lib/libifconfig/libifconfig_bridge.c @@ -66,40 +66,37 @@ ifconfig_bridge_get_bridge_status(ifconfig_handle_t *h, { struct ifbifconf members; struct ifbrparam cache_param; - struct _ifconfig_bridge_status *bridge; - char *buf; + struct _ifconfig_bridge_status *bridge = NULL; + char *buf = NULL; + members.ifbic_buf = NULL; *bridgep = NULL; bridge = calloc(1, sizeof(struct _ifconfig_bridge_status)); if (bridge == NULL) { h->error.errtype = OTHER; h->error.errcode = ENOMEM; - return (-1); + goto err; } bridge->inner.params = &bridge->params; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGCACHE, &cache_param, sizeof(cache_param), false) != 0) { - free(bridge); - return (-1); + goto err; } bridge->inner.cache_size = cache_param.ifbrp_csize; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGTO, &cache_param, sizeof(cache_param), false) != 0) { - free(bridge); - return (-1); + goto err; } bridge->inner.cache_lifetime = cache_param.ifbrp_ctime; if (ifconfig_bridge_ioctlwrap(h, name, BRDGPARAM, &bridge->params, sizeof(bridge->params), false) != 0) { - free(bridge); - return (-1); + goto err; } - members.ifbic_buf = NULL; for (size_t len = 8192; (buf = realloc(members.ifbic_buf, len)) != NULL; len *= 2) { @@ -107,27 +104,52 @@ ifconfig_bridge_get_bridge_status(ifconfig_handle_t *h, members.ifbic_len = len; if (ifconfig_bridge_ioctlwrap(h, name, BRDGGIFS, &members, sizeof(members), false) != 0) { - free(buf); - free(bridge); - return (-1); + goto err; } if ((members.ifbic_len + sizeof(*members.ifbic_req)) < len) break; } if (buf == NULL) { - free(members.ifbic_buf); - free(bridge); h->error.errtype = OTHER; h->error.errcode = ENOMEM; - return (-1); + goto err; } bridge->inner.members = members.ifbic_req; bridge->inner.members_count = members.ifbic_len / sizeof(*members.ifbic_req); + bridge->inner.member_vlans = calloc(bridge->inner.members_count, + sizeof(ifbvlan_set_t)); + if (bridge->inner.member_vlans == NULL) { + h->error.errtype = OTHER; + h->error.errcode = ENOMEM; + goto err; + } + for (size_t i = 0; i < bridge->inner.members_count; ++i) { + struct ifbif_vlan_req vreq; + memset(&vreq, 0, sizeof(vreq)); + strlcpy(vreq.bv_ifname, bridge->inner.members[i].ifbr_ifsname, + sizeof(vreq.bv_ifname)); + + if (ifconfig_bridge_ioctlwrap(h, name, BRDGGIFVLANSET, &vreq, + sizeof(vreq), false) != 0) { + goto err; + } + + __BIT_COPY(BRVLAN_SETSIZE, &vreq.bv_set, + &bridge->inner.member_vlans[i]); + } + *bridgep = &bridge->inner; return (0); + +err: + free(members.ifbic_buf); + if (bridge) + free(bridge->inner.member_vlans); + free(bridge); + return (-1); } void diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index 29f464ef6414..eb8137f6c76f 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -625,6 +625,10 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap type = PS_FST_TYPE_EVENTFD; data = file.f_data; break; + case DTYPE_INOTIFY: + type = PS_FST_TYPE_INOTIFY; + data = file.f_data; + break; default: continue; } @@ -717,6 +721,7 @@ kinfo_type2fst(int kftype) { KF_TYPE_SOCKET, PS_FST_TYPE_SOCKET }, { KF_TYPE_VNODE, PS_FST_TYPE_VNODE }, { KF_TYPE_EVENTFD, PS_FST_TYPE_EVENTFD }, + { KF_TYPE_INOTIFY, PS_FST_TYPE_INOTIFY }, { KF_TYPE_UNKNOWN, PS_FST_TYPE_UNKNOWN } }; #define NKFTYPES (sizeof(kftypes2fst) / sizeof(*kftypes2fst)) diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h index 0e9a4214414c..548747f90171 100644 --- a/lib/libprocstat/libprocstat.h +++ b/lib/libprocstat/libprocstat.h @@ -71,6 +71,7 @@ #define PS_FST_TYPE_PROCDESC 13 #define PS_FST_TYPE_DEV 14 #define PS_FST_TYPE_EVENTFD 15 +#define PS_FST_TYPE_INOTIFY 16 /* * Special descriptor numbers. diff --git a/lib/libsys/Makefile.sys b/lib/libsys/Makefile.sys index 491c765e9416..3eb4bf85153d 100644 --- a/lib/libsys/Makefile.sys +++ b/lib/libsys/Makefile.sys @@ -224,6 +224,7 @@ MAN+= abort2.2 \ getsockopt.2 \ gettimeofday.2 \ getuid.2 \ + inotify.2 \ intro.2 \ ioctl.2 \ issetugid.2 \ @@ -448,6 +449,11 @@ MLINKS+=getrlimit.2 setrlimit.2 MLINKS+=getsockopt.2 setsockopt.2 MLINKS+=gettimeofday.2 settimeofday.2 MLINKS+=getuid.2 geteuid.2 +MLINKS+=inotify.2 inotify_init.2 \ + inotify.2 inotify_init1.2 \ + inotify.2 inotify_add_watch.2 \ + inotify.2 inotify_add_watch_at.2 \ + inotify.2 inotify_rm_watch.2 MLINKS+=intro.2 errno.2 MLINKS+=jail.2 jail_attach.2 \ jail.2 jail_get.2 \ diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map index 7fac1ed6160d..45e0160100af 100644 --- a/lib/libsys/Symbol.sys.map +++ b/lib/libsys/Symbol.sys.map @@ -381,6 +381,8 @@ FBSD_1.8 { exterrctl; fchroot; getrlimitusage; + inotify_add_watch_at; + inotify_rm_watch; kcmp; setcred; }; diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h index e2a8f2253814..1799906eb885 100644 --- a/lib/libsys/_libsys.h +++ b/lib/libsys/_libsys.h @@ -466,6 +466,8 @@ typedef int (__sys_getrlimitusage_t)(u_int, int, rlim_t *); typedef int (__sys_fchroot_t)(int); typedef int (__sys_setcred_t)(u_int, const struct setcred *, size_t); typedef int (__sys_exterrctl_t)(u_int, u_int, void *); +typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t); +typedef int (__sys_inotify_rm_watch_t)(int, int); void __sys_exit(int rval); int __sys_fork(void); @@ -868,6 +870,8 @@ int __sys_getrlimitusage(u_int which, int flags, rlim_t * res); int __sys_fchroot(int fd); int __sys_setcred(u_int flags, const struct setcred * wcred, size_t size); int __sys_exterrctl(u_int op, u_int flags, void * ptr); +int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask); +int __sys_inotify_rm_watch(int fd, int wd); __END_DECLS #endif /* __LIBSYS_H_ */ diff --git a/lib/libsys/inotify.2 b/lib/libsys/inotify.2 new file mode 100644 index 000000000000..f94509d6f59e --- /dev/null +++ b/lib/libsys/inotify.2 @@ -0,0 +1,379 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Klara, Inc. +.\" +.Dd May 19, 2025 +.Dt INOTIFY 2 +.Os +.Sh NAME +.Nm inotify_init , +.Nm inotify_init1 , +.Nm inotify_add_watch , +.Nm inotify_add_watch_at , +.Nm inotify_rm_watch +.Nd monitor file system events +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/inotify.h +.Ft int +.Fo inotify_init +.Fc +.Ft int +.Fo inotify_init1 +.Fa "int flags" +.Fc +.Ft int +.Fo inotify_add_watch +.Fa "int fd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_add_watch_at +.Fa "int fd" +.Fa "int dfd" +.Fa "const char *pathname" +.Fa "uint32_t mask" +.Fc +.Ft int +.Fo inotify_rm_watch +.Fa "int fd" +.Fa "uint32_t wd" +.Fc +.Bd -literal +struct inotify_event { + int wd; /* Watch descriptor */ + uint32_t mask; /* Event and flags */ + uint32_t cookie; /* Unique ID which links rename events */ + uint32_t len; /* Name field size, including nul bytes */ + char name[0]; /* Filename (nul-terminated) */ +}; +.Ed +.Sh DESCRIPTION +The inotify system calls provide an interface to monitor file system events. +They aim to be compatible with the Linux inotify interface. +The provided functionality is similar to the +.Dv EVFILT_VNODE +filter of the +.Xr kevent 2 +system call, but further allows monitoring of a directory without needing to +open each object in that directory. +This avoids races and reduces the number of file descriptors needed to monitor +a large file hierarchy. +.Pp +inotify allows one or more file system objects, generally files or directories, +to be watched for events, such as file open or close. +Watched objects are associated with a file descriptor returned +by +.Fn inotify_init +or +.Fn inotify_init1 . +When an event occurs, a record describing the event becomes available for +reading from the inotify file descriptor. +Each inotify descriptor thus refers to a queue of events waiting to be read. +inotify descriptors are inherited across +.Xr fork 2 +calls and may be passed to other processes via +.Xr unix 4 +sockets. +.Pp +The +.Fn inotify_init1 +system call accepts two flags. +The +.Dv IN_NONBLOCK +flag causes the inotify descriptor to be opened in non-blocking mode, such that +.Xr read 2 +calls will not block if no records are available to consume, and will instead +return +.Er EWOULDBLOCK . +The +.Dv IN_CLOEXEC +flag causes the inotify descriptor to be closed automatically when +.Xr execve 2 +is called. +.Pp +To watch a file or directory, the +.Fn inotify_add_watch +or +.Fn inotify_add_watch_at +system calls must be used. +They take a path and a mask of events to watch for, and return a +.Dq watch descriptor , +a non-negative integer which uniquely identifies the watched object within the +inotify descriptor. +.Pp +The +.Fn inotify_rm_watch +system call removes a watch from an inotify descriptor. +.Pp +When watching a directory, objects within the directory are monitored for events +as well as the directory itself. +A record describing an inotify event consists of a +.Dq struct inotify_event +followed by the name of the object in the directory being watched. +If the watched object itself generates an event, no name is present. +Extra nul bytes may follow the file name in order to provide alignment for a +subsequent record. +.Pp +The following events are defined: +.Bl -tag -width IN_CLOSE_NOWRITE +.It Dv IN_ACCESS +A file's contents were accessed, e.g., by +.Xr read 2 +.Xr copy_file_range 2 , +.Xr sendfile 2 , +or +.Xr getdirentries 2 . +.It Dv IN_ATTRIB +A file's metadata was changed, e.g., by +.Xr chmod 2 +or +.Xr unlink 2 . +.It Dv IN_CLOSE_WRITE +A file that was previously opened for writing was closed. +.It Dv IN_CLOSE_NOWRITE +A file that was previously opened read-only was closed. +.It Dv IN_CREATE +A file within a watched directory was created, e.g., by +.Xr open 2 , +.Xr mkdir 2 , +.Xr symlink 2 , +.Xr mknod 2 , +or +.Xr bind 2 . +.It Dv IN_DELETE +A file or directory within a watched directory was removed. +.It Dv IN_DELETE_SELF +The watched file or directory itself was deleted. +This event is generated only when the link count of the file drops +to zero. +.It Dv IN_MODIFY +A file's contents were modified, e.g., by +.Xr write 2 +or +.Xr copy_file_range 2 . +.It Dv IN_MOVE_SELF +The watched file or directory itself was renamed. +.It Dv IN_MOVED_FROM +A file or directory was moved from a watched directory. +.It Dv IN_MOVED_TO +A file or directory was moved into a watched directory. +A +.Xr rename 2 +call thus may generate two events, one for the old name and one for the new +name. +These are linked together by the +.Ar cookie +field in the inotify record, which can be compared to link the two records +to the same event. +.It Dv IN_OPEN +A file was opened. +.El +.Pp +Some additional flags may be set in inotify event records: +.Bl -tag -width IN_Q_OVERFLOW +.It Dv IN_IGNORED +When a watch is removed from a file, for example because it was created with the +.Dv IN_ONESHOT +flag, the file was deleted, or the watch was explicitly removed with +.Xr inotify_rm_watch 2 , +an event with this mask is generated to indicate that the watch will not +generate any more events. +Once this event is generated, the watch is automatically removed, and in +particular should not be removed manually with +.Xr inotify_rm_watch 2 . +.It Dv IN_ISDIR +When the subject of an event is a directory, this flag is set in the +.Ar mask +.It Dv IN_Q_OVERFLOW +One or more events were dropped, for example because of a kernel memory allocation +failure or because the event queue size hit a limit. +.It Dv IN_UNMOUNT +The filesystem containing the watched object was unmounted. +.El +.Pp +A number of flags may also be specified in the +.Ar mask +given to +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at : +.Bl -tag -width IN_DONT_FOLLOW +.It Dv IN_DONT_FOLLOW +If +.Ar pathname +is a symbolic link, do not follow it. +.It Dv IN_EXCL_UNLINK +This currently has no effect, see the +.Sx BUGS +section. +.In Dv IN_MASK_ADD +When adding a watch to an object, and that object is already watched by the +same inotify descriptor, by default the mask of the existing watch is +overwritten. +When +.Dv IN_MASK_ADD +is specified, the mask of the existing watch is instead logically ORed with +the new mask. +.In Dv IN_MASK_CREATE +When +.Fn inotify_add watch +is used to add a watch to an object, +.Dv IN_MASK_CREATE +is specified, and that object is already watched by the same inotify descriptor, +return an error instead of updating the existing watch. +.In Dv IN_ONESHOT +Monitor the object for a single event, after which the watch is automatically +removed. +As part of removal, a +.Dv IN_IGNORED +event is generated. +.In Dv IN_ONLYDIR +When creating a watch, fail with +.Er ENOTDIR +if the path does not refer to a directory. +.El +.Sh SYSCTL VARIABLES +The following variables are available as both +.Xr sysctl 8 +variables and +.Xr loader 8 +tunables: +.Bl -tag -width 15 +.It Va vfs.inotify.max_events +The maximum number of inotify records that can be queued for a single +inotify descriptor. +Records in excess of this limit are discarded, and a single event with +mask equal to +.Dv IN_Q_OVERFLOW +will be present in the queue. +.It Va vfs.inotify.max_user_instances +The maximum number of inotify descriptors that can be created by a single +user. +.It Va vfs.inotify.max_user_watches +The maximum number of inotify watches per user. +.El +.Sh EXAMPLES +See the example program in +.Pa /usr/share/examples/inotify/inotify.c . +.Sh ERRORS +The +.Fn inotify_init +and +.Fn inotify_init1 +functions will fail if: +.Bl -tag -width Er +.It Bq Er ENFILE +The system limit on the total number of open files has been reached. +.It Bq Er EMFILE +A per-process limit on the number of open files has been reached. +.It Bq Er EMFILE +The system limit on the number of inotify descriptors has been reached. +.It Bq Er EINVAL +An unrecognized flag was passed to +.Fn inotify_init1 . +.El +.Pp +The +.Fn inotify_add_watch +and +.Fn inotify_add_watch_at +system calls will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar mask +parameter does not specify an event, or +the +.Dv IN_MASK_CREATE +and +.Dv IN_MASK_ADD +flags are both set, or an unrecognized flag was passed. +.It Bq Er ENOTDIR +The +.Ar pathname +parameter refers to a file that is not a directory, and the +.Dv IN_ONLYDIR +flag was specified. +.It Bq Er ENOSPC +The per-user limit on the total number of inotify watches has been reached. +.It Bq Er ECAPMODE +The process is in capability mode and +.Fn inotify_add_watch +was called, or +.Fn inotify_add_watch_at +was called with +.Dv AT_FDCWD +as the directory file descriptor +.Ar dfd . +.It Bq Er ENOTCAPABLE +The process is in capability mode and +.Ar pathname +contains a +.Dq .. +component leading to a directory outside the directory hierarchy specified +by +.Ar dfd . +.El +.Pp +The +.Fn inotify_rm_watch +system call will fail if: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Ar fd +parameter is not a valid file descriptor. +.It Bq Er EINVAL +The +.Ar fd +parameter is not an inotify descriptor. +.It Bq Er EINVAL +The +.Ar wd +parameter is not a valid watch descriptor. +.El +.Sh SEE ALSO +.Xr kevent 2 , +.Xr capsicum 4 +.Sh STANDARDS +The +.Nm +interface originates from Linux and is non-standard. +This implementation aims to be compatible with that of Linux and is based +on the documentation available at +.Pa https://man7.org/linux/man-pages/man7/inotify.7.html . +.Sh HISTORY +The inotify system calls first appeared in +.Fx 15.0 . +.Sh BUGS +If a file in a watched directory has multiple hard links, +an access via any hard link for that file will generate an event, even +if the accessed link belongs to an unwatched directory. +This is not the case for the Linux implementation, where only accesses +via the hard link in the watched directory will generate an event. +.Pp +If a watched directory contains multiple hard links of a file, an event +on one of the hard links will generate an inotify record for each link +in the directory. +.Pp +When a file is unlinked, no more events will be generated for that file, +even if it continues to be accessed. +By default, the Linux implementation will continue to generate events in +this case. +Thus, the +.Fx +implementation behaves as though +.Dv IN_EXCL_UNLINK +is always set. diff --git a/lib/libsys/pathconf.2 b/lib/libsys/pathconf.2 index 4c562b9c2c9a..79ac8310000d 100644 --- a/lib/libsys/pathconf.2 +++ b/lib/libsys/pathconf.2 @@ -25,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd May 3, 2025 +.Dd July 5, 2025 .Dt PATHCONF 2 .Os .Sh NAME @@ -179,6 +179,14 @@ otherwise 0. Return 1 if named attributes are enabled for the file system, otherwise 0. .It Li _PC_HAS_NAMEDATTR Return 1 if one or more named attributes exist for the file, otherwise 0. +.It Li _PC_HAS_HIDDENSYSTEM +Return 1 if both +.Dv UF_HIDDEN +and +.Dv UF_SYSTEM +flags can be set by +.Xr chflags 2 , +otherwise 0. .El .Sh RETURN VALUES If the call to @@ -255,6 +263,7 @@ An I/O error occurred while reading from or writing to the file system. Corrupted data was detected while reading from the file system. .El .Sh SEE ALSO +.Xr chflags 2 , .Xr lseek 2 , .Xr sysctl 3 .Sh HISTORY diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map index 51be88203c17..69fce2ea7c63 100644 --- a/lib/libsys/syscalls.map +++ b/lib/libsys/syscalls.map @@ -809,4 +809,8 @@ FBSDprivate_1.0 { __sys_setcred; _exterrctl; __sys_exterrctl; + _inotify_add_watch_at; + __sys_inotify_add_watch_at; + _inotify_rm_watch; + __sys_inotify_rm_watch; }; diff --git a/lib/libsys/write.2 b/lib/libsys/write.2 index 7ff1c42715d6..d2ff41ceead9 100644 --- a/lib/libsys/write.2 +++ b/lib/libsys/write.2 @@ -195,6 +195,9 @@ is greater than if the sysctl .Va debug.iosize_max_clamp is non-zero). +.It Bq Er EINVAL +The file descriptor refers to a raw device, and the write +offset or size is not a multiple of the device's block size. .It Bq Er EINTEGRITY The backing store for .Fa fd diff --git a/lib/libsysdecode/Makefile b/lib/libsysdecode/Makefile index b01877bb8bb8..ca020552a6e9 100644 --- a/lib/libsysdecode/Makefile +++ b/lib/libsysdecode/Makefile @@ -84,6 +84,7 @@ MLINKS+=sysdecode_mask.3 sysdecode_accessmode.3 \ sysdecode_mask.3 sysdecode_fileflags.3 \ sysdecode_mask.3 sysdecode_filemode.3 \ sysdecode_mask.3 sysdecode_flock_operation.3 \ + sysdecode_mask.3 sysdecode_inotifyflags.3 \ sysdecode_mask.3 sysdecode_mlockall_flags.3 \ sysdecode_mask.3 sysdecode_mmap_flags.3 \ sysdecode_mask.3 sysdecode_mmap_prot.3 \ diff --git a/lib/libsysdecode/flags.c b/lib/libsysdecode/flags.c index 32829d35dbe0..dc09c5747968 100644 --- a/lib/libsysdecode/flags.c +++ b/lib/libsysdecode/flags.c @@ -23,7 +23,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> #define L2CAP_SOCKET_CHECKED #include <sys/types.h> @@ -31,6 +30,7 @@ #include <sys/capsicum.h> #include <sys/event.h> #include <sys/extattr.h> +#include <sys/inotify.h> #include <sys/linker.h> #include <sys/mman.h> #include <sys/mount.h> @@ -351,6 +351,13 @@ sysdecode_getrusage_who(int who) return (lookup_value(rusage, who)); } +bool +sysdecode_inotifyflags(FILE *fp, int flag, int *rem) +{ + + return (print_mask_int(fp, inotifyflags, flag, rem)); +} + static struct name_table kevent_user_ffctrl[] = { X(NOTE_FFNOP) X(NOTE_FFAND) X(NOTE_FFOR) X(NOTE_FFCOPY) XEND diff --git a/lib/libsysdecode/mktables b/lib/libsysdecode/mktables index 5d7be2aad3c8..6b4f79402660 100644 --- a/lib/libsysdecode/mktables +++ b/lib/libsysdecode/mktables @@ -98,6 +98,7 @@ gen_table "extattrns" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/ gen_table "fadvisebehav" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h" gen_table "openflags" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" "O_RDONLY|O_RDWR|O_WRONLY" gen_table "flockops" "LOCK_[A-Z]+[[:space:]]+0x[0-9]+" "sys/fcntl.h" +gen_table "inotifyflags" "IN_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/inotify.h" "IN_CLOEXEC|IN_NONBLOCK" gen_table "kldsymcmd" "KLDSYM_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "kldunloadfflags" "LINKER_UNLOAD_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h" gen_table "lio_listiomodes" "LIO_(NO)?WAIT[[:space:]]+[0-9]+" "aio.h" diff --git a/lib/libsysdecode/sysdecode.h b/lib/libsysdecode/sysdecode.h index 8dc0bbea6f0d..c95d7f71379b 100644 --- a/lib/libsysdecode/sysdecode.h +++ b/lib/libsysdecode/sysdecode.h @@ -61,6 +61,7 @@ const char *sysdecode_getfsstat_mode(int _mode); const char *sysdecode_getrusage_who(int _who); const char *sysdecode_idtype(int _idtype); const char *sysdecode_ioctlname(unsigned long _val); +bool sysdecode_inotifyflags(FILE *_fp, int _flags, int *_rem); const char *sysdecode_ipproto(int _protocol); void sysdecode_kevent_fflags(FILE *_fp, short _filter, int _fflags, int _base); diff --git a/libexec/flua/modules/lposix.c b/libexec/flua/modules/lposix.c index 816d4bc688d2..9edc7e687786 100644 --- a/libexec/flua/modules/lposix.c +++ b/libexec/flua/modules/lposix.c @@ -88,18 +88,23 @@ static int lua_chown(lua_State *L) { const char *path; - uid_t owner = (uid_t) -1; - gid_t group = (gid_t) -1; + uid_t owner = (uid_t)-1; + gid_t group = (gid_t)-1; + int error; enforce_max_args(L, 3); path = luaL_checkstring(L, 1); if (lua_isinteger(L, 2)) - owner = (uid_t) lua_tointeger(L, 2); + owner = (uid_t)lua_tointeger(L, 2); else if (lua_isstring(L, 2)) { - struct passwd *p = getpwnam(lua_tostring(L, 2)); - if (p != NULL) - owner = p->pw_uid; + char buf[4096]; + struct passwd passwd, *pwd; + + error = getpwnam_r(lua_tostring(L, 2), &passwd, + buf, sizeof(buf), &pwd); + if (error == 0) + owner = pwd->pw_uid; else return (luaL_argerror(L, 2, lua_pushfstring(L, "unknown user %s", @@ -112,11 +117,15 @@ lua_chown(lua_State *L) } if (lua_isinteger(L, 3)) - group = (gid_t) lua_tointeger(L, 3); + group = (gid_t)lua_tointeger(L, 3); else if (lua_isstring(L, 3)) { - struct group *g = getgrnam(lua_tostring(L, 3)); - if (g != NULL) - group = g->gr_gid; + char buf[4096]; + struct group gr, *grp; + + error = getgrnam_r(lua_tostring(L, 3), &gr, buf, sizeof(buf), + &grp); + if (error == 0) + group = grp->gr_gid; else return (luaL_argerror(L, 3, lua_pushfstring(L, "unknown group %s", diff --git a/libexec/nuageinit/nuage.lua b/libexec/nuageinit/nuage.lua index 11958e8b5cc2..493ae11d6ca7 100644 --- a/libexec/nuageinit/nuage.lua +++ b/libexec/nuageinit/nuage.lua @@ -56,6 +56,21 @@ local function errmsg(str, prepend) os.exit(1) end +local function chmod(path, mode) + local mode = tonumber(mode, 8) + local _, err, msg = sys_stat.chmod(path, mode) + if err then + errmsg("chmod(" .. path .. ", " .. mode .. ") failed: " .. msg) + end +end + +local function chown(path, owner, group) + local _, err, msg = unistd.chown(path, owner, group) + if err then + errmsg("chown(" .. path .. ", " .. owner .. ", " .. group .. ") failed: " .. msg) + end +end + local function dirname(oldpath) if not oldpath then return nil @@ -252,12 +267,12 @@ local function addsshkey(homedir, key) f:write(key .. "\n") f:close() if chownak then - sys_stat.chmod(ak_path, 384) - unistd.chown(ak_path, dirattrs.uid, dirattrs.gid) + chmod(ak_path, "0600") + chown(ak_path, dirattrs.uid, dirattrs.gid) end if chowndotssh then - sys_stat.chmod(dotssh_path, 448) - unistd.chown(dotssh_path, dirattrs.uid, dirattrs.gid) + chmod(dotssh_path, "0700") + chown(dotssh_path, dirattrs.uid, dirattrs.gid) end end @@ -296,10 +311,10 @@ local function addsudo(pwd) end f:close() if chmodsudoers then - sys_stat.chmod(sudoers, 416) + chmod(sudoers, "0640") end if chmodsudoersd then - sys_stat.chmod(sudoers, 480) + chmod(sudoers, "0740") end end @@ -521,16 +536,14 @@ local function addfile(file, defer) end f:close() if file.permissions then - -- convert from octal to decimal - local perm = tonumber(file.permissions, 8) - sys_stat.chmod(filepath, perm) + chmod(filepath, file.permissions) end if file.owner then local owner, group = string.match(file.owner, "([^:]+):([^:]+)") if not owner then owner = file.owner end - unistd.chown(filepath, owner, group) + chown(filepath, owner, group) end return true end @@ -538,6 +551,8 @@ end local n = { warn = warnmsg, err = errmsg, + chmod = chmod, + chown = chown, dirname = dirname, mkdir_p = mkdir_p, sethostname = sethostname, diff --git a/libexec/nuageinit/nuageinit b/libexec/nuageinit/nuageinit index 84133d4373c5..0fcdc7274db3 100755 --- a/libexec/nuageinit/nuageinit +++ b/libexec/nuageinit/nuageinit @@ -7,7 +7,6 @@ local nuage = require("nuage") local ucl = require("ucl") local yaml = require("lyaml") -local sys_stat = require("posix.sys.stat") if #arg ~= 2 then nuage.err("Usage: " .. arg[0] .. " <cloud-init-directory> (<config-2> | <nocloud>)", false) @@ -157,7 +156,7 @@ local function ssh_keys(obj) sshkey:close() end if keytype == "private" then - sys_stat.chmod(path, 384) + nuage.chmod(path, "0600") end end end @@ -281,7 +280,7 @@ local function runcmd(obj) end if f ~= nil then f:close() - sys_stat.chmod(root .. "/var/cache/nuageinit/runcmds", 493) + nuage.chmod(root .. "/var/cache/nuageinit/runcmds", "0755") end end @@ -503,5 +502,5 @@ if line == "#cloud-config" then end elseif line:sub(1, 2) == "#!" then -- delay for execution at rc.local time -- - sys_stat.chmod(root .. "/var/cache/nuageinit/user_data", 493) + nuage.chmod(root .. "/var/cache/nuageinit/user_data", "0755") end diff --git a/sbin/ifconfig/ifbridge.c b/sbin/ifconfig/ifbridge.c index 2d0af1255a73..ce5d2f4894fa 100644 --- a/sbin/ifconfig/ifbridge.c +++ b/sbin/ifconfig/ifbridge.c @@ -147,6 +147,36 @@ bridge_addresses(if_ctx *ctx, const char *prefix) } static void +print_vlans(ifbvlan_set_t *vlans) +{ + unsigned printed = 0; + + for (unsigned vlan = DOT1Q_VID_MIN; vlan <= DOT1Q_VID_MAX;) { + unsigned last; + + if (!BRVLAN_TEST(vlans, vlan)) { + ++vlan; + continue; + } + + last = vlan; + while (last < DOT1Q_VID_MAX && BRVLAN_TEST(vlans, last + 1)) + ++last; + + if (printed == 0) + printf(" tagged "); + else + printf(","); + + printf("%u", vlan); + if (last != vlan) + printf("-%u", last); + ++printed; + vlan = last + 1; + } +} + +static void bridge_status(if_ctx *ctx) { struct ifconfig_bridge_status *bridge; @@ -211,6 +241,9 @@ bridge_status(if_ctx *ctx) else printf(" <unknown state %d>", state); } + if (member->ifbr_untagged != 0) + printf(" untagged %u", (unsigned)member->ifbr_untagged); + print_vlans(&bridge->member_vlans[i]); printf("\n"); } @@ -577,6 +610,45 @@ setbridge_ifpathcost(if_ctx *ctx, const char *ifn, const char *cost) } static void +setbridge_untagged(if_ctx *ctx, const char *ifn, const char *vlanid) +{ + struct ifbreq req; + u_long val; + + memset(&req, 0, sizeof(req)); + + if (get_val(vlanid, &val) < 0) + errx(1, "invalid VLAN identifier: %s", vlanid); + + /* + * Reject vlan 0, since it's not a valid vlan identifier and has a + * special meaning in the kernel interface. + */ + if (val == 0) + errx(1, "invalid VLAN identifier: %lu", val); + + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); + req.ifbr_untagged = val; + + if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFUNTAGGED %s", vlanid); +} + +static void +unsetbridge_untagged(if_ctx *ctx, const char *ifn, int dummy __unused) +{ + struct ifbreq req; + + memset(&req, 0, sizeof(req)); + + strlcpy(req.ifbr_ifsname, ifn, sizeof(req.ifbr_ifsname)); + req.ifbr_untagged = 0; + + if (do_cmd(ctx, BRDGSIFUNTAGGED, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFUNTAGGED"); +} + +static void setbridge_ifmaxaddr(if_ctx *ctx, const char *ifn, const char *arg) { struct ifbreq req; @@ -612,17 +684,118 @@ setbridge_timeout(if_ctx *ctx, const char *arg, int dummy __unused) static void setbridge_private(if_ctx *ctx, const char *val, int dummy __unused) { - do_bridgeflag(ctx, val, IFBIF_PRIVATE, 1); } static void unsetbridge_private(if_ctx *ctx, const char *val, int dummy __unused) { - do_bridgeflag(ctx, val, IFBIF_PRIVATE, 0); } +static void +setbridge_vlanfilter(if_ctx *ctx, const char *val, int dummy __unused) +{ + do_bridgeflag(ctx, val, IFBIF_VLANFILTER, 1); +} + +static void +unsetbridge_vlanfilter(if_ctx *ctx, const char *val, int dummy __unused) +{ + do_bridgeflag(ctx, val, IFBIF_VLANFILTER, 0); +} + +static int +parse_vlans(ifbvlan_set_t *set, const char *str) +{ + char *s, *token; + + /* "none" means the empty vlan set */ + if (strcmp(str, "none") == 0) { + __BIT_ZERO(BRVLAN_SETSIZE, set); + return (0); + } + + /* "all" means all vlans, except for 0 and 4095 which are reserved */ + if (strcmp(str, "all") == 0) { + __BIT_FILL(BRVLAN_SETSIZE, set); + BRVLAN_CLR(set, DOT1Q_VID_NULL); + BRVLAN_CLR(set, DOT1Q_VID_RSVD_IMPL); + return (0); + } + + if ((s = strdup(str)) == NULL) + return (-1); + + while ((token = strsep(&s, ",")) != NULL) { + unsigned long first, last; + char *p, *lastp; + + if ((lastp = strchr(token, '-')) != NULL) + *lastp++ = '\0'; + + first = last = strtoul(token, &p, 10); + if (*p != '\0') + goto err; + if (first < DOT1Q_VID_MIN || first > DOT1Q_VID_MAX) + goto err; + + if (lastp) { + last = strtoul(lastp, &p, 10); + if (*p != '\0') + goto err; + if (last < DOT1Q_VID_MIN || last > DOT1Q_VID_MAX || + last < first) + goto err; + } + + for (unsigned vlan = first; vlan <= last; ++vlan) + BRVLAN_SET(set, vlan); + } + + free(s); + return (0); + +err: + free(s); + return (-1); +} + +static void +set_bridge_vlanset(if_ctx *ctx, const char *ifn, const char *vlans, int op) +{ + struct ifbif_vlan_req req; + + memset(&req, 0, sizeof(req)); + + if (parse_vlans(&req.bv_set, vlans) != 0) + errx(1, "invalid vlan set: %s", vlans); + + strlcpy(req.bv_ifname, ifn, sizeof(req.bv_ifname)); + req.bv_op = op; + + if (do_cmd(ctx, BRDGSIFVLANSET, &req, sizeof(req), 1) < 0) + err(1, "BRDGSIFVLANSET %s", vlans); +} + +static void +setbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_SET); +} + +static void +addbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_ADD); +} + +static void +delbridge_tagged(if_ctx *ctx, const char *ifn, const char *vlans) +{ + set_bridge_vlanset(ctx, ifn, vlans, BRDG_VLAN_OP_DEL); +} + static struct cmd bridge_cmds[] = { DEF_CMD_ARG("addm", setbridge_add), DEF_CMD_ARG("deletem", setbridge_delete), @@ -659,6 +832,13 @@ static struct cmd bridge_cmds[] = { DEF_CMD_ARG2("ifpriority", setbridge_ifpriority), DEF_CMD_ARG2("ifpathcost", setbridge_ifpathcost), DEF_CMD_ARG2("ifmaxaddr", setbridge_ifmaxaddr), + DEF_CMD_ARG("vlanfilter", setbridge_vlanfilter), + DEF_CMD_ARG("-vlanfilter", unsetbridge_vlanfilter), + DEF_CMD_ARG2("untagged", setbridge_untagged), + DEF_CMD_ARG("-untagged", unsetbridge_untagged), + DEF_CMD_ARG2("tagged", setbridge_tagged), + DEF_CMD_ARG2("+tagged", addbridge_tagged), + DEF_CMD_ARG2("-tagged", delbridge_tagged), DEF_CMD_ARG("timeout", setbridge_timeout), DEF_CMD_ARG("private", setbridge_private), DEF_CMD_ARG("-private", unsetbridge_private), diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index e3f094a336fb..3fb8b5f02b76 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 24, 2025 +.Dd July 5, 2025 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2696,6 +2696,57 @@ source addresses are dropped until an existing host cache entry expires or is removed. Set to 0 to disable. .El +.Ss Bridge VLAN Filtering Parameters +The behaviour of these options is described in the +.Dq VLAN SUPPORT +section of +.Xr bridge 4 . +.Bl -tag -width indent +.It Cm vlanfilter Ar interface +Enable VLAN filtering on an interface. +.It Cm -vlanfilter Ar interface +Disable VLAN filtering on an interface. +.It Cm untagged Ar interface Ar vlan-id +Set the untagged VLAN identifier for an interface. +.Pp +Setting +.Cm untagged +will automatically enable VLAN filtering on the interface. +.It Cm -untagged Ar interface Ar vlan-id +Clear the untagged VLAN identifier for an interface. +.It Cm tagged Ar interface Ar vlan-list +Set the interface's VLAN access list to the provided list of VLANs. +The list should be a comma-separated list of one or more VLAN IDs +or ranges formatted as +.Ar first-last , +the value +.Dq none +meaning the empty set, +or the value +.Dq all +meaning all VLANs (1-4094). +.Pp +Setting +.Cm tagged +will automatically enable VLAN filtering on the interface. +.It Cm +tagged Ar interface Ar vlan-list +Add the provided list of VLAN IDs to the interface's VLAN access list. +The list should be formatted as described for +.Cm tagged . +.Pp +Setting +.Cm +tagged +will automatically enable VLAN filtering on the interface. +.It Cm -tagged Ar interface Ar vlan-list +Remove the provided list of VLAN IDs from the interface's VLAN access +list. +The list should be formatted as described for +.Cm tagged . +.Pp +Setting +.Cm -tagged +will automatically enable VLAN filtering on the interface. +.El .Ss Link Aggregation and Link Failover Parameters The following parameters are specific to lagg interfaces: .Bl -tag -width indent diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y index f05e5608ba9d..c59204d3d5a4 100644 --- a/sbin/pfctl/parse.y +++ b/sbin/pfctl/parse.y @@ -3926,14 +3926,14 @@ uid : STRING { if (!strcmp($1, "unknown")) $$ = UID_MAX; else { - struct passwd *pw; + uid_t uid; - if ((pw = getpwnam($1)) == NULL) { + if (uid_from_user($1, &uid) == -1) { yyerror("unknown user %s", $1); free($1); YYERROR; } - $$ = pw->pw_uid; + $$ = uid; } free($1); } @@ -4004,14 +4004,14 @@ gid : STRING { if (!strcmp($1, "unknown")) $$ = GID_MAX; else { - struct group *grp; + gid_t gid; - if ((grp = getgrnam($1)) == NULL) { + if (gid_from_group($1, &gid) == -1) { yyerror("unknown group %s", $1); free($1); YYERROR; } - $$ = grp->gr_gid; + $$ = gid; } free($1); } @@ -6854,7 +6854,8 @@ top: } else if (c == '\\') { if ((next = lgetc(quotec)) == EOF) return (0); - if (next == quotec || c == ' ' || c == '\t') + if (next == quotec || next == ' ' || + next == '\t') c = next; else if (next == '\n') { file->lineno++; @@ -7149,11 +7150,10 @@ pfctl_cmdline_symset(char *s) if ((val = strrchr(s, '=')) == NULL) return (-1); - if ((sym = malloc(strlen(s) - strlen(val) + 1)) == NULL) + sym = strndup(s, val - s); + if (sym == NULL) err(1, "%s: malloc", __func__); - strlcpy(sym, s, strlen(s) - strlen(val) + 1); - ret = symset(sym, val + 1, 1); free(sym); diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8 index 0a4b8952ef74..c7fad58262dc 100644 --- a/sbin/pfctl/pfctl.8 +++ b/sbin/pfctl/pfctl.8 @@ -24,7 +24,7 @@ .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" -.Dd May 29, 2025 +.Dd June 30, 2025 .Dt PFCTL 8 .Os .Sh NAME @@ -114,8 +114,9 @@ Other rules and options are ignored. Apply flags .Fl f , .Fl F , +.Fl s , and -.Fl s +.Fl T only to the rules in the specified .Ar anchor . In addition to the main ruleset, @@ -473,7 +474,7 @@ Show the list of tables. .It Fl s Cm osfp Show the list of operating system fingerprints. .It Fl s Cm Interfaces -Show the list of interfaces and interface drivers available to PF. +Show the list of interfaces and interface groups available to PF. When used together with .Fl v , it additionally lists which interfaces have skip rules activated. @@ -543,7 +544,7 @@ Kill a table. Flush all addresses of a table. .It Fl T Cm add Add one or more addresses in a table. -Automatically create a nonexisting table. +Automatically create a persistent table if it does not exist. .It Fl T Cm delete Delete one or more addresses from a table. .It Fl T Cm expire Ar number @@ -555,7 +556,7 @@ For entries which have never had their statistics cleared, refers to the time they were added to the table. .It Fl T Cm replace Replace the addresses of the table. -Automatically create a nonexisting table. +Automatically create a persistent table if it does not exist. .It Fl T Cm show Show the content (addresses) of a table. .It Fl T Cm test diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c index 21befd3ca697..38d74aceba80 100644 --- a/sbin/pfctl/pfctl.c +++ b/sbin/pfctl/pfctl.c @@ -3175,6 +3175,9 @@ main(int argc, char *argv[]) } } + if (tblcmdopt == NULL ^ tableopt == NULL) + usage(); + if (tblcmdopt != NULL) { argc -= optind; argv += optind; @@ -3196,11 +3199,11 @@ main(int argc, char *argv[]) if (anchoropt != NULL) { int len = strlen(anchoropt); - if (mode == O_RDONLY && showopt == NULL) { - warnx("anchors apply to -f, -F and -s only"); + if (mode == O_RDONLY && showopt == NULL && tblcmdopt == NULL) { + warnx("anchors apply to -f, -F, -s, and -T only"); usage(); } - if (mode == O_RDWR && + if (mode == O_RDWR && tblcmdopt == NULL && (anchoropt[0] == '_' || strstr(anchoropt, "/_") != NULL)) errx(1, "anchor names beginning with '_' cannot " "be modified from the command line"); @@ -3400,7 +3403,7 @@ main(int argc, char *argv[]) pfctl_kill_src_nodes(dev, ifaceopt, opts); if (tblcmdopt != NULL) { - error = pfctl_command_tables(argc, argv, tableopt, + error = pfctl_table(argc, argv, tableopt, tblcmdopt, rulesopt, anchorname, opts); rulesopt = NULL; } diff --git a/sbin/pfctl/pfctl.h b/sbin/pfctl/pfctl.h index 5abd5ddcdf8f..08d48695709e 100644 --- a/sbin/pfctl/pfctl.h +++ b/sbin/pfctl/pfctl.h @@ -83,7 +83,7 @@ int pfi_clr_istats(const char *, int *, int); void pfctl_print_title(char *); void pfctl_do_clear_tables(const char *, int); void pfctl_show_tables(const char *, int); -int pfctl_command_tables(int, char *[], char *, const char *, char *, +int pfctl_table(int, char *[], char *, const char *, char *, const char *, int); int pfctl_show_altq(int, const char *, int, int); void warn_namespace_collision(const char *); diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c index 6b47d75f77f3..1db98c6103d4 100644 --- a/sbin/pfctl/pfctl_parser.c +++ b/sbin/pfctl/pfctl_parser.c @@ -66,6 +66,7 @@ #include "pfctl_parser.h" #include "pfctl.h" +void copy_satopfaddr(struct pf_addr *, struct sockaddr *); void print_op (u_int8_t, const char *, const char *); void print_port (u_int8_t, u_int16_t, u_int16_t, const char *, int); void print_ugid (u_int8_t, unsigned, unsigned, const char *, unsigned); @@ -75,8 +76,7 @@ void print_fromto(struct pf_rule_addr *, pf_osfp_t, int ifa_skip_if(const char *filter, struct node_host *p); struct node_host *host_if(const char *, int); -struct node_host *host_v4(const char *, int); -struct node_host *host_v6(const char *, int); +struct node_host *host_ip(const char *, int); struct node_host *host_dns(const char *, int, int); const char * const tcpflags = "FSRPAUEWe"; @@ -229,6 +229,17 @@ pfctl_parser_init(void) err(1, "Failed to create interface group query response map"); } +void +copy_satopfaddr(struct pf_addr *pfa, struct sockaddr *sa) +{ + if (sa->sa_family == AF_INET6) + pfa->v6 = ((struct sockaddr_in6 *)sa)->sin6_addr; + else if (sa->sa_family == AF_INET) + pfa->v4 = ((struct sockaddr_in *)sa)->sin_addr; + else + warnx("unhandled af %d", sa->sa_family); +} + const struct icmptypeent * geticmptypebynumber(u_int8_t type, sa_family_t af) { @@ -1378,7 +1389,6 @@ struct node_host * gen_dynnode(struct node_host *h, sa_family_t af) { struct node_host *n; - struct pf_addr *m; if (h->addr.type != PF_ADDR_DYNIFTL) return (NULL); @@ -1391,8 +1401,7 @@ gen_dynnode(struct node_host *h, sa_family_t af) n->tail = NULL; /* fix up netmask */ - m = &n->addr.v.a.mask; - if (af == AF_INET && unmask(m) > 32) + if (af == AF_INET && unmask(&n->addr.v.a.mask) > 32) set_ipmask(n, 32); return (n); @@ -1502,42 +1511,25 @@ ifa_load(void) } #endif n->ifindex = 0; - if (n->af == AF_INET) { - memcpy(&n->addr.v.a.addr, &((struct sockaddr_in *) - ifa->ifa_addr)->sin_addr.s_addr, - sizeof(struct in_addr)); - memcpy(&n->addr.v.a.mask, &((struct sockaddr_in *) - ifa->ifa_netmask)->sin_addr.s_addr, - sizeof(struct in_addr)); - if (ifa->ifa_broadaddr != NULL) - memcpy(&n->bcast, &((struct sockaddr_in *) - ifa->ifa_broadaddr)->sin_addr.s_addr, - sizeof(struct in_addr)); - if (ifa->ifa_dstaddr != NULL) - memcpy(&n->peer, &((struct sockaddr_in *) - ifa->ifa_dstaddr)->sin_addr.s_addr, - sizeof(struct in_addr)); - } else if (n->af == AF_INET6) { - memcpy(&n->addr.v.a.addr, &((struct sockaddr_in6 *) - ifa->ifa_addr)->sin6_addr.s6_addr, - sizeof(struct in6_addr)); - memcpy(&n->addr.v.a.mask, &((struct sockaddr_in6 *) - ifa->ifa_netmask)->sin6_addr.s6_addr, - sizeof(struct in6_addr)); - if (ifa->ifa_broadaddr != NULL) - memcpy(&n->bcast, &((struct sockaddr_in6 *) - ifa->ifa_broadaddr)->sin6_addr.s6_addr, - sizeof(struct in6_addr)); - if (ifa->ifa_dstaddr != NULL) - memcpy(&n->peer, &((struct sockaddr_in6 *) - ifa->ifa_dstaddr)->sin6_addr.s6_addr, - sizeof(struct in6_addr)); - n->ifindex = ((struct sockaddr_in6 *) - ifa->ifa_addr)->sin6_scope_id; - } else if (n->af == AF_LINK) { - ifa_add_groups_to_map(ifa->ifa_name); + if (n->af == AF_LINK) { n->ifindex = ((struct sockaddr_dl *) ifa->ifa_addr)->sdl_index; + ifa_add_groups_to_map(ifa->ifa_name); + } else { + copy_satopfaddr(&n->addr.v.a.addr, ifa->ifa_addr); + ifa->ifa_netmask->sa_family = ifa->ifa_addr->sa_family; + copy_satopfaddr(&n->addr.v.a.mask, ifa->ifa_netmask); + if (ifa->ifa_broadaddr != NULL) { + ifa->ifa_broadaddr->sa_family = ifa->ifa_addr->sa_family; + copy_satopfaddr(&n->bcast, ifa->ifa_broadaddr); + } + if (ifa->ifa_dstaddr != NULL) { + ifa->ifa_dstaddr->sa_family = ifa->ifa_addr->sa_family; + copy_satopfaddr(&n->peer, ifa->ifa_dstaddr); + } + if (n->af == AF_INET6) + n->ifindex = ((struct sockaddr_in6 *) + ifa->ifa_addr) ->sin6_scope_id; } if ((n->ifname = strdup(ifa->ifa_name)) == NULL) err(1, "%s: strdup", __func__); @@ -1810,15 +1802,14 @@ host(const char *s, int opts) goto error; } if ((ps = malloc(strlen(s) - strlen(p) + 1)) == NULL) - err(1, "host: malloc"); + err(1, "%s: malloc", __func__); strlcpy(ps, s, strlen(s) - strlen(p) + 1); } else { if ((ps = strdup(s)) == NULL) - err(1, "host: strdup"); + err(1, "%s: strdup", __func__); } - if ((h = host_v4(s, mask)) == NULL && - (h = host_v6(ps, mask)) == NULL && + if ((h = host_ip(ps, mask)) == NULL && (h = host_if(ps, mask)) == NULL && (h = host_dns(ps, mask, (opts & PF_OPT_NODNS))) == NULL) { fprintf(stderr, "no IP address found for %s\n", s); @@ -1875,59 +1866,42 @@ error: } struct node_host * -host_v4(const char *s, int mask) +host_ip(const char *s, int mask) { + struct addrinfo hints, *res; struct node_host *h = NULL; - struct in_addr ina; - - memset(&ina, 0, sizeof(ina)); - if (mask > -1) { - if (inet_net_pton(AF_INET, s, &ina, sizeof(ina)) == -1) - return (NULL); - } else { - if (inet_pton(AF_INET, s, &ina) != 1) - return (NULL); - } - h = calloc(1, sizeof(struct node_host)); + h = calloc(1, sizeof(*h)); if (h == NULL) - err(1, "address: calloc"); - h->ifname = NULL; - h->af = AF_INET; - h->addr.v.a.addr.addr32[0] = ina.s_addr; - set_ipmask(h, mask); - h->next = NULL; - h->tail = h; - - return (h); -} - -struct node_host * -host_v6(const char *s, int mask) -{ - struct addrinfo hints, *res; - struct node_host *h = NULL; + err(1, "%s: calloc", __func__); + if (mask != -1) { + /* Try to parse 10/8 */ + h->af = AF_INET; + if (inet_net_pton(AF_INET, s, &h->addr.v.a.addr.v4, + sizeof(h->addr.v.a.addr.v4)) != -1) + goto out; + } memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_INET6; + hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_DGRAM; /*dummy*/ hints.ai_flags = AI_NUMERICHOST; - if (getaddrinfo(s, "0", &hints, &res) == 0) { - h = calloc(1, sizeof(struct node_host)); - if (h == NULL) - err(1, "address: calloc"); - h->ifname = NULL; - h->af = AF_INET6; - memcpy(&h->addr.v.a.addr, - &((struct sockaddr_in6 *)res->ai_addr)->sin6_addr, - sizeof(h->addr.v.a.addr)); - h->ifindex = - ((struct sockaddr_in6 *)res->ai_addr)->sin6_scope_id; - set_ipmask(h, mask); + if (getaddrinfo(s, NULL, &hints, &res) == 0) { + h->af = res->ai_family; + copy_satopfaddr(&h->addr.v.a.addr, res->ai_addr); + if (h->af == AF_INET6) + h->ifindex = + ((struct sockaddr_in6 *)res->ai_addr)->sin6_scope_id; freeaddrinfo(res); - h->next = NULL; - h->tail = h; + } else { + free(h); + return (NULL); } +out: + set_ipmask(h, mask); + h->ifname = NULL; + h->next = NULL; + h->tail = h; return (h); } @@ -1974,20 +1948,10 @@ host_dns(const char *s, int mask, int numeric) err(1, "host_dns: calloc"); n->ifname = NULL; n->af = res->ai_family; - if (res->ai_family == AF_INET) { - memcpy(&n->addr.v.a.addr, - &((struct sockaddr_in *) - res->ai_addr)->sin_addr.s_addr, - sizeof(struct in_addr)); - } else { - memcpy(&n->addr.v.a.addr, - &((struct sockaddr_in6 *) - res->ai_addr)->sin6_addr.s6_addr, - sizeof(struct in6_addr)); + copy_satopfaddr(&n->addr.v.a.addr, res->ai_addr); + if (res->ai_family == AF_INET6) n->ifindex = - ((struct sockaddr_in6 *) - res->ai_addr)->sin6_scope_id; - } + ((struct sockaddr_in6 *)res->ai_addr)->sin6_scope_id; set_ipmask(n, mask); n->next = NULL; n->tail = n; diff --git a/sbin/pfctl/pfctl_table.c b/sbin/pfctl/pfctl_table.c index 3fe87b53b7f9..53abea3e1ae1 100644 --- a/sbin/pfctl/pfctl_table.c +++ b/sbin/pfctl/pfctl_table.c @@ -55,8 +55,6 @@ #include "pfctl.h" extern void usage(void); -static int pfctl_table(int, char *[], char *, const char *, char *, - const char *, int); static void print_table(const struct pfr_table *, int, int); static int print_tstats(const struct pfr_tstats *, int); static int load_addr(struct pfr_buffer *, int, char *[], char *, int, int); @@ -119,15 +117,6 @@ pfctl_show_tables(const char *anchor, int opts) } int -pfctl_command_tables(int argc, char *argv[], char *tname, - const char *command, char *file, const char *anchor, int opts) -{ - if (tname == NULL || command == NULL) - usage(); - return pfctl_table(argc, argv, tname, command, file, anchor, opts); -} - -int pfctl_table(int argc, char *argv[], char *tname, const char *command, char *file, const char *anchor, int opts) { @@ -214,7 +203,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, xprintf(opts, "%d/%d addresses added", nadd, b.pfrb_size); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "delete")) { @@ -228,7 +218,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, xprintf(opts, "%d/%d addresses deleted", ndel, b.pfrb_size); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "replace")) { @@ -259,7 +250,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, xprintf(opts, "no changes"); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "expire")) { @@ -282,7 +274,7 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, break; } PFRB_FOREACH(p, &b) { - ((struct pfr_astats *)p)->pfras_a.pfra_fback = 0; + ((struct pfr_astats *)p)->pfras_a.pfra_fback = PFR_FB_NONE; if (time(NULL) - ((struct pfr_astats *)p)->pfras_tzero > lifetime) if (pfr_buf_add(&b2, @@ -297,7 +289,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command, xprintf(opts, "%d/%d addresses expired", ndel, b2.pfrb_size); if (opts & PF_OPT_VERBOSE) PFRB_FOREACH(a, &b2) - if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback) + if ((opts & PF_OPT_VERBOSE2) || + a->pfra_fback != PFR_FB_NONE) print_addrx(a, NULL, opts & PF_OPT_USEDNS); } else if (!strcmp(command, "reset")) { diff --git a/sbin/pfctl/tests/files/pf1071.in b/sbin/pfctl/tests/files/pf1071.in new file mode 100644 index 000000000000..9e6c2abc0621 --- /dev/null +++ b/sbin/pfctl/tests/files/pf1071.in @@ -0,0 +1 @@ +pass inet from (lo0)/24 diff --git a/sbin/pfctl/tests/files/pf1071.ok b/sbin/pfctl/tests/files/pf1071.ok new file mode 100644 index 000000000000..409b5dc4b068 --- /dev/null +++ b/sbin/pfctl/tests/files/pf1071.ok @@ -0,0 +1 @@ +pass inet from (lo0)/24 to any flags S/SA keep state diff --git a/sbin/pfctl/tests/pfctl_test_list.inc b/sbin/pfctl/tests/pfctl_test_list.inc index f87d8ba6bc73..51729bc9adad 100644 --- a/sbin/pfctl/tests/pfctl_test_list.inc +++ b/sbin/pfctl/tests/pfctl_test_list.inc @@ -179,3 +179,4 @@ PFCTL_TEST_FAIL(1067, "route-to can't be used on block rules") PFCTL_TEST(1068, "max-pkt-rate") PFCTL_TEST(1069, "max-pkt-size") PFCTL_TEST_FAIL(1070, "include line number") +PFCTL_TEST(1071, "mask length on (lo0)") diff --git a/share/examples/Makefile b/share/examples/Makefile index 560fdae6de5b..f0c050a36306 100644 --- a/share/examples/Makefile +++ b/share/examples/Makefile @@ -15,6 +15,7 @@ LDIRS= BSD_daemon \ find_interface \ flua \ indent \ + inotify \ ipfw \ jails \ kld \ @@ -97,6 +98,10 @@ SE_FLUA= libjail.lua SE_DIRS+= indent SE_INDENT= indent.pro +SE_DIRS+= inotify +SE_INOTIFY= inotify.c \ + Makefile + .if ${MK_IPFILTER} != "no" SUBDIR+= ipfilter .endif diff --git a/share/examples/inotify/Makefile b/share/examples/inotify/Makefile new file mode 100644 index 000000000000..c54c629c58d7 --- /dev/null +++ b/share/examples/inotify/Makefile @@ -0,0 +1,6 @@ +PROG= inotify +MAN= + +LIBADD= xo + +.include <bsd.prog.mk> diff --git a/share/examples/inotify/inotify.c b/share/examples/inotify/inotify.c new file mode 100644 index 000000000000..b8e300bc992c --- /dev/null +++ b/share/examples/inotify/inotify.c @@ -0,0 +1,172 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +/* + * A simple program to demonstrate inotify. Given one or more paths, it watches + * all events on those paths and prints them to standard output. + */ + +#include <sys/types.h> +#include <sys/event.h> +#include <sys/inotify.h> + +#include <assert.h> +#include <err.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <libxo/xo.h> + +static void +usage(void) +{ + xo_errx(1, "usage: inotify <path1> [<path2> ...]"); +} + +static const char * +ev2str(uint32_t event) +{ + switch (event & IN_ALL_EVENTS) { + case IN_ACCESS: + return ("IN_ACCESS"); + case IN_ATTRIB: + return ("IN_ATTRIB"); + case IN_CLOSE_WRITE: + return ("IN_CLOSE_WRITE"); + case IN_CLOSE_NOWRITE: + return ("IN_CLOSE_NOWRITE"); + case IN_CREATE: + return ("IN_CREATE"); + case IN_DELETE: + return ("IN_DELETE"); + case IN_DELETE_SELF: + return ("IN_DELETE_SELF"); + case IN_MODIFY: + return ("IN_MODIFY"); + case IN_MOVE_SELF: + return ("IN_MOVE_SELF"); + case IN_MOVED_FROM: + return ("IN_MOVED_FROM"); + case IN_MOVED_TO: + return ("IN_MOVED_TO"); + case IN_OPEN: + return ("IN_OPEN"); + default: + switch (event) { + case IN_IGNORED: + return ("IN_IGNORED"); + case IN_Q_OVERFLOW: + return ("IN_Q_OVERFLOW"); + case IN_UNMOUNT: + return ("IN_UNMOUNT"); + } + warnx("unknown event %#x", event); + assert(0); + } +} + +static void +set_handler(int kq, int sig) +{ + struct kevent kev; + + (void)signal(sig, SIG_IGN); + EV_SET(&kev, sig, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0) + xo_err(1, "kevent"); +} + +int +main(int argc, char **argv) +{ + struct inotify_event *iev, *iev1; + struct kevent kev; + size_t ievsz; + int ifd, kq; + + argc = xo_parse_args(argc, argv); + if (argc < 2) + usage(); + argc--; + argv++; + + ifd = inotify_init1(IN_NONBLOCK); + if (ifd < 0) + xo_err(1, "inotify"); + for (int i = 0; i < argc; i++) { + int wd; + + wd = inotify_add_watch(ifd, argv[i], IN_ALL_EVENTS); + if (wd < 0) + xo_err(1, "inotify_add_watch(%s)", argv[i]); + } + + xo_set_version("1"); + xo_open_list("events"); + + kq = kqueue(); + if (kq < 0) + xo_err(1, "kqueue"); + + /* + * Handle signals in the event loop so that we can close the xo list. + */ + set_handler(kq, SIGINT); + set_handler(kq, SIGTERM); + set_handler(kq, SIGHUP); + set_handler(kq, SIGQUIT); + + /* + * Monitor the inotify descriptor for events. + */ + EV_SET(&kev, ifd, EVFILT_READ, EV_ADD, 0, 0, NULL); + if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0) + xo_err(1, "kevent"); + + ievsz = sizeof(*iev) + NAME_MAX + 1; + iev = malloc(ievsz); + if (iev == NULL) + err(1, "malloc"); + + for (;;) { + ssize_t n; + const char *ev; + + if (kevent(kq, NULL, 0, &kev, 1, NULL) < 0) + xo_err(1, "kevent"); + if (kev.filter == EVFILT_SIGNAL) + break; + + n = read(ifd, iev, ievsz); + if (n < 0) + xo_err(1, "read"); + assert(n >= (ssize_t)sizeof(*iev)); + + for (iev1 = iev; n > 0;) { + assert(n >= (ssize_t)sizeof(*iev1)); + + ev = ev2str(iev1->mask); + xo_open_instance("event"); + xo_emit("{:wd/%3d} {:event/%16s} {:name/%s}\n", + iev1->wd, ev, iev1->name); + xo_close_instance("event"); + + n -= sizeof(*iev1) + iev1->len; + iev1 = (struct inotify_event *)(void *) + ((char *)iev1 + sizeof(*iev1) + iev1->len); + } + (void)xo_flush(); + } + + xo_close_list("events"); + + if (xo_finish() < 0) + xo_err(1, "stdout"); + exit(0); +} diff --git a/share/man/man4/bridge.4 b/share/man/man4/bridge.4 index 7ce734ae87eb..2dff393ebc29 100644 --- a/share/man/man4/bridge.4 +++ b/share/man/man4/bridge.4 @@ -36,7 +36,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd May 28, 2025 +.Dd July 5, 2025 .Dt IF_BRIDGE 4 .Os .Sh NAME @@ -271,6 +271,54 @@ by setting the .Va net.link.bridge.log_stp node using .Xr sysctl 8 . +.Sh VLAN SUPPORT +The +.Nm +driver has full support for virtual LANs (VLANs). +The bridge implements independent VLAN learning, i.e. MAC addresses are +learned on a per-VLAN basis, and the same MAC address may be learned on +multiple interfaces on different VLANs. +Incoming frames with an 802.1Q tag will be assigned to the appropriate +VLAN. +.Pp +Traffic sent to or from the host is not assigned to a VLAN by default. +To allow the host to communicate on a VLAN, configure a +.Xr vlan 4 +interface on the bridge and (if necessary) assign IP addresses there. +.Pp +By default no access control is enabled, so any interface may +participate in any VLAN. +.Pp +VLAN filtering may be enabled on an interface using the +.Xr ifconfig 8 +.Cm vlanfilter +option. +When VLAN filtering is enabled, an interface may only send and receive +frames based on its configured VLAN access list. +.Pp +The interface's untagged VLAN ID may be configured using the +.Xr ifconfig 8 +.Cm untagged +option. +If an untagged VLAN ID is configured, incoming frames will be assigned +to that VLAN, and the interface may receive outgoing untagged frames +in that VLAN. +.Pp +The tagged VLAN access list may be configured using the +.Cm tagged , +.Cm +tagged +and +.Cm -tagged +options to +.Xr ifconfig 8 . +An interface may send and receive tagged frames for any VLAN in its +access list. +.Pp +The bridge will automatically insert or remove 802.1q tags as needed, +based on the interface configuration, when forwarding frames between +interfaces. +This tag processing is only done for interfaces with VLAN filtering +enabled. .Sh PACKET FILTERING Packet filtering can be used with any firewall package that hooks in via the .Xr pfil 9 @@ -538,6 +586,7 @@ ifconfig bridge0 addm fxp0 addm gif0 up .Xr ipfw 4 , .Xr netmap 4 , .Xr pf 4 , +.Xr vlan 4 , .Xr ifconfig 8 .Sh HISTORY The diff --git a/share/man/man4/pf.4 b/share/man/man4/pf.4 index 422600a6fa44..03a4ba2bbe7f 100644 --- a/share/man/man4/pf.4 +++ b/share/man/man4/pf.4 @@ -26,7 +26,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd July 1, 2025 +.Dd July 2, 2025 .Dt PF 4 .Os .Sh NAME @@ -1114,7 +1114,7 @@ will be set to the length of the buffer actually used. .It Dv DIOCCLRSRCNODES Clear the tree of source tracking nodes. .It Dv DIOCIGETIFACES Fa "struct pfioc_iface *io" -Get the list of interfaces and interface drivers known to +Get the list of interfaces and interface groups known to .Nm . All the ioctls that manipulate interfaces use the same structure described below: @@ -1131,7 +1131,7 @@ struct pfioc_iface { .Pp If not empty, .Va pfiio_name -can be used to restrict the search to a specific interface or driver. +can be used to restrict the search to a specific interface or group. .Va pfiio_buffer[pfiio_size] is the user-supplied buffer for returning the data. On entry, diff --git a/share/man/man4/rights.4 b/share/man/man4/rights.4 index 0c24f6b45f88..8f5f6ad9c2d2 100644 --- a/share/man/man4/rights.4 +++ b/share/man/man4/rights.4 @@ -30,7 +30,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd May 1, 2024 +.Dd May 22, 2025 .Dt RIGHTS 4 .Os .Sh NAME @@ -319,6 +319,14 @@ Permit .It Dv CAP_GETSOCKOPT Permit .Xr getsockopt 2 . +.It Dv CAP_INOTIFY_ADD +Permit +.Xr inotify_add_watch 2 +and +.Xr inotify_add_watch_at 2 . +.It Dv CAP_INOTIFY_RM +Permit +.Xr inotify_rm_watch 2 . .It Dv CAP_IOCTL Permit .Xr ioctl 2 . diff --git a/share/man/man5/src.conf.5 b/share/man/man5/src.conf.5 index aecdde416578..63e9f471f1f1 100644 --- a/share/man/man5/src.conf.5 +++ b/share/man/man5/src.conf.5 @@ -1,5 +1,5 @@ .\" DO NOT EDIT-- this file is @generated by tools/build/options/makeman. -.Dd June 20, 2025 +.Dd July 5, 2025 .Dt SRC.CONF 5 .Os .Sh NAME @@ -1570,6 +1570,8 @@ utility. Build .Xr rpcbind 8 with warmstart support. +.It Va WITH_RUN_TESTS +Run tests as part of the build. .It Va WITHOUT_SCTP_SUPPORT Disable support in the kernel for the .Xr sctp 4 diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile index badaee1479f7..f709a4818dd5 100644 --- a/share/man/man9/Makefile +++ b/share/man/man9/Makefile @@ -434,6 +434,7 @@ MAN= accept_filter.9 \ VOP_GETEXTATTR.9 \ VOP_GETPAGES.9 \ VOP_INACTIVE.9 \ + VOP_INOTIFY.9 \ VOP_IOCTL.9 \ VOP_LINK.9 \ VOP_LISTEXTATTR.9 \ @@ -2460,6 +2461,7 @@ MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \ MLINKS+=VOP_FSYNC.9 VOP_FDATASYNC.9 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9 +MLINKS+=VOP_INOTIFY.9 VOP_INOTIFY_ADD_WATCH.9 MLINKS+=VOP_LOCK.9 vn_lock.9 \ VOP_LOCK.9 VOP_ISLOCKED.9 \ VOP_LOCK.9 VOP_UNLOCK.9 diff --git a/share/man/man9/VOP_INOTIFY.9 b/share/man/man9/VOP_INOTIFY.9 new file mode 100644 index 000000000000..43b644682153 --- /dev/null +++ b/share/man/man9/VOP_INOTIFY.9 @@ -0,0 +1,60 @@ +.\"- +.\" SPDX-License-Identifier: BSD-2-Clause +.\" +.\" Copyright (c) 2025 Klara, Inc. +.\" +.Dd May 27, 2025 +.Dt VOP_INOTIFY 9 +.Os +.Sh NAME +.Nm VOP_INOTIFY +.Nd vnode inotify interface +.Sh SYNOPSIS +.In sys/param.h +.In sys/vnode.h +.Ft int +.Fo VOP_INOTIFY +.Fa struct vnode *vp +.Fa struct vnode *dvp +.Fa struct componentname *cnp +.Fa int event +.Fa uint32_t cookie +.Fc +.Ft int +.Fo VOP_INOTIFY_ADD_WATCH +.Fa struct vnode *vp +.Fa struct inotify_softc *sc +.Fa uint32_t mask +.Fa uint32_t *wdp +.Fa struct thread *td +.Fc +.Sh DESCRIPTION +The +.Fn VOP_INOTIFY +operation notifies the +.Xr inotify 2 +subsystem of a file system event on a vnode. +The +.Fa dvp +and +.Fa cnp +arguments are optional and are only used to obtain a file name for the event. +If they are omitted, the inotify subsystem will use the file name cache to +find a name for the vnode, but this is more expensive. +.Pp +The +.Fn VOP_INOTIFY_ADD_WATCH +operation is for internal use by the inotify subsystem to add a watch to a +vnode. +.Sh LOCKS +The +.Fn VOP_INOTIFY +operation does not assume any particular vnode lock state. +The +.Fn VOP_INOTIFY_ADD_WATCH +operation should be called with the vnode locked. +.Sh RETURN VALUES +Zero is returned on success, otherwise an error code is returned. +.Sh SEE ALSO +.Xr inotify 2 , +.Xr vnode 9 diff --git a/share/man/man9/firmware.9 b/share/man/man9/firmware.9 index f324861248d2..43f965a20fb7 100644 --- a/share/man/man9/firmware.9 +++ b/share/man/man9/firmware.9 @@ -201,7 +201,7 @@ whether compiled in, or preloaded by or manually loaded with .Xr kldload 8 . However, a system can implement additional mechanisms to bring -these images in memory before calling +these images into memory before calling .Fn firmware_register . .Pp When @@ -347,7 +347,7 @@ If .Fa imagename matches the trailing subpath of a registered image with a full path, that image is returned. .It -he kernel linker searches for a kernel module named +The kernel linker searches for a kernel module named .Fa imagename . If a kernel module is found, it is loaded, and the list of registered firmware images is searched again. diff --git a/share/misc/committers-src.dot b/share/misc/committers-src.dot index 60ef52278a53..313f40ad8e51 100644 --- a/share/misc/committers-src.dot +++ b/share/misc/committers-src.dot @@ -114,6 +114,7 @@ andre [label="Andre Oppermann\nandre@FreeBSD.org\n2003/11/12"] andreast [label="Andreas Tobler\nandreast@FreeBSD.org\n2010/09/05"] andrew [label="Andrew Turner\nandrew@FreeBSD.org\n2010/07/19"] antoine [label="Antoine Brodin\nantoine@FreeBSD.org\n2008/02/03"] +aokblast [label="ShengYi Hung\naokblast@FreeBSD.org\n2025/07/02"] araujo [label="Marcelo Araujo\naraujo@FreeBSD.org\n2015/08/04"] arichardson [label="Alex Richardson\narichardson@FreeBSD.org\n2017/10/30"] ariff [label="Ariff Abdullah\nariff@FreeBSD.org\n2005/11/14"] @@ -764,6 +765,7 @@ kp -> nick kp -> rcm kp -> zlei +lwhsu -> aokblast lwhsu -> khng manu -> corvink @@ -777,6 +779,7 @@ marcel -> nwhitehorn marcel -> sjg markj -> "0mp" +markj -> aokblast markj -> bnovkov markj -> cem markj -> christos diff --git a/stand/efi/loader/main.c b/stand/efi/loader/main.c index 70cdfb2e9328..3ef418d20df3 100644 --- a/stand/efi/loader/main.c +++ b/stand/efi/loader/main.c @@ -1547,6 +1547,7 @@ command_seed_entropy(int argc, char *argv[]) } COMMAND_SET(poweroff, "poweroff", "power off the system", command_poweroff); +COMMAND_SET(halt, "halt", "power off the system", command_poweroff); static int command_poweroff(int argc __unused, char *argv[] __unused) diff --git a/sys/amd64/amd64/efirt_machdep.c b/sys/amd64/amd64/efirt_machdep.c index 81a28ebe97ee..f70e235a0150 100644 --- a/sys/amd64/amd64/efirt_machdep.c +++ b/sys/amd64/amd64/efirt_machdep.c @@ -56,6 +56,15 @@ #include <vm/vm_pager.h> #include <vm/vm_radix.h> +/* The EFI regions we're allowed to map. */ +#define EFI_ALLOWED_TYPES_MASK ( \ + 1u << EFI_MD_TYPE_BS_CODE | 1u << EFI_MD_TYPE_BS_DATA | \ + 1u << EFI_MD_TYPE_RT_CODE | 1u << EFI_MD_TYPE_RT_DATA | \ + 1u << EFI_MD_TYPE_FIRMWARE \ +) + +uint32_t efi_map_regs; + static pml5_entry_t *efi_pml5; static pml4_entry_t *efi_pml4; static vm_object_t obj_1t1_pt; @@ -181,6 +190,7 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) vm_offset_t va; uint64_t idx; int bits, i, mode; + bool map_pz = true; obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 + NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG), @@ -198,9 +208,16 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) pmap_pinit_pml4(efi_pmltop_page); } + if ((efi_map_regs & ~EFI_ALLOWED_TYPES_MASK) != 0) { + printf("Ignoring the following runtime EFI regions: %#x\n", + efi_map_regs & ~EFI_ALLOWED_TYPES_MASK); + efi_map_regs &= EFI_ALLOWED_TYPES_MASK; + } + for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { - if ((p->md_attr & EFI_MD_ATTR_RT) == 0) + if ((p->md_attr & EFI_MD_ATTR_RT) == 0 && + !EFI_MAP_BOOTTYPE_ALLOWED(p->md_type)) continue; if (p->md_virt != 0 && p->md_virt != p->md_phys) { if (bootverbose) @@ -256,6 +273,22 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) } } VM_OBJECT_WUNLOCK(obj_1t1_pt); + if (p->md_phys == 0) + map_pz = false; + } + + /* + * Some BIOSes tend to access phys 0 during efirt calls, + * so map it if we haven't yet. + */ + if (map_pz) { + VM_OBJECT_WLOCK(obj_1t1_pt); + pte = efi_1t1_pte(0); + /* Assume Write-Back */ + bits = pmap_cache_bits(kernel_pmap, VM_MEMATTR_WRITE_BACK, + false) | X86_PG_RW | X86_PG_V; + pte_store(pte, bits); + VM_OBJECT_WUNLOCK(obj_1t1_pt); } return (true); diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 05e482f7783b..7f317674907e 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -325,6 +325,10 @@ initializecpu(void) wrmsr(MSR_EFER, msr); pg_nx = PG_NX; } + if ((amd_feature2 & AMDID2_TCE) != 0) { + msr = rdmsr(MSR_EFER) | EFER_TCE; + wrmsr(MSR_EFER, msr); + } hw_ibrs_recalculate(false); hw_ssb_recalculate(false); amd64_syscall_ret_flush_l1d_recalc(); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 032a134bbd4b..1e8f9b22bd19 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -645,7 +645,7 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, * NB: physmap_idx points to the next free slot. */ insert_idx = physmap_idx; - for (i = 0; i <= physmap_idx; i += 2) { + for (i = 0; i < physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; @@ -659,7 +659,7 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, } /* See if we can prepend to the next entry. */ - if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { + if (insert_idx < physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } @@ -670,8 +670,6 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, return (1); } - physmap_idx += 2; - *physmap_idxp = physmap_idx; if (physmap_idx == PHYS_AVAIL_ENTRIES) { printf( "Too many segments in the physical address map, giving up\n"); @@ -682,11 +680,14 @@ add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, * Move the last 'N' entries down to make room for the new * entry if needed. */ - for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { + for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } + physmap_idx += 2; + *physmap_idxp = physmap_idx; + /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; @@ -757,6 +758,7 @@ add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); + TUNABLE_INT_FETCH("machdep.efirt.regs", &efi_map_regs); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { @@ -794,10 +796,13 @@ add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, } switch (p->md_type) { - case EFI_MD_TYPE_CODE: - case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: + if (EFI_MAP_BOOTTYPE_ALLOWED(p->md_type)) + continue; + /* FALLTHROUGH */ + case EFI_MD_TYPE_CODE: + case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 2ab8c3b17e22..cae5436a1ff2 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1301,8 +1301,10 @@ static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, static bool pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); +static bool pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, + vm_offset_t va, struct rwlock **lockp, vm_page_t mpte); static bool pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, - vm_offset_t va); + vm_offset_t va, vm_page_t m); static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, @@ -1334,7 +1336,7 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - struct spglist *free, struct rwlock **lockp); + bool remove_pt, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); @@ -5999,7 +6001,7 @@ pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, SLIST_INIT(&free); sva = trunc_2mpage(va); - pmap_remove_pde(pmap, pde, sva, &free, lockp); + pmap_remove_pde(pmap, pde, sva, true, &free, lockp); if ((oldpde & pmap_global_bit(pmap)) == 0) pmap_invalidate_pde_page(pmap, sva, oldpde); vm_page_free_pages_toq(&free, true); @@ -6011,11 +6013,17 @@ static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp) { + return (pmap_demote_pde_mpte(pmap, pde, va, lockp, NULL)); +} + +static bool +pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, + struct rwlock **lockp, vm_page_t mpte) +{ pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; vm_paddr_t mptepa; - vm_page_t mpte; int PG_PTE_CACHE; bool in_kernel; @@ -6028,61 +6036,65 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, PG_PKU_MASK = pmap_pku_mask_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - in_kernel = va >= VM_MAXUSER_ADDRESS; oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); - - /* - * Invalidate the 2MB page mapping and return "failure" if the - * mapping was never accessed. - */ - if ((oldpde & PG_A) == 0) { - KASSERT((oldpde & PG_W) == 0, - ("pmap_demote_pde: a wired mapping is missing PG_A")); - pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); - return (false); - } - - mpte = pmap_remove_pt_page(pmap, va); + KASSERT((oldpde & PG_MANAGED) == 0 || lockp != NULL, + ("pmap_demote_pde: lockp for a managed mapping is NULL")); + in_kernel = va >= VM_MAXUSER_ADDRESS; if (mpte == NULL) { - KASSERT((oldpde & PG_W) == 0, - ("pmap_demote_pde: page table page for a wired mapping" - " is missing")); - - /* - * If the page table page is missing and the mapping - * is for a kernel address, the mapping must belong to - * the direct map. Page table pages are preallocated - * for every other part of the kernel address space, - * so the direct map region is the only part of the - * kernel address space that must be handled here. - */ - KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && - va < DMAP_MAX_ADDRESS), - ("pmap_demote_pde: No saved mpte for va %#lx", va)); - /* - * If the 2MB page mapping belongs to the direct map - * region of the kernel's address space, then the page - * allocation request specifies the highest possible - * priority (VM_ALLOC_INTERRUPT). Otherwise, the - * priority is normal. + * Invalidate the 2MB page mapping and return "failure" if the + * mapping was never accessed. */ - mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), - (in_kernel ? VM_ALLOC_INTERRUPT : 0) | VM_ALLOC_WIRED); - - /* - * If the allocation of the new page table page fails, - * invalidate the 2MB page mapping and return "failure". - */ - if (mpte == NULL) { + if ((oldpde & PG_A) == 0) { + KASSERT((oldpde & PG_W) == 0, + ("pmap_demote_pde: a wired mapping is missing PG_A")); pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); return (false); } - if (!in_kernel) - mpte->ref_count = NPTEPG; + mpte = pmap_remove_pt_page(pmap, va); + if (mpte == NULL) { + KASSERT((oldpde & PG_W) == 0, + ("pmap_demote_pde: page table page for a wired mapping is missing")); + + /* + * If the page table page is missing and the mapping + * is for a kernel address, the mapping must belong to + * the direct map. Page table pages are preallocated + * for every other part of the kernel address space, + * so the direct map region is the only part of the + * kernel address space that must be handled here. + */ + KASSERT(!in_kernel || (va >= DMAP_MIN_ADDRESS && + va < DMAP_MAX_ADDRESS), + ("pmap_demote_pde: No saved mpte for va %#lx", va)); + + /* + * If the 2MB page mapping belongs to the direct map + * region of the kernel's address space, then the page + * allocation request specifies the highest possible + * priority (VM_ALLOC_INTERRUPT). Otherwise, the + * priority is normal. + */ + mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), + (in_kernel ? VM_ALLOC_INTERRUPT : 0) | + VM_ALLOC_WIRED); + + /* + * If the allocation of the new page table page fails, + * invalidate the 2MB page mapping and return "failure". + */ + if (mpte == NULL) { + pmap_demote_pde_abort(pmap, va, pde, oldpde, + lockp); + return (false); + } + + if (!in_kernel) + mpte->ref_count = NPTEPG; + } } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); @@ -6153,7 +6165,8 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void -pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, + bool remove_pt) { pd_entry_t newpde; vm_paddr_t mptepa; @@ -6161,7 +6174,10 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mpte = pmap_remove_pt_page(pmap, va); + if (remove_pt) + mpte = pmap_remove_pt_page(pmap, va); + else + mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); @@ -6193,7 +6209,7 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) * pmap_remove_pde: do the things to unmap a superpage in a process */ static int -pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, +pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool remove_pt, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; @@ -6234,7 +6250,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, } } if (pmap == kernel_pmap) { - pmap_remove_kernel_pde(pmap, pdq, sva); + pmap_remove_kernel_pde(pmap, pdq, sva, remove_pt); } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { @@ -6476,7 +6492,8 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; - pmap_remove_pde(pmap, pde, sva, &free, &lock); + pmap_remove_pde(pmap, pde, sva, true, &free, + &lock); continue; } else if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { @@ -7552,13 +7569,36 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, /* * The reference to the PD page that was acquired by * pmap_alloc_pde() ensures that it won't be freed. - * However, if the PDE resulted from a promotion, then + * However, if the PDE resulted from a promotion, and + * the mapping is not from kernel_pmap, then * a reserved PT page could be freed. */ - (void)pmap_remove_pde(pmap, pde, va, &free, lockp); + (void)pmap_remove_pde(pmap, pde, va, + pmap != kernel_pmap, &free, lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { + if (va >= VM_MAXUSER_ADDRESS) { + /* + * Try to save the ptp in the trie + * before any changes to mappings are + * made. Abort on failure. + */ + mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + if (pmap_insert_pt_page(pmap, mt, false, false)) { + if (pdpg != NULL) + pdpg->ref_count--; + CTR1(KTR_PMAP, + "pmap_enter_pde: cannot ins kern ptp va %#lx", + va); + return (KERN_RESOURCE_SHORTAGE); + } + /* + * Both pmap_remove_pde() and + * pmap_remove_ptes() will zero-fill + * the kernel page table page. + */ + } pmap_delayed_invl_start(); if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free, lockp)) @@ -7572,14 +7612,6 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, } else { KASSERT(SLIST_EMPTY(&free), ("pmap_enter_pde: freed kernel page table page")); - - /* - * Both pmap_remove_pde() and pmap_remove_ptes() will - * leave the kernel page table page zero filled. - */ - mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false, false)) - panic("pmap_enter_pde: trie insert failed"); } } @@ -9547,7 +9579,7 @@ pmap_unmapdev(void *p, vm_size_t size) * Tries to demote a 1GB page mapping. */ static bool -pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) +pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; @@ -9564,12 +9596,19 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) oldpdpe = *pdpe; KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); - pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, - VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT); - if (pdpg == NULL) { - CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" - " in pmap %p", va, pmap); - return (false); + if (m == NULL) { + pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, + VM_ALLOC_WIRED); + if (pdpg == NULL) { + CTR2(KTR_PMAP, + "pmap_demote_pdpe: failure for va %#lx in pmap %p", + va, pmap); + return (false); + } + } else { + pdpg = m; + pdpg->pindex = va >> PDPSHIFT; + pmap_pt_page_count_adj(pmap, 1); } pdpgpa = VM_PAGE_TO_PHYS(pdpg); firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); @@ -9779,7 +9818,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, tmpva += NBPDP; continue; } - if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) + if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva, NULL)) return (ENOMEM); } pde = pmap_pdpe_to_pde(pdpe, tmpva); @@ -9937,11 +9976,13 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, } /* - * Demotes any mapping within the direct map region that covers more than the - * specified range of physical addresses. This range's size must be a power - * of two and its starting address must be a multiple of its size. Since the - * demotion does not change any attributes of the mapping, a TLB invalidation - * is not mandatory. The caller may, however, request a TLB invalidation. + * Demotes any mapping within the direct map region that covers more + * than the specified range of physical addresses. This range's size + * must be a power of two and its starting address must be a multiple + * of its size, which means that any pdp from the mapping is fully + * covered by the range if len > NBPDP. Since the demotion does not + * change any attributes of the mapping, a TLB invalidation is not + * mandatory. The caller may, however, request a TLB invalidation. */ void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) @@ -9949,6 +9990,7 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) pdp_entry_t *pdpe; pd_entry_t *pde; vm_offset_t va; + vm_page_t m, mpte; bool changed; if (len == 0) @@ -9956,31 +9998,59 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2")); KASSERT((base & (len - 1)) == 0, ("pmap_demote_DMAP: base is not a multiple of len")); + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "pmap_demote_DMAP"); + if (len < NBPDP && base < dmaplimit) { va = PHYS_TO_DMAP(base); changed = false; + + /* + * Assume that it is fine to sleep there. + * The only existing caller of pmap_demote_DMAP() is the + * x86_mr_split_dmap() function. + */ + m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK); + if (len < NBPDR) { + mpte = vm_page_alloc_noobj(VM_ALLOC_WIRED | + VM_ALLOC_WAITOK); + } else + mpte = NULL; + PMAP_LOCK(kernel_pmap); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDPE"); if ((*pdpe & PG_PS) != 0) { - if (!pmap_demote_pdpe(kernel_pmap, pdpe, va)) + if (!pmap_demote_pdpe(kernel_pmap, pdpe, va, m)) panic("pmap_demote_DMAP: PDPE failed"); changed = true; + m = NULL; } if (len < NBPDR) { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDE"); if ((*pde & PG_PS) != 0) { - if (!pmap_demote_pde(kernel_pmap, pde, va)) + mpte->pindex = pmap_pde_pindex(va); + pmap_pt_page_count_adj(kernel_pmap, 1); + if (!pmap_demote_pde_mpte(kernel_pmap, pde, va, + NULL, mpte)) panic("pmap_demote_DMAP: PDE failed"); changed = true; + mpte = NULL; } } if (changed && invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); + if (m != NULL) { + vm_page_unwire_noq(m); + vm_page_free(m); + } + if (mpte != NULL) { + vm_page_unwire_noq(mpte); + vm_page_free(mpte); + } } } diff --git a/sys/amd64/include/efi.h b/sys/amd64/include/efi.h index b47c4aa27ac7..439f2f0b317d 100644 --- a/sys/amd64/include/efi.h +++ b/sys/amd64/include/efi.h @@ -53,6 +53,10 @@ #define EFI_TIME_OWNED() mtx_assert(&atrtc_time_lock, MA_OWNED) #define EFI_RT_HANDLE_FAULTS_DEFAULT 1 + +#define EFI_MAP_BOOTTYPE_ALLOWED(type) (((efi_map_regs >> (type)) & 1) != 0) + +extern uint32_t efi_map_regs; #endif struct efirt_callinfo { diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h index 15e1dfc1a444..f1d9c96a78d7 100644 --- a/sys/amd64/linux/linux_proto.h +++ b/sys/amd64/linux/linux_proto.h @@ -914,10 +914,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c index 8413d2723551..62b50cf68a32 100644 --- a/sys/amd64/linux/linux_sysent.c +++ b/sys/amd64/linux/linux_sysent.c @@ -268,8 +268,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 251 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 252 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 253 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 256 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 257 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 258 = linux_mkdirat */ diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c index 20322f7a8660..1dc4de019080 100644 --- a/sys/amd64/linux/linux_systrace_args.c +++ b/sys/amd64/linux/linux_systrace_args.c @@ -1918,12 +1918,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 254: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 255: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -5860,9 +5867,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 254: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 255: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 256: @@ -8353,8 +8383,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 253: /* linux_inotify_add_watch */ case 254: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 255: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 256: /* linux_openat */ diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master index fd08c9b0279d..5e1394751ef6 100644 --- a/sys/amd64/linux/syscalls.master +++ b/sys/amd64/linux/syscalls.master @@ -1476,10 +1476,17 @@ int linux_inotify_init(void); } 254 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 255 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 256 AUE_NULL STD { int linux_migrate_pages(void); diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h index ab0edd99df42..57a303271f1c 100644 --- a/sys/amd64/linux32/linux32_proto.h +++ b/sys/amd64/linux32/linux32_proto.h @@ -983,10 +983,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1184,7 +1187,7 @@ struct linux_pipe2_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c index add9844254ce..1bc8841badf3 100644 --- a/sys/amd64/linux32/linux32_sysent.c +++ b/sys/amd64/linux32/linux32_sysent.c @@ -307,8 +307,8 @@ struct sysent linux32_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -347,7 +347,7 @@ struct sysent linux32_sysent[] = { { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c index 7793124e6935..cbd1641c2a34 100644 --- a/sys/amd64/linux32/linux32_systrace_args.c +++ b/sys/amd64/linux32/linux32_systrace_args.c @@ -2036,12 +2036,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2379,7 +2386,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6536,9 +6545,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7116,6 +7148,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9809,8 +9848,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -9982,6 +10027,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master index 92d5f09c423f..7bd522a598e8 100644 --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -1589,10 +1589,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1860,7 +1867,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/arm/allwinner/aw_gpio.c b/sys/arm/allwinner/aw_gpio.c index 18b47bab12d9..2061e38a155f 100644 --- a/sys/arm/allwinner/aw_gpio.c +++ b/sys/arm/allwinner/aw_gpio.c @@ -1154,10 +1154,6 @@ aw_gpio_attach(device_t dev) aw_gpio_register_isrcs(sc); intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; - /* * Register as a pinctrl device */ @@ -1166,6 +1162,10 @@ aw_gpio_attach(device_t dev) fdt_pinctrl_register(dev, "allwinner,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; + config_intrhook_oneshot(aw_gpio_enable_bank_supply, sc); return (0); diff --git a/sys/arm/allwinner/aw_rtc.c b/sys/arm/allwinner/aw_rtc.c index 9938601f17ce..4af57ab879e8 100644 --- a/sys/arm/allwinner/aw_rtc.c +++ b/sys/arm/allwinner/aw_rtc.c @@ -134,6 +134,7 @@ static struct ofw_compat_data compat_data[] = { { "allwinner,sun7i-a20-rtc", (uintptr_t) &a20_conf }, { "allwinner,sun6i-a31-rtc", (uintptr_t) &a31_conf }, { "allwinner,sun8i-h3-rtc", (uintptr_t) &h3_conf }, + { "allwinner,sun20i-d1-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h5-rtc", (uintptr_t) &h3_conf }, { "allwinner,sun50i-h6-rtc", (uintptr_t) &h3_conf }, { NULL, 0 } @@ -147,11 +148,13 @@ struct aw_rtc_softc { static struct clk_fixed_def aw_rtc_osc32k = { .clkdef.id = 0, + .clkdef.name = "osc32k", .freq = 32768, }; static struct clk_fixed_def aw_rtc_iosc = { .clkdef.id = 2, + .clkdef.name = "iosc", }; static void aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev); @@ -250,23 +253,33 @@ aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev) { int nclocks; node = ofw_bus_get_node(dev); - nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); - /* No clocks to export */ - if (nclocks <= 0) - return; - if (nclocks != 3) { - device_printf(dev, "Having only %d clocks instead of 3, aborting\n", nclocks); + /* Nothing to do. */ + if (!OF_hasprop(node, "clocks")) return; + + /* + * If the device tree gives us specific output names for the clocks, + * use them. + */ + nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames); + if (nclocks > 0) { + if (nclocks != 3) { + device_printf(dev, + "Found %d clocks names instead of 3, aborting\n", + nclocks); + return; + } + + aw_rtc_osc32k.clkdef.name = clknames[0]; + aw_rtc_iosc.clkdef.name = clknames[2]; } clkdom = clkdom_create(dev); - aw_rtc_osc32k.clkdef.name = clknames[0]; if (clknode_fixed_register(clkdom, &aw_rtc_osc32k) != 0) device_printf(dev, "Cannot register osc32k clock\n"); - aw_rtc_iosc.clkdef.name = clknames[2]; aw_rtc_iosc.freq = sc->conf->iosc_freq; if (clknode_fixed_register(clkdom, &aw_rtc_iosc) != 0) device_printf(dev, "Cannot register iosc clock\n"); diff --git a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c index e4fc57b79ba5..48d1d2af5abc 100644 --- a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c +++ b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c @@ -837,12 +837,12 @@ bcm_gpio_attach(device_t dev) } sc->sc_gpio_npins = i; bcm_gpio_sysctl_init(sc); - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) - goto fail; fdt_pinctrl_register(dev, "brcm,pins"); fdt_pinctrl_configure_tree(dev); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) + goto fail; return (0); diff --git a/sys/arm/mv/mvebu_gpio.c b/sys/arm/mv/mvebu_gpio.c index 681cf20f7f9f..7acdfff539dc 100644 --- a/sys/arm/mv/mvebu_gpio.c +++ b/sys/arm/mv/mvebu_gpio.c @@ -810,7 +810,6 @@ mvebu_gpio_attach(device_t dev) return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm/nvidia/as3722_gpio.c b/sys/arm/nvidia/as3722_gpio.c index 073d057884c9..f7b3d4d43bab 100644 --- a/sys/arm/nvidia/as3722_gpio.c +++ b/sys/arm/nvidia/as3722_gpio.c @@ -544,7 +544,7 @@ as3722_gpio_attach(struct as3722_softc *sc, phandle_t node) sc->gpio_pins = malloc(sizeof(struct as3722_gpio_pin *) * sc->gpio_npins, M_AS3722_GPIO, M_WAITOK | M_ZERO); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); for (i = 0; i < sc->gpio_npins; i++) { diff --git a/sys/arm/nvidia/tegra_gpio.c b/sys/arm/nvidia/tegra_gpio.c index 16e1ef94d6a9..e37fd69a121e 100644 --- a/sys/arm/nvidia/tegra_gpio.c +++ b/sys/arm/nvidia/tegra_gpio.c @@ -824,7 +824,6 @@ tegra_gpio_attach(device_t dev) return (ENXIO); } - bus_attach_children(dev); return (0); } diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c index ec2dd5907024..ebaaccea1d99 100644 --- a/sys/arm64/apple/apple_pinctrl.c +++ b/sys/arm64/apple/apple_pinctrl.c @@ -161,22 +161,22 @@ apple_pinctrl_attach(device_t dev) goto error; } + fdt_pinctrl_register(dev, "pinmux"); + fdt_pinctrl_configure_tree(dev); + + if (OF_hasprop(node, "interrupt-controller")) { + sc->sc_irqs = mallocarray(sc->sc_ngpios, + sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); + intr_pic_register(dev, + OF_xref_from_node(ofw_bus_get_node(dev))); + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "failed to attach gpiobus\n"); goto error; } - fdt_pinctrl_register(dev, "pinmux"); - fdt_pinctrl_configure_tree(dev); - - if (!OF_hasprop(node, "interrupt-controller")) - return (0); - - sc->sc_irqs = mallocarray(sc->sc_ngpios, - sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK); - intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev))); - return (0); error: mtx_destroy(&sc->sc_mtx); diff --git a/sys/arm64/linux/linux_proto.h b/sys/arm64/linux/linux_proto.h index ae3d8569df58..82f57f77ffae 100644 --- a/sys/arm64/linux/linux_proto.h +++ b/sys/arm64/linux/linux_proto.h @@ -141,10 +141,13 @@ struct linux_inotify_init1_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_ioctl_args { char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)]; diff --git a/sys/arm64/linux/linux_sysent.c b/sys/arm64/linux/linux_sysent.c index 722ada465730..e54a76cfd55e 100644 --- a/sys/arm64/linux/linux_sysent.c +++ b/sys/arm64/linux/linux_sysent.c @@ -41,8 +41,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 24 = linux_dup3 */ { .sy_narg = AS(linux_fcntl_args), .sy_call = (sy_call_t *)linux_fcntl, .sy_auevent = AUE_FCNTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 25 = linux_fcntl */ { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 26 = linux_inotify_init1 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */ { .sy_narg = AS(linux_ioctl_args), .sy_call = (sy_call_t *)linux_ioctl, .sy_auevent = AUE_IOCTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 29 = linux_ioctl */ { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 30 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 31 = linux_ioprio_get */ diff --git a/sys/arm64/linux/linux_systrace_args.c b/sys/arm64/linux/linux_systrace_args.c index 54e4dd82355d..1b946a9406a5 100644 --- a/sys/arm64/linux/linux_systrace_args.c +++ b/sys/arm64/linux/linux_systrace_args.c @@ -210,12 +210,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 27: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 28: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_ioctl */ @@ -2780,9 +2787,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 27: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 28: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_ioctl */ case 29: @@ -6455,8 +6485,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 27: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 28: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_ioctl */ case 29: if (ndx == 0 || ndx == 1) diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master index 79c04c398e00..2babdcaf03bf 100644 --- a/sys/arm64/linux/syscalls.master +++ b/sys/arm64/linux/syscalls.master @@ -170,10 +170,17 @@ ); } 27 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 28 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } 29 AUE_IOCTL STD { int linux_ioctl( diff --git a/sys/arm64/nvidia/tegra210/max77620_gpio.c b/sys/arm64/nvidia/tegra210/max77620_gpio.c index 8dcf98099dac..5d91e23324c7 100644 --- a/sys/arm64/nvidia/tegra210/max77620_gpio.c +++ b/sys/arm64/nvidia/tegra210/max77620_gpio.c @@ -672,7 +672,7 @@ max77620_gpio_attach(struct max77620_softc *sc, phandle_t node) sx_init(&sc->gpio_lock, "MAX77620 GPIO lock"); - sc->gpio_busdev = gpiobus_attach_bus(sc->dev); + sc->gpio_busdev = gpiobus_add_bus(sc->dev); if (sc->gpio_busdev == NULL) return (ENXIO); diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c index a86392f16624..847bc7394dd0 100644 --- a/sys/arm64/rockchip/rk_gpio.c +++ b/sys/arm64/rockchip/rk_gpio.c @@ -362,12 +362,6 @@ rk_gpio_attach(device_t dev) return (ENXIO); } - sc->sc_busdev = gpiobus_attach_bus(dev); - if (sc->sc_busdev == NULL) { - rk_gpio_detach(dev); - return (ENXIO); - } - /* Set the cached value to unknown */ for (i = 0; i < RK_GPIO_MAX_PINS; i++) sc->pin_cached[i].is_gpio = 2; @@ -377,6 +371,12 @@ rk_gpio_attach(device_t dev) sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR); RK_GPIO_UNLOCK(sc); + sc->sc_busdev = gpiobus_attach_bus(dev); + if (sc->sc_busdev == NULL) { + rk_gpio_detach(dev); + return (ENXIO); + } + return (0); } diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h index 0f110d5f9ddd..9381396f247c 100644 --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -663,6 +663,7 @@ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ #define AUE_TIMERFD 43270 /* FreeBSD/Linux. */ #define AUE_SETCRED 43271 /* FreeBSD-specific. */ +#define AUE_INOTIFY 43272 /* FreeBSD/Linux. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/cam/mmc/mmc_da.c b/sys/cam/mmc/mmc_da.c index 1c455e1951d7..9246f95a080e 100644 --- a/sys/cam/mmc/mmc_da.c +++ b/sys/cam/mmc/mmc_da.c @@ -1198,27 +1198,6 @@ sdda_get_host_caps(struct cam_periph *periph, union ccb *ccb) return (cts->host_caps); } -static uint32_t -sdda_get_max_data(struct cam_periph *periph, union ccb *ccb) -{ - struct ccb_trans_settings_mmc *cts; - - cts = &ccb->cts.proto_specific.mmc; - memset(cts, 0, sizeof(struct ccb_trans_settings_mmc)); - - ccb->ccb_h.func_code = XPT_GET_TRAN_SETTINGS; - ccb->ccb_h.flags = CAM_DIR_NONE; - ccb->ccb_h.retry_count = 0; - ccb->ccb_h.timeout = 100; - ccb->ccb_h.cbfcnp = NULL; - xpt_action(ccb); - - if (ccb->ccb_h.status != CAM_REQ_CMP) - panic("Cannot get host max data"); - KASSERT(cts->host_max_data != 0, ("host_max_data == 0?!")); - return (cts->host_max_data); -} - static void sdda_start_init(void *context, union ccb *start_ccb) { diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index eaa086188b5f..8d2748098c00 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -511,4 +511,6 @@ #define FREEBSD32_SYS_fchroot 590 #define FREEBSD32_SYS_freebsd32_setcred 591 #define FREEBSD32_SYS_exterrctl 592 -#define FREEBSD32_SYS_MAXSYSCALL 593 +#define FREEBSD32_SYS_inotify_add_watch_at 593 +#define FREEBSD32_SYS_inotify_rm_watch 594 +#define FREEBSD32_SYS_MAXSYSCALL 595 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 989f32a5c6f0..bda373268cc5 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -598,4 +598,6 @@ const char *freebsd32_syscallnames[] = { "fchroot", /* 590 = fchroot */ "freebsd32_setcred", /* 591 = freebsd32_setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 476fe2ac3f80..3718a1b0c8ee 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -660,4 +660,6 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(freebsd32_setcred_args), .sy_call = (sy_call_t *)freebsd32_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = freebsd32_setcred */ { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index cf08938cd5de..37564a737a62 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3395,6 +3395,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9172,6 +9190,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11070,6 +11120,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c index 35d6debe0da9..19cd55849f65 100644 --- a/sys/compat/linux/linux_dummy.c +++ b/sys/compat/linux/linux_dummy.c @@ -74,9 +74,6 @@ DUMMY(kexec_load); DUMMY(add_key); DUMMY(request_key); DUMMY(keyctl); -/* Linux 2.6.13: */ -DUMMY(inotify_add_watch); -DUMMY(inotify_rm_watch); /* Linux 2.6.16: */ DUMMY(migrate_pages); DUMMY(unshare); @@ -87,7 +84,6 @@ DUMMY(vmsplice); DUMMY(move_pages); /* Linux 2.6.27: */ DUMMY(signalfd4); -DUMMY(inotify_init1); /* Linux 2.6.31: */ DUMMY(perf_event_open); /* Linux 2.6.36: */ diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 246bc26d85d4..86834a7ecea8 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -32,11 +32,13 @@ #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filedesc.h> +#include <sys/inotify.h> #include <sys/lock.h> #include <sys/mman.h> #include <sys/selinfo.h> #include <sys/pipe.h> #include <sys/proc.h> +#include <sys/specialfd.h> #include <sys/stat.h> #include <sys/sx.h> #include <sys/syscallsubr.h> @@ -1877,3 +1879,122 @@ linux_writev(struct thread *td, struct linux_writev_args *args) freeuio(auio); return (linux_enobufs2eagain(td, args->fd, error)); } + +static int +linux_inotify_init_flags(int l_flags) +{ + int bsd_flags; + + if ((l_flags & ~(LINUX_IN_CLOEXEC | LINUX_IN_NONBLOCK)) != 0) + linux_msg(NULL, "inotify_init1 unsupported flags 0x%x", + l_flags); + + bsd_flags = 0; + if ((l_flags & LINUX_IN_CLOEXEC) != 0) + bsd_flags |= O_CLOEXEC; + if ((l_flags & LINUX_IN_NONBLOCK) != 0) + bsd_flags |= O_NONBLOCK; + return (bsd_flags); +} + +static int +inotify_init_common(struct thread *td, int flags) +{ + struct specialfd_inotify si; + + si.flags = linux_inotify_init_flags(flags); + return (kern_specialfd(td, SPECIALFD_INOTIFY, &si)); +} + +#if defined(__i386__) || defined(__amd64__) +int +linux_inotify_init(struct thread *td, struct linux_inotify_init_args *args) +{ + return (inotify_init_common(td, 0)); +} +#endif + +int +linux_inotify_init1(struct thread *td, struct linux_inotify_init1_args *args) +{ + return (inotify_init_common(td, args->flags)); +} + +/* + * The native implementation uses the same values for inotify events as + * libinotify, which gives us binary compatibility with Linux. This simplifies + * the shim implementation a lot, as otherwise we would have to handle read(2) + * calls on inotify descriptors and translate events to Linux's ABI. + */ +_Static_assert(LINUX_IN_ACCESS == IN_ACCESS, + "IN_ACCESS mismatch"); +_Static_assert(LINUX_IN_MODIFY == IN_MODIFY, + "IN_MODIFY mismatch"); +_Static_assert(LINUX_IN_ATTRIB == IN_ATTRIB, + "IN_ATTRIB mismatch"); +_Static_assert(LINUX_IN_CLOSE_WRITE == IN_CLOSE_WRITE, + "IN_CLOSE_WRITE mismatch"); +_Static_assert(LINUX_IN_CLOSE_NOWRITE == IN_CLOSE_NOWRITE, + "IN_CLOSE_NOWRITE mismatch"); +_Static_assert(LINUX_IN_OPEN == IN_OPEN, + "IN_OPEN mismatch"); +_Static_assert(LINUX_IN_MOVED_FROM == IN_MOVED_FROM, + "IN_MOVED_FROM mismatch"); +_Static_assert(LINUX_IN_MOVED_TO == IN_MOVED_TO, + "IN_MOVED_TO mismatch"); +_Static_assert(LINUX_IN_CREATE == IN_CREATE, + "IN_CREATE mismatch"); +_Static_assert(LINUX_IN_DELETE == IN_DELETE, + "IN_DELETE mismatch"); +_Static_assert(LINUX_IN_DELETE_SELF == IN_DELETE_SELF, + "IN_DELETE_SELF mismatch"); +_Static_assert(LINUX_IN_MOVE_SELF == IN_MOVE_SELF, + "IN_MOVE_SELF mismatch"); + +_Static_assert(LINUX_IN_UNMOUNT == IN_UNMOUNT, + "IN_UNMOUNT mismatch"); +_Static_assert(LINUX_IN_Q_OVERFLOW == IN_Q_OVERFLOW, + "IN_Q_OVERFLOW mismatch"); +_Static_assert(LINUX_IN_IGNORED == IN_IGNORED, + "IN_IGNORED mismatch"); + +_Static_assert(LINUX_IN_ISDIR == IN_ISDIR, + "IN_ISDIR mismatch"); +_Static_assert(LINUX_IN_ONLYDIR == IN_ONLYDIR, + "IN_ONLYDIR mismatch"); +_Static_assert(LINUX_IN_DONT_FOLLOW == IN_DONT_FOLLOW, + "IN_DONT_FOLLOW mismatch"); +_Static_assert(LINUX_IN_MASK_CREATE == IN_MASK_CREATE, + "IN_MASK_CREATE mismatch"); +_Static_assert(LINUX_IN_MASK_ADD == IN_MASK_ADD, + "IN_MASK_ADD mismatch"); +_Static_assert(LINUX_IN_ONESHOT == IN_ONESHOT, + "IN_ONESHOT mismatch"); +_Static_assert(LINUX_IN_EXCL_UNLINK == IN_EXCL_UNLINK, + "IN_EXCL_UNLINK mismatch"); + +static int +linux_inotify_watch_flags(int l_flags) +{ + if ((l_flags & ~(LINUX_IN_ALL_EVENTS | LINUX_IN_ALL_FLAGS)) != 0) { + linux_msg(NULL, "inotify_add_watch unsupported flags 0x%x", + l_flags); + } + + return (l_flags); +} + +int +linux_inotify_add_watch(struct thread *td, + struct linux_inotify_add_watch_args *args) +{ + return (kern_inotify_add_watch(args->fd, AT_FDCWD, args->pathname, + linux_inotify_watch_flags(args->mask), td)); +} + +int +linux_inotify_rm_watch(struct thread *td, + struct linux_inotify_rm_watch_args *args) +{ + return (kern_inotify_rm_watch(args->fd, args->wd, td)); +} diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h index 2e56942b0f40..7448dc597230 100644 --- a/sys/compat/linux/linux_file.h +++ b/sys/compat/linux/linux_file.h @@ -189,6 +189,38 @@ #define LINUX_HUGETLB_FLAG_ENCODE_2GB (31 << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) #define LINUX_HUGETLB_FLAG_ENCODE_16GB (34U << LINUX_HUGETLB_FLAG_ENCODE_SHIFT) +/* inotify flags */ +#define LINUX_IN_ACCESS 0x00000001 +#define LINUX_IN_MODIFY 0x00000002 +#define LINUX_IN_ATTRIB 0x00000004 +#define LINUX_IN_CLOSE_WRITE 0x00000008 +#define LINUX_IN_CLOSE_NOWRITE 0x00000010 +#define LINUX_IN_OPEN 0x00000020 +#define LINUX_IN_MOVED_FROM 0x00000040 +#define LINUX_IN_MOVED_TO 0x00000080 +#define LINUX_IN_CREATE 0x00000100 +#define LINUX_IN_DELETE 0x00000200 +#define LINUX_IN_DELETE_SELF 0x00000400 +#define LINUX_IN_MOVE_SELF 0x00000800 + +#define LINUX_IN_UNMOUNT 0x00002000 +#define LINUX_IN_Q_OVERFLOW 0x00004000 +#define LINUX_IN_IGNORED 0x00008000 + +#define LINUX_IN_ONLYDIR 0x01000000 +#define LINUX_IN_DONT_FOLLOW 0x02000000 +#define LINUX_IN_EXCL_UNLINK 0x04000000 +#define LINUX_IN_MASK_CREATE 0x10000000 +#define LINUX_IN_MASK_ADD 0x20000000 +#define LINUX_IN_ISDIR 0x40000000 +#define LINUX_IN_ONESHOT 0x80000000 + +#define LINUX_IN_ALL_EVENTS 0x00000fff +#define LINUX_IN_ALL_FLAGS 0xf700e000 + +#define LINUX_IN_NONBLOCK 0x00000800 +#define LINUX_IN_CLOEXEC 0x00080000 + #if defined(_KERNEL) struct l_file_handle { l_uint handle_bytes; diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi.h b/sys/compat/linuxkpi/common/include/acpi/acpi.h index e0218bdde12e..1e398d05ba20 100644 --- a/sys/compat/linuxkpi/common/include/acpi/acpi.h +++ b/sys/compat/linuxkpi/common/include/acpi/acpi.h @@ -3,6 +3,10 @@ * * Copyright (c) 2017 Mark Johnston <markj@FreeBSD.org> * Copyright (c) 2020 Vladimir Kondratyev <wulf@FreeBSD.org> + * Copyright (c) 2025 The FreeBSD Foundation + * + * Portions of this software were developed by Björn Zeeb + * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -31,6 +35,13 @@ #define _LINUXKPI_ACPI_ACPI_H_ /* + * LINUXKPI_WANT_LINUX_ACPI is a temporary workaround to allow drm-kmod + * to update all needed branches without breaking builds. + * Once that happened and checks are implemented based on __FreeBSD_verison + * we will remove these conditions again. + */ + +/* * FreeBSD import of ACPICA has a typedef for BOOLEAN which conflicts with * amdgpu driver. Workaround it on preprocessor level. */ @@ -46,8 +57,8 @@ typedef int64_t INT64; #include <contrib/dev/acpica/include/acpi.h> #undef BOOLEAN +typedef ACPI_IO_ADDRESS acpi_io_address; typedef ACPI_HANDLE acpi_handle; -typedef ACPI_OBJECT acpi_object; typedef ACPI_OBJECT_HANDLER acpi_object_handler; typedef ACPI_OBJECT_TYPE acpi_object_type; typedef ACPI_STATUS acpi_status; @@ -55,12 +66,62 @@ typedef ACPI_STRING acpi_string; typedef ACPI_SIZE acpi_size; typedef ACPI_WALK_CALLBACK acpi_walk_callback; +union linuxkpi_acpi_object { + acpi_object_type type; + struct { + acpi_object_type type; + UINT64 value; + } integer; + struct { + acpi_object_type type; + UINT32 length; + char *pointer; + } string; + struct { + acpi_object_type type; + UINT32 length; + UINT8 *pointer; + } buffer; + struct { + acpi_object_type type; + UINT32 count; + union linuxkpi_acpi_object *elements; + } package; + struct { + acpi_object_type type; + acpi_object_type actual_type; + acpi_handle handle; + } reference; + struct { + acpi_object_type type; + UINT32 proc_id; + acpi_io_address pblk_address; + UINT32 pblk_length; + } processor; + struct { + acpi_object_type type; + UINT32 system_level; + UINT32 resource_order; + } power_resource; +}; + +#ifdef LINUXKPI_WANT_LINUX_ACPI +struct linuxkpi_acpi_buffer { + acpi_size length; /* Length in bytes of the buffer */ + void *pointer; /* pointer to buffer */ +}; + +typedef struct linuxkpi_acpi_buffer lkpi_acpi_buffer_t; +#else +typedef ACPI_BUFFER lkpi_acpi_buffer_t; +#endif + static inline ACPI_STATUS acpi_evaluate_object(ACPI_HANDLE Object, ACPI_STRING Pathname, - ACPI_OBJECT_LIST *ParameterObjects, ACPI_BUFFER *ReturnObjectBuffer) + ACPI_OBJECT_LIST *ParameterObjects, lkpi_acpi_buffer_t *ReturnObjectBuffer) { return (AcpiEvaluateObject( - Object, Pathname, ParameterObjects, ReturnObjectBuffer)); + Object, Pathname, ParameterObjects, (ACPI_BUFFER *)ReturnObjectBuffer)); } static inline const char * @@ -83,9 +144,9 @@ acpi_get_data(ACPI_HANDLE ObjHandle, ACPI_OBJECT_HANDLER Handler, void **Data) } static inline ACPI_STATUS -acpi_get_name(ACPI_HANDLE Object, UINT32 NameType, ACPI_BUFFER *RetPathPtr) +acpi_get_name(ACPI_HANDLE Object, UINT32 NameType, lkpi_acpi_buffer_t *RetPathPtr) { - return (AcpiGetName(Object, NameType, RetPathPtr)); + return (AcpiGetName(Object, NameType, (ACPI_BUFFER *)RetPathPtr)); } static inline ACPI_STATUS @@ -101,4 +162,9 @@ acpi_put_table(ACPI_TABLE_HEADER *Table) AcpiPutTable(Table); } +#ifdef LINUXKPI_WANT_LINUX_ACPI +#define acpi_object linuxkpi_acpi_object +#define acpi_buffer linuxkpi_acpi_buffer +#endif + #endif /* _LINUXKPI_ACPI_ACPI_H_ */ diff --git a/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h b/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h index 65bcbe7f1bdd..47195e7d66a6 100644 --- a/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h +++ b/sys/compat/linuxkpi/common/include/acpi/acpi_bus.h @@ -58,4 +58,10 @@ bool lkpi_acpi_dev_present(const char *hid, const char *uid, struct acpi_device *lkpi_acpi_dev_get_first_match_dev(const char *hid, const char *uid, int64_t hrv); +union linuxkpi_acpi_object; + +union linuxkpi_acpi_object * +acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid, + UINT64 rev, UINT64 func, union linuxkpi_acpi_object *arg); + #endif /* _LINUXKPI_ACPI_ACPI_BUS_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/device.h b/sys/compat/linuxkpi/common/include/linux/device.h index a5f6874a07f6..2556b0c45e49 100644 --- a/sys/compat/linuxkpi/common/include/linux/device.h +++ b/sys/compat/linuxkpi/common/include/linux/device.h @@ -90,6 +90,8 @@ struct dev_pm_ops { struct device_driver { const char *name; const struct dev_pm_ops *pm; + + void (*shutdown) (struct device *); }; struct device_type { diff --git a/sys/compat/linuxkpi/common/include/linux/pci.h b/sys/compat/linuxkpi/common/include/linux/pci.h index 174015ba7a58..af19829f1cbb 100644 --- a/sys/compat/linuxkpi/common/include/linux/pci.h +++ b/sys/compat/linuxkpi/common/include/linux/pci.h @@ -72,6 +72,10 @@ struct pci_device_id { uintptr_t driver_data; }; +#define MODULE_DEVICE_TABLE_BUS_pci(_bus, _table) \ +MODULE_PNP_INFO("U32:vendor;U32:device;V32:subvendor;V32:subdevice", \ + _bus, lkpi_ ## _table, _table, nitems(_table) - 1) + /* Linux has an empty element at the end of the ID table -> nitems() - 1. */ #define MODULE_DEVICE_TABLE(_bus, _table) \ \ @@ -85,11 +89,10 @@ static driver_t _ ## _bus ## _ ## _table ## _driver = { \ 0 \ }; \ \ -DRIVER_MODULE(lkpi_ ## _table, pci, _ ## _bus ## _ ## _table ## _driver,\ +DRIVER_MODULE(lkpi_ ## _table, _bus, _ ## _bus ## _ ## _table ## _driver,\ 0, 0); \ \ -MODULE_PNP_INFO("U32:vendor;U32:device;V32:subvendor;V32:subdevice", \ - _bus, lkpi_ ## _table, _table, nitems(_table) - 1) +MODULE_DEVICE_TABLE_BUS_ ## _bus(_bus, _table) #define PCI_ANY_ID -1U diff --git a/sys/compat/linuxkpi/common/src/linux_acpi.c b/sys/compat/linuxkpi/common/src/linux_acpi.c index 6a9afb3ddff0..d18c69d9210d 100644 --- a/sys/compat/linuxkpi/common/src/linux_acpi.c +++ b/sys/compat/linuxkpi/common/src/linux_acpi.c @@ -39,6 +39,7 @@ #include <linux/notifier.h> #include <linux/suspend.h> +#include <linux/uuid.h> #include <acpi/acpi_bus.h> #include <acpi/video.h> @@ -99,6 +100,17 @@ acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const char *uuid, int rev, argv4, &buf, type)) ? (ACPI_OBJECT *)buf.Pointer : NULL); } +union linuxkpi_acpi_object * +acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid, + UINT64 rev, UINT64 func, union linuxkpi_acpi_object *pkg) +{ + ACPI_BUFFER buf; + + return (ACPI_SUCCESS(acpi_EvaluateDSM(ObjHandle, (const uint8_t *)guid, + rev, func, (ACPI_OBJECT *)pkg, &buf)) ? + (union linuxkpi_acpi_object *)buf.Pointer : NULL); +} + static void linux_handle_power_suspend_event(void *arg __unused) { @@ -323,6 +335,13 @@ acpi_evaluate_dsm_typed(ACPI_HANDLE handle, const char *uuid, int rev, return (NULL); } +union linuxkpi_acpi_object * +acpi_evaluate_dsm(ACPI_HANDLE ObjHandle, const guid_t *guid, + UINT64 rev, UINT64 func, union linuxkpi_acpi_object *pkg) +{ + return (NULL); +} + int register_acpi_notifier(struct notifier_block *nb) { diff --git a/sys/conf/files b/sys/conf/files index 75ee10be5896..866901ba4c51 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1776,6 +1776,19 @@ dev/hwpmc/hwpmc_soft.c optional hwpmc dev/hwreset/hwreset.c optional hwreset dev/hwreset/hwreset_array.c optional hwreset dev/hwreset/hwreset_if.m optional hwreset +dev/hwt/hwt.c optional hwt +dev/hwt/hwt_backend.c optional hwt +dev/hwt/hwt_config.c optional hwt +dev/hwt/hwt_context.c optional hwt +dev/hwt/hwt_contexthash.c optional hwt +dev/hwt/hwt_cpu.c optional hwt +dev/hwt/hwt_hook.c optional hwt +dev/hwt/hwt_ioctl.c optional hwt +dev/hwt/hwt_owner.c optional hwt +dev/hwt/hwt_ownerhash.c optional hwt +dev/hwt/hwt_record.c optional hwt +dev/hwt/hwt_thread.c optional hwt +dev/hwt/hwt_vm.c optional hwt dev/ichiic/ig4_acpi.c optional ig4 acpi iicbus dev/ichiic/ig4_iic.c optional ig4 iicbus dev/ichiic/ig4_pci.c optional ig4 pci iicbus @@ -3160,8 +3173,6 @@ dev/sound/midi/midi.c optional sound dev/sound/midi/mpu401.c optional sound dev/sound/midi/mpu_if.m optional sound dev/sound/midi/mpufoi_if.m optional sound -dev/sound/midi/sequencer.c optional sound -dev/sound/midi/synth_if.m optional sound dev/spibus/acpi_spibus.c optional acpi spibus dev/spibus/ofw_spibus.c optional fdt spibus dev/spibus/spibus.c optional spibus \ @@ -3979,6 +3990,7 @@ kern/vfs_export.c standard kern/vfs_extattr.c standard kern/vfs_hash.c standard kern/vfs_init.c standard +kern/vfs_inotify.c standard kern/vfs_lookup.c standard kern/vfs_mount.c standard kern/vfs_mountroot.c standard diff --git a/sys/conf/options b/sys/conf/options index 03e8964e965d..a637b0b74a77 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -885,6 +885,9 @@ DCONS_FORCE_GDB opt_dcons.h HWPMC_DEBUG opt_global.h HWPMC_HOOKS +# Hardware Trace (HWT) framework options +HWT_HOOKS + # 802.11 support layer IEEE80211_DEBUG opt_wlan.h IEEE80211_DEBUG_REFCNT opt_wlan.h diff --git a/sys/contrib/dev/iwlwifi/iwl-debug.h b/sys/contrib/dev/iwlwifi/iwl-debug.h index 43288a5a8d74..7b3b402766b4 100644 --- a/sys/contrib/dev/iwlwifi/iwl-debug.h +++ b/sys/contrib/dev/iwlwifi/iwl-debug.h @@ -47,7 +47,7 @@ enum iwl_dl { IWL_DL_DROP = 0x00000010, IWL_DL_EEPROM = 0x00000020, IWL_DL_FW = 0x00000040, - /* = 0x00000080, */ + IWL_DL_DEV_RADIO = 0x00000080, IWL_DL_HC = 0x00000100, IWL_DL_HT = 0x00000200, IWL_DL_INFO = 0x00000400, @@ -195,6 +195,8 @@ void __iwl_dbg(struct device *, u32, bool, const char *, const char *fmt, ...); IWL_DPRINTF(_subsys, IWL_DL_WEP, _fmt, ##__VA_ARGS__) #define IWL_DEBUG_WOWLAN(_subsys, _fmt, ...) \ IWL_DPRINTF(_subsys, IWL_DL_WOWLAN, _fmt, ##__VA_ARGS__) +#define IWL_DEBUG_DEV_RADIO(_dev, _fmt, ...) \ + IWL_DPRINTF_DEV((_dev), IWL_DL_DEV_RADIO, _fmt, ##__VA_ARGS__) #define IWL_DEBUG_PCI_RW(_subsys, _fmt, ...) \ IWL_DPRINTF(_subsys, IWL_DL_PCI_RW, _fmt, ##__VA_ARGS__) diff --git a/sys/contrib/dev/rtw89/acpi.c b/sys/contrib/dev/rtw89/acpi.c index 02d4526c1538..f5dedb12c129 100644 --- a/sys/contrib/dev/rtw89/acpi.c +++ b/sys/contrib/dev/rtw89/acpi.c @@ -8,7 +8,6 @@ #include "acpi.h" #include "debug.h" -#if defined(__linux__) static const guid_t rtw89_guid = GUID_INIT(0xD2A8C3E8, 0x4B69, 0x4F00, 0x82, 0xBD, 0xFE, 0x86, 0x07, 0x80, 0x3A, 0xA7); @@ -149,14 +148,6 @@ int rtw89_acpi_evaluate_dsm(struct rtw89_dev *rtwdev, ACPI_FREE(obj); return ret; } -#elif defined(__FreeBSD__) -int rtw89_acpi_evaluate_dsm(struct rtw89_dev *rtwdev, - enum rtw89_acpi_dsm_func func, - struct rtw89_acpi_dsm_result *res) -{ - return -ENOENT; -} -#endif int rtw89_acpi_evaluate_rtag(struct rtw89_dev *rtwdev, struct rtw89_acpi_rtag_result *res) @@ -180,28 +171,15 @@ int rtw89_acpi_evaluate_rtag(struct rtw89_dev *rtwdev, if (ACPI_FAILURE(status)) return -EIO; -#if defined(__linux__) obj = buf.pointer; if (obj->type != ACPI_TYPE_BUFFER) { -#elif defined(__FreeBSD__) - obj = buf.Pointer; - if (obj->Type != ACPI_TYPE_BUFFER) { -#endif rtw89_debug(rtwdev, RTW89_DBG_ACPI, -#if defined(__linux__) "acpi: expect buffer but type: %d\n", obj->type); -#elif defined(__FreeBSD__) - "acpi: expect buffer but type: %d\n", obj->Type); -#endif ret = -EINVAL; goto out; } -#if defined(__linux__) buf_len = obj->buffer.length; -#elif defined(__FreeBSD__) - buf_len = obj->Buffer.Length; -#endif if (buf_len != sizeof(*res)) { rtw89_debug(rtwdev, RTW89_DBG_ACPI, "%s: invalid buffer length: %u\n", __func__, buf_len); @@ -209,11 +187,7 @@ int rtw89_acpi_evaluate_rtag(struct rtw89_dev *rtwdev, goto out; } -#if defined(__linux__) *res = *(struct rtw89_acpi_rtag_result *)obj->buffer.pointer; -#elif defined(__FreeBSD__) - *res = *(struct rtw89_acpi_rtag_result *)obj->Buffer.Pointer; -#endif rtw89_hex_dump(rtwdev, RTW89_DBG_ACPI, "antenna_gain: ", res, sizeof(*res)); diff --git a/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h b/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h index 1d844a67c928..c4c9e789cf3e 100644 --- a/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h +++ b/sys/dev/bnxt/bnxt_en/bnxt_auxbus_compat.h @@ -39,6 +39,7 @@ struct auxiliary_device_id { char name[AUXILIARY_NAME_SIZE]; uint64_t driver_data; }; +#define MODULE_DEVICE_TABLE_BUS_auxiliary(_bus, _table) struct auxiliary_device { struct device dev; diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c index 2987af634866..f9468e0deda0 100644 --- a/sys/dev/gpio/acpi_gpiobus.c +++ b/sys/dev/gpio/acpi_gpiobus.c @@ -36,6 +36,7 @@ #include <dev/gpio/gpiobusvar.h> #include <dev/gpio/acpi_gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include "gpiobus_if.h" diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c index 2e2618805e7b..ab7f13177969 100644 --- a/sys/dev/gpio/gpiobus.c +++ b/sys/dev/gpio/gpiobus.c @@ -39,6 +39,7 @@ #include <sys/sbuf.h> #include <dev/gpio/gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include "gpiobus_if.h" @@ -213,20 +214,40 @@ gpio_pin_is_active(gpio_pin_t pin, bool *active) return (0); } +/* + * Note that this function should only + * be used in cases where a pre-existing + * gpiobus_pin structure exists. In most + * cases, the gpio_pin_get_by_* functions + * suffice. + */ +int +gpio_pin_acquire(gpio_pin_t gpio) +{ + device_t busdev; + + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); + KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); + + busdev = GPIO_GET_BUS(gpio->dev); + if (busdev == NULL) + return (ENXIO); + + return (gpiobus_acquire_pin(busdev, gpio->pin)); +} + void gpio_pin_release(gpio_pin_t gpio) { device_t busdev; - if (gpio == NULL) - return; - + KASSERT(gpio != NULL, ("GPIO pin is NULL.")); KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL.")); busdev = GPIO_GET_BUS(gpio->dev); - if (busdev != NULL) - gpiobus_release_pin(busdev, gpio->pin); + KASSERT(busdev != NULL, ("gpiobus dev is NULL.")); + gpiobus_release_pin(busdev, gpio->pin); free(gpio, M_DEVBUF); } @@ -293,7 +314,7 @@ gpiobus_print_pins(struct gpiobus_ivar *devi, struct sbuf *sb) } device_t -gpiobus_attach_bus(device_t dev) +gpiobus_add_bus(device_t dev) { device_t busdev; @@ -307,8 +328,24 @@ gpiobus_attach_bus(device_t dev) #ifdef FDT ofw_gpiobus_register_provider(dev); #endif - bus_attach_children(dev); + return (busdev); +} + +/* + * Attach a gpiobus child. + * Note that the controller is expected + * to be fully initialized at this point. + */ +device_t +gpiobus_attach_bus(device_t dev) +{ + device_t busdev; + busdev = gpiobus_add_bus(dev); + if (busdev == NULL) + return (NULL); + + bus_attach_children(dev); return (busdev); } @@ -385,14 +422,13 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin) sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max: %d\n", pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } /* Mark pin as mapped and give warning if it's already mapped. */ if (sc->sc_pins[pin].mapped) { device_printf(bus, "warning: pin %d is already mapped\n", pin); - return (-1); + return (EBUSY); } sc->sc_pins[pin].mapped = 1; @@ -400,7 +436,7 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin) } /* Release mapped pin */ -int +void gpiobus_release_pin(device_t bus, uint32_t pin) { struct gpiobus_softc *sc; @@ -408,19 +444,15 @@ gpiobus_release_pin(device_t bus, uint32_t pin) sc = device_get_softc(bus); /* Consistency check. */ if (pin >= sc->sc_npins) { - device_printf(bus, - "invalid pin %d, max=%d\n", - pin, sc->sc_npins - 1); - return (-1); + panic("%s: invalid pin %d, max: %d", + device_get_nameunit(bus), pin, sc->sc_npins - 1); } - if (!sc->sc_pins[pin].mapped) { - device_printf(bus, "pin %d is not mapped\n", pin); - return (-1); - } - sc->sc_pins[pin].mapped = 0; + if (!sc->sc_pins[pin].mapped) + panic("%s: pin %d is not mapped", device_get_nameunit(bus), + pin); - return (0); + sc->sc_pins[pin].mapped = 0; } static int @@ -435,8 +467,7 @@ gpiobus_acquire_child_pins(device_t dev, device_t child) device_printf(child, "cannot acquire pin %d\n", devi->pins[i]); while (--i >= 0) { - (void)gpiobus_release_pin(dev, - devi->pins[i]); + gpiobus_release_pin(dev, devi->pins[i]); } gpiobus_free_ivars(devi); return (EBUSY); diff --git a/sys/dev/sound/midi/sequencer.h b/sys/dev/gpio/gpiobus_internal.h index 22ea0ae6c1b6..de3f57663132 100644 --- a/sys/dev/sound/midi/sequencer.h +++ b/sys/dev/gpio/gpiobus_internal.h @@ -1,8 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2003 Mathew Kanner - * Copyright (c) 1999 Seigo Tanimura + * Copyright (c) 2009 Oleksandr Tymoshenko <gonzo@freebsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,65 +24,24 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * */ -/* - * Include file for the midi sequence driver. - */ - -#ifndef _SEQUENCER_H_ -#define _SEQUENCER_H_ - -#define NSEQ_MAX 16 +#ifndef __GPIOBUS_INTERNAL_H__ +#define __GPIOBUS_INTERNAL_H__ /* - * many variables should be reduced to a range. Here define a macro + * Functions shared between gpiobus and other bus classes that derive from it; + * these should not be called directly by other drivers. */ - -#define RANGE(var, low, high) (var) = \ -((var)<(low)?(low) : (var)>(high)?(high) : (var)) - -#ifdef _KERNEL - -void seq_timer(void *arg); - -SYSCTL_DECL(_hw_midi_seq); - -extern int seq_debug; - -#define SEQ_DEBUG(y, x) \ - do { \ - if (seq_debug >= y) { \ - (x); \ - } \ - } while (0) - -SYSCTL_DECL(_hw_midi); - -#endif /* _KERNEL */ - -#define SYNTHPROP_MIDI 1 -#define SYNTHPROP_SYNTH 2 -#define SYNTHPROP_RX 4 -#define SYNTHPROP_TX 8 - -struct _midi_cmdtab { - int cmd; - char *name; -}; -typedef struct _midi_cmdtab midi_cmdtab; -extern midi_cmdtab cmdtab_seqevent[]; -extern midi_cmdtab cmdtab_seqioctl[]; -extern midi_cmdtab cmdtab_timer[]; -extern midi_cmdtab cmdtab_seqcv[]; -extern midi_cmdtab cmdtab_seqccmn[]; - -char *midi_cmdname(int cmd, midi_cmdtab * tab); - -enum { - MORE, - TIMERARMED, - QUEUEFULL -}; - +int gpiobus_attach(device_t); +int gpiobus_detach(device_t); +int gpiobus_init_softc(device_t); +int gpiobus_alloc_ivars(struct gpiobus_ivar *); +void gpiobus_free_ivars(struct gpiobus_ivar *); +int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); +int gpiobus_acquire_pin(device_t, uint32_t); +void gpiobus_release_pin(device_t, uint32_t); + +extern driver_t gpiobus_driver; #endif diff --git a/sys/dev/gpio/gpiobusvar.h b/sys/dev/gpio/gpiobusvar.h index 74783e112f89..7f504236a774 100644 --- a/sys/dev/gpio/gpiobusvar.h +++ b/sys/dev/gpio/gpiobusvar.h @@ -156,6 +156,8 @@ int gpio_pin_get_by_bus_pinnum(device_t _bus, uint32_t _pinnum, gpio_pin_t *_gp) /* Acquire a pin by child and index (used by direct children of gpiobus). */ int gpio_pin_get_by_child_index(device_t _child, uint32_t _idx, gpio_pin_t *_gp); +/* Acquire a pin from an existing gpio_pin_t. */ +int gpio_pin_acquire(gpio_pin_t gpio); /* Release a pin acquired via any gpio_pin_get_xxx() function. */ void gpio_pin_release(gpio_pin_t gpio); @@ -167,22 +169,9 @@ int gpio_pin_setflags(gpio_pin_t pin, uint32_t flags); struct resource *gpio_alloc_intr_resource(device_t consumer_dev, int *rid, u_int alloc_flags, gpio_pin_t pin, uint32_t intr_mode); -/* - * Functions shared between gpiobus and other bus classes that derive from it; - * these should not be called directly by other drivers. - */ int gpio_check_flags(uint32_t, uint32_t); +device_t gpiobus_add_bus(device_t); device_t gpiobus_attach_bus(device_t); int gpiobus_detach_bus(device_t); -int gpiobus_attach(device_t); -int gpiobus_detach(device_t); -int gpiobus_init_softc(device_t); -int gpiobus_alloc_ivars(struct gpiobus_ivar *); -void gpiobus_free_ivars(struct gpiobus_ivar *); -int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *); -int gpiobus_acquire_pin(device_t, uint32_t); -int gpiobus_release_pin(device_t, uint32_t); - -extern driver_t gpiobus_driver; #endif /* __GPIOBUS_H__ */ diff --git a/sys/dev/gpio/gpiopps.c b/sys/dev/gpio/gpiopps.c index bb8afa5e062c..82620a50a798 100644 --- a/sys/dev/gpio/gpiopps.c +++ b/sys/dev/gpio/gpiopps.c @@ -160,7 +160,7 @@ gpiopps_detach(device_t dev) if (sc->ires != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irid, sc->ires); if (sc->gpin != NULL) - gpiobus_release_pin(GPIO_GET_BUS(sc->gpin->dev), sc->gpin->pin); + gpio_pin_release(sc->gpin); return (0); } diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c index 32dc5b55e698..fc5fb03d6824 100644 --- a/sys/dev/gpio/ofw_gpiobus.c +++ b/sys/dev/gpio/ofw_gpiobus.c @@ -36,6 +36,7 @@ #include <sys/module.h> #include <dev/gpio/gpiobusvar.h> +#include <dev/gpio/gpiobus_internal.h> #include <dev/ofw/ofw_bus.h> #include "gpiobus_if.h" diff --git a/sys/dev/gpio/pl061.c b/sys/dev/gpio/pl061.c index cc39790322b6..87d4310a6396 100644 --- a/sys/dev/gpio/pl061.c +++ b/sys/dev/gpio/pl061.c @@ -487,14 +487,21 @@ pl061_attach(device_t dev) } } + mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); + + if (sc->sc_xref != 0 && !intr_pic_register(dev, sc->sc_xref)) { + device_printf(dev, "couldn't register PIC\n"); + PL061_LOCK_DESTROY(sc); + goto free_isrc; + } + sc->sc_busdev = gpiobus_attach_bus(dev); if (sc->sc_busdev == NULL) { device_printf(dev, "couldn't attach gpio bus\n"); + PL061_LOCK_DESTROY(sc); goto free_isrc; } - mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN); - return (0); free_isrc: @@ -503,6 +510,7 @@ free_isrc: * for (irq = 0; irq < PL061_NUM_GPIO; irq++) * intr_isrc_deregister(PIC_INTR_ISRC(sc, irq)); */ + bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_irq_hdlr); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid, sc->sc_irq_res); free_pic: diff --git a/sys/dev/gpio/pl061.h b/sys/dev/gpio/pl061.h index 809a1168493d..d9fe23e502b1 100644 --- a/sys/dev/gpio/pl061.h +++ b/sys/dev/gpio/pl061.h @@ -46,6 +46,7 @@ struct pl061_softc { struct resource *sc_mem_res; struct resource *sc_irq_res; void *sc_irq_hdlr; + intptr_t sc_xref; int sc_mem_rid; int sc_irq_rid; struct pl061_pin_irqsrc sc_isrcs[PL061_NUM_GPIO]; diff --git a/sys/dev/gpio/pl061_acpi.c b/sys/dev/gpio/pl061_acpi.c index f5885025083e..8e9921261e4e 100644 --- a/sys/dev/gpio/pl061_acpi.c +++ b/sys/dev/gpio/pl061_acpi.c @@ -67,19 +67,12 @@ pl061_acpi_probe(device_t dev) static int pl061_acpi_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = ACPI_GPIO_XREF; - if (!intr_pic_register(dev, ACPI_GPIO_XREF)) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_acpi_methods[] = { diff --git a/sys/dev/gpio/pl061_fdt.c b/sys/dev/gpio/pl061_fdt.c index aa22298b43c6..681b3ccdfdeb 100644 --- a/sys/dev/gpio/pl061_fdt.c +++ b/sys/dev/gpio/pl061_fdt.c @@ -61,19 +61,12 @@ pl061_fdt_probe(device_t dev) static int pl061_fdt_attach(device_t dev) { - int error; + struct pl061_softc *sc; - error = pl061_attach(dev); - if (error != 0) - return (error); + sc = device_get_softc(dev); + sc->sc_xref = OF_xref_from_node(ofw_bus_get_node(dev)); - if (!intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev)))) { - device_printf(dev, "couldn't register PIC\n"); - pl061_detach(dev); - error = ENXIO; - } - - return (error); + return (pl061_attach(dev)); } static device_method_t pl061_fdt_methods[] = { diff --git a/sys/dev/gpio/qoriq_gpio.c b/sys/dev/gpio/qoriq_gpio.c index 25dfccede29f..8b44cd256c79 100644 --- a/sys/dev/gpio/qoriq_gpio.c +++ b/sys/dev/gpio/qoriq_gpio.c @@ -369,11 +369,6 @@ qoriq_gpio_attach(device_t dev) for (i = 0; i <= MAXPIN; i++) sc->sc_pins[i].gp_caps = DEFAULT_CAPS; - sc->busdev = gpiobus_attach_bus(dev); - if (sc->busdev == NULL) { - qoriq_gpio_detach(dev); - return (ENOMEM); - } /* * Enable the GPIO Input Buffer for all GPIOs. * This is safe on devices without a GPIBE register, because those @@ -384,6 +379,12 @@ qoriq_gpio_attach(device_t dev) OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); + if (sc->busdev == NULL) { + qoriq_gpio_detach(dev); + return (ENOMEM); + } + return (0); } diff --git a/sys/dev/hwt/hwt.c b/sys/dev/hwt/hwt.c new file mode 100644 index 000000000000..c476e6031ba8 --- /dev/null +++ b/sys/dev/hwt/hwt.c @@ -0,0 +1,242 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Hardware Tracing framework. + * + * The framework manages hardware tracing units that collect information + * about software execution and store it as events in highly compressed format + * into DRAM. The events cover information about control flow changes of a + * program, whether branches taken or not, exceptions taken, timing information, + * cycles elapsed and more. That allows us to restore entire program flow of a + * given application without performance impact. + * + * Design overview. + * + * The framework provides character devices for mmap(2) and ioctl(2) system + * calls to allow user to manage CPU (hardware) tracing units. + * + * /dev/hwt: + * .ioctl: + * hwt_ioctl(): + * a) HWT_IOC_ALLOC + * Allocates kernel tracing context CTX based on requested mode + * of operation. Verifies the information that comes with the + * request (pid, cpus), allocates unique ID for the context. + * Creates a new character device for CTX management. + * + * /dev/hwt_%d[_%d], ident[, thread_id] + * .mmap + * Maps tracing buffers of the corresponding thread to userspace. + * .ioctl + * hwt_thread_ioctl(): + * a) HWT_IOC_START + * Enables tracing unit for a given context. + * b) HWT_IOC_RECORD_GET + * Transfers (small) record entries collected during program + * execution for a given context to userspace, such as mmaping + * tables of executable and dynamic libraries, interpreter, + * kernel mappings, tid of threads created, etc. + * c) HWT_IOC_SET_CONFIG + * Allows to specify backend-specific configuration of the + * trace unit. + * d) HWT_IOC_WAKEUP + * Wakes up a thread that is currently sleeping. + * e) HWT_IOC_BUFPTR_GET + * Transfers current hardware pointer in the filling buffer + * to the userspace. + * f) HWT_IOC_SVC_BUF + * To avoid data loss, userspace may notify kernel it has + * copied out the given buffer, so kernel is ok to overwrite + * + * HWT context lifecycle in THREAD mode of operation: + * 1. User invokes HWT_IOC_ALLOC ioctl with information about pid to trace and + * size of the buffers for the trace data to allocate. + * Some architectures may have different tracing units supported, so user + * also provides backend name to use for this context, e.g. "coresight". + * 2. Kernel allocates context, lookups the proc for the given pid. Then it + * creates first hwt_thread in the context and allocates trace buffers for + * it. Immediately, kernel initializes tracing backend. + * Kernel creates character device and returns unique identificator of + * trace context to the user. + * 3. To manage the new context, user opens the character device created. + * User invokes HWT_IOC_START ioctl, kernel marks context as RUNNING. + * At this point any HWT hook invocation by scheduler enables/disables + * tracing for threads associated with the context (threads of the proc). + * Any new threads creation (of the target proc) procedures will be invoking + * corresponding hooks in HWT framework, so that new hwt_thread and buffers + * allocated, character device for mmap(2) created on the fly. + * 4. User issues HWT_IOC_RECORD_GET ioctl to fetch information about mmaping + * tables and threads created during application startup. + * 5. User mmaps tracing buffers of each thread to userspace (using + * /dev/hwt_%d_%d % (ident, thread_id) character devices). + * 6. User can repeat 4 if expected thread is not yet created during target + * application execution. + * 7. User issues HWT_IOC_BUFPTR_GET ioctl to get current filling level of the + * hardware buffer of a given thread. + * 8. User invokes trace decoder library to process available data and see the + * results in human readable form. + * 9. User repeats 7 if needed. + * + * HWT context lifecycle in CPU mode of operation: + * 1. User invokes HWT_IOC_ALLOC ioctl providing a set of CPU to trace within + * single CTX. + * 2. Kernel verifies the set of CPU and allocates tracing context, creates + * a buffer for each CPU. + * Kernel creates a character device for every CPU provided in the request. + * Kernel initialized tracing backend. + * 3. User opens character devices of interest to map the buffers to userspace. + * User can start tracing by invoking HWT_IOC_START on any of character + * device within the context, entire context will be marked as RUNNING. + * 4. The rest is similar to the THREAD mode. + * + */ + +#include <sys/param.h> +#include <sys/conf.h> +#include <sys/eventhandler.h> +#include <sys/kernel.h> +#include <sys/module.h> + +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_ownerhash.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_record.h> +#include <dev/hwt/hwt_ioctl.h> +#include <dev/hwt/hwt_hook.h> + +#define HWT_DEBUG +#undef HWT_DEBUG + +#ifdef HWT_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static eventhandler_tag hwt_exit_tag; +static struct cdev *hwt_cdev; +static struct cdevsw hwt_cdevsw = { + .d_version = D_VERSION, + .d_name = "hwt", + .d_mmap_single = NULL, + .d_ioctl = hwt_ioctl +}; + +static void +hwt_process_exit(void *arg __unused, struct proc *p) +{ + struct hwt_owner *ho; + + /* Stop HWTs associated with exiting owner, if any. */ + ho = hwt_ownerhash_lookup(p); + if (ho) + hwt_owner_shutdown(ho); +} + +static int +hwt_load(void) +{ + struct make_dev_args args; + int error; + + make_dev_args_init(&args); + args.mda_devsw = &hwt_cdevsw; + args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; + args.mda_uid = UID_ROOT; + args.mda_gid = GID_WHEEL; + args.mda_mode = 0660; + args.mda_si_drv1 = NULL; + + hwt_backend_load(); + hwt_ctx_load(); + hwt_contexthash_load(); + hwt_ownerhash_load(); + hwt_record_load(); + + error = make_dev_s(&args, &hwt_cdev, "hwt"); + if (error != 0) + return (error); + + hwt_exit_tag = EVENTHANDLER_REGISTER(process_exit, hwt_process_exit, + NULL, EVENTHANDLER_PRI_ANY); + + hwt_hook_load(); + + return (0); +} + +static int +hwt_unload(void) +{ + + hwt_hook_unload(); + EVENTHANDLER_DEREGISTER(process_exit, hwt_exit_tag); + destroy_dev(hwt_cdev); + hwt_record_unload(); + hwt_ownerhash_unload(); + hwt_contexthash_unload(); + hwt_ctx_unload(); + hwt_backend_unload(); + + return (0); +} + +static int +hwt_modevent(module_t mod, int type, void *data) +{ + int error; + + switch (type) { + case MOD_LOAD: + error = hwt_load(); + break; + case MOD_UNLOAD: + error = hwt_unload(); + break; + default: + error = 0; + break; + } + + return (error); +} + +static moduledata_t hwt_mod = { + "hwt", + hwt_modevent, + NULL +}; + +DECLARE_MODULE(hwt, hwt_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); +MODULE_VERSION(hwt, 1); diff --git a/sys/dev/hwt/hwt_backend.c b/sys/dev/hwt/hwt_backend.c new file mode 100644 index 000000000000..1ba5db0d3d09 --- /dev/null +++ b/sys/dev/hwt/hwt_backend.c @@ -0,0 +1,289 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Hardware Trace (HWT) framework. */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/hwt.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_backend.h> + +#define HWT_BACKEND_DEBUG +#undef HWT_BACKEND_DEBUG + +#ifdef HWT_BACKEND_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static struct mtx hwt_backend_mtx; + +struct hwt_backend_entry { + struct hwt_backend *backend; + LIST_ENTRY(hwt_backend_entry) next; +}; + +static LIST_HEAD(, hwt_backend_entry) hwt_backends; + +static MALLOC_DEFINE(M_HWT_BACKEND, "hwt_backend", "HWT backend"); + +int +hwt_backend_init(struct hwt_context *ctx) +{ + int error; + + dprintf("%s\n", __func__); + + error = ctx->hwt_backend->ops->hwt_backend_init(ctx); + + return (error); +} + +void +hwt_backend_deinit(struct hwt_context *ctx) +{ + + dprintf("%s\n", __func__); + + ctx->hwt_backend->ops->hwt_backend_deinit(ctx); +} + +int +hwt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id) +{ + int error; + + dprintf("%s\n", __func__); + + error = ctx->hwt_backend->ops->hwt_backend_configure(ctx, cpu_id, + thread_id); + + return (error); +} + +void +hwt_backend_enable(struct hwt_context *ctx, int cpu_id) +{ + + dprintf("%s\n", __func__); + + ctx->hwt_backend->ops->hwt_backend_enable(ctx, cpu_id); +} + +void +hwt_backend_disable(struct hwt_context *ctx, int cpu_id) +{ + + dprintf("%s\n", __func__); + + ctx->hwt_backend->ops->hwt_backend_disable(ctx, cpu_id); +} + +void +hwt_backend_enable_smp(struct hwt_context *ctx) +{ + + dprintf("%s\n", __func__); + + ctx->hwt_backend->ops->hwt_backend_enable_smp(ctx); +} + +void +hwt_backend_disable_smp(struct hwt_context *ctx) +{ + + dprintf("%s\n", __func__); + + ctx->hwt_backend->ops->hwt_backend_disable_smp(ctx); +} + +void __unused +hwt_backend_dump(struct hwt_context *ctx, int cpu_id) +{ + + dprintf("%s\n", __func__); + + ctx->hwt_backend->ops->hwt_backend_dump(cpu_id); +} + +int +hwt_backend_read(struct hwt_context *ctx, struct hwt_vm *vm, int *ident, + vm_offset_t *offset, uint64_t *data) +{ + int error; + + dprintf("%s\n", __func__); + + error = ctx->hwt_backend->ops->hwt_backend_read(vm, ident, + offset, data); + + return (error); +} + +struct hwt_backend * +hwt_backend_lookup(const char *name) +{ + struct hwt_backend_entry *entry; + struct hwt_backend *backend; + + HWT_BACKEND_LOCK(); + LIST_FOREACH(entry, &hwt_backends, next) { + backend = entry->backend; + if (strcmp(backend->name, name) == 0) { + HWT_BACKEND_UNLOCK(); + return (backend); + } + } + HWT_BACKEND_UNLOCK(); + + return (NULL); +} + +int +hwt_backend_register(struct hwt_backend *backend) +{ + struct hwt_backend_entry *entry; + + if (backend == NULL || + backend->name == NULL || + backend->ops == NULL) + return (EINVAL); + + entry = malloc(sizeof(struct hwt_backend_entry), M_HWT_BACKEND, + M_WAITOK | M_ZERO); + entry->backend = backend; + + HWT_BACKEND_LOCK(); + LIST_INSERT_HEAD(&hwt_backends, entry, next); + HWT_BACKEND_UNLOCK(); + + return (0); +} + +int +hwt_backend_unregister(struct hwt_backend *backend) +{ + struct hwt_backend_entry *entry, *tmp; + + if (backend == NULL) + return (EINVAL); + + /* TODO: check if not in use */ + + HWT_BACKEND_LOCK(); + LIST_FOREACH_SAFE(entry, &hwt_backends, next, tmp) { + if (entry->backend == backend) { + LIST_REMOVE(entry, next); + HWT_BACKEND_UNLOCK(); + free(entry, M_HWT_BACKEND); + return (0); + } + } + HWT_BACKEND_UNLOCK(); + + return (ENOENT); +} + +void +hwt_backend_load(void) +{ + + mtx_init(&hwt_backend_mtx, "hwt backend", NULL, MTX_DEF); + LIST_INIT(&hwt_backends); +} + +void +hwt_backend_unload(void) +{ + + /* TODO: ensure all unregistered */ + + mtx_destroy(&hwt_backend_mtx); +} + +void +hwt_backend_stop(struct hwt_context *ctx) +{ + dprintf("%s\n", __func__); + + ctx->hwt_backend->ops->hwt_backend_stop(ctx); +} + +int +hwt_backend_svc_buf(struct hwt_context *ctx, void *data, size_t data_size, + int data_version) +{ + int error; + + dprintf("%s\n", __func__); + + error = ctx->hwt_backend->ops->hwt_backend_svc_buf(ctx, data, data_size, + data_version); + + return (error); +} + +int +hwt_backend_thread_alloc(struct hwt_context *ctx, struct hwt_thread *thr) +{ + int error; + + dprintf("%s\n", __func__); + + if (ctx->hwt_backend->ops->hwt_backend_thread_alloc == NULL) + return (0); + KASSERT(thr->private == NULL, + ("%s: thread private data is not NULL\n", __func__)); + error = ctx->hwt_backend->ops->hwt_backend_thread_alloc(thr); + + return (error); +} + +void +hwt_backend_thread_free(struct hwt_thread *thr) +{ + dprintf("%s\n", __func__); + + if (thr->backend->ops->hwt_backend_thread_free == NULL) + return; + KASSERT(thr->private != NULL, + ("%s: thread private data is NULL\n", __func__)); + thr->backend->ops->hwt_backend_thread_free(thr); + + return; +} diff --git a/sys/dev/hwt/hwt_backend.h b/sys/dev/hwt/hwt_backend.h new file mode 100644 index 000000000000..3b6c9442a7a6 --- /dev/null +++ b/sys/dev/hwt/hwt_backend.h @@ -0,0 +1,87 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_BACKEND_H_ +#define _DEV_HWT_HWT_BACKEND_H_ + +struct hwt_backend_ops { + int (*hwt_backend_init)(struct hwt_context *); + int (*hwt_backend_deinit)(struct hwt_context *); + int (*hwt_backend_configure)(struct hwt_context *, int cpu_id, + int thread_id); + int (*hwt_backend_svc_buf)(struct hwt_context *, void *data, + size_t data_size, int data_version); + void (*hwt_backend_enable)(struct hwt_context *, int cpu_id); + void (*hwt_backend_disable)(struct hwt_context *, int cpu_id); + int (*hwt_backend_read)(struct hwt_vm *, int *ident, + vm_offset_t *offset, uint64_t *data); + void (*hwt_backend_stop)(struct hwt_context *); + /* For backends that are tied to local CPU registers */ + int (*hwt_backend_enable_smp)(struct hwt_context *); + int (*hwt_backend_disable_smp)(struct hwt_context *); + /* Allocation and initialization of backend-specific thread data. */ + int (*hwt_backend_thread_alloc)(struct hwt_thread *); + void (*hwt_backend_thread_free)(struct hwt_thread *); + /* Debugging only. */ + void (*hwt_backend_dump)(int cpu_id); +}; + +struct hwt_backend { + const char *name; + struct hwt_backend_ops *ops; + /* buffers require kernel virtual addresses */ + bool kva_req; +}; + +int hwt_backend_init(struct hwt_context *ctx); +void hwt_backend_deinit(struct hwt_context *ctx); +int hwt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id); +void hwt_backend_enable(struct hwt_context *ctx, int cpu_id); +void hwt_backend_disable(struct hwt_context *ctx, int cpu_id); +void hwt_backend_enable_smp(struct hwt_context *ctx); +void hwt_backend_disable_smp(struct hwt_context *ctx); +void hwt_backend_dump(struct hwt_context *ctx, int cpu_id); +int hwt_backend_read(struct hwt_context *ctx, struct hwt_vm *vm, int *ident, + vm_offset_t *offset, uint64_t *data); +int hwt_backend_register(struct hwt_backend *); +int hwt_backend_unregister(struct hwt_backend *); +void hwt_backend_stop(struct hwt_context *); +int hwt_backend_svc_buf(struct hwt_context *ctx, void *data, size_t data_size, + int data_version); +struct hwt_backend * hwt_backend_lookup(const char *name); +int hwt_backend_thread_alloc(struct hwt_context *ctx, struct hwt_thread *); +void hwt_backend_thread_free(struct hwt_thread *); + +void hwt_backend_load(void); +void hwt_backend_unload(void); + +#define HWT_BACKEND_LOCK() mtx_lock(&hwt_backend_mtx) +#define HWT_BACKEND_UNLOCK() mtx_unlock(&hwt_backend_mtx) + +#endif /* !_DEV_HWT_HWT_BACKEND_H_ */ + diff --git a/sys/dev/hwt/hwt_config.c b/sys/dev/hwt/hwt_config.c new file mode 100644 index 000000000000..30688e7fc76b --- /dev/null +++ b/sys/dev/hwt/hwt_config.c @@ -0,0 +1,108 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/lock.h> +#include <sys/hwt.h> + +#include <vm/vm.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_record.h> + +#define HWT_MAXCONFIGSIZE PAGE_SIZE + +#define HWT_CONFIG_DEBUG +#undef HWT_CONFIG_DEBUG + +#ifdef HWT_CONFIG_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static MALLOC_DEFINE(M_HWT_CONFIG, "hwt_config", "HWT config"); + +int +hwt_config_set(struct thread *td, struct hwt_context *ctx, + struct hwt_set_config *sconf) +{ + size_t config_size; + void *old_config; + void *config; + int error; + + config_size = sconf->config_size; + if (config_size == 0) + return (0); + + if (config_size > HWT_MAXCONFIGSIZE) + return (EFBIG); + + config = malloc(config_size, M_HWT_CONFIG, M_WAITOK | M_ZERO); + + error = copyin(sconf->config, config, config_size); + if (error) { + free(config, M_HWT_CONFIG); + return (error); + } + + HWT_CTX_LOCK(ctx); + old_config = ctx->config; + ctx->config = config; + ctx->config_size = sconf->config_size; + ctx->config_version = sconf->config_version; + HWT_CTX_UNLOCK(ctx); + + if (old_config != NULL) + free(old_config, M_HWT_CONFIG); + + return (error); +} + +void +hwt_config_free(struct hwt_context *ctx) +{ + + if (ctx->config == NULL) + return; + + free(ctx->config, M_HWT_CONFIG); + + ctx->config = NULL; +} diff --git a/sys/dev/hwt/hwt_config.h b/sys/dev/hwt/hwt_config.h new file mode 100644 index 000000000000..47485583063c --- /dev/null +++ b/sys/dev/hwt/hwt_config.h @@ -0,0 +1,36 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_CONFIG_H_ +#define _DEV_HWT_HWT_CONFIG_H_ + +int hwt_config_set(struct thread *td, struct hwt_context *ctx, + struct hwt_set_config *sconf); +void hwt_config_free(struct hwt_context *ctx); + +#endif /* !_DEV_HWT_HWT_CONFIG_H_ */ diff --git a/sys/dev/hwt/hwt_context.c b/sys/dev/hwt/hwt_context.c new file mode 100644 index 000000000000..9af76cffc928 --- /dev/null +++ b/sys/dev/hwt/hwt_context.c @@ -0,0 +1,201 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/bitstring.h> +#include <sys/conf.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/hwt.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_vm.h> +#include <dev/hwt/hwt_cpu.h> + +#define HWT_DEBUG +#undef HWT_DEBUG + +#ifdef HWT_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static MALLOC_DEFINE(M_HWT_CTX, "hwt_ctx", "Hardware Trace"); + +static bitstr_t *ident_set; +static int ident_set_size; +static struct mtx ident_set_mutex; + +static int +hwt_ctx_ident_alloc(int *new_ident) +{ + + mtx_lock(&ident_set_mutex); + bit_ffc(ident_set, ident_set_size, new_ident); + if (*new_ident == -1) { + mtx_unlock(&ident_set_mutex); + return (ENOMEM); + } + bit_set(ident_set, *new_ident); + mtx_unlock(&ident_set_mutex); + + return (0); +} + +static void +hwt_ctx_ident_free(int ident) +{ + + mtx_lock(&ident_set_mutex); + bit_clear(ident_set, ident); + mtx_unlock(&ident_set_mutex); +} + +int +hwt_ctx_alloc(struct hwt_context **ctx0) +{ + struct hwt_context *ctx; + int error; + + ctx = malloc(sizeof(struct hwt_context), M_HWT_CTX, M_WAITOK | M_ZERO); + + TAILQ_INIT(&ctx->records); + TAILQ_INIT(&ctx->threads); + TAILQ_INIT(&ctx->cpus); + mtx_init(&ctx->mtx, "ctx", NULL, MTX_SPIN); + mtx_init(&ctx->rec_mtx, "ctx_rec", NULL, MTX_DEF); + refcount_init(&ctx->refcnt, 0); + + error = hwt_ctx_ident_alloc(&ctx->ident); + if (error) { + printf("could not allocate ident bit str\n"); + return (error); + } + + *ctx0 = ctx; + + return (0); +} + +static void +hwt_ctx_free_cpus(struct hwt_context *ctx) +{ + struct hwt_cpu *cpu; + + do { + HWT_CTX_LOCK(ctx); + cpu = TAILQ_FIRST(&ctx->cpus); + if (cpu) + TAILQ_REMOVE(&ctx->cpus, cpu, next); + HWT_CTX_UNLOCK(ctx); + + if (cpu == NULL) + break; + + /* TODO: move vm_free() to cpu_free()? */ + hwt_vm_free(cpu->vm); + hwt_cpu_free(cpu); + } while (1); +} + +static void +hwt_ctx_free_threads(struct hwt_context *ctx) +{ + struct hwt_thread *thr; + + dprintf("%s: remove threads\n", __func__); + + do { + HWT_CTX_LOCK(ctx); + thr = TAILQ_FIRST(&ctx->threads); + if (thr) + TAILQ_REMOVE(&ctx->threads, thr, next); + HWT_CTX_UNLOCK(ctx); + + if (thr == NULL) + break; + + HWT_THR_LOCK(thr); + /* TODO: check if thr is sleeping before waking it up. */ + wakeup(thr); + HWT_THR_UNLOCK(thr); + + if (refcount_release(&thr->refcnt)) + hwt_thread_free(thr); + } while (1); +} + +void +hwt_ctx_free(struct hwt_context *ctx) +{ + + if (ctx->mode == HWT_MODE_CPU) + hwt_ctx_free_cpus(ctx); + else + hwt_ctx_free_threads(ctx); + + hwt_config_free(ctx); + hwt_ctx_ident_free(ctx->ident); + free(ctx, M_HWT_CTX); +} + +void +hwt_ctx_put(struct hwt_context *ctx) +{ + + refcount_release(&ctx->refcnt); +} + +void +hwt_ctx_load(void) +{ + + ident_set_size = (1 << 8); + ident_set = bit_alloc(ident_set_size, M_HWT_CTX, M_WAITOK); + mtx_init(&ident_set_mutex, "ident set", NULL, MTX_DEF); +} + +void +hwt_ctx_unload(void) +{ + + mtx_destroy(&ident_set_mutex); + free(ident_set, M_HWT_CTX); +} diff --git a/sys/dev/hwt/hwt_context.h b/sys/dev/hwt/hwt_context.h new file mode 100644 index 000000000000..cafb197ae348 --- /dev/null +++ b/sys/dev/hwt/hwt_context.h @@ -0,0 +1,86 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_CONTEXT_H_ +#define _DEV_HWT_HWT_CONTEXT_H_ + +enum hwt_ctx_state { + CTX_STATE_STOPPED, + CTX_STATE_RUNNING, +}; + +struct hwt_context { + TAILQ_HEAD(, hwt_record_entry) records; + + LIST_ENTRY(hwt_context) next_hch; /* Entry in contexthash. */ + LIST_ENTRY(hwt_context) next_hwts; /* Entry in ho->hwts. */ + + int mode; + int ident; + + int kqueue_fd; + struct thread *hwt_td; + + /* CPU mode. */ + cpuset_t cpu_map; + TAILQ_HEAD(, hwt_cpu) cpus; + + /* Thread mode. */ + struct proc *proc; /* Target proc. */ + pid_t pid; /* Target pid. */ + TAILQ_HEAD(, hwt_thread) threads; + int thread_counter; + int pause_on_mmap; + + size_t bufsize; /* Trace bufsize for each vm.*/ + + void *config; + size_t config_size; + int config_version; + + struct hwt_owner *hwt_owner; + struct hwt_backend *hwt_backend; + + struct mtx mtx; + struct mtx rec_mtx; + enum hwt_ctx_state state; + int refcnt; +}; + +#define HWT_CTX_LOCK(ctx) mtx_lock_spin(&(ctx)->mtx) +#define HWT_CTX_UNLOCK(ctx) mtx_unlock_spin(&(ctx)->mtx) +#define HWT_CTX_ASSERT_LOCKED(ctx) mtx_assert(&(ctx)->mtx, MA_OWNED) + +int hwt_ctx_alloc(struct hwt_context **ctx0); +void hwt_ctx_free(struct hwt_context *ctx); +void hwt_ctx_put(struct hwt_context *ctx); + +void hwt_ctx_load(void); +void hwt_ctx_unload(void); + +#endif /* !_DEV_HWT_HWT_CONTEXT_H_ */ diff --git a/sys/dev/hwt/hwt_contexthash.c b/sys/dev/hwt/hwt_contexthash.c new file mode 100644 index 000000000000..5682b7d38e5e --- /dev/null +++ b/sys/dev/hwt/hwt_contexthash.c @@ -0,0 +1,134 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/hwt.h> + +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> + +#define HWT_DEBUG +#undef HWT_DEBUG + +#ifdef HWT_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +#define HWT_CONTEXTHASH_SIZE 1024 + +static MALLOC_DEFINE(M_HWT_CONTEXTHASH, "hwt_chash", "Hardware Trace"); + +/* + * Hash function. Discard the lower 2 bits of the pointer since + * these are always zero for our uses. The hash multiplier is + * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). + */ + +#define _HWT_HM 11400714819323198486u /* hash multiplier */ +#define HWT_HASH_PTR(P, M) ((((unsigned long) (P) >> 2) * _HWT_HM) & (M)) + +static struct mtx hwt_contexthash_mtx; +static u_long hwt_contexthashmask; +static LIST_HEAD(hwt_contexthash, hwt_context) *hwt_contexthash; + +/* + * To use by hwt_switch_in/out() and hwt_record() only. + * This function returns with refcnt acquired. + */ +struct hwt_context * +hwt_contexthash_lookup(struct proc *p) +{ + struct hwt_contexthash *hch; + struct hwt_context *ctx; + int hindex; + + hindex = HWT_HASH_PTR(p, hwt_contexthashmask); + hch = &hwt_contexthash[hindex]; + + HWT_CTXHASH_LOCK(); + LIST_FOREACH(ctx, hch, next_hch) { + if (ctx->proc == p) { + refcount_acquire(&ctx->refcnt); + HWT_CTXHASH_UNLOCK(); + return (ctx); + } + } + HWT_CTXHASH_UNLOCK(); + + return (NULL); +} + +void +hwt_contexthash_insert(struct hwt_context *ctx) +{ + struct hwt_contexthash *hch; + int hindex; + + hindex = HWT_HASH_PTR(ctx->proc, hwt_contexthashmask); + hch = &hwt_contexthash[hindex]; + + HWT_CTXHASH_LOCK(); + LIST_INSERT_HEAD(hch, ctx, next_hch); + HWT_CTXHASH_UNLOCK(); +} + +void +hwt_contexthash_remove(struct hwt_context *ctx) +{ + + HWT_CTXHASH_LOCK(); + LIST_REMOVE(ctx, next_hch); + HWT_CTXHASH_UNLOCK(); +} + +void +hwt_contexthash_load(void) +{ + + hwt_contexthash = hashinit(HWT_CONTEXTHASH_SIZE, M_HWT_CONTEXTHASH, + &hwt_contexthashmask); + mtx_init(&hwt_contexthash_mtx, "hwt ctx hash", "hwt ctx", MTX_SPIN); +} + +void +hwt_contexthash_unload(void) +{ + + mtx_destroy(&hwt_contexthash_mtx); + hashdestroy(hwt_contexthash, M_HWT_CONTEXTHASH, hwt_contexthashmask); +} diff --git a/sys/dev/hwt/hwt_contexthash.h b/sys/dev/hwt/hwt_contexthash.h new file mode 100644 index 000000000000..c3ab7acd2a74 --- /dev/null +++ b/sys/dev/hwt/hwt_contexthash.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_CONTEXTHASH_H_ +#define _DEV_HWT_HWT_CONTEXTHASH_H_ + +struct hwt_context * hwt_contexthash_lookup(struct proc *p); +void hwt_contexthash_insert(struct hwt_context *ctx); +void hwt_contexthash_remove(struct hwt_context *ctx); + +void hwt_contexthash_load(void); +void hwt_contexthash_unload(void); + +#define HWT_CTXHASH_LOCK() mtx_lock_spin(&hwt_contexthash_mtx) +#define HWT_CTXHASH_UNLOCK() mtx_unlock_spin(&hwt_contexthash_mtx) + +#endif /* !_DEV_HWT_HWT_CONTEXTHASH_H_ */ diff --git a/sys/dev/hwt/hwt_cpu.c b/sys/dev/hwt/hwt_cpu.c new file mode 100644 index 000000000000..7d38eb082e65 --- /dev/null +++ b/sys/dev/hwt/hwt_cpu.c @@ -0,0 +1,115 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/hwt.h> + +#include <vm/vm.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_record.h> +#include <dev/hwt/hwt_cpu.h> + +#define HWT_CPU_DEBUG +#undef HWT_CPU_DEBUG + +#ifdef HWT_CPU_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static MALLOC_DEFINE(M_HWT_CPU, "hwt_cpu", "HWT cpu"); + +struct hwt_cpu * +hwt_cpu_alloc(void) +{ + struct hwt_cpu *cpu; + + cpu = malloc(sizeof(struct hwt_cpu), M_HWT_CPU, M_WAITOK | M_ZERO); + + return (cpu); +} + +void +hwt_cpu_free(struct hwt_cpu *cpu) +{ + + free(cpu, M_HWT_CPU); +} + +struct hwt_cpu * +hwt_cpu_first(struct hwt_context *ctx) +{ + struct hwt_cpu *cpu; + + HWT_CTX_ASSERT_LOCKED(ctx); + + cpu = TAILQ_FIRST(&ctx->cpus); + + KASSERT(cpu != NULL, ("cpu is NULL")); + + return (cpu); +} + +struct hwt_cpu * +hwt_cpu_get(struct hwt_context *ctx, int cpu_id) +{ + struct hwt_cpu *cpu, *tcpu; + + HWT_CTX_ASSERT_LOCKED(ctx); + + TAILQ_FOREACH_SAFE(cpu, &ctx->cpus, next, tcpu) { + KASSERT(cpu != NULL, ("cpu is NULL")); + if (cpu->cpu_id == cpu_id) { + return cpu; + } + } + + return (NULL); +} + +void +hwt_cpu_insert(struct hwt_context *ctx, struct hwt_cpu *cpu) +{ + + HWT_CTX_ASSERT_LOCKED(ctx); + + TAILQ_INSERT_TAIL(&ctx->cpus, cpu, next); +} diff --git a/sys/dev/hwt/hwt_cpu.h b/sys/dev/hwt/hwt_cpu.h new file mode 100644 index 000000000000..92b89229b6e4 --- /dev/null +++ b/sys/dev/hwt/hwt_cpu.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_CPU_H_ +#define _DEV_HWT_HWT_CPU_H_ + +struct hwt_cpu { + int cpu_id; + struct hwt_vm *vm; + TAILQ_ENTRY(hwt_cpu) next; +}; + +struct hwt_cpu * hwt_cpu_alloc(void); +void hwt_cpu_free(struct hwt_cpu *cpu); + +struct hwt_cpu * hwt_cpu_first(struct hwt_context *ctx); +struct hwt_cpu * hwt_cpu_get(struct hwt_context *ctx, int cpu_id); +void hwt_cpu_insert(struct hwt_context *ctx, struct hwt_cpu *cpu); + +#endif /* !_DEV_HWT_HWT_CPU_H_ */ diff --git a/sys/dev/hwt/hwt_hook.c b/sys/dev/hwt/hwt_hook.c new file mode 100644 index 000000000000..258279b14f20 --- /dev/null +++ b/sys/dev/hwt/hwt_hook.c @@ -0,0 +1,323 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Hardware Trace (HWT) framework. */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/refcount.h> +#include <sys/hwt.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_record.h> +#include <dev/hwt/hwt_vm.h> + +#define HWT_DEBUG +#undef HWT_DEBUG + +#ifdef HWT_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static void +hwt_switch_in(struct thread *td) +{ + struct hwt_context *ctx; + struct hwt_thread *thr; + struct proc *p; + int cpu_id; + + p = td->td_proc; + + cpu_id = PCPU_GET(cpuid); + + ctx = hwt_contexthash_lookup(p); + if (ctx == NULL) + return; + + if (ctx->state != CTX_STATE_RUNNING) { + hwt_ctx_put(ctx); + return; + } + + thr = hwt_thread_lookup(ctx, td); + if (thr == NULL) { + hwt_ctx_put(ctx); + return; + } + + dprintf("%s: thr %p index %d tid %d on cpu_id %d\n", __func__, thr, + thr->thread_id, td->td_tid, cpu_id); + + hwt_backend_configure(ctx, cpu_id, thr->thread_id); + hwt_backend_enable(ctx, cpu_id); + + hwt_ctx_put(ctx); +} + +static void +hwt_switch_out(struct thread *td) +{ + struct hwt_context *ctx; + struct hwt_thread *thr; + struct proc *p; + int cpu_id; + + p = td->td_proc; + + cpu_id = PCPU_GET(cpuid); + + ctx = hwt_contexthash_lookup(p); + if (ctx == NULL) + return; + + if (ctx->state != CTX_STATE_RUNNING) { + hwt_ctx_put(ctx); + return; + } + thr = hwt_thread_lookup(ctx, td); + if (thr == NULL) { + hwt_ctx_put(ctx); + return; + } + + dprintf("%s: thr %p index %d tid %d on cpu_id %d\n", __func__, thr, + thr->thread_id, td->td_tid, cpu_id); + + hwt_backend_disable(ctx, cpu_id); + + hwt_ctx_put(ctx); +} + +static void +hwt_hook_thread_exit(struct thread *td) +{ + struct hwt_context *ctx; + struct hwt_thread *thr; + struct proc *p; + int cpu_id; + + p = td->td_proc; + + cpu_id = PCPU_GET(cpuid); + + ctx = hwt_contexthash_lookup(p); + if (ctx == NULL) + return; + + thr = hwt_thread_lookup(ctx, td); + if (thr == NULL) { + hwt_ctx_put(ctx); + return; + } + + thr->state = HWT_THREAD_STATE_EXITED; + + dprintf("%s: thr %p index %d tid %d on cpu_id %d\n", __func__, thr, + thr->thread_id, td->td_tid, cpu_id); + + if (ctx->state == CTX_STATE_RUNNING) + hwt_backend_disable(ctx, cpu_id); + + hwt_ctx_put(ctx); +} + +static void +hwt_hook_mmap(struct thread *td) +{ + struct hwt_context *ctx; + struct hwt_thread *thr; + struct proc *p; + int pause; + + p = td->td_proc; + + ctx = hwt_contexthash_lookup(p); + if (ctx == NULL) + return; + + /* The ctx state could be any here. */ + + pause = ctx->pause_on_mmap ? 1 : 0; + + thr = hwt_thread_lookup(ctx, td); + if (thr == NULL) { + hwt_ctx_put(ctx); + return; + } + + /* + * msleep(9) atomically releases the mtx lock, so take refcount + * to ensure that thr is not destroyed. + * It could not be destroyed prior to this call as we are holding ctx + * refcnt. + */ + refcount_acquire(&thr->refcnt); + hwt_ctx_put(ctx); + + if (pause) { + HWT_THR_LOCK(thr); + msleep(thr, &thr->mtx, PCATCH, "hwt-mmap", 0); + HWT_THR_UNLOCK(thr); + } + + if (refcount_release(&thr->refcnt)) + hwt_thread_free(thr); +} + +static int +hwt_hook_thread_create(struct thread *td) +{ + struct hwt_record_entry *entry; + struct hwt_context *ctx; + struct hwt_thread *thr; + char path[MAXPATHLEN]; + size_t bufsize; + struct proc *p; + int thread_id, kva_req; + int error; + + p = td->td_proc; + + /* Step 1. Get CTX and collect information needed. */ + ctx = hwt_contexthash_lookup(p); + if (ctx == NULL) + return (ENXIO); + thread_id = atomic_fetchadd_int(&ctx->thread_counter, 1); + bufsize = ctx->bufsize; + kva_req = ctx->hwt_backend->kva_req; + sprintf(path, "hwt_%d_%d", ctx->ident, thread_id); + hwt_ctx_put(ctx); + + /* Step 2. Allocate some memory without holding ctx ref. */ + error = hwt_thread_alloc(&thr, path, bufsize, kva_req); + if (error) { + printf("%s: could not allocate thread, error %d\n", + __func__, error); + return (error); + } + + entry = hwt_record_entry_alloc(); + entry->record_type = HWT_RECORD_THREAD_CREATE; + entry->thread_id = thread_id; + + /* Step 3. Get CTX once again. */ + ctx = hwt_contexthash_lookup(p); + if (ctx == NULL) { + hwt_record_entry_free(entry); + hwt_thread_free(thr); + /* ctx->thread_counter does not matter. */ + return (ENXIO); + } + /* Allocate backend-specific thread data. */ + error = hwt_backend_thread_alloc(ctx, thr); + if (error != 0) { + dprintf("%s: failed to allocate backend thread data\n", + __func__); + return (error); + } + + thr->vm->ctx = ctx; + thr->ctx = ctx; + thr->backend = ctx->hwt_backend; + thr->thread_id = thread_id; + thr->td = td; + + HWT_CTX_LOCK(ctx); + hwt_thread_insert(ctx, thr, entry); + HWT_CTX_UNLOCK(ctx); + + /* Notify userspace. */ + hwt_record_wakeup(ctx); + + hwt_ctx_put(ctx); + + return (0); +} + +static void +hwt_hook_handler(struct thread *td, int func, void *arg) +{ + struct proc *p; + + p = td->td_proc; + if ((p->p_flag2 & P2_HWT) == 0) + return; + + switch (func) { + case HWT_SWITCH_IN: + hwt_switch_in(td); + break; + case HWT_SWITCH_OUT: + hwt_switch_out(td); + break; + case HWT_THREAD_CREATE: + hwt_hook_thread_create(td); + break; + case HWT_THREAD_SET_NAME: + /* TODO. */ + break; + case HWT_THREAD_EXIT: + hwt_hook_thread_exit(td); + break; + case HWT_EXEC: + case HWT_MMAP: + hwt_record_td(td, arg, M_WAITOK | M_ZERO); + hwt_hook_mmap(td); + break; + case HWT_RECORD: + hwt_record_td(td, arg, M_WAITOK | M_ZERO); + break; + }; +} + +void +hwt_hook_load(void) +{ + + hwt_hook = hwt_hook_handler; +} + +void +hwt_hook_unload(void) +{ + + hwt_hook = NULL; +} diff --git a/sys/dev/hwt/hwt_hook.h b/sys/dev/hwt/hwt_hook.h new file mode 100644 index 000000000000..a8eccba3ec43 --- /dev/null +++ b/sys/dev/hwt/hwt_hook.h @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/hwt_record.h> + +#ifndef _DEV_HWT_HWT_HOOK_H_ +#define _DEV_HWT_HWT_HOOK_H_ + +#define HWT_SWITCH_IN 0 +#define HWT_SWITCH_OUT 1 +#define HWT_THREAD_EXIT 2 +#define HWT_THREAD_CREATE 3 +#define HWT_THREAD_SET_NAME 4 +#define HWT_RECORD 5 +#define HWT_MMAP 6 +#define HWT_EXEC 7 + +#define HWT_CALL_HOOK(td, func, arg) \ +do { \ + if (hwt_hook != NULL) \ + (hwt_hook)((td), (func), (arg)); \ +} while (0) + +#define HWT_HOOK_INSTALLED (hwt_hook != NULL) + +extern void (*hwt_hook)(struct thread *td, int func, void *arg); + +void hwt_hook_load(void); +void hwt_hook_unload(void); + +#endif /* !_DEV_HWT_HWT_HOOK_H_ */ diff --git a/sys/dev/hwt/hwt_intr.h b/sys/dev/hwt/hwt_intr.h new file mode 100644 index 000000000000..e601969f001c --- /dev/null +++ b/sys/dev/hwt/hwt_intr.h @@ -0,0 +1,33 @@ +/*- + * Copyright (c) 2023-2025 Bojan Novković <bnovkov@freebsd.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_INTR_H_ +#define _DEV_HWT_HWT_INTR_H_ + +#include <machine/frame.h> + +extern int (*hwt_intr)(struct trapframe *tf); + +#endif /* !_DEV_HWT_HWT_INTR_H_ */ diff --git a/sys/dev/hwt/hwt_ioctl.c b/sys/dev/hwt/hwt_ioctl.c new file mode 100644 index 000000000000..592db4931bb4 --- /dev/null +++ b/sys/dev/hwt/hwt_ioctl.c @@ -0,0 +1,445 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Hardware Trace (HWT) framework. */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/ioccom.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/smp.h> +#include <sys/hwt.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_cpu.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_ownerhash.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_record.h> +#include <dev/hwt/hwt_ioctl.h> +#include <dev/hwt/hwt_vm.h> + +#define HWT_IOCTL_DEBUG +#undef HWT_IOCTL_DEBUG + +#ifdef HWT_IOCTL_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +/* No real reason for these limitations just sanity checks. */ +#define HWT_MAXBUFSIZE (32UL * 1024 * 1024 * 1024) /* 32 GB */ + +static MALLOC_DEFINE(M_HWT_IOCTL, "hwt_ioctl", "Hardware Trace"); + +/* + * Check if owner process *o can trace target process *t. + */ + +static int +hwt_priv_check(struct proc *o, struct proc *t) +{ + struct ucred *oc, *tc; + int error; + int i; + + PROC_LOCK(o); + oc = o->p_ucred; + crhold(oc); + PROC_UNLOCK(o); + + PROC_LOCK_ASSERT(t, MA_OWNED); + tc = t->p_ucred; + crhold(tc); + + error = 0; + + /* + * The effective uid of the HWT owner should match at least one + * of the effective / real / saved uids of the target process. + */ + + if (oc->cr_uid != tc->cr_uid && + oc->cr_uid != tc->cr_svuid && + oc->cr_uid != tc->cr_ruid) { + error = EPERM; + goto done; + } + + /* + * Everyone of the target's group ids must be in the owner's + * group list. + */ + for (i = 0; i < tc->cr_ngroups; i++) + if (!groupmember(tc->cr_groups[i], oc)) { + error = EPERM; + goto done; + } + + /* Check the read and saved GIDs too. */ + if (!groupmember(tc->cr_rgid, oc) || + !groupmember(tc->cr_svgid, oc)) { + error = EPERM; + goto done; + } + +done: + crfree(tc); + crfree(oc); + + return (error); +} + +static int +hwt_ioctl_alloc_mode_thread(struct thread *td, struct hwt_owner *ho, + struct hwt_backend *backend, struct hwt_alloc *halloc) +{ + struct thread **threads, *td1; + struct hwt_record_entry *entry; + struct hwt_context *ctx, *ctx1; + struct hwt_thread *thr; + char path[MAXPATHLEN]; + struct proc *p; + int thread_id; + int error; + int cnt; + int i; + + /* Check if the owner have this pid configured already. */ + ctx = hwt_owner_lookup_ctx(ho, halloc->pid); + if (ctx) + return (EEXIST); + + /* Allocate a new HWT context. */ + error = hwt_ctx_alloc(&ctx); + if (error) + return (error); + ctx->bufsize = halloc->bufsize; + ctx->pid = halloc->pid; + ctx->hwt_backend = backend; + ctx->hwt_owner = ho; + ctx->mode = HWT_MODE_THREAD; + ctx->hwt_td = td; + ctx->kqueue_fd = halloc->kqueue_fd; + + error = copyout(&ctx->ident, halloc->ident, sizeof(int)); + if (error) { + hwt_ctx_free(ctx); + return (error); + } + + /* Now get the victim proc. */ + p = pfind(halloc->pid); + if (p == NULL) { + hwt_ctx_free(ctx); + return (ENXIO); + } + + /* Ensure we can trace it. */ + error = hwt_priv_check(td->td_proc, p); + if (error) { + PROC_UNLOCK(p); + hwt_ctx_free(ctx); + return (error); + } + + /* Ensure it is not being traced already. */ + ctx1 = hwt_contexthash_lookup(p); + if (ctx1) { + refcount_release(&ctx1->refcnt); + PROC_UNLOCK(p); + hwt_ctx_free(ctx); + return (EEXIST); + } + + /* Allocate hwt threads and buffers. */ + + cnt = 0; + + FOREACH_THREAD_IN_PROC(p, td1) { + cnt += 1; + } + + KASSERT(cnt > 0, ("no threads")); + + threads = malloc(sizeof(struct thread *) * cnt, M_HWT_IOCTL, + M_NOWAIT | M_ZERO); + if (threads == NULL) { + PROC_UNLOCK(p); + hwt_ctx_free(ctx); + return (ENOMEM); + } + + i = 0; + + FOREACH_THREAD_IN_PROC(p, td1) { + threads[i++] = td1; + } + + ctx->proc = p; + PROC_UNLOCK(p); + + for (i = 0; i < cnt; i++) { + thread_id = atomic_fetchadd_int(&ctx->thread_counter, 1); + sprintf(path, "hwt_%d_%d", ctx->ident, thread_id); + + error = hwt_thread_alloc(&thr, path, ctx->bufsize, + ctx->hwt_backend->kva_req); + if (error) { + free(threads, M_HWT_IOCTL); + hwt_ctx_free(ctx); + return (error); + } + /* Allocate backend-specific thread data. */ + error = hwt_backend_thread_alloc(ctx, thr); + if (error != 0) { + dprintf("%s: failed to allocate thread backend data\n", + __func__); + free(threads, M_HWT_IOCTL); + hwt_ctx_free(ctx); + return (error); + } + + /* + * Insert a THREAD_CREATE record so userspace picks up + * the thread's tracing buffers. + */ + entry = hwt_record_entry_alloc(); + entry->record_type = HWT_RECORD_THREAD_CREATE; + entry->thread_id = thread_id; + + thr->vm->ctx = ctx; + thr->td = threads[i]; + thr->ctx = ctx; + thr->backend = ctx->hwt_backend; + thr->thread_id = thread_id; + + HWT_CTX_LOCK(ctx); + hwt_thread_insert(ctx, thr, entry); + HWT_CTX_UNLOCK(ctx); + } + + free(threads, M_HWT_IOCTL); + + error = hwt_backend_init(ctx); + if (error) { + hwt_ctx_free(ctx); + return (error); + } + + /* hwt_owner_insert_ctx? */ + mtx_lock(&ho->mtx); + LIST_INSERT_HEAD(&ho->hwts, ctx, next_hwts); + mtx_unlock(&ho->mtx); + + /* + * Hooks are now in action after this, but the ctx is not in RUNNING + * state. + */ + hwt_contexthash_insert(ctx); + + p = pfind(halloc->pid); + if (p) { + p->p_flag2 |= P2_HWT; + PROC_UNLOCK(p); + } + + return (0); +} + +static int +hwt_ioctl_alloc_mode_cpu(struct thread *td, struct hwt_owner *ho, + struct hwt_backend *backend, struct hwt_alloc *halloc) +{ + struct hwt_context *ctx; + struct hwt_cpu *cpu; + struct hwt_vm *vm; + char path[MAXPATHLEN]; + size_t cpusetsize; + cpuset_t cpu_map; + int cpu_count = 0; + int cpu_id; + int error; + + CPU_ZERO(&cpu_map); + cpusetsize = min(halloc->cpusetsize, sizeof(cpuset_t)); + error = copyin(halloc->cpu_map, &cpu_map, cpusetsize); + if (error) + return (error); + + CPU_FOREACH_ISSET(cpu_id, &cpu_map) { +#ifdef SMP + /* Ensure CPU is not halted. */ + if (CPU_ISSET(cpu_id, &hlt_cpus_mask)) + return (ENXIO); +#endif +#if 0 + /* TODO: Check if the owner have this cpu configured already. */ + ctx = hwt_owner_lookup_ctx_by_cpu(ho, halloc->cpu); + if (ctx) + return (EEXIST); +#endif + + cpu_count++; + } + + if (cpu_count == 0) + return (ENODEV); + + /* Allocate a new HWT context. */ + error = hwt_ctx_alloc(&ctx); + if (error) + return (error); + ctx->bufsize = halloc->bufsize; + ctx->hwt_backend = backend; + ctx->hwt_owner = ho; + ctx->mode = HWT_MODE_CPU; + ctx->cpu_map = cpu_map; + ctx->hwt_td = td; + ctx->kqueue_fd = halloc->kqueue_fd; + + error = copyout(&ctx->ident, halloc->ident, sizeof(int)); + if (error) { + hwt_ctx_free(ctx); + return (error); + } + + CPU_FOREACH_ISSET(cpu_id, &cpu_map) { + sprintf(path, "hwt_%d_%d", ctx->ident, cpu_id); + error = hwt_vm_alloc(ctx->bufsize, ctx->hwt_backend->kva_req, + path, &vm); + if (error) { + /* TODO: remove all allocated cpus. */ + hwt_ctx_free(ctx); + return (error); + } + + cpu = hwt_cpu_alloc(); + cpu->cpu_id = cpu_id; + cpu->vm = vm; + + vm->cpu = cpu; + vm->ctx = ctx; + + HWT_CTX_LOCK(ctx); + hwt_cpu_insert(ctx, cpu); + HWT_CTX_UNLOCK(ctx); + } + + error = hwt_backend_init(ctx); + if (error) { + /* TODO: remove all allocated cpus. */ + hwt_ctx_free(ctx); + return (error); + } + + /* hwt_owner_insert_ctx? */ + mtx_lock(&ho->mtx); + LIST_INSERT_HEAD(&ho->hwts, ctx, next_hwts); + mtx_unlock(&ho->mtx); + + hwt_record_kernel_objects(ctx); + + return (0); +} + +static int +hwt_ioctl_alloc(struct thread *td, struct hwt_alloc *halloc) +{ + char backend_name[HWT_BACKEND_MAXNAMELEN]; + struct hwt_backend *backend; + struct hwt_owner *ho; + int error; + + if (halloc->bufsize > HWT_MAXBUFSIZE) + return (EINVAL); + if (halloc->bufsize % PAGE_SIZE) + return (EINVAL); + if (halloc->backend_name == NULL) + return (EINVAL); + + error = copyinstr(halloc->backend_name, (void *)backend_name, + HWT_BACKEND_MAXNAMELEN, NULL); + if (error) + return (error); + + backend = hwt_backend_lookup(backend_name); + if (backend == NULL) + return (ENODEV); + + /* First get the owner. */ + ho = hwt_ownerhash_lookup(td->td_proc); + if (ho == NULL) { + /* Create a new owner. */ + ho = hwt_owner_alloc(td->td_proc); + if (ho == NULL) + return (ENOMEM); + hwt_ownerhash_insert(ho); + } + + switch (halloc->mode) { + case HWT_MODE_THREAD: + error = hwt_ioctl_alloc_mode_thread(td, ho, backend, halloc); + break; + case HWT_MODE_CPU: + error = hwt_ioctl_alloc_mode_cpu(td, ho, backend, halloc); + break; + default: + error = ENXIO; + }; + + return (error); +} + +int +hwt_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, + struct thread *td) +{ + int error; + + switch (cmd) { + case HWT_IOC_ALLOC: + /* Allocate HWT context. */ + error = hwt_ioctl_alloc(td, (struct hwt_alloc *)addr); + return (error); + default: + return (ENXIO); + }; +} diff --git a/sys/dev/hwt/hwt_ioctl.h b/sys/dev/hwt/hwt_ioctl.h new file mode 100644 index 000000000000..ce4270dc0d44 --- /dev/null +++ b/sys/dev/hwt/hwt_ioctl.h @@ -0,0 +1,35 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_IOCTL_H +#define _DEV_HWT_HWT_IOCTL_H + +int hwt_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, + struct thread *td); + +#endif /* !_DEV_HWT_HWT_IOCTL_H */ diff --git a/sys/dev/hwt/hwt_owner.c b/sys/dev/hwt/hwt_owner.c new file mode 100644 index 000000000000..3c82040578de --- /dev/null +++ b/sys/dev/hwt/hwt_owner.c @@ -0,0 +1,157 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/hwt.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_cpu.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_ownerhash.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_vm.h> +#include <dev/hwt/hwt_record.h> + +#define HWT_DEBUG +#undef HWT_DEBUG + +#ifdef HWT_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static MALLOC_DEFINE(M_HWT_OWNER, "hwt_owner", "Hardware Trace"); + +struct hwt_context * +hwt_owner_lookup_ctx(struct hwt_owner *ho, pid_t pid) +{ + struct hwt_context *ctx; + + mtx_lock(&ho->mtx); + LIST_FOREACH(ctx, &ho->hwts, next_hwts) { + if (ctx->pid == pid) { + mtx_unlock(&ho->mtx); + return (ctx); + } + } + mtx_unlock(&ho->mtx); + + return (NULL); +} + +#if 0 +struct hwt_context * +hwt_owner_lookup_ctx_by_cpu(struct hwt_owner *ho, int cpu) +{ + struct hwt_context *ctx; + + mtx_lock(&ho->mtx); + LIST_FOREACH(ctx, &ho->hwts, next_hwts) { + if (ctx->cpu == cpu) { + mtx_unlock(&ho->mtx); + return (ctx); + } + } + mtx_unlock(&ho->mtx); + + return (NULL); +} +#endif + +struct hwt_owner * +hwt_owner_alloc(struct proc *p) +{ + struct hwt_owner *ho; + + ho = malloc(sizeof(struct hwt_owner), M_HWT_OWNER, + M_WAITOK | M_ZERO); + ho->p = p; + + LIST_INIT(&ho->hwts); + mtx_init(&ho->mtx, "hwts", NULL, MTX_DEF); + + return (ho); +} + +void +hwt_owner_shutdown(struct hwt_owner *ho) +{ + struct hwt_context *ctx; + + dprintf("%s: stopping hwt owner\n", __func__); + + while (1) { + mtx_lock(&ho->mtx); + ctx = LIST_FIRST(&ho->hwts); + if (ctx) + LIST_REMOVE(ctx, next_hwts); + mtx_unlock(&ho->mtx); + + if (ctx == NULL) + break; + + if (ctx->mode == HWT_MODE_THREAD) + hwt_contexthash_remove(ctx); + + /* + * A hook could be still dealing with this ctx right here. + */ + + HWT_CTX_LOCK(ctx); + ctx->state = 0; + HWT_CTX_UNLOCK(ctx); + + /* Ensure hooks invocation is now completed. */ + while (refcount_load(&ctx->refcnt) > 0) + continue; + + /* + * Note that a thread could be still sleeping on msleep(9). + */ + + hwt_backend_deinit(ctx); + hwt_record_free_all(ctx); + hwt_ctx_free(ctx); + } + + hwt_ownerhash_remove(ho); + free(ho, M_HWT_OWNER); +} diff --git a/sys/dev/hwt/hwt_owner.h b/sys/dev/hwt/hwt_owner.h new file mode 100644 index 000000000000..2ac569a55050 --- /dev/null +++ b/sys/dev/hwt/hwt_owner.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_OWNER_H_ +#define _DEV_HWT_HWT_OWNER_H_ + +struct hwt_owner { + struct proc *p; + struct mtx mtx; /* Protects hwts. */ + LIST_HEAD(, hwt_context) hwts; /* Owned HWTs. */ + LIST_ENTRY(hwt_owner) next; /* Entry in hwt owner hash. */ +}; + + +struct hwt_context * hwt_owner_lookup_ctx(struct hwt_owner *ho, pid_t pid); +struct hwt_owner * hwt_owner_alloc(struct proc *p); +void hwt_owner_shutdown(struct hwt_owner *ho); +struct hwt_context * hwt_owner_lookup_ctx_by_cpu(struct hwt_owner *ho, int cpu); + +#endif /* !_DEV_HWT_HWT_OWNER_H_ */ diff --git a/sys/dev/hwt/hwt_ownerhash.c b/sys/dev/hwt/hwt_ownerhash.c new file mode 100644 index 000000000000..7c9e2232bac4 --- /dev/null +++ b/sys/dev/hwt/hwt_ownerhash.c @@ -0,0 +1,141 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/hwt.h> + +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_ownerhash.h> + +#define HWT_DEBUG +#undef HWT_DEBUG + +#ifdef HWT_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +#define HWT_OWNERHASH_SIZE 1024 + +static MALLOC_DEFINE(M_HWT_OWNERHASH, "hwt_ohash", "Hardware Trace"); + +/* + * Hash function. Discard the lower 2 bits of the pointer since + * these are always zero for our uses. The hash multiplier is + * round((2^LONG_BIT) * ((sqrt(5)-1)/2)). + */ + +#define _HWT_HM 11400714819323198486u /* hash multiplier */ +#define HWT_HASH_PTR(P, M) ((((unsigned long) (P) >> 2) * _HWT_HM) & (M)) + +static struct mtx hwt_ownerhash_mtx; +static u_long hwt_ownerhashmask; +static LIST_HEAD(hwt_ownerhash, hwt_owner) *hwt_ownerhash; + +struct hwt_owner * +hwt_ownerhash_lookup(struct proc *p) +{ + struct hwt_ownerhash *hoh; + struct hwt_owner *ho; + int hindex; + + hindex = HWT_HASH_PTR(p, hwt_ownerhashmask); + hoh = &hwt_ownerhash[hindex]; + + HWT_OWNERHASH_LOCK(); + LIST_FOREACH(ho, hoh, next) { + if (ho->p == p) { + HWT_OWNERHASH_UNLOCK(); + return (ho); + } + } + HWT_OWNERHASH_UNLOCK(); + + return (NULL); +} + +void +hwt_ownerhash_insert(struct hwt_owner *ho) +{ + struct hwt_ownerhash *hoh; + int hindex; + + hindex = HWT_HASH_PTR(ho->p, hwt_ownerhashmask); + hoh = &hwt_ownerhash[hindex]; + + HWT_OWNERHASH_LOCK(); + LIST_INSERT_HEAD(hoh, ho, next); + HWT_OWNERHASH_UNLOCK(); +} + +void +hwt_ownerhash_remove(struct hwt_owner *ho) +{ + + /* Destroy hwt owner. */ + HWT_OWNERHASH_LOCK(); + LIST_REMOVE(ho, next); + HWT_OWNERHASH_UNLOCK(); +} + +void +hwt_ownerhash_load(void) +{ + + hwt_ownerhash = hashinit(HWT_OWNERHASH_SIZE, M_HWT_OWNERHASH, + &hwt_ownerhashmask); + mtx_init(&hwt_ownerhash_mtx, "hwt-owner-hash", "hwt-owner", MTX_DEF); +} + +void +hwt_ownerhash_unload(void) +{ + struct hwt_ownerhash *hoh; + struct hwt_owner *ho, *tmp; + + HWT_OWNERHASH_LOCK(); + for (hoh = hwt_ownerhash; + hoh <= &hwt_ownerhash[hwt_ownerhashmask]; + hoh++) { + LIST_FOREACH_SAFE(ho, hoh, next, tmp) { + /* TODO: module is in use ? */ + } + } + HWT_OWNERHASH_UNLOCK(); + + mtx_destroy(&hwt_ownerhash_mtx); + hashdestroy(hwt_ownerhash, M_HWT_OWNERHASH, hwt_ownerhashmask); +} diff --git a/sys/dev/hwt/hwt_ownerhash.h b/sys/dev/hwt/hwt_ownerhash.h new file mode 100644 index 000000000000..4a7bc958d0f7 --- /dev/null +++ b/sys/dev/hwt/hwt_ownerhash.h @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_OWNERHASH_H_ +#define _DEV_HWT_HWT_OWNERHASH_H_ + +struct hwt_owner * hwt_ownerhash_lookup(struct proc *p); +void hwt_ownerhash_insert(struct hwt_owner *ho); +void hwt_ownerhash_remove(struct hwt_owner *ho); + +void hwt_ownerhash_load(void); +void hwt_ownerhash_unload(void); + +#define HWT_OWNERHASH_LOCK() mtx_lock(&hwt_ownerhash_mtx) +#define HWT_OWNERHASH_UNLOCK() mtx_unlock(&hwt_ownerhash_mtx) + +#endif /* !_DEV_HWT_HWT_OWNERHASH_H_ */ diff --git a/sys/dev/hwt/hwt_record.c b/sys/dev/hwt/hwt_record.c new file mode 100644 index 000000000000..850ea6f8c5be --- /dev/null +++ b/sys/dev/hwt/hwt_record.c @@ -0,0 +1,302 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/hwt.h> +#include <sys/linker.h> +#include <sys/pmckern.h> /* linker_hwpmc_list_objects */ + +#include <vm/vm.h> +#include <vm/uma.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_record.h> + +#define HWT_RECORD_DEBUG +#undef HWT_RECORD_DEBUG + +#ifdef HWT_RECORD_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static MALLOC_DEFINE(M_HWT_RECORD, "hwt_record", "Hardware Trace"); +static uma_zone_t record_zone = NULL; + +static struct hwt_record_entry * +hwt_record_clone(struct hwt_record_entry *ent, int flags) +{ + struct hwt_record_entry *entry; + + entry = uma_zalloc(record_zone, flags); + if (entry == NULL) + return (NULL); + memcpy(entry, ent, sizeof(struct hwt_record_entry)); + switch (ent->record_type) { + case HWT_RECORD_MMAP: + case HWT_RECORD_EXECUTABLE: + case HWT_RECORD_KERNEL: + entry->fullpath = strdup(ent->fullpath, M_HWT_RECORD); + break; + default: + break; + } + + return (entry); +} + +static void +hwt_record_to_user(struct hwt_record_entry *ent, + struct hwt_record_user_entry *usr) +{ + usr->record_type = ent->record_type; + switch (ent->record_type) { + case HWT_RECORD_MMAP: + case HWT_RECORD_EXECUTABLE: + case HWT_RECORD_KERNEL: + usr->addr = ent->addr; + usr->baseaddr = ent->baseaddr; + strncpy(usr->fullpath, ent->fullpath, MAXPATHLEN); + break; + case HWT_RECORD_BUFFER: + usr->buf_id = ent->buf_id; + usr->curpage = ent->curpage; + usr->offset = ent->offset; + break; + case HWT_RECORD_THREAD_CREATE: + case HWT_RECORD_THREAD_SET_NAME: + usr->thread_id = ent->thread_id; + break; + default: + break; + } +} + +void +hwt_record_load(void) +{ + record_zone = uma_zcreate("HWT records", + sizeof(struct hwt_record_entry), NULL, NULL, NULL, NULL, 0, 0); +} + +void +hwt_record_unload(void) +{ + uma_zdestroy(record_zone); +} + +void +hwt_record_ctx(struct hwt_context *ctx, struct hwt_record_entry *ent, int flags) +{ + struct hwt_record_entry *entry; + + KASSERT(ent != NULL, ("ent is NULL")); + entry = hwt_record_clone(ent, flags); + if (entry == NULL) { + /* XXX: Not sure what to do here other than logging an error. */ + return; + } + + HWT_CTX_LOCK(ctx); + TAILQ_INSERT_TAIL(&ctx->records, entry, next); + HWT_CTX_UNLOCK(ctx); + hwt_record_wakeup(ctx); +} + +void +hwt_record_td(struct thread *td, struct hwt_record_entry *ent, int flags) +{ + struct hwt_record_entry *entry; + struct hwt_context *ctx; + struct proc *p; + + p = td->td_proc; + + KASSERT(ent != NULL, ("ent is NULL")); + entry = hwt_record_clone(ent, flags); + if (entry == NULL) { + /* XXX: Not sure what to do here other than logging an error. */ + return; + } + ctx = hwt_contexthash_lookup(p); + if (ctx == NULL) { + hwt_record_entry_free(entry); + return; + } + HWT_CTX_LOCK(ctx); + TAILQ_INSERT_TAIL(&ctx->records, entry, next); + HWT_CTX_UNLOCK(ctx); + hwt_record_wakeup(ctx); + + hwt_ctx_put(ctx); +} + +struct hwt_record_entry * +hwt_record_entry_alloc(void) +{ + return (uma_zalloc(record_zone, M_WAITOK | M_ZERO)); +} + +void +hwt_record_entry_free(struct hwt_record_entry *entry) +{ + + switch (entry->record_type) { + case HWT_RECORD_MMAP: + case HWT_RECORD_EXECUTABLE: + case HWT_RECORD_KERNEL: + free(entry->fullpath, M_HWT_RECORD); + break; + default: + break; + } + + uma_zfree(record_zone, entry); +} + +static int +hwt_record_grab(struct hwt_context *ctx, + struct hwt_record_user_entry *user_entry, int nitems_req, int wait) +{ + struct hwt_record_entry *entry; + int i; + + if (wait) { + mtx_lock(&ctx->rec_mtx); + if (TAILQ_FIRST(&ctx->records) == NULL) { + /* Wait until we have new records. */ + msleep(ctx, &ctx->rec_mtx, PCATCH, "recsnd", 0); + } + mtx_unlock(&ctx->rec_mtx); + } + + for (i = 0; i < nitems_req; i++) { + HWT_CTX_LOCK(ctx); + entry = TAILQ_FIRST(&ctx->records); + if (entry) + TAILQ_REMOVE_HEAD(&ctx->records, next); + HWT_CTX_UNLOCK(ctx); + + if (entry == NULL) + break; + hwt_record_to_user(entry, &user_entry[i]); + hwt_record_entry_free(entry); + } + + return (i); +} + +void +hwt_record_free_all(struct hwt_context *ctx) +{ + struct hwt_record_entry *entry; + + while (1) { + HWT_CTX_LOCK(ctx); + entry = TAILQ_FIRST(&ctx->records); + if (entry) + TAILQ_REMOVE_HEAD(&ctx->records, next); + HWT_CTX_UNLOCK(ctx); + + if (entry == NULL) + break; + + hwt_record_entry_free(entry); + } +} + +int +hwt_record_send(struct hwt_context *ctx, struct hwt_record_get *record_get) +{ + struct hwt_record_user_entry *user_entry; + int nitems_req; + int error; + int i; + + nitems_req = 0; + + error = copyin(record_get->nentries, &nitems_req, sizeof(int)); + if (error) + return (error); + + if (nitems_req < 1 || nitems_req > 1024) + return (ENXIO); + + user_entry = malloc(sizeof(struct hwt_record_user_entry) * nitems_req, + M_HWT_RECORD, M_WAITOK | M_ZERO); + + i = hwt_record_grab(ctx, user_entry, nitems_req, record_get->wait); + if (i > 0) + error = copyout(user_entry, record_get->records, + sizeof(struct hwt_record_user_entry) * i); + + if (error == 0) + error = copyout(&i, record_get->nentries, sizeof(int)); + + free(user_entry, M_HWT_RECORD); + + return (error); +} + +void +hwt_record_kernel_objects(struct hwt_context *ctx) +{ + struct hwt_record_entry *entry; + struct pmckern_map_in *kobase; + int i; + + kobase = linker_hwpmc_list_objects(); + for (i = 0; kobase[i].pm_file != NULL; i++) { + entry = hwt_record_entry_alloc(); + entry->record_type = HWT_RECORD_KERNEL; + entry->fullpath = strdup(kobase[i].pm_file, M_HWT_RECORD); + entry->addr = kobase[i].pm_address; + + HWT_CTX_LOCK(ctx); + TAILQ_INSERT_HEAD(&ctx->records, entry, next); + HWT_CTX_UNLOCK(ctx); + } + free(kobase, M_LINKER); +} + +void +hwt_record_wakeup(struct hwt_context *ctx) +{ + wakeup(ctx); +} diff --git a/sys/dev/hwt/hwt_record.h b/sys/dev/hwt/hwt_record.h new file mode 100644 index 000000000000..3f347ca67d54 --- /dev/null +++ b/sys/dev/hwt/hwt_record.h @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_RECORD_H_ +#define _DEV_HWT_HWT_RECORD_H_ + +struct hwt_record_get; + +void hwt_record_load(void); +void hwt_record_unload(void); + +int hwt_record_send(struct hwt_context *ctx, struct hwt_record_get *record_get); +void hwt_record_td(struct thread *td, struct hwt_record_entry *ent, int flags); +void hwt_record_ctx(struct hwt_context *ctx, struct hwt_record_entry *ent, + int flags); +struct hwt_record_entry * hwt_record_entry_alloc(void); +void hwt_record_entry_free(struct hwt_record_entry *entry); +void hwt_record_kernel_objects(struct hwt_context *ctx); +void hwt_record_free_all(struct hwt_context *ctx); +void hwt_record_wakeup(struct hwt_context *ctx); + +#endif /* !_DEV_HWT_HWT_RECORD_H_ */ diff --git a/sys/dev/hwt/hwt_thread.c b/sys/dev/hwt/hwt_thread.c new file mode 100644 index 000000000000..827c068a681f --- /dev/null +++ b/sys/dev/hwt/hwt_thread.c @@ -0,0 +1,162 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/hwt.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> +#include <vm/vm_pageout.h> +#include <vm/vm_phys.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_ownerhash.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_vm.h> +#include <dev/hwt/hwt_record.h> + +#define HWT_THREAD_DEBUG +#undef HWT_THREAD_DEBUG + +#ifdef HWT_THREAD_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static MALLOC_DEFINE(M_HWT_THREAD, "hwt_thread", "Hardware Trace"); + +struct hwt_thread * +hwt_thread_first(struct hwt_context *ctx) +{ + struct hwt_thread *thr; + + HWT_CTX_ASSERT_LOCKED(ctx); + + thr = TAILQ_FIRST(&ctx->threads); + + KASSERT(thr != NULL, ("thr is NULL")); + + return (thr); +} + +/* + * To use by hwt_switch_in/out() only. + */ +struct hwt_thread * +hwt_thread_lookup(struct hwt_context *ctx, struct thread *td) +{ + struct hwt_thread *thr; + + /* Caller of this func holds ctx refcnt right here. */ + + HWT_CTX_LOCK(ctx); + TAILQ_FOREACH(thr, &ctx->threads, next) { + if (thr->td == td) { + HWT_CTX_UNLOCK(ctx); + return (thr); + } + } + HWT_CTX_UNLOCK(ctx); + + /* + * We are here because the hook on thread creation failed to allocate + * a thread. + */ + + return (NULL); +} + +int +hwt_thread_alloc(struct hwt_thread **thr0, char *path, size_t bufsize, + int kva_req) +{ + struct hwt_thread *thr; + struct hwt_vm *vm; + int error; + + error = hwt_vm_alloc(bufsize, kva_req, path, &vm); + if (error) + return (error); + + thr = malloc(sizeof(struct hwt_thread), M_HWT_THREAD, + M_WAITOK | M_ZERO); + thr->vm = vm; + + mtx_init(&thr->mtx, "thr", NULL, MTX_DEF); + + refcount_init(&thr->refcnt, 1); + + vm->thr = thr; + + *thr0 = thr; + + return (0); +} + +void +hwt_thread_free(struct hwt_thread *thr) +{ + + hwt_vm_free(thr->vm); + /* Free private backend data, if any. */ + if (thr->private != NULL) + hwt_backend_thread_free(thr); + free(thr, M_HWT_THREAD); +} + +/* + * Inserts a new thread and a thread creation record into the + * context notifies userspace about the newly created thread. + */ +void +hwt_thread_insert(struct hwt_context *ctx, struct hwt_thread *thr, + struct hwt_record_entry *entry) +{ + + HWT_CTX_ASSERT_LOCKED(ctx); + TAILQ_INSERT_TAIL(&ctx->threads, thr, next); + TAILQ_INSERT_TAIL(&ctx->records, entry, next); +} diff --git a/sys/dev/hwt/hwt_thread.h b/sys/dev/hwt/hwt_thread.h new file mode 100644 index 000000000000..ccc29aeb3494 --- /dev/null +++ b/sys/dev/hwt/hwt_thread.h @@ -0,0 +1,64 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_THREAD_H_ +#define _DEV_HWT_HWT_THREAD_H_ + +struct hwt_record_entry; + +struct hwt_thread { + struct hwt_vm *vm; + struct hwt_context *ctx; + struct hwt_backend *backend; + struct thread *td; + TAILQ_ENTRY(hwt_thread) next; + int thread_id; + int state; +#define HWT_THREAD_STATE_EXITED (1 << 0) + struct mtx mtx; + u_int refcnt; + int cpu_id; /* last cpu_id */ + void *private; /* backend-specific private data */ +}; + +/* Thread allocation. */ +int hwt_thread_alloc(struct hwt_thread **thr0, char *path, size_t bufsize, + int kva_req); +void hwt_thread_free(struct hwt_thread *thr); + +/* Thread list mgt. */ +void hwt_thread_insert(struct hwt_context *ctx, struct hwt_thread *thr, struct hwt_record_entry *entry); +struct hwt_thread * hwt_thread_first(struct hwt_context *ctx); +struct hwt_thread * hwt_thread_lookup(struct hwt_context *ctx, + struct thread *td); + +#define HWT_THR_LOCK(thr) mtx_lock(&(thr)->mtx) +#define HWT_THR_UNLOCK(thr) mtx_unlock(&(thr)->mtx) +#define HWT_THR_ASSERT_LOCKED(thr) mtx_assert(&(thr)->mtx, MA_OWNED) + +#endif /* !_DEV_HWT_HWT_THREAD_H_ */ diff --git a/sys/dev/hwt/hwt_vm.c b/sys/dev/hwt/hwt_vm.c new file mode 100644 index 000000000000..6c55e218dcec --- /dev/null +++ b/sys/dev/hwt/hwt_vm.c @@ -0,0 +1,503 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/ioccom.h> +#include <sys/conf.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/hwt.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> +#include <vm/vm_pageout.h> +#include <vm/vm_phys.h> + +#include <dev/hwt/hwt_hook.h> +#include <dev/hwt/hwt_context.h> +#include <dev/hwt/hwt_contexthash.h> +#include <dev/hwt/hwt_config.h> +#include <dev/hwt/hwt_cpu.h> +#include <dev/hwt/hwt_owner.h> +#include <dev/hwt/hwt_ownerhash.h> +#include <dev/hwt/hwt_thread.h> +#include <dev/hwt/hwt_backend.h> +#include <dev/hwt/hwt_vm.h> +#include <dev/hwt/hwt_record.h> + +#define HWT_THREAD_DEBUG +#undef HWT_THREAD_DEBUG + +#ifdef HWT_THREAD_DEBUG +#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__) +#else +#define dprintf(fmt, ...) +#endif + +static MALLOC_DEFINE(M_HWT_VM, "hwt_vm", "Hardware Trace"); + +static int +hwt_vm_fault(vm_object_t vm_obj, vm_ooffset_t offset, + int prot, vm_page_t *mres) +{ + + return (0); +} + +static int +hwt_vm_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, + vm_ooffset_t foff, struct ucred *cred, u_short *color) +{ + + *color = 0; + + return (0); +} + +static void +hwt_vm_dtor(void *handle) +{ + +} + +static struct cdev_pager_ops hwt_vm_pager_ops = { + .cdev_pg_fault = hwt_vm_fault, + .cdev_pg_ctor = hwt_vm_ctor, + .cdev_pg_dtor = hwt_vm_dtor +}; + +static int +hwt_vm_alloc_pages(struct hwt_vm *vm, int kva_req) +{ + vm_paddr_t low, high, boundary; + vm_memattr_t memattr; +#ifdef __aarch64__ + uintptr_t va; +#endif + int alignment; + vm_page_t m; + int pflags; + int tries; + int i; + + alignment = PAGE_SIZE; + low = 0; + high = -1UL; + boundary = 0; + pflags = VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO; + memattr = VM_MEMATTR_DEVICE; + + if (kva_req) { + vm->kvaddr = kva_alloc(vm->npages * PAGE_SIZE); + if (!vm->kvaddr) + return (ENOMEM); + } + + vm->obj = cdev_pager_allocate(vm, OBJT_MGTDEVICE, + &hwt_vm_pager_ops, vm->npages * PAGE_SIZE, PROT_READ, 0, + curthread->td_ucred); + + for (i = 0; i < vm->npages; i++) { + tries = 0; +retry: + m = vm_page_alloc_noobj_contig(pflags, 1, low, high, + alignment, boundary, memattr); + if (m == NULL) { + if (tries < 3) { + if (!vm_page_reclaim_contig(pflags, 1, low, + high, alignment, boundary)) + vm_wait(NULL); + tries++; + goto retry; + } + + return (ENOMEM); + } + +#if 0 + /* TODO: could not clean device memory on arm64. */ + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); +#endif + +#ifdef __aarch64__ + va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + cpu_dcache_wb_range((void *)va, PAGE_SIZE); +#endif + + m->valid = VM_PAGE_BITS_ALL; + m->oflags &= ~VPO_UNMANAGED; + m->flags |= PG_FICTITIOUS; + vm->pages[i] = m; + + VM_OBJECT_WLOCK(vm->obj); + vm_page_insert(m, vm->obj, i); + if (kva_req) + pmap_qenter(vm->kvaddr + i * PAGE_SIZE, &m, 1); + VM_OBJECT_WUNLOCK(vm->obj); + } + + return (0); +} + +static int +hwt_vm_open(struct cdev *cdev, int oflags, int devtype, struct thread *td) +{ + + dprintf("%s\n", __func__); + + return (0); +} + +static int +hwt_vm_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, + vm_size_t mapsize, struct vm_object **objp, int nprot) +{ + struct hwt_vm *vm; + + vm = cdev->si_drv1; + + if (nprot != PROT_READ || *offset != 0) + return (ENXIO); + + vm_object_reference(vm->obj); + *objp = vm->obj; + + return (0); +} + +static void +hwt_vm_start_cpu_mode(struct hwt_context *ctx) +{ + cpuset_t enable_cpus; + int cpu_id; + + CPU_ZERO(&enable_cpus); + + CPU_FOREACH_ISSET(cpu_id, &ctx->cpu_map) { +#ifdef SMP + /* Ensure CPU is not halted. */ + if (CPU_ISSET(cpu_id, &hlt_cpus_mask)) + continue; +#endif + + hwt_backend_configure(ctx, cpu_id, cpu_id); + + CPU_SET(cpu_id, &enable_cpus); + } + + if (ctx->hwt_backend->ops->hwt_backend_enable_smp == NULL) { + CPU_FOREACH_ISSET(cpu_id, &enable_cpus) + hwt_backend_enable(ctx, cpu_id); + } else { + /* Some backends require enabling all CPUs at once. */ + hwt_backend_enable_smp(ctx); + } +} + +static int +hwt_vm_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, + struct thread *td) +{ + struct hwt_record_get *rget; + struct hwt_set_config *sconf; + struct hwt_bufptr_get *ptr_get; + struct hwt_svc_buf *sbuf; + + struct hwt_context *ctx; + struct hwt_vm *vm; + struct hwt_owner *ho; + + vm_offset_t offset; + int ident; + int error; + uint64_t data = 0; + void *data2; + size_t data_size; + int data_version; + + vm = dev->si_drv1; + KASSERT(vm != NULL, ("si_drv1 is NULL")); + + ctx = vm->ctx; + + /* Ensure process is registered owner of this HWT. */ + ho = hwt_ownerhash_lookup(td->td_proc); + if (ho == NULL) + return (ENXIO); + + if (ctx->hwt_owner != ho) + return (EPERM); + + switch (cmd) { + case HWT_IOC_START: + dprintf("%s: start tracing\n", __func__); + + HWT_CTX_LOCK(ctx); + if (ctx->state == CTX_STATE_RUNNING) { + /* Already running ? */ + HWT_CTX_UNLOCK(ctx); + return (ENXIO); + } + ctx->state = CTX_STATE_RUNNING; + HWT_CTX_UNLOCK(ctx); + + if (ctx->mode == HWT_MODE_CPU) + hwt_vm_start_cpu_mode(ctx); + else { + /* + * Tracing backend will be configured and enabled + * during hook invocation. See hwt_hook.c. + */ + } + + break; + + case HWT_IOC_STOP: + if (ctx->state == CTX_STATE_STOPPED) + return (ENXIO); + hwt_backend_stop(ctx); + ctx->state = CTX_STATE_STOPPED; + break; + + case HWT_IOC_RECORD_GET: + rget = (struct hwt_record_get *)addr; + error = hwt_record_send(ctx, rget); + if (error) + return (error); + break; + + case HWT_IOC_SET_CONFIG: + if (ctx->state == CTX_STATE_RUNNING) { + return (ENXIO); + } + sconf = (struct hwt_set_config *)addr; + error = hwt_config_set(td, ctx, sconf); + if (error) + return (error); + ctx->pause_on_mmap = sconf->pause_on_mmap ? 1 : 0; + break; + + case HWT_IOC_WAKEUP: + + if (ctx->mode == HWT_MODE_CPU) + return (ENXIO); + + KASSERT(vm->thr != NULL, ("thr is NULL")); + + wakeup(vm->thr); + + break; + + case HWT_IOC_BUFPTR_GET: + ptr_get = (struct hwt_bufptr_get *)addr; + + error = hwt_backend_read(ctx, vm, &ident, &offset, &data); + if (error) + return (error); + + if (ptr_get->ident) + error = copyout(&ident, ptr_get->ident, sizeof(int)); + if (error) + return (error); + + if (ptr_get->offset) + error = copyout(&offset, ptr_get->offset, + sizeof(vm_offset_t)); + if (error) + return (error); + + if (ptr_get->data) + error = copyout(&data, ptr_get->data, sizeof(uint64_t)); + if (error) + return (error); + + break; + + case HWT_IOC_SVC_BUF: + if (ctx->state == CTX_STATE_STOPPED) { + return (ENXIO); + } + + sbuf = (struct hwt_svc_buf *)addr; + data_size = sbuf->data_size; + data_version = sbuf->data_version; + + if (data_size == 0 || data_size > PAGE_SIZE) + return (EINVAL); + + data2 = malloc(data_size, M_HWT_VM, M_WAITOK | M_ZERO); + error = copyin(sbuf->data, data2, data_size); + if (error) { + free(data2, M_HWT_VM); + return (error); + } + + error = hwt_backend_svc_buf(ctx, data2, data_size, data_version); + if (error) { + free(data2, M_HWT_VM); + return (error); + } + + free(data2, M_HWT_VM); + break; + + default: + break; + } + + return (0); +} + +static struct cdevsw hwt_vm_cdevsw = { + .d_version = D_VERSION, + .d_name = "hwt", + .d_open = hwt_vm_open, + .d_mmap_single = hwt_vm_mmap_single, + .d_ioctl = hwt_vm_ioctl, +}; + +static int +hwt_vm_create_cdev(struct hwt_vm *vm, char *path) +{ + struct make_dev_args args; + int error; + + dprintf("%s: path %s\n", __func__, path); + + make_dev_args_init(&args); + args.mda_devsw = &hwt_vm_cdevsw; + args.mda_flags = MAKEDEV_CHECKNAME | MAKEDEV_WAITOK; + args.mda_uid = UID_ROOT; + args.mda_gid = GID_WHEEL; + args.mda_mode = 0660; + args.mda_si_drv1 = vm; + + error = make_dev_s(&args, &vm->cdev, "%s", path); + if (error != 0) + return (error); + + return (0); +} + +static int +hwt_vm_alloc_buffers(struct hwt_vm *vm, int kva_req) +{ + int error; + + vm->pages = malloc(sizeof(struct vm_page *) * vm->npages, + M_HWT_VM, M_WAITOK | M_ZERO); + + error = hwt_vm_alloc_pages(vm, kva_req); + if (error) { + printf("%s: could not alloc pages\n", __func__); + return (error); + } + + return (0); +} + +static void +hwt_vm_destroy_buffers(struct hwt_vm *vm) +{ + vm_page_t m; + int i; + + if (vm->ctx->hwt_backend->kva_req && vm->kvaddr != 0) { + pmap_qremove(vm->kvaddr, vm->npages); + kva_free(vm->kvaddr, vm->npages * PAGE_SIZE); + } + VM_OBJECT_WLOCK(vm->obj); + for (i = 0; i < vm->npages; i++) { + m = vm->pages[i]; + if (m == NULL) + break; + + vm_page_busy_acquire(m, 0); + cdev_pager_free_page(vm->obj, m); + m->flags &= ~PG_FICTITIOUS; + vm_page_unwire_noq(m); + vm_page_free(m); + + } + vm_pager_deallocate(vm->obj); + VM_OBJECT_WUNLOCK(vm->obj); + + free(vm->pages, M_HWT_VM); +} + +void +hwt_vm_free(struct hwt_vm *vm) +{ + + dprintf("%s\n", __func__); + + if (vm->cdev) + destroy_dev_sched(vm->cdev); + hwt_vm_destroy_buffers(vm); + free(vm, M_HWT_VM); +} + +int +hwt_vm_alloc(size_t bufsize, int kva_req, char *path, struct hwt_vm **vm0) +{ + struct hwt_vm *vm; + int error; + + vm = malloc(sizeof(struct hwt_vm), M_HWT_VM, M_WAITOK | M_ZERO); + vm->npages = bufsize / PAGE_SIZE; + + error = hwt_vm_alloc_buffers(vm, kva_req); + if (error) { + free(vm, M_HWT_VM); + return (error); + } + + error = hwt_vm_create_cdev(vm, path); + if (error) { + hwt_vm_free(vm); + return (error); + } + + *vm0 = vm; + + return (0); +} diff --git a/sys/dev/hwt/hwt_vm.h b/sys/dev/hwt/hwt_vm.h new file mode 100644 index 000000000000..5002bd43e093 --- /dev/null +++ b/sys/dev/hwt/hwt_vm.h @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _DEV_HWT_HWT_VM_H_ +#define _DEV_HWT_HWT_VM_H_ + +struct hwt_vm { + vm_page_t *pages; + int npages; + vm_object_t obj; + vm_offset_t kvaddr; + struct cdev *cdev; + + struct hwt_context *ctx; + struct hwt_cpu *cpu; /* cpu mode only. */ + struct hwt_thread *thr; /* thr mode only. */ +}; + +int hwt_vm_alloc(size_t bufsize, int kva_req, char *path, struct hwt_vm **vm0); +void hwt_vm_free(struct hwt_vm *vm); + +#endif /* !_DEV_HWT_HWT_VM_H_ */ diff --git a/sys/dev/iicbus/gpio/tca64xx.c b/sys/dev/iicbus/gpio/tca64xx.c index 3b3bca9936f1..cd011ae9be75 100644 --- a/sys/dev/iicbus/gpio/tca64xx.c +++ b/sys/dev/iicbus/gpio/tca64xx.c @@ -261,14 +261,13 @@ tca64xx_attach(device_t dev) sc->addr = iicbus_get_addr(dev); mtx_init(&sc->mtx, "tca64xx gpio", "gpio", MTX_DEF); + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { device_printf(dev, "Could not create busdev child\n"); return (ENXIO); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - #ifdef DEBUG switch (sc->chip) { case TCA6416_TYPE: diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index b842d4f2fd8e..741a7c013f7d 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1559,19 +1559,26 @@ mddestroy(struct md_s *sc, struct thread *td) mtx_destroy(&sc->queue_mtx); switch (sc->type) { case MD_VNODE: - vn_lock(sc->s_vnode.vnode, LK_EXCLUSIVE | LK_RETRY); - sc->s_vnode.vnode->v_vflag &= ~VV_MD; - VOP_UNLOCK(sc->s_vnode.vnode); - (void)vn_close(sc->s_vnode.vnode, sc->flags & MD_READONLY ? - FREAD : (FREAD|FWRITE), sc->cred, td); - kva_free(sc->s_vnode.kva, maxphys + PAGE_SIZE); + if (sc->s_vnode.vnode != NULL) { + vn_lock(sc->s_vnode.vnode, LK_EXCLUSIVE | LK_RETRY); + sc->s_vnode.vnode->v_vflag &= ~VV_MD; + VOP_UNLOCK(sc->s_vnode.vnode); + (void)vn_close(sc->s_vnode.vnode, + sc->flags & MD_READONLY ? FREAD : (FREAD|FWRITE), + sc->cred, td); + } + if (sc->s_vnode.kva != 0) + kva_free(sc->s_vnode.kva, maxphys + PAGE_SIZE); break; case MD_SWAP: - vm_object_deallocate(sc->s_swap.object); + if (sc->s_swap.object != NULL) + vm_object_deallocate(sc->s_swap.object); break; case MD_MALLOC: - destroy_indir(sc, sc->s_malloc.indir); - uma_zdestroy(sc->s_malloc.uma); + if (sc->s_malloc.indir != NULL) + destroy_indir(sc, sc->s_malloc.indir); + if (sc->s_malloc.uma != NULL) + uma_zdestroy(sc->s_malloc.uma); break; case MD_PRELOAD: case MD_NULL: diff --git a/sys/dev/mem/memutil.c b/sys/dev/mem/memutil.c index cf9714d6ec8f..20ce337df0ab 100644 --- a/sys/dev/mem/memutil.c +++ b/sys/dev/mem/memutil.c @@ -26,15 +26,14 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/param.h> +#include <sys/systm.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/memrange.h> -#include <sys/rwlock.h> -#include <sys/systm.h> +#include <sys/sx.h> -static struct rwlock mr_lock; +static struct sx mr_lock; /* * Implementation-neutral, kernel-callable functions for manipulating @@ -46,7 +45,7 @@ mem_range_init(void) if (mem_range_softc.mr_op == NULL) return; - rw_init(&mr_lock, "memrange"); + sx_init(&mr_lock, "memrange"); mem_range_softc.mr_op->init(&mem_range_softc); } @@ -56,7 +55,7 @@ mem_range_destroy(void) if (mem_range_softc.mr_op == NULL) return; - rw_destroy(&mr_lock); + sx_destroy(&mr_lock); } int @@ -67,12 +66,12 @@ mem_range_attr_get(struct mem_range_desc *mrd, int *arg) if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); nd = *arg; - rw_rlock(&mr_lock); + sx_slock(&mr_lock); if (nd == 0) *arg = mem_range_softc.mr_ndesc; else bcopy(mem_range_softc.mr_desc, mrd, nd * sizeof(*mrd)); - rw_runlock(&mr_lock); + sx_sunlock(&mr_lock); return (0); } @@ -83,8 +82,8 @@ mem_range_attr_set(struct mem_range_desc *mrd, int *arg) if (mem_range_softc.mr_op == NULL) return (EOPNOTSUPP); - rw_wlock(&mr_lock); + sx_xlock(&mr_lock); ret = mem_range_softc.mr_op->set(&mem_range_softc, mrd, arg); - rw_wunlock(&mr_lock); + sx_xunlock(&mr_lock); return (ret); } diff --git a/sys/dev/random/random_harvestq.c b/sys/dev/random/random_harvestq.c index 379b64ac15f1..ee37bda36496 100644 --- a/sys/dev/random/random_harvestq.c +++ b/sys/dev/random/random_harvestq.c @@ -161,6 +161,11 @@ static struct harvest_context { } hc_entropy_fast_accumulator; } harvest_context; +#define RANDOM_HARVEST_INIT_LOCK() mtx_init(&harvest_context.hc_mtx, \ + "entropy harvest mutex", NULL, MTX_SPIN) +#define RANDOM_HARVEST_LOCK() mtx_lock_spin(&harvest_context.hc_mtx) +#define RANDOM_HARVEST_UNLOCK() mtx_unlock_spin(&harvest_context.hc_mtx) + static struct kproc_desc random_proc_kp = { "rand_harvestq", random_kthread, @@ -212,9 +217,10 @@ random_kthread(void) kproc_exit(0); /* NOTREACHED */ } -/* This happens well after SI_SUB_RANDOM */ SYSINIT(random_device_h_proc, SI_SUB_KICK_SCHEDULER, SI_ORDER_ANY, kproc_start, &random_proc_kp); +_Static_assert(SI_SUB_KICK_SCHEDULER > SI_SUB_RANDOM, + "random kthread starting before subsystem initialization"); static void rs_epoch_init(void *dummy __unused) @@ -305,7 +311,6 @@ random_sources_feed(void) explicit_bzero(entropy, sizeof(entropy)); } -/* ARGSUSED */ static int random_check_uint_harvestmask(SYSCTL_HANDLER_ARGS) { @@ -336,7 +341,6 @@ SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask, random_check_uint_harvestmask, "IU", "Entropy harvesting mask"); -/* ARGSUSED */ static int random_print_harvestmask(SYSCTL_HANDLER_ARGS) { @@ -390,7 +394,6 @@ static const char *random_source_descr[ENTROPYSOURCE] = { /* "ENTROPYSOURCE" */ }; -/* ARGSUSED */ static int random_print_harvestmask_symbolic(SYSCTL_HANDLER_ARGS) { @@ -423,7 +426,6 @@ SYSCTL_PROC(_kern_random_harvest, OID_AUTO, mask_symbolic, random_print_harvestmask_symbolic, "A", "Entropy harvesting mask (symbolic)"); -/* ARGSUSED */ static void random_harvestq_init(void *unused __unused) { @@ -453,7 +455,7 @@ random_early_prime(char *entropy, size_t len) return (0); for (i = 0; i < len; i += sizeof(event.he_entropy)) { - event.he_somecounter = (uint32_t)get_cyclecount(); + event.he_somecounter = random_get_cyclecount(); event.he_size = sizeof(event.he_entropy); event.he_source = RANDOM_CACHED; event.he_destination = @@ -493,7 +495,6 @@ random_prime_loader_file(const char *type) * known to the kernel, and inserting it directly into the hashing * module, currently Fortuna. */ -/* ARGSUSED */ static void random_harvestq_prime(void *unused __unused) { @@ -522,7 +523,6 @@ random_harvestq_prime(void *unused __unused) } SYSINIT(random_device_prime, SI_SUB_RANDOM, SI_ORDER_MIDDLE, random_harvestq_prime, NULL); -/* ARGSUSED */ static void random_harvestq_deinit(void *unused __unused) { @@ -560,7 +560,7 @@ random_harvest_queue_(const void *entropy, u_int size, enum random_entropy_sourc if (ring_in != harvest_context.hc_entropy_ring.out) { /* The ring is not full */ event = harvest_context.hc_entropy_ring.ring + ring_in; - event->he_somecounter = (uint32_t)get_cyclecount(); + event->he_somecounter = random_get_cyclecount(); event->he_source = origin; event->he_destination = harvest_context.hc_destination[origin]++; if (size <= sizeof(event->he_entropy)) { @@ -589,7 +589,8 @@ random_harvest_fast_(const void *entropy, u_int size) u_int pos; pos = harvest_context.hc_entropy_fast_accumulator.pos; - harvest_context.hc_entropy_fast_accumulator.buf[pos] ^= jenkins_hash(entropy, size, (uint32_t)get_cyclecount()); + harvest_context.hc_entropy_fast_accumulator.buf[pos] ^= + jenkins_hash(entropy, size, random_get_cyclecount()); harvest_context.hc_entropy_fast_accumulator.pos = (pos + 1)%RANDOM_ACCUM_MAX; } @@ -606,7 +607,7 @@ random_harvest_direct_(const void *entropy, u_int size, enum random_entropy_sour KASSERT(origin >= RANDOM_START && origin < ENTROPYSOURCE, ("%s: origin %d invalid\n", __func__, origin)); size = MIN(size, sizeof(event.he_entropy)); - event.he_somecounter = (uint32_t)get_cyclecount(); + event.he_somecounter = random_get_cyclecount(); event.he_size = size; event.he_source = origin; event.he_destination = harvest_context.hc_destination[origin]++; diff --git a/sys/dev/random/random_harvestq.h b/sys/dev/random/random_harvestq.h index 69a9dfabd44a..7804bf52aa4f 100644 --- a/sys/dev/random/random_harvestq.h +++ b/sys/dev/random/random_harvestq.h @@ -27,6 +27,9 @@ #ifndef SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED #define SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED +#include <sys/types.h> +#include <machine/cpu.h> + #define HARVESTSIZE 2 /* Max length in words of each harvested entropy unit */ /* These are used to queue harvested packets of entropy. The entropy @@ -40,8 +43,10 @@ struct harvest_event { uint8_t he_source; /* origin of the entropy */ }; -#define RANDOM_HARVEST_INIT_LOCK(x) mtx_init(&harvest_context.hc_mtx, "entropy harvest mutex", NULL, MTX_SPIN) -#define RANDOM_HARVEST_LOCK(x) mtx_lock_spin(&harvest_context.hc_mtx) -#define RANDOM_HARVEST_UNLOCK(x) mtx_unlock_spin(&harvest_context.hc_mtx) +static inline uint32_t +random_get_cyclecount(void) +{ + return ((uint32_t)get_cyclecount()); +} #endif /* SYS_DEV_RANDOM_RANDOM_HARVESTQ_H_INCLUDED */ diff --git a/sys/dev/random/randomdev.c b/sys/dev/random/randomdev.c index 6d637ab5a53e..9d1c7b1167c8 100644 --- a/sys/dev/random/randomdev.c +++ b/sys/dev/random/randomdev.c @@ -303,14 +303,14 @@ randomdev_accumulate(uint8_t *buf, u_int count) /* Extra timing here is helpful to scrape scheduler jitter entropy */ randomdev_hash_init(&hash); - timestamp = (uint32_t)get_cyclecount(); + timestamp = random_get_cyclecount(); randomdev_hash_iterate(&hash, ×tamp, sizeof(timestamp)); randomdev_hash_iterate(&hash, buf, count); - timestamp = (uint32_t)get_cyclecount(); + timestamp = random_get_cyclecount(); randomdev_hash_iterate(&hash, ×tamp, sizeof(timestamp)); randomdev_hash_finish(&hash, entropy_data); for (i = 0; i < RANDOM_KEYSIZE_WORDS; i += sizeof(event.he_entropy)/sizeof(event.he_entropy[0])) { - event.he_somecounter = (uint32_t)get_cyclecount(); + event.he_somecounter = random_get_cyclecount(); event.he_size = sizeof(event.he_entropy); event.he_source = RANDOM_CACHED; event.he_destination = destination++; /* Harmless cheating */ diff --git a/sys/dev/regulator/regulator_fixed.c b/sys/dev/regulator/regulator_fixed.c index 0a76da7140a0..55cdb5e4aeae 100644 --- a/sys/dev/regulator/regulator_fixed.c +++ b/sys/dev/regulator/regulator_fixed.c @@ -100,12 +100,8 @@ static struct gpio_entry * regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin) { struct gpio_entry *entry, *tmp; - device_t busdev; int rv; - busdev = GPIO_GET_BUS(gpio_pin->dev); - if (busdev == NULL) - return (NULL); entry = malloc(sizeof(struct gpio_entry), M_FIXEDREGULATOR, M_WAITOK | M_ZERO); @@ -122,8 +118,8 @@ regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin) } /* Reserve pin. */ - /* XXX Can we call gpiobus_acquire_pin() with gpio_list_mtx held? */ - rv = gpiobus_acquire_pin(busdev, gpio_pin->pin); + /* XXX Can we call gpio_pin_acquire() with gpio_list_mtx held? */ + rv = gpio_pin_acquire(gpio_pin); if (rv != 0) { mtx_unlock(&gpio_list_mtx); free(entry, M_FIXEDREGULATOR); diff --git a/sys/dev/sound/midi/midi.c b/sys/dev/sound/midi/midi.c index fbfb69de2913..6753f864ba9c 100644 --- a/sys/dev/sound/midi/midi.c +++ b/sys/dev/sound/midi/midi.c @@ -30,12 +30,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ - /* - * Parts of this file started out as NetBSD: midi.c 1.31 - * They are mostly gone. Still the most obvious will be the state - * machine midi_in - */ - #include <sys/param.h> #include <sys/systm.h> #include <sys/queue.h> @@ -66,7 +60,6 @@ #include "mpu_if.h" #include <dev/sound/midi/midiq.h> -#include "synth_if.h" MALLOC_DEFINE(M_MIDI, "midi buffers", "Midi data allocation area"); #ifndef KOBJMETHOD_END @@ -79,17 +72,6 @@ enum midi_states { MIDI_IN_START, MIDI_IN_SYSEX, MIDI_IN_DATA }; -/* - * The MPU interface current has init() uninit() inqsize() outqsize() - * callback() : fiddle with the tx|rx status. - */ - -#include "mpu_if.h" - -/* - * /dev/rmidi Structure definitions - */ - #define MIDI_NAMELEN 16 struct snd_midi { KOBJ_FIELDS; @@ -115,95 +97,13 @@ struct snd_midi { * complete command packets. */ struct proc *async; struct cdev *dev; - struct synth_midi *synth; - int synth_flags; TAILQ_ENTRY(snd_midi) link; }; -struct synth_midi { - KOBJ_FIELDS; - struct snd_midi *m; -}; - -static synth_open_t midisynth_open; -static synth_close_t midisynth_close; -static synth_writeraw_t midisynth_writeraw; -static synth_killnote_t midisynth_killnote; -static synth_startnote_t midisynth_startnote; -static synth_setinstr_t midisynth_setinstr; -static synth_alloc_t midisynth_alloc; -static synth_controller_t midisynth_controller; -static synth_bender_t midisynth_bender; - -static kobj_method_t midisynth_methods[] = { - KOBJMETHOD(synth_open, midisynth_open), - KOBJMETHOD(synth_close, midisynth_close), - KOBJMETHOD(synth_writeraw, midisynth_writeraw), - KOBJMETHOD(synth_setinstr, midisynth_setinstr), - KOBJMETHOD(synth_startnote, midisynth_startnote), - KOBJMETHOD(synth_killnote, midisynth_killnote), - KOBJMETHOD(synth_alloc, midisynth_alloc), - KOBJMETHOD(synth_controller, midisynth_controller), - KOBJMETHOD(synth_bender, midisynth_bender), - KOBJMETHOD_END -}; - -DEFINE_CLASS(midisynth, midisynth_methods, 0); - -/* - * Module Exports & Interface - * - * struct midi_chan *midi_init(MPU_CLASS cls, int unit, int chan, - * void *cookie) - * int midi_uninit(struct snd_midi *) - * - * 0 == no error - * EBUSY or other error - * - * int midi_in(struct snd_midi *, char *buf, int count) - * int midi_out(struct snd_midi *, char *buf, int count) - * - * midi_{in,out} return actual size transfered - * - */ - -/* - * midi_devs tailq, holder of all rmidi instances protected by midistat_lock - */ - TAILQ_HEAD(, snd_midi) midi_devs; -/* - * /dev/midistat variables and declarations, protected by midistat_lock - */ - struct sx mstat_lock; -static int midistat_isopen = 0; -static struct sbuf midistat_sbuf; -static struct cdev *midistat_dev; - -/* - * /dev/midistat dev_t declarations - */ - -static d_open_t midistat_open; -static d_close_t midistat_close; -static d_read_t midistat_read; - -static struct cdevsw midistat_cdevsw = { - .d_version = D_VERSION, - .d_open = midistat_open, - .d_close = midistat_close, - .d_read = midistat_read, - .d_name = "midistat", -}; - -/* - * /dev/rmidi dev_t declarations, struct variable access is protected by - * locks contained within the structure. - */ - static d_open_t midi_open; static d_close_t midi_close; static d_ioctl_t midi_ioctl; @@ -222,41 +122,18 @@ static struct cdevsw midi_cdevsw = { .d_name = "rmidi", }; -/* - * Prototypes of library functions - */ - static int midi_destroy(struct snd_midi *, int); -static int midistat_prepare(struct sbuf * s); static int midi_load(void); static int midi_unload(void); -/* - * Misc declr. - */ SYSCTL_NODE(_hw, OID_AUTO, midi, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Midi driver"); -static SYSCTL_NODE(_hw_midi, OID_AUTO, stat, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, - "Status device"); int midi_debug; /* XXX: should this be moved into debug.midi? */ SYSCTL_INT(_hw_midi, OID_AUTO, debug, CTLFLAG_RW, &midi_debug, 0, ""); -int midi_dumpraw; -SYSCTL_INT(_hw_midi, OID_AUTO, dumpraw, CTLFLAG_RW, &midi_dumpraw, 0, ""); - -int midi_instroff; -SYSCTL_INT(_hw_midi, OID_AUTO, instroff, CTLFLAG_RW, &midi_instroff, 0, ""); - -int midistat_verbose; -SYSCTL_INT(_hw_midi_stat, OID_AUTO, verbose, CTLFLAG_RW, - &midistat_verbose, 0, ""); - #define MIDI_DEBUG(l,a) if(midi_debug>=l) a -/* - * CODE START - */ void midistat_lock(void) @@ -285,9 +162,6 @@ midistat_lockassert(void) * what unit number is used. * * It is an error to call midi_init with an already used unit/channel combo. - * - * Returns NULL on error - * */ struct snd_midi * midi_init(kobj_class_t cls, int unit, int channel, void *cookie) @@ -326,9 +200,6 @@ midi_init(kobj_class_t cls, int unit, int channel, void *cookie) MIDI_DEBUG(1, printf("midiinit #2: unit %d/%d.\n", unit, channel)); m = malloc(sizeof(*m), M_MIDI, M_WAITOK | M_ZERO); - m->synth = malloc(sizeof(*m->synth), M_MIDI, M_WAITOK | M_ZERO); - kobj_init((kobj_t)m->synth, &midisynth_class); - m->synth->m = m; kobj_init((kobj_t)m, cls); inqsize = MPU_INQSIZE(m, cookie); outqsize = MPU_OUTQSIZE(m, cookie); @@ -393,7 +264,6 @@ err2: if (MIDIQ_BUF(m->outq)) free(MIDIQ_BUF(m->outq), M_MIDI); err1: - free(m->synth, M_MIDI); free(m, M_MIDI); err0: midistat_unlock(); @@ -405,9 +275,7 @@ err0: * midi_uninit does not call MIDI_UNINIT, as since this is the implementors * entry point. midi_uninit if fact, does not send any methods. A call to * midi_uninit is a defacto promise that you won't manipulate ch anymore - * */ - int midi_uninit(struct snd_midi *m) { @@ -440,13 +308,6 @@ exit: return err; } -/* - * midi_in: process all data until the queue is full, then discards the rest. - * Since midi_in is a state machine, data discards can cause it to get out of - * whack. Process as much as possible. It calls, wakeup, selnotify and - * psignal at most once. - */ - #ifdef notdef static int midi_lengths[] = {2, 2, 2, 2, 1, 1, 2, 0}; @@ -460,6 +321,12 @@ static int midi_lengths[] = {2, 2, 2, 2, 1, 1, 2, 0}; #define MIDI_SYSEX_START 0xF0 #define MIDI_SYSEX_END 0xF7 +/* + * midi_in: process all data until the queue is full, then discards the rest. + * Since midi_in is a state machine, data discards can cause it to get out of + * whack. Process as much as possible. It calls, wakeup, selnotify and + * psignal at most once. + */ int midi_in(struct snd_midi *m, uint8_t *buf, int size) { @@ -627,9 +494,6 @@ midi_out(struct snd_midi *m, uint8_t *buf, int size) return used; } -/* - * /dev/rmidi#.# device access functions - */ int midi_open(struct cdev *i_dev, int flags, int mode, struct thread *td) { @@ -934,434 +798,6 @@ midi_poll(struct cdev *i_dev, int events, struct thread *td) } /* - * /dev/midistat device functions - * - */ -static int -midistat_open(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - int error; - - MIDI_DEBUG(1, printf("midistat_open\n")); - - midistat_lock(); - if (midistat_isopen) { - midistat_unlock(); - return EBUSY; - } - midistat_isopen = 1; - sbuf_new(&midistat_sbuf, NULL, 4096, SBUF_AUTOEXTEND); - error = (midistat_prepare(&midistat_sbuf) > 0) ? 0 : ENOMEM; - if (error) - midistat_isopen = 0; - midistat_unlock(); - return error; -} - -static int -midistat_close(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - MIDI_DEBUG(1, printf("midistat_close\n")); - midistat_lock(); - if (!midistat_isopen) { - midistat_unlock(); - return EBADF; - } - sbuf_delete(&midistat_sbuf); - midistat_isopen = 0; - midistat_unlock(); - return 0; -} - -static int -midistat_read(struct cdev *i_dev, struct uio *uio, int flag) -{ - long l; - int err; - - MIDI_DEBUG(4, printf("midistat_read\n")); - midistat_lock(); - if (!midistat_isopen) { - midistat_unlock(); - return EBADF; - } - if (uio->uio_offset < 0 || uio->uio_offset > sbuf_len(&midistat_sbuf)) { - midistat_unlock(); - return EINVAL; - } - err = 0; - l = lmin(uio->uio_resid, sbuf_len(&midistat_sbuf) - uio->uio_offset); - if (l > 0) { - err = uiomove(sbuf_data(&midistat_sbuf) + uio->uio_offset, l, - uio); - } - midistat_unlock(); - return err; -} - -/* - * Module library functions - */ - -static int -midistat_prepare(struct sbuf *s) -{ - struct snd_midi *m; - - midistat_lockassert(); - - sbuf_printf(s, "FreeBSD Midi Driver (midi2)\n"); - if (TAILQ_EMPTY(&midi_devs)) { - sbuf_printf(s, "No devices installed.\n"); - sbuf_finish(s); - return sbuf_len(s); - } - sbuf_printf(s, "Installed devices:\n"); - - TAILQ_FOREACH(m, &midi_devs, link) { - mtx_lock(&m->lock); - sbuf_printf(s, "%s [%d/%d:%s]", m->name, m->unit, m->channel, - MPU_PROVIDER(m, m->cookie)); - sbuf_printf(s, "%s", MPU_DESCR(m, m->cookie, midistat_verbose)); - sbuf_printf(s, "\n"); - mtx_unlock(&m->lock); - } - - sbuf_finish(s); - return sbuf_len(s); -} - -#ifdef notdef -/* - * Convert IOCTL command to string for debugging - */ - -static char * -midi_cmdname(int cmd) -{ - static struct { - int cmd; - char *name; - } *tab, cmdtab_midiioctl[] = { -#define A(x) {x, ## x} - /* - * Once we have some real IOCTLs define, the following will - * be relavant. - * - * A(SNDCTL_MIDI_PRETIME), A(SNDCTL_MIDI_MPUMODE), - * A(SNDCTL_MIDI_MPUCMD), A(SNDCTL_SYNTH_INFO), - * A(SNDCTL_MIDI_INFO), A(SNDCTL_SYNTH_MEMAVL), - * A(SNDCTL_FM_LOAD_INSTR), A(SNDCTL_FM_4OP_ENABLE), - * A(MIOSPASSTHRU), A(MIOGPASSTHRU), A(AIONWRITE), - * A(AIOGSIZE), A(AIOSSIZE), A(AIOGFMT), A(AIOSFMT), - * A(AIOGMIX), A(AIOSMIX), A(AIOSTOP), A(AIOSYNC), - * A(AIOGCAP), - */ -#undef A - { - -1, "unknown" - }, - }; - - for (tab = cmdtab_midiioctl; tab->cmd != cmd && tab->cmd != -1; tab++); - return tab->name; -} - -#endif /* notdef */ - -/* - * midisynth - */ - -int -midisynth_open(void *n, void *arg, int flags) -{ - struct snd_midi *m = ((struct synth_midi *)n)->m; - int retval; - - MIDI_DEBUG(1, printf("midisynth_open %s %s\n", - flags & FREAD ? "M_RX" : "", flags & FWRITE ? "M_TX" : "")); - - if (m == NULL) - return ENXIO; - - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - - retval = 0; - - if (flags & FREAD) { - if (MIDIQ_SIZE(m->inq) == 0) - retval = ENXIO; - else if (m->flags & M_RX) - retval = EBUSY; - if (retval) - goto err; - } - if (flags & FWRITE) { - if (MIDIQ_SIZE(m->outq) == 0) - retval = ENXIO; - else if (m->flags & M_TX) - retval = EBUSY; - if (retval) - goto err; - } - m->busy++; - - /* - * TODO: Consider m->async = 0; - */ - - if (flags & FREAD) { - m->flags |= M_RX | M_RXEN; - /* - * Only clear the inq, the outq might still have data to drain - * from a previous session - */ - MIDIQ_CLEAR(m->inq); - m->rchan = 0; - } - - if (flags & FWRITE) { - m->flags |= M_TX; - m->wchan = 0; - } - m->synth_flags = flags & (FREAD | FWRITE); - - MPU_CALLBACK(m, m->cookie, m->flags); - -err: mtx_unlock(&m->qlock); - mtx_unlock(&m->lock); - MIDI_DEBUG(2, printf("midisynth_open: return %d.\n", retval)); - return retval; -} - -int -midisynth_close(void *n) -{ - struct snd_midi *m = ((struct synth_midi *)n)->m; - int retval; - int oldflags; - - MIDI_DEBUG(1, printf("midisynth_close %s %s\n", - m->synth_flags & FREAD ? "M_RX" : "", - m->synth_flags & FWRITE ? "M_TX" : "")); - - if (m == NULL) - return ENXIO; - - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - - if ((m->synth_flags & FREAD && !(m->flags & M_RX)) || - (m->synth_flags & FWRITE && !(m->flags & M_TX))) { - retval = ENXIO; - goto err; - } - m->busy--; - - oldflags = m->flags; - - if (m->synth_flags & FREAD) - m->flags &= ~(M_RX | M_RXEN); - if (m->synth_flags & FWRITE) - m->flags &= ~M_TX; - - if ((m->flags & (M_TXEN | M_RXEN)) != (oldflags & (M_RXEN | M_TXEN))) - MPU_CALLBACK(m, m->cookie, m->flags); - - MIDI_DEBUG(1, printf("midi_close: closed, busy = %d.\n", m->busy)); - - mtx_unlock(&m->qlock); - mtx_unlock(&m->lock); - retval = 0; -err: return retval; -} - -/* - * Always blocking. - */ - -int -midisynth_writeraw(void *n, uint8_t *buf, size_t len) -{ - struct snd_midi *m = ((struct synth_midi *)n)->m; - int retval; - int used; - int i; - - MIDI_DEBUG(4, printf("midisynth_writeraw\n")); - - retval = 0; - - if (m == NULL) - return ENXIO; - - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - - if (!(m->flags & M_TX)) - goto err1; - - if (midi_dumpraw) - printf("midi dump: "); - - while (len > 0) { - while (MIDIQ_AVAIL(m->outq) == 0) { - if (!(m->flags & M_TXEN)) { - m->flags |= M_TXEN; - MPU_CALLBACK(m, m->cookie, m->flags); - } - mtx_unlock(&m->lock); - m->wchan = 1; - MIDI_DEBUG(3, printf("midisynth_writeraw msleep\n")); - retval = msleep(&m->wchan, &m->qlock, - PCATCH | PDROP, "midi TX", 0); - /* - * We slept, maybe things have changed since last - * dying check - */ - if (retval == EINTR) - goto err0; - - if (retval) - goto err0; - mtx_lock(&m->lock); - mtx_lock(&m->qlock); - m->wchan = 0; - if (!m->busy) - goto err1; - } - - /* - * We are certain than data can be placed on the queue - */ - - used = MIN(MIDIQ_AVAIL(m->outq), len); - used = MIN(used, MIDI_WSIZE); - MIDI_DEBUG(5, - printf("midi_synth: resid %zu len %jd avail %jd\n", - len, (intmax_t)MIDIQ_LEN(m->outq), - (intmax_t)MIDIQ_AVAIL(m->outq))); - - if (midi_dumpraw) - for (i = 0; i < used; i++) - printf("%x ", buf[i]); - - MIDIQ_ENQ(m->outq, buf, used); - len -= used; - - /* - * Inform the bottom half that data can be written - */ - if (!(m->flags & M_TXEN)) { - m->flags |= M_TXEN; - MPU_CALLBACK(m, m->cookie, m->flags); - } - } - /* - * If we Made it here then transfer is good - */ - if (midi_dumpraw) - printf("\n"); - - retval = 0; -err1: mtx_unlock(&m->qlock); - mtx_unlock(&m->lock); -err0: return retval; -} - -static int -midisynth_killnote(void *n, uint8_t chn, uint8_t note, uint8_t vel) -{ - u_char c[3]; - - if (note > 127 || chn > 15) - return (EINVAL); - - if (vel > 127) - vel = 127; - - if (vel == 64) { - c[0] = 0x90 | (chn & 0x0f); /* Note on. */ - c[1] = (u_char)note; - c[2] = 0; - } else { - c[0] = 0x80 | (chn & 0x0f); /* Note off. */ - c[1] = (u_char)note; - c[2] = (u_char)vel; - } - - return midisynth_writeraw(n, c, 3); -} - -static int -midisynth_setinstr(void *n, uint8_t chn, uint16_t instr) -{ - u_char c[2]; - - if (instr > 127 || chn > 15) - return EINVAL; - - c[0] = 0xc0 | (chn & 0x0f); /* Progamme change. */ - c[1] = instr + midi_instroff; - - return midisynth_writeraw(n, c, 2); -} - -static int -midisynth_startnote(void *n, uint8_t chn, uint8_t note, uint8_t vel) -{ - u_char c[3]; - - if (note > 127 || chn > 15) - return EINVAL; - - if (vel > 127) - vel = 127; - - c[0] = 0x90 | (chn & 0x0f); /* Note on. */ - c[1] = (u_char)note; - c[2] = (u_char)vel; - - return midisynth_writeraw(n, c, 3); -} -static int -midisynth_alloc(void *n, uint8_t chan, uint8_t note) -{ - return chan; -} - -static int -midisynth_controller(void *n, uint8_t chn, uint8_t ctrlnum, uint16_t val) -{ - u_char c[3]; - - if (ctrlnum > 127 || chn > 15) - return EINVAL; - - c[0] = 0xb0 | (chn & 0x0f); /* Control Message. */ - c[1] = ctrlnum; - c[2] = val; - return midisynth_writeraw(n, c, 3); -} - -static int -midisynth_bender(void *n, uint8_t chn, uint16_t val) -{ - u_char c[3]; - - if (val > 16383 || chn > 15) - return EINVAL; - - c[0] = 0xe0 | (chn & 0x0f); /* Pitch bend. */ - c[1] = (u_char)val & 0x7f; - c[2] = (u_char)(val >> 7) & 0x7f; - - return midisynth_writeraw(n, c, 3); -} - -/* * Single point of midi destructions. */ static int @@ -1381,24 +817,16 @@ midi_destroy(struct snd_midi *m, int midiuninit) free(MIDIQ_BUF(m->outq), M_MIDI); mtx_destroy(&m->qlock); mtx_destroy(&m->lock); - free(m->synth, M_MIDI); free(m, M_MIDI); return 0; } -/* - * Load and unload functions, creates the /dev/midistat device - */ - static int midi_load(void) { sx_init(&mstat_lock, "midistat lock"); TAILQ_INIT(&midi_devs); - midistat_dev = make_dev(&midistat_cdevsw, MIDI_DEV_MIDICTL, UID_ROOT, - GID_WHEEL, 0666, "midistat"); - return 0; } @@ -1411,9 +839,6 @@ midi_unload(void) MIDI_DEBUG(1, printf("midi_unload()\n")); retval = EBUSY; midistat_lock(); - if (midistat_isopen) - goto exit0; - TAILQ_FOREACH_SAFE(m, &midi_devs, link, tmp) { mtx_lock(&m->lock); if (m->busy) @@ -1421,28 +846,21 @@ midi_unload(void) else retval = midi_destroy(m, 1); if (retval) - goto exit1; + goto exit; } midistat_unlock(); - destroy_dev(midistat_dev); - /* - * Made it here then unload is complete - */ sx_destroy(&mstat_lock); return 0; -exit1: +exit: mtx_unlock(&m->lock); -exit0: midistat_unlock(); if (retval) MIDI_DEBUG(2, printf("midi_unload: failed\n")); return retval; } -extern int seq_modevent(module_t mod, int type, void *data); - static int midi_modevent(module_t mod, int type, void *data) { @@ -1453,14 +871,10 @@ midi_modevent(module_t mod, int type, void *data) switch (type) { case MOD_LOAD: retval = midi_load(); - if (retval == 0) - retval = seq_modevent(mod, type, data); break; case MOD_UNLOAD: retval = midi_unload(); - if (retval == 0) - retval = seq_modevent(mod, type, data); break; default: @@ -1470,73 +884,5 @@ midi_modevent(module_t mod, int type, void *data) return retval; } -kobj_t -midimapper_addseq(void *arg1, int *unit, void **cookie) -{ - unit = NULL; - - return (kobj_t)arg1; -} - -int -midimapper_open_locked(void *arg1, void **cookie) -{ - int retval = 0; - struct snd_midi *m; - - midistat_lockassert(); - TAILQ_FOREACH(m, &midi_devs, link) { - retval++; - } - - return retval; -} - -int -midimapper_open(void *arg1, void **cookie) -{ - int retval; - - midistat_lock(); - retval = midimapper_open_locked(arg1, cookie); - midistat_unlock(); - - return retval; -} - -int -midimapper_close(void *arg1, void *cookie) -{ - return 0; -} - -kobj_t -midimapper_fetch_synth_locked(void *arg, void *cookie, int unit) -{ - struct snd_midi *m; - int retval = 0; - - midistat_lockassert(); - TAILQ_FOREACH(m, &midi_devs, link) { - if (unit == retval) - return (kobj_t)m->synth; - retval++; - } - - return NULL; -} - -kobj_t -midimapper_fetch_synth(void *arg, void *cookie, int unit) -{ - kobj_t synth; - - midistat_lock(); - synth = midimapper_fetch_synth_locked(arg, cookie, unit); - midistat_unlock(); - - return synth; -} - DEV_MODULE(midi, midi_modevent, NULL); MODULE_VERSION(midi, 1); diff --git a/sys/dev/sound/midi/midi.h b/sys/dev/sound/midi/midi.h index 2254fab690e9..286e84264ef3 100644 --- a/sys/dev/sound/midi/midi.h +++ b/sys/dev/sound/midi/midi.h @@ -51,11 +51,4 @@ int midi_uninit(struct snd_midi *_m); int midi_out(struct snd_midi *_m, uint8_t *_buf, int _size); int midi_in(struct snd_midi *_m, uint8_t *_buf, int _size); -kobj_t midimapper_addseq(void *arg1, int *unit, void **cookie); -int midimapper_open_locked(void *arg1, void **cookie); -int midimapper_open(void *arg1, void **cookie); -int midimapper_close(void *arg1, void *cookie); -kobj_t midimapper_fetch_synth_locked(void *arg, void *cookie, int unit); -kobj_t midimapper_fetch_synth(void *arg, void *cookie, int unit); - #endif diff --git a/sys/dev/sound/midi/mpu401.c b/sys/dev/sound/midi/mpu401.c index 2be285bc0040..224ebb1b01f4 100644 --- a/sys/dev/sound/midi/mpu401.c +++ b/sys/dev/sound/midi/mpu401.c @@ -88,8 +88,6 @@ static int mpu401_minqsize(struct snd_midi *, void *); static int mpu401_moutqsize(struct snd_midi *, void *); static void mpu401_mcallback(struct snd_midi *, void *, int); static void mpu401_mcallbackp(struct snd_midi *, void *, int); -static const char *mpu401_mdescr(struct snd_midi *, void *, int); -static const char *mpu401_mprovider(struct snd_midi *, void *); static kobj_method_t mpu401_methods[] = { KOBJMETHOD(mpu_init, mpu401_minit), @@ -98,8 +96,6 @@ static kobj_method_t mpu401_methods[] = { KOBJMETHOD(mpu_outqsize, mpu401_moutqsize), KOBJMETHOD(mpu_callback, mpu401_mcallback), KOBJMETHOD(mpu_callbackp, mpu401_mcallbackp), - KOBJMETHOD(mpu_descr, mpu401_mdescr), - KOBJMETHOD(mpu_provider, mpu401_mprovider), KOBJMETHOD_END }; @@ -122,24 +118,12 @@ mpu401_intr(struct mpu401 *m) int i; int s; -/* - printf("mpu401_intr\n"); -*/ #define RXRDY(m) ( (STATUS(m) & MPU_INPUTBUSY) == 0) #define TXRDY(m) ( (STATUS(m) & MPU_OUTPUTBUSY) == 0) -#if 0 -#define D(x,l) printf("mpu401_intr %d %x %s %s\n",l, x, x&MPU_INPUTBUSY?"RX":"", x&MPU_OUTPUTBUSY?"TX":"") -#else -#define D(x,l) -#endif i = 0; s = STATUS(m); - D(s, 1); while ((s & MPU_INPUTBUSY) == 0 && i < MPU_INTR_BUF) { b[i] = READ(m); -/* - printf("mpu401_intr in i %d d %d\n", i, b[i]); -*/ i++; s = STATUS(m); } @@ -148,15 +132,9 @@ mpu401_intr(struct mpu401 *m) i = 0; while (!(s & MPU_OUTPUTBUSY) && i < MPU_INTR_BUF) { if (midi_out(m->mid, b, 1)) { -/* - printf("mpu401_intr out i %d d %d\n", i, b[0]); -*/ WRITE(m, *b); } else { -/* - printf("mpu401_intr write: no output\n"); -*/ return 0; } i++; @@ -262,13 +240,7 @@ static void mpu401_mcallback(struct snd_midi *sm, void *arg, int flags) { struct mpu401 *m = arg; -#if 0 - printf("mpu401_callback %s %s %s %s\n", - flags & M_RX ? "M_RX" : "", - flags & M_TX ? "M_TX" : "", - flags & M_RXEN ? "M_RXEN" : "", - flags & M_TXEN ? "M_TXEN" : ""); -#endif + if (flags & M_TXEN && m->si) { callout_reset(&m->timer, 1, mpu401_timeout, m); } @@ -278,19 +250,5 @@ mpu401_mcallback(struct snd_midi *sm, void *arg, int flags) static void mpu401_mcallbackp(struct snd_midi *sm, void *arg, int flags) { -/* printf("mpu401_callbackp\n"); */ mpu401_mcallback(sm, arg, flags); } - -static const char * -mpu401_mdescr(struct snd_midi *sm, void *arg, int verbosity) -{ - - return "descr mpu401"; -} - -static const char * -mpu401_mprovider(struct snd_midi *m, void *arg) -{ - return "provider mpu401"; -} diff --git a/sys/dev/sound/midi/mpu_if.m b/sys/dev/sound/midi/mpu_if.m index b7cb586c5dd0..835d887f703a 100644 --- a/sys/dev/sound/midi/mpu_if.m +++ b/sys/dev/sound/midi/mpu_if.m @@ -56,17 +56,6 @@ METHOD void callback { int _flags; }; -METHOD const char * provider { - struct snd_midi *_kobj; - void *_cookie; -}; - -METHOD const char * descr { - struct snd_midi *_kobj; - void *_cookie; - int _verbosity; -}; - METHOD int uninit { struct snd_midi *_kobj; void *_cookie; diff --git a/sys/dev/sound/midi/sequencer.c b/sys/dev/sound/midi/sequencer.c deleted file mode 100644 index 03b71688175c..000000000000 --- a/sys/dev/sound/midi/sequencer.c +++ /dev/null @@ -1,2107 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause - * - * Copyright (c) 2003 Mathew Kanner - * Copyright (c) 1993 Hannu Savolainen - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * The sequencer personality manager. - */ - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/ioccom.h> - -#include <sys/filio.h> -#include <sys/lock.h> -#include <sys/sockio.h> -#include <sys/fcntl.h> -#include <sys/proc.h> -#include <sys/sysctl.h> - -#include <sys/kernel.h> /* for DATA_SET */ - -#include <sys/module.h> -#include <sys/conf.h> -#include <sys/file.h> -#include <sys/uio.h> -#include <sys/syslog.h> -#include <sys/errno.h> -#include <sys/malloc.h> -#include <sys/bus.h> -#include <machine/resource.h> -#include <machine/bus.h> -#include <machine/clock.h> /* for DELAY */ -#include <sys/soundcard.h> -#include <sys/rman.h> -#include <sys/mman.h> -#include <sys/poll.h> -#include <sys/mutex.h> -#include <sys/condvar.h> -#include <sys/kthread.h> -#include <sys/unistd.h> -#include <sys/selinfo.h> -#include <sys/sx.h> - -#ifdef HAVE_KERNEL_OPTION_HEADERS -#include "opt_snd.h" -#endif - -#include <dev/sound/midi/midi.h> -#include <dev/sound/midi/midiq.h> -#include "synth_if.h" - -#include <dev/sound/midi/sequencer.h> - -#define TMR_TIMERBASE 13 - -#define SND_DEV_SEQ 1 /* Sequencer output /dev/sequencer (FM - * synthesizer and MIDI output) */ -#define SND_DEV_MUSIC 8 /* /dev/music, level 2 interface */ - -/* Length of a sequencer event. */ -#define EV_SZ 8 -#define IEV_SZ 8 - -/* Lookup modes */ -#define LOOKUP_EXIST (0) -#define LOOKUP_OPEN (1) -#define LOOKUP_CLOSE (2) - -#define MIDIDEV(y) (dev2unit(y) & 0x0f) - -/* These are the entries to the sequencer driver. */ -static d_open_t mseq_open; -static d_close_t mseq_close; -static d_ioctl_t mseq_ioctl; -static d_read_t mseq_read; -static d_write_t mseq_write; -static d_poll_t mseq_poll; - -static struct cdevsw seq_cdevsw = { - .d_version = D_VERSION, - .d_open = mseq_open, - .d_close = mseq_close, - .d_read = mseq_read, - .d_write = mseq_write, - .d_ioctl = mseq_ioctl, - .d_poll = mseq_poll, - .d_name = "sequencer", -}; - -struct seq_softc { - KOBJ_FIELDS; - - struct mtx seq_lock, q_lock; - struct cv empty_cv, reset_cv, in_cv, out_cv, state_cv, th_cv; - - MIDIQ_HEAD(, u_char) in_q, out_q; - - u_long flags; - /* Flags (protected by flag_mtx of mididev_info) */ - int fflags; /* Access mode */ - int music; - - int out_water; /* Sequence output threshould */ - snd_sync_parm sync_parm; /* AIOSYNC parameter set */ - struct thread *sync_thread; /* AIOSYNCing thread */ - struct selinfo in_sel, out_sel; - int midi_number; - struct cdev *seqdev, *musicdev; - int unit; - int maxunits; - kobj_t *midis; - int *midi_flags; - kobj_t mapper; - void *mapper_cookie; - struct timeval timerstop, timersub; - int timerbase, tempo; - int timerrun; - int done; - int playing; - int recording; - int busy; - int pre_event_timeout; - int waiting; -}; - -/* - * Module specific stuff, including how many sequecers - * we currently own. - */ - -SYSCTL_NODE(_hw_midi, OID_AUTO, seq, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, - "Midi sequencer"); - -int seq_debug; -/* XXX: should this be moved into debug.midi? */ -SYSCTL_INT(_hw_midi_seq, OID_AUTO, debug, CTLFLAG_RW, &seq_debug, 0, ""); - -midi_cmdtab cmdtab_seqevent[] = { - {SEQ_NOTEOFF, "SEQ_NOTEOFF"}, - {SEQ_NOTEON, "SEQ_NOTEON"}, - {SEQ_WAIT, "SEQ_WAIT"}, - {SEQ_PGMCHANGE, "SEQ_PGMCHANGE"}, - {SEQ_SYNCTIMER, "SEQ_SYNCTIMER"}, - {SEQ_MIDIPUTC, "SEQ_MIDIPUTC"}, - {SEQ_DRUMON, "SEQ_DRUMON"}, - {SEQ_DRUMOFF, "SEQ_DRUMOFF"}, - {SEQ_ECHO, "SEQ_ECHO"}, - {SEQ_AFTERTOUCH, "SEQ_AFTERTOUCH"}, - {SEQ_CONTROLLER, "SEQ_CONTROLLER"}, - {SEQ_BALANCE, "SEQ_BALANCE"}, - {SEQ_VOLMODE, "SEQ_VOLMODE"}, - {SEQ_FULLSIZE, "SEQ_FULLSIZE"}, - {SEQ_PRIVATE, "SEQ_PRIVATE"}, - {SEQ_EXTENDED, "SEQ_EXTENDED"}, - {EV_SEQ_LOCAL, "EV_SEQ_LOCAL"}, - {EV_TIMING, "EV_TIMING"}, - {EV_CHN_COMMON, "EV_CHN_COMMON"}, - {EV_CHN_VOICE, "EV_CHN_VOICE"}, - {EV_SYSEX, "EV_SYSEX"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_seqioctl[] = { - {SNDCTL_SEQ_RESET, "SNDCTL_SEQ_RESET"}, - {SNDCTL_SEQ_SYNC, "SNDCTL_SEQ_SYNC"}, - {SNDCTL_SYNTH_INFO, "SNDCTL_SYNTH_INFO"}, - {SNDCTL_SEQ_CTRLRATE, "SNDCTL_SEQ_CTRLRATE"}, - {SNDCTL_SEQ_GETOUTCOUNT, "SNDCTL_SEQ_GETOUTCOUNT"}, - {SNDCTL_SEQ_GETINCOUNT, "SNDCTL_SEQ_GETINCOUNT"}, - {SNDCTL_SEQ_PERCMODE, "SNDCTL_SEQ_PERCMODE"}, - {SNDCTL_FM_LOAD_INSTR, "SNDCTL_FM_LOAD_INSTR"}, - {SNDCTL_SEQ_TESTMIDI, "SNDCTL_SEQ_TESTMIDI"}, - {SNDCTL_SEQ_RESETSAMPLES, "SNDCTL_SEQ_RESETSAMPLES"}, - {SNDCTL_SEQ_NRSYNTHS, "SNDCTL_SEQ_NRSYNTHS"}, - {SNDCTL_SEQ_NRMIDIS, "SNDCTL_SEQ_NRMIDIS"}, - {SNDCTL_SEQ_GETTIME, "SNDCTL_SEQ_GETTIME"}, - {SNDCTL_MIDI_INFO, "SNDCTL_MIDI_INFO"}, - {SNDCTL_SEQ_THRESHOLD, "SNDCTL_SEQ_THRESHOLD"}, - {SNDCTL_SYNTH_MEMAVL, "SNDCTL_SYNTH_MEMAVL"}, - {SNDCTL_FM_4OP_ENABLE, "SNDCTL_FM_4OP_ENABLE"}, - {SNDCTL_PMGR_ACCESS, "SNDCTL_PMGR_ACCESS"}, - {SNDCTL_SEQ_PANIC, "SNDCTL_SEQ_PANIC"}, - {SNDCTL_SEQ_OUTOFBAND, "SNDCTL_SEQ_OUTOFBAND"}, - {SNDCTL_TMR_TIMEBASE, "SNDCTL_TMR_TIMEBASE"}, - {SNDCTL_TMR_START, "SNDCTL_TMR_START"}, - {SNDCTL_TMR_STOP, "SNDCTL_TMR_STOP"}, - {SNDCTL_TMR_CONTINUE, "SNDCTL_TMR_CONTINUE"}, - {SNDCTL_TMR_TEMPO, "SNDCTL_TMR_TEMPO"}, - {SNDCTL_TMR_SOURCE, "SNDCTL_TMR_SOURCE"}, - {SNDCTL_TMR_METRONOME, "SNDCTL_TMR_METRONOME"}, - {SNDCTL_TMR_SELECT, "SNDCTL_TMR_SELECT"}, - {SNDCTL_MIDI_PRETIME, "SNDCTL_MIDI_PRETIME"}, - {AIONWRITE, "AIONWRITE"}, - {AIOGSIZE, "AIOGSIZE"}, - {AIOSSIZE, "AIOSSIZE"}, - {AIOGFMT, "AIOGFMT"}, - {AIOSFMT, "AIOSFMT"}, - {AIOGMIX, "AIOGMIX"}, - {AIOSMIX, "AIOSMIX"}, - {AIOSTOP, "AIOSTOP"}, - {AIOSYNC, "AIOSYNC"}, - {AIOGCAP, "AIOGCAP"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_timer[] = { - {TMR_WAIT_REL, "TMR_WAIT_REL"}, - {TMR_WAIT_ABS, "TMR_WAIT_ABS"}, - {TMR_STOP, "TMR_STOP"}, - {TMR_START, "TMR_START"}, - {TMR_CONTINUE, "TMR_CONTINUE"}, - {TMR_TEMPO, "TMR_TEMPO"}, - {TMR_ECHO, "TMR_ECHO"}, - {TMR_CLOCK, "TMR_CLOCK"}, - {TMR_SPP, "TMR_SPP"}, - {TMR_TIMESIG, "TMR_TIMESIG"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_seqcv[] = { - {MIDI_NOTEOFF, "MIDI_NOTEOFF"}, - {MIDI_NOTEON, "MIDI_NOTEON"}, - {MIDI_KEY_PRESSURE, "MIDI_KEY_PRESSURE"}, - {-1, NULL}, -}; - -midi_cmdtab cmdtab_seqccmn[] = { - {MIDI_CTL_CHANGE, "MIDI_CTL_CHANGE"}, - {MIDI_PGM_CHANGE, "MIDI_PGM_CHANGE"}, - {MIDI_CHN_PRESSURE, "MIDI_CHN_PRESSURE"}, - {MIDI_PITCH_BEND, "MIDI_PITCH_BEND"}, - {MIDI_SYSTEM_PREFIX, "MIDI_SYSTEM_PREFIX"}, - {-1, NULL}, -}; - -#ifndef KOBJMETHOD_END -#define KOBJMETHOD_END { NULL, NULL } -#endif - -/* - * static const char *mpu401_mprovider(kobj_t obj, struct mpu401 *m); - */ - -static kobj_method_t seq_methods[] = { - /* KOBJMETHOD(mpu_provider,mpu401_mprovider), */ - KOBJMETHOD_END -}; - -DEFINE_CLASS(sequencer, seq_methods, 0); - -/* The followings are the local function. */ -static int seq_convertold(u_char *event, u_char *out); - -/* - * static void seq_midiinput(struct seq_softc * scp, void *md); - */ -static void seq_reset(struct seq_softc *scp); -static int seq_sync(struct seq_softc *scp); - -static int seq_processevent(struct seq_softc *scp, u_char *event); - -static int seq_timing(struct seq_softc *scp, u_char *event); -static int seq_local(struct seq_softc *scp, u_char *event); - -static int seq_chnvoice(struct seq_softc *scp, kobj_t md, u_char *event); -static int seq_chncommon(struct seq_softc *scp, kobj_t md, u_char *event); -static int seq_sysex(struct seq_softc *scp, kobj_t md, u_char *event); - -static int seq_fetch_mid(struct seq_softc *scp, int unit, kobj_t *md); -void seq_copytoinput(struct seq_softc *scp, u_char *event, int len); -int seq_modevent(module_t mod, int type, void *data); -struct seq_softc *seqs[10]; -static struct mtx seqinfo_mtx; -static u_long nseq = 0; - -static void timer_start(struct seq_softc *t); -static void timer_stop(struct seq_softc *t); -static void timer_setvals(struct seq_softc *t, int tempo, int timerbase); -static void timer_wait(struct seq_softc *t, int ticks, int wait_abs); -static int timer_now(struct seq_softc *t); - -static void -timer_start(struct seq_softc *t) -{ - t->timerrun = 1; - getmicrotime(&t->timersub); -} - -static void -timer_continue(struct seq_softc *t) -{ - struct timeval now; - - if (t->timerrun == 1) - return; - t->timerrun = 1; - getmicrotime(&now); - timevalsub(&now, &t->timerstop); - timevaladd(&t->timersub, &now); -} - -static void -timer_stop(struct seq_softc *t) -{ - t->timerrun = 0; - getmicrotime(&t->timerstop); -} - -static void -timer_setvals(struct seq_softc *t, int tempo, int timerbase) -{ - t->tempo = tempo; - t->timerbase = timerbase; -} - -static void -timer_wait(struct seq_softc *t, int ticks, int wait_abs) -{ - struct timeval now, when; - int ret; - unsigned long long i; - - while (t->timerrun == 0) { - SEQ_DEBUG(2, printf("Timer wait when timer isn't running\n")); - /* - * The old sequencer used timeouts that only increased - * the timer when the timer was running. - * Hence the sequencer would stick (?) if the - * timer was disabled. - */ - cv_wait(&t->reset_cv, &t->seq_lock); - if (t->playing == 0) - return; - } - - i = ticks * 60ull * 1000000ull / (t->tempo * t->timerbase); - - when.tv_sec = i / 1000000; - when.tv_usec = i % 1000000; - -#if 0 - printf("timer_wait tempo %d timerbase %d ticks %d abs %d u_sec %llu\n", - t->tempo, t->timerbase, ticks, wait_abs, i); -#endif - - if (wait_abs != 0) { - getmicrotime(&now); - timevalsub(&now, &t->timersub); - timevalsub(&when, &now); - } - if (when.tv_sec < 0 || when.tv_usec < 0) { - SEQ_DEBUG(3, - printf("seq_timer error negative time %lds.%06lds\n", - (long)when.tv_sec, (long)when.tv_usec)); - return; - } - i = when.tv_sec * 1000000ull; - i += when.tv_usec; - i *= hz; - i /= 1000000ull; -#if 0 - printf("seq_timer usec %llu ticks %llu\n", - when.tv_sec * 1000000ull + when.tv_usec, i); -#endif - t->waiting = 1; - ret = cv_timedwait(&t->reset_cv, &t->seq_lock, i + 1); - t->waiting = 0; - - if (ret != EWOULDBLOCK) - SEQ_DEBUG(3, printf("seq_timer didn't timeout\n")); - -} - -static int -timer_now(struct seq_softc *t) -{ - struct timeval now; - unsigned long long i; - int ret; - - if (t->timerrun == 0) - now = t->timerstop; - else - getmicrotime(&now); - - timevalsub(&now, &t->timersub); - - i = now.tv_sec * 1000000ull; - i += now.tv_usec; - i *= t->timerbase; -/* i /= t->tempo; */ - i /= 1000000ull; - - ret = i; - /* - * printf("timer_now: %llu %d\n", i, ret); - */ - - return ret; -} - -static void -seq_eventthread(void *arg) -{ - struct seq_softc *scp = arg; - u_char event[EV_SZ]; - - mtx_lock(&scp->seq_lock); - SEQ_DEBUG(2, printf("seq_eventthread started\n")); - while (scp->done == 0) { -restart: - while (scp->playing == 0) { - cv_wait(&scp->state_cv, &scp->seq_lock); - if (scp->done) - goto done; - } - - while (MIDIQ_EMPTY(scp->out_q)) { - cv_broadcast(&scp->empty_cv); - cv_wait(&scp->out_cv, &scp->seq_lock); - if (scp->playing == 0) - goto restart; - if (scp->done) - goto done; - } - - MIDIQ_DEQ(scp->out_q, event, EV_SZ); - - if (MIDIQ_AVAIL(scp->out_q) < scp->out_water) { - cv_broadcast(&scp->out_cv); - selwakeup(&scp->out_sel); - } - seq_processevent(scp, event); - } - -done: - cv_broadcast(&scp->th_cv); - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(2, printf("seq_eventthread finished\n")); - kproc_exit(0); -} - -/* - * seq_processevent: This maybe called by the event thread or the IOCTL - * handler for queued and out of band events respectively. - */ -static int -seq_processevent(struct seq_softc *scp, u_char *event) -{ - int ret; - kobj_t m; - - ret = 0; - - if (event[0] == EV_SEQ_LOCAL) - ret = seq_local(scp, event); - else if (event[0] == EV_TIMING) - ret = seq_timing(scp, event); - else if (event[0] != EV_CHN_VOICE && - event[0] != EV_CHN_COMMON && - event[0] != EV_SYSEX && - event[0] != SEQ_MIDIPUTC) { - ret = 1; - SEQ_DEBUG(2, printf("seq_processevent not known %d\n", - event[0])); - } else if (seq_fetch_mid(scp, event[1], &m) != 0) { - ret = 1; - SEQ_DEBUG(2, printf("seq_processevent midi unit not found %d\n", - event[1])); - } else - switch (event[0]) { - case EV_CHN_VOICE: - ret = seq_chnvoice(scp, m, event); - break; - case EV_CHN_COMMON: - ret = seq_chncommon(scp, m, event); - break; - case EV_SYSEX: - ret = seq_sysex(scp, m, event); - break; - case SEQ_MIDIPUTC: - mtx_unlock(&scp->seq_lock); - ret = SYNTH_WRITERAW(m, &event[2], 1); - mtx_lock(&scp->seq_lock); - break; - } - return ret; -} - -static int -seq_addunit(void) -{ - struct seq_softc *scp; - int ret; - u_char *buf; - - gone_in(15, "Warning! MIDI sequencer to be removed soon: no longer " - "needed or used\n"); - - /* Allocate the softc. */ - ret = ENOMEM; - scp = malloc(sizeof(*scp), M_DEVBUF, M_NOWAIT | M_ZERO); - if (scp == NULL) { - SEQ_DEBUG(1, printf("seq_addunit: softc allocation failed.\n")); - goto err; - } - kobj_init((kobj_t)scp, &sequencer_class); - - buf = malloc(sizeof(*buf) * EV_SZ * 1024, M_TEMP, M_NOWAIT | M_ZERO); - if (buf == NULL) - goto err; - MIDIQ_INIT(scp->in_q, buf, EV_SZ * 1024); - buf = malloc(sizeof(*buf) * EV_SZ * 1024, M_TEMP, M_NOWAIT | M_ZERO); - if (buf == NULL) - goto err; - MIDIQ_INIT(scp->out_q, buf, EV_SZ * 1024); - ret = EINVAL; - - scp->midis = malloc(sizeof(kobj_t) * 32, M_TEMP, M_NOWAIT | M_ZERO); - scp->midi_flags = malloc(sizeof(*scp->midi_flags) * 32, M_TEMP, - M_NOWAIT | M_ZERO); - - if (scp->midis == NULL || scp->midi_flags == NULL) - goto err; - - scp->flags = 0; - - mtx_init(&scp->seq_lock, "seqflq", NULL, 0); - cv_init(&scp->state_cv, "seqstate"); - cv_init(&scp->empty_cv, "seqempty"); - cv_init(&scp->reset_cv, "seqtimer"); - cv_init(&scp->out_cv, "seqqout"); - cv_init(&scp->in_cv, "seqqin"); - cv_init(&scp->th_cv, "seqstart"); - - /* - * Init the damn timer - */ - - scp->mapper = midimapper_addseq(scp, &scp->unit, &scp->mapper_cookie); - if (scp->mapper == NULL) - goto err; - - scp->seqdev = make_dev(&seq_cdevsw, SND_DEV_SEQ, UID_ROOT, GID_WHEEL, - 0666, "sequencer%d", scp->unit); - - scp->musicdev = make_dev(&seq_cdevsw, SND_DEV_MUSIC, UID_ROOT, - GID_WHEEL, 0666, "music%d", scp->unit); - - if (scp->seqdev == NULL || scp->musicdev == NULL) - goto err; - /* - * TODO: Add to list of sequencers this module provides - */ - - ret = - kproc_create - (seq_eventthread, scp, NULL, RFHIGHPID, 0, - "sequencer %02d", scp->unit); - - if (ret) - goto err; - - scp->seqdev->si_drv1 = scp->musicdev->si_drv1 = scp; - - SEQ_DEBUG(2, printf("sequencer %d created scp %p\n", scp->unit, scp)); - - ret = 0; - - mtx_lock(&seqinfo_mtx); - seqs[nseq++] = scp; - mtx_unlock(&seqinfo_mtx); - - goto ok; - -err: - if (scp != NULL) { - if (scp->seqdev != NULL) - destroy_dev(scp->seqdev); - if (scp->musicdev != NULL) - destroy_dev(scp->musicdev); - /* - * TODO: Destroy mutex and cv - */ - if (scp->midis != NULL) - free(scp->midis, M_TEMP); - if (scp->midi_flags != NULL) - free(scp->midi_flags, M_TEMP); - if (scp->out_q.b) - free(scp->out_q.b, M_TEMP); - if (scp->in_q.b) - free(scp->in_q.b, M_TEMP); - free(scp, M_DEVBUF); - } -ok: - return ret; -} - -static int -seq_delunit(int unit) -{ - struct seq_softc *scp = seqs[unit]; - int i; - - //SEQ_DEBUG(4, printf("seq_delunit: %d\n", unit)); - SEQ_DEBUG(1, printf("seq_delunit: 1 \n")); - mtx_lock(&scp->seq_lock); - - scp->playing = 0; - scp->done = 1; - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->reset_cv); - SEQ_DEBUG(1, printf("seq_delunit: 2 \n")); - cv_wait(&scp->th_cv, &scp->seq_lock); - SEQ_DEBUG(1, printf("seq_delunit: 3.0 \n")); - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(1, printf("seq_delunit: 3.1 \n")); - - cv_destroy(&scp->state_cv); - SEQ_DEBUG(1, printf("seq_delunit: 4 \n")); - cv_destroy(&scp->empty_cv); - SEQ_DEBUG(1, printf("seq_delunit: 5 \n")); - cv_destroy(&scp->reset_cv); - SEQ_DEBUG(1, printf("seq_delunit: 6 \n")); - cv_destroy(&scp->out_cv); - SEQ_DEBUG(1, printf("seq_delunit: 7 \n")); - cv_destroy(&scp->in_cv); - SEQ_DEBUG(1, printf("seq_delunit: 8 \n")); - cv_destroy(&scp->th_cv); - - SEQ_DEBUG(1, printf("seq_delunit: 10 \n")); - if (scp->seqdev) - destroy_dev(scp->seqdev); - SEQ_DEBUG(1, printf("seq_delunit: 11 \n")); - if (scp->musicdev) - destroy_dev(scp->musicdev); - SEQ_DEBUG(1, printf("seq_delunit: 12 \n")); - scp->seqdev = scp->musicdev = NULL; - if (scp->midis != NULL) - free(scp->midis, M_TEMP); - SEQ_DEBUG(1, printf("seq_delunit: 13 \n")); - if (scp->midi_flags != NULL) - free(scp->midi_flags, M_TEMP); - SEQ_DEBUG(1, printf("seq_delunit: 14 \n")); - free(scp->out_q.b, M_TEMP); - SEQ_DEBUG(1, printf("seq_delunit: 15 \n")); - free(scp->in_q.b, M_TEMP); - - SEQ_DEBUG(1, printf("seq_delunit: 16 \n")); - - mtx_destroy(&scp->seq_lock); - SEQ_DEBUG(1, printf("seq_delunit: 17 \n")); - free(scp, M_DEVBUF); - - mtx_lock(&seqinfo_mtx); - for (i = unit; i < (nseq - 1); i++) - seqs[i] = seqs[i + 1]; - nseq--; - mtx_unlock(&seqinfo_mtx); - - return 0; -} - -int -seq_modevent(module_t mod, int type, void *data) -{ - int retval, r; - - retval = 0; - - switch (type) { - case MOD_LOAD: - mtx_init(&seqinfo_mtx, "seqmod", NULL, 0); - retval = seq_addunit(); - break; - - case MOD_UNLOAD: - while (nseq) { - r = seq_delunit(nseq - 1); - if (r) { - retval = r; - break; - } - } - if (nseq == 0) { - retval = 0; - mtx_destroy(&seqinfo_mtx); - } - break; - - default: - break; - } - - return retval; -} - -static int -seq_fetch_mid(struct seq_softc *scp, int unit, kobj_t *md) -{ - - if (unit >= scp->midi_number || unit < 0) - return EINVAL; - - *md = scp->midis[unit]; - - return 0; -} - -int -mseq_open(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - struct seq_softc *scp = i_dev->si_drv1; - int i; - - gone_in(15, "Warning! MIDI sequencer to be removed soon: no longer " - "needed or used\n"); - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(3, printf("seq_open: scp %p unit %d, flags 0x%x.\n", - scp, scp->unit, flags)); - - /* - * Mark this device busy. - */ - - midistat_lock(); - mtx_lock(&scp->seq_lock); - if (scp->busy) { - mtx_unlock(&scp->seq_lock); - midistat_unlock(); - SEQ_DEBUG(2, printf("seq_open: unit %d is busy.\n", scp->unit)); - return EBUSY; - } - scp->fflags = flags; - /* - if ((scp->fflags & O_NONBLOCK) != 0) - scp->flags |= SEQ_F_NBIO; - */ - scp->music = MIDIDEV(i_dev) == SND_DEV_MUSIC; - - /* - * Enumerate the available midi devices - */ - scp->midi_number = 0; - scp->maxunits = midimapper_open_locked(scp->mapper, &scp->mapper_cookie); - - if (scp->maxunits == 0) - SEQ_DEBUG(2, printf("seq_open: no midi devices\n")); - - for (i = 0; i < scp->maxunits; i++) { - scp->midis[scp->midi_number] = - midimapper_fetch_synth_locked(scp->mapper, - scp->mapper_cookie, i); - if (scp->midis[scp->midi_number]) { - if (SYNTH_OPEN(scp->midis[scp->midi_number], scp, - scp->fflags) != 0) - scp->midis[scp->midi_number] = NULL; - else { - scp->midi_flags[scp->midi_number] = - SYNTH_QUERY(scp->midis[scp->midi_number]); - scp->midi_number++; - } - } - } - midistat_unlock(); - - timer_setvals(scp, 60, 100); - - timer_start(scp); - timer_stop(scp); - /* - * actually, if we're in rdonly mode, we should start the timer - */ - /* - * TODO: Handle recording now - */ - - scp->out_water = MIDIQ_SIZE(scp->out_q) / 2; - - scp->busy = 1; - mtx_unlock(&scp->seq_lock); - - SEQ_DEBUG(2, printf("seq_open: opened, mode %s.\n", - scp->music ? "music" : "sequencer")); - SEQ_DEBUG(2, - printf("Sequencer %d %p opened maxunits %d midi_number %d:\n", - scp->unit, scp, scp->maxunits, scp->midi_number)); - for (i = 0; i < scp->midi_number; i++) - SEQ_DEBUG(3, printf(" midi %d %p\n", i, scp->midis[i])); - - return 0; -} - -/* - * mseq_close - */ -int -mseq_close(struct cdev *i_dev, int flags, int mode, struct thread *td) -{ - int i; - struct seq_softc *scp = i_dev->si_drv1; - int ret; - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(2, printf("seq_close: unit %d.\n", scp->unit)); - - mtx_lock(&scp->seq_lock); - - ret = ENXIO; - if (scp->busy == 0) - goto err; - - seq_reset(scp); - seq_sync(scp); - - for (i = 0; i < scp->midi_number; i++) - if (scp->midis[i]) - SYNTH_CLOSE(scp->midis[i]); - - midimapper_close(scp->mapper, scp->mapper_cookie); - - timer_stop(scp); - - scp->busy = 0; - ret = 0; - -err: - SEQ_DEBUG(3, printf("seq_close: closed ret = %d.\n", ret)); - mtx_unlock(&scp->seq_lock); - return ret; -} - -int -mseq_read(struct cdev *i_dev, struct uio *uio, int ioflag) -{ - int retval, used; - struct seq_softc *scp = i_dev->si_drv1; - -#define SEQ_RSIZE 32 - u_char buf[SEQ_RSIZE]; - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(7, printf("mseq_read: unit %d, resid %zd.\n", - scp->unit, uio->uio_resid)); - - mtx_lock(&scp->seq_lock); - if ((scp->fflags & FREAD) == 0) { - SEQ_DEBUG(2, printf("mseq_read: unit %d is not for reading.\n", - scp->unit)); - retval = EIO; - goto err1; - } - /* - * Begin recording. - */ - /* - * if ((scp->flags & SEQ_F_READING) == 0) - */ - /* - * TODO, start recording if not alread - */ - - /* - * I think the semantics are to return as soon - * as possible. - * Second thought, it doesn't seem like midimoutain - * expects that at all. - * TODO: Look up in some sort of spec - */ - - while (uio->uio_resid > 0) { - while (MIDIQ_EMPTY(scp->in_q)) { - retval = EWOULDBLOCK; - /* - * I wish I knew which one to care about - */ - - if (scp->fflags & O_NONBLOCK) - goto err1; - if (ioflag & O_NONBLOCK) - goto err1; - - retval = cv_wait_sig(&scp->in_cv, &scp->seq_lock); - if (retval != 0) - goto err1; - } - - used = MIN(MIDIQ_LEN(scp->in_q), uio->uio_resid); - used = MIN(used, SEQ_RSIZE); - - SEQ_DEBUG(8, printf("midiread: uiomove cc=%d\n", used)); - MIDIQ_DEQ(scp->in_q, buf, used); - mtx_unlock(&scp->seq_lock); - retval = uiomove(buf, used, uio); - mtx_lock(&scp->seq_lock); - if (retval) - goto err1; - } - - retval = 0; -err1: - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(6, printf("mseq_read: ret %d, resid %zd.\n", - retval, uio->uio_resid)); - - return retval; -} - -int -mseq_write(struct cdev *i_dev, struct uio *uio, int ioflag) -{ - u_char event[EV_SZ], newevent[EV_SZ], ev_code; - struct seq_softc *scp = i_dev->si_drv1; - int retval; - int used; - - SEQ_DEBUG(7, printf("seq_write: unit %d, resid %zd.\n", - scp->unit, uio->uio_resid)); - - if (scp == NULL) - return ENXIO; - - mtx_lock(&scp->seq_lock); - - if ((scp->fflags & FWRITE) == 0) { - SEQ_DEBUG(2, printf("seq_write: unit %d is not for writing.\n", - scp->unit)); - retval = EIO; - goto err0; - } - while (uio->uio_resid > 0) { - while (MIDIQ_AVAIL(scp->out_q) == 0) { - retval = EWOULDBLOCK; - if (scp->fflags & O_NONBLOCK) - goto err0; - if (ioflag & O_NONBLOCK) - goto err0; - SEQ_DEBUG(8, printf("seq_write cvwait\n")); - - scp->playing = 1; - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->state_cv); - - retval = cv_wait_sig(&scp->out_cv, &scp->seq_lock); - /* - * We slept, maybe things have changed since last - * dying check - */ - if (retval != 0) - goto err0; -#if 0 - /* - * Useless test - */ - if (scp != i_dev->si_drv1) - retval = ENXIO; -#endif - } - - used = MIN(uio->uio_resid, 4); - - SEQ_DEBUG(8, printf("seqout: resid %zd len %jd avail %jd\n", - uio->uio_resid, (intmax_t)MIDIQ_LEN(scp->out_q), - (intmax_t)MIDIQ_AVAIL(scp->out_q))); - - if (used != 4) { - retval = ENXIO; - goto err0; - } - mtx_unlock(&scp->seq_lock); - retval = uiomove(event, used, uio); - mtx_lock(&scp->seq_lock); - if (retval) - goto err0; - - ev_code = event[0]; - SEQ_DEBUG(8, printf("seq_write: unit %d, event %s.\n", - scp->unit, midi_cmdname(ev_code, cmdtab_seqevent))); - - /* Have a look at the event code. */ - if (ev_code == SEQ_FULLSIZE) { - /* - * TODO: restore code for SEQ_FULLSIZE - */ -#if 0 - /* - * A long event, these are the patches/samples for a - * synthesizer. - */ - midiunit = *(u_short *)&event[2]; - mtx_lock(&sd->seq_lock); - ret = lookup_mididev(scp, midiunit, LOOKUP_OPEN, &md); - mtx_unlock(&sd->seq_lock); - if (ret != 0) - return (ret); - - SEQ_DEBUG(printf("seq_write: loading a patch to the unit %d.\n", midiunit)); - - ret = md->synth.loadpatch(md, *(short *)&event[0], buf, - p + 4, count, 0); - return (ret); -#else - /* - * For now, just flush the darn buffer - */ - SEQ_DEBUG(2, - printf("seq_write: SEQ_FULLSIZE flusing buffer.\n")); - while (uio->uio_resid > 0) { - mtx_unlock(&scp->seq_lock); - retval = uiomove(event, MIN(EV_SZ, uio->uio_resid), uio); - mtx_lock(&scp->seq_lock); - if (retval) - goto err0; - } - retval = 0; - goto err0; -#endif - } - retval = EINVAL; - if (ev_code >= 128) { - int error; - - /* - * Some sort of an extended event. The size is eight - * bytes. scoop extra info. - */ - if (scp->music && ev_code == SEQ_EXTENDED) { - SEQ_DEBUG(2, printf("seq_write: invalid level two event %x.\n", ev_code)); - goto err0; - } - mtx_unlock(&scp->seq_lock); - if (uio->uio_resid < 4) - error = EINVAL; - else - error = uiomove((caddr_t)&event[4], 4, uio); - mtx_lock(&scp->seq_lock); - if (error) { - SEQ_DEBUG(2, - printf("seq_write: user memory mangled?\n")); - goto err0; - } - } else { - /* - * Size four event. - */ - if (scp->music) { - SEQ_DEBUG(2, printf("seq_write: four byte event in music mode.\n")); - goto err0; - } - } - if (ev_code == SEQ_MIDIPUTC) { - /* - * TODO: event[2] is unit number to receive char. - * Range check it. - */ - } - if (scp->music) { -#ifdef not_ever_ever - if (event[0] == EV_TIMING && - (event[1] == TMR_START || event[1] == TMR_STOP)) { - /* - * For now, try to make midimoutain work by - * forcing these events to be processed - * immediately. - */ - seq_processevent(scp, event); - } else - MIDIQ_ENQ(scp->out_q, event, EV_SZ); -#else - MIDIQ_ENQ(scp->out_q, event, EV_SZ); -#endif - } else { - if (seq_convertold(event, newevent) > 0) - MIDIQ_ENQ(scp->out_q, newevent, EV_SZ); -#if 0 - else - goto err0; -#endif - } - } - - scp->playing = 1; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - - retval = 0; - -err0: - SEQ_DEBUG(6, - printf("seq_write done: leftover buffer length %zd retval %d\n", - uio->uio_resid, retval)); - mtx_unlock(&scp->seq_lock); - return retval; -} - -int -mseq_ioctl(struct cdev *i_dev, u_long cmd, caddr_t arg, int mode, - struct thread *td) -{ - int midiunit, ret, tmp; - struct seq_softc *scp = i_dev->si_drv1; - struct synth_info *synthinfo; - struct midi_info *midiinfo; - u_char event[EV_SZ]; - u_char newevent[EV_SZ]; - - kobj_t md; - - /* - * struct snd_size *sndsize; - */ - - if (scp == NULL) - return ENXIO; - - SEQ_DEBUG(6, printf("seq_ioctl: unit %d, cmd %s.\n", - scp->unit, midi_cmdname(cmd, cmdtab_seqioctl))); - - ret = 0; - - switch (cmd) { - case SNDCTL_SEQ_GETTIME: - /* - * ioctl needed by libtse - */ - mtx_lock(&scp->seq_lock); - *(int *)arg = timer_now(scp); - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(6, printf("seq_ioctl: gettime %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_TMR_METRONOME: - /* fallthrough */ - case SNDCTL_TMR_SOURCE: - /* - * Not implemented - */ - ret = 0; - break; - case SNDCTL_TMR_TEMPO: - event[1] = TMR_TEMPO; - event[4] = *(int *)arg & 0xFF; - event[5] = (*(int *)arg >> 8) & 0xFF; - event[6] = (*(int *)arg >> 16) & 0xFF; - event[7] = (*(int *)arg >> 24) & 0xFF; - goto timerevent; - case SNDCTL_TMR_TIMEBASE: - event[1] = TMR_TIMERBASE; - event[4] = *(int *)arg & 0xFF; - event[5] = (*(int *)arg >> 8) & 0xFF; - event[6] = (*(int *)arg >> 16) & 0xFF; - event[7] = (*(int *)arg >> 24) & 0xFF; - goto timerevent; - case SNDCTL_TMR_START: - event[1] = TMR_START; - goto timerevent; - case SNDCTL_TMR_STOP: - event[1] = TMR_STOP; - goto timerevent; - case SNDCTL_TMR_CONTINUE: - event[1] = TMR_CONTINUE; -timerevent: - event[0] = EV_TIMING; - mtx_lock(&scp->seq_lock); - if (!scp->music) { - ret = EINVAL; - mtx_unlock(&scp->seq_lock); - break; - } - seq_processevent(scp, event); - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_TMR_SELECT: - SEQ_DEBUG(2, - printf("seq_ioctl: SNDCTL_TMR_SELECT not supported\n")); - ret = EINVAL; - break; - case SNDCTL_SEQ_SYNC: - if (mode == O_RDONLY) { - ret = 0; - break; - } - mtx_lock(&scp->seq_lock); - ret = seq_sync(scp); - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_SEQ_PANIC: - /* fallthrough */ - case SNDCTL_SEQ_RESET: - /* - * SNDCTL_SEQ_PANIC == SNDCTL_SEQ_RESET - */ - mtx_lock(&scp->seq_lock); - seq_reset(scp); - mtx_unlock(&scp->seq_lock); - ret = 0; - break; - case SNDCTL_SEQ_TESTMIDI: - mtx_lock(&scp->seq_lock); - /* - * TODO: SNDCTL_SEQ_TESTMIDI now means "can I write to the - * device?". - */ - mtx_unlock(&scp->seq_lock); - break; -#if 0 - case SNDCTL_SEQ_GETINCOUNT: - if (mode == O_WRONLY) - *(int *)arg = 0; - else { - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->in_q.rl; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(printf("seq_ioctl: incount %d.\n", - *(int *)arg)); - } - ret = 0; - break; - case SNDCTL_SEQ_GETOUTCOUNT: - if (mode == O_RDONLY) - *(int *)arg = 0; - else { - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->out_q.fl; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(printf("seq_ioctl: outcount %d.\n", - *(int *)arg)); - } - ret = 0; - break; -#endif - case SNDCTL_SEQ_CTRLRATE: - if (*(int *)arg != 0) { - ret = EINVAL; - break; - } - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->timerbase; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: ctrlrate %d.\n", *(int *)arg)); - ret = 0; - break; - /* - * TODO: ioctl SNDCTL_SEQ_RESETSAMPLES - */ -#if 0 - case SNDCTL_SEQ_RESETSAMPLES: - mtx_lock(&scp->seq_lock); - ret = lookup_mididev(scp, *(int *)arg, LOOKUP_OPEN, &md); - mtx_unlock(&scp->seq_lock); - if (ret != 0) - break; - ret = midi_ioctl(MIDIMKDEV(major(i_dev), *(int *)arg, - SND_DEV_MIDIN), cmd, arg, mode, td); - break; -#endif - case SNDCTL_SEQ_NRSYNTHS: - mtx_lock(&scp->seq_lock); - *(int *)arg = scp->midi_number; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: synths %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_SEQ_NRMIDIS: - mtx_lock(&scp->seq_lock); - if (scp->music) - *(int *)arg = 0; - else { - /* - * TODO: count the numbder of devices that can WRITERAW - */ - *(int *)arg = scp->midi_number; - } - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: midis %d.\n", *(int *)arg)); - ret = 0; - break; - /* - * TODO: ioctl SNDCTL_SYNTH_MEMAVL - */ -#if 0 - case SNDCTL_SYNTH_MEMAVL: - mtx_lock(&scp->seq_lock); - ret = lookup_mididev(scp, *(int *)arg, LOOKUP_OPEN, &md); - mtx_unlock(&scp->seq_lock); - if (ret != 0) - break; - ret = midi_ioctl(MIDIMKDEV(major(i_dev), *(int *)arg, - SND_DEV_MIDIN), cmd, arg, mode, td); - break; -#endif - case SNDCTL_SEQ_OUTOFBAND: - for (ret = 0; ret < EV_SZ; ret++) - event[ret] = (u_char)arg[0]; - - mtx_lock(&scp->seq_lock); - if (scp->music) - ret = seq_processevent(scp, event); - else { - if (seq_convertold(event, newevent) > 0) - ret = seq_processevent(scp, newevent); - else - ret = EINVAL; - } - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_SYNTH_INFO: - synthinfo = (struct synth_info *)arg; - midiunit = synthinfo->device; - mtx_lock(&scp->seq_lock); - if (seq_fetch_mid(scp, midiunit, &md) == 0) { - bzero(synthinfo, sizeof(*synthinfo)); - synthinfo->name[0] = 'f'; - synthinfo->name[1] = 'a'; - synthinfo->name[2] = 'k'; - synthinfo->name[3] = 'e'; - synthinfo->name[4] = 's'; - synthinfo->name[5] = 'y'; - synthinfo->name[6] = 'n'; - synthinfo->name[7] = 't'; - synthinfo->name[8] = 'h'; - synthinfo->device = midiunit; - synthinfo->synth_type = SYNTH_TYPE_MIDI; - synthinfo->capabilities = scp->midi_flags[midiunit]; - ret = 0; - } else - ret = EINVAL; - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_MIDI_INFO: - midiinfo = (struct midi_info *)arg; - midiunit = midiinfo->device; - mtx_lock(&scp->seq_lock); - if (seq_fetch_mid(scp, midiunit, &md) == 0) { - bzero(midiinfo, sizeof(*midiinfo)); - midiinfo->name[0] = 'f'; - midiinfo->name[1] = 'a'; - midiinfo->name[2] = 'k'; - midiinfo->name[3] = 'e'; - midiinfo->name[4] = 'm'; - midiinfo->name[5] = 'i'; - midiinfo->name[6] = 'd'; - midiinfo->name[7] = 'i'; - midiinfo->device = midiunit; - midiinfo->capabilities = scp->midi_flags[midiunit]; - /* - * TODO: What devtype? - */ - midiinfo->dev_type = 0x01; - ret = 0; - } else - ret = EINVAL; - mtx_unlock(&scp->seq_lock); - break; - case SNDCTL_SEQ_THRESHOLD: - mtx_lock(&scp->seq_lock); - RANGE(*(int *)arg, 1, MIDIQ_SIZE(scp->out_q) - 1); - scp->out_water = *(int *)arg; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: water %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_MIDI_PRETIME: - tmp = *(int *)arg; - if (tmp < 0) - tmp = 0; - mtx_lock(&scp->seq_lock); - scp->pre_event_timeout = (hz * tmp) / 10; - *(int *)arg = scp->pre_event_timeout; - mtx_unlock(&scp->seq_lock); - SEQ_DEBUG(3, printf("seq_ioctl: pretime %d.\n", *(int *)arg)); - ret = 0; - break; - case SNDCTL_FM_4OP_ENABLE: - case SNDCTL_PMGR_IFACE: - case SNDCTL_PMGR_ACCESS: - /* - * Patch manager and fm are ded, ded, ded. - */ - /* fallthrough */ - default: - /* - * TODO: Consider ioctl default case. - * Old code used to - * if ((scp->fflags & O_ACCMODE) == FREAD) { - * ret = EIO; - * break; - * } - * Then pass on the ioctl to device 0 - */ - SEQ_DEBUG(2, - printf("seq_ioctl: unsupported IOCTL %ld.\n", cmd)); - ret = EINVAL; - break; - } - - return ret; -} - -int -mseq_poll(struct cdev *i_dev, int events, struct thread *td) -{ - int ret, lim; - struct seq_softc *scp = i_dev->si_drv1; - - SEQ_DEBUG(3, printf("seq_poll: unit %d.\n", scp->unit)); - SEQ_DEBUG(1, printf("seq_poll: unit %d.\n", scp->unit)); - - mtx_lock(&scp->seq_lock); - - ret = 0; - - /* Look up the appropriate queue and select it. */ - if ((events & (POLLOUT | POLLWRNORM)) != 0) { - /* Start playing. */ - scp->playing = 1; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - - lim = scp->out_water; - - if (MIDIQ_AVAIL(scp->out_q) < lim) - /* No enough space, record select. */ - selrecord(td, &scp->out_sel); - else - /* We can write now. */ - ret |= events & (POLLOUT | POLLWRNORM); - } - if ((events & (POLLIN | POLLRDNORM)) != 0) { - /* TODO: Start recording. */ - - /* Find out the boundary. */ - lim = 1; - if (MIDIQ_LEN(scp->in_q) < lim) - /* No data ready, record select. */ - selrecord(td, &scp->in_sel); - else - /* We can read now. */ - ret |= events & (POLLIN | POLLRDNORM); - } - mtx_unlock(&scp->seq_lock); - - return (ret); -} - -#if 0 -static void -sein_qtr(void *p, void /* mididev_info */ *md) -{ - struct seq_softc *scp; - - scp = (struct seq_softc *)p; - - mtx_lock(&scp->seq_lock); - - /* Restart playing if we have the data to output. */ - if (scp->queueout_pending) - seq_callback(scp, SEQ_CB_START | SEQ_CB_WR); - /* Check the midi device if we are reading. */ - if ((scp->flags & SEQ_F_READING) != 0) - seq_midiinput(scp, md); - - mtx_unlock(&scp->seq_lock); -} - -#endif -/* - * seq_convertold - * Was the old playevent. Use this to convert and old - * style /dev/sequencer event to a /dev/music event - */ -static int -seq_convertold(u_char *event, u_char *out) -{ - int used; - u_char dev, chn, note, vel; - - out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = - out[7] = 0; - - dev = 0; - chn = event[1]; - note = event[2]; - vel = event[3]; - - used = 0; - -restart: - /* - * TODO: Debug statement - */ - switch (event[0]) { - case EV_TIMING: - case EV_CHN_VOICE: - case EV_CHN_COMMON: - case EV_SYSEX: - case EV_SEQ_LOCAL: - out[0] = event[0]; - out[1] = event[1]; - out[2] = event[2]; - out[3] = event[3]; - out[4] = event[4]; - out[5] = event[5]; - out[6] = event[6]; - out[7] = event[7]; - used += 8; - break; - case SEQ_NOTEOFF: - out[0] = EV_CHN_VOICE; - out[1] = dev; - out[2] = MIDI_NOTEOFF; - out[3] = chn; - out[4] = note; - out[5] = 255; - used += 4; - break; - - case SEQ_NOTEON: - out[0] = EV_CHN_VOICE; - out[1] = dev; - out[2] = MIDI_NOTEON; - out[3] = chn; - out[4] = note; - out[5] = vel; - used += 4; - break; - - /* - * wait delay = (event[2] << 16) + (event[3] << 8) + event[4] - */ - - case SEQ_PGMCHANGE: - out[0] = EV_CHN_COMMON; - out[1] = dev; - out[2] = MIDI_PGM_CHANGE; - out[3] = chn; - out[4] = note; - out[5] = vel; - used += 4; - break; -/* - out[0] = EV_TIMING; - out[1] = dev; - out[2] = MIDI_PGM_CHANGE; - out[3] = chn; - out[4] = note; - out[5] = vel; - SEQ_DEBUG(4,printf("seq_playevent: synctimer\n")); - break; -*/ - - case SEQ_MIDIPUTC: - SEQ_DEBUG(4, - printf("seq_playevent: put data 0x%02x, unit %d.\n", - event[1], event[2])); - /* - * Pass through to the midi device. - * device = event[2] - * data = event[1] - */ - out[0] = SEQ_MIDIPUTC; - out[1] = dev; - out[2] = chn; - used += 4; - break; -#ifdef notyet - case SEQ_ECHO: - /* - * This isn't handled here yet because I don't know if I can - * just use four bytes events. There might be consequences - * in the _read routing - */ - if (seq_copytoinput(scp, event, 4) == EAGAIN) { - ret = QUEUEFULL; - break; - } - ret = MORE; - break; -#endif - case SEQ_EXTENDED: - switch (event[1]) { - case SEQ_NOTEOFF: - case SEQ_NOTEON: - case SEQ_PGMCHANGE: - event++; - used = 4; - goto restart; - break; - case SEQ_AFTERTOUCH: - /* - * SYNTH_AFTERTOUCH(md, event[3], event[4]) - */ - case SEQ_BALANCE: - /* - * SYNTH_PANNING(md, event[3], (char)event[4]) - */ - case SEQ_CONTROLLER: - /* - * SYNTH_CONTROLLER(md, event[3], event[4], *(short *)&event[5]) - */ - case SEQ_VOLMODE: - /* - * SYNTH_VOLUMEMETHOD(md, event[3]) - */ - default: - SEQ_DEBUG(2, - printf("seq_convertold: SEQ_EXTENDED type %d" - "not handled\n", event[1])); - break; - } - break; - case SEQ_WAIT: - out[0] = EV_TIMING; - out[1] = TMR_WAIT_REL; - out[4] = event[2]; - out[5] = event[3]; - out[6] = event[4]; - - SEQ_DEBUG(5, printf("SEQ_WAIT %d", - event[2] + (event[3] << 8) + (event[4] << 24))); - - used += 4; - break; - - case SEQ_ECHO: - case SEQ_SYNCTIMER: - case SEQ_PRIVATE: - default: - SEQ_DEBUG(2, - printf("seq_convertold: event type %d not handled %d %d %d\n", - event[0], event[1], event[2], event[3])); - break; - } - return used; -} - -/* - * Writting to the sequencer buffer never blocks and drops - * input which cannot be queued - */ -void -seq_copytoinput(struct seq_softc *scp, u_char *event, int len) -{ - - mtx_assert(&scp->seq_lock, MA_OWNED); - - if (MIDIQ_AVAIL(scp->in_q) < len) { - /* - * ENOROOM? EINPUTDROPPED? ETOUGHLUCK? - */ - SEQ_DEBUG(2, printf("seq_copytoinput: queue full\n")); - } else { - MIDIQ_ENQ(scp->in_q, event, len); - selwakeup(&scp->in_sel); - cv_broadcast(&scp->in_cv); - } - -} - -static int -seq_chnvoice(struct seq_softc *scp, kobj_t md, u_char *event) -{ - int ret, voice; - u_char cmd, chn, note, parm; - - ret = 0; - cmd = event[2]; - chn = event[3]; - note = event[4]; - parm = event[5]; - - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(5, printf("seq_chnvoice: unit %d, dev %d, cmd %s," - " chn %d, note %d, parm %d.\n", scp->unit, event[1], - midi_cmdname(cmd, cmdtab_seqcv), chn, note, parm)); - - voice = SYNTH_ALLOC(md, chn, note); - - mtx_unlock(&scp->seq_lock); - - switch (cmd) { - case MIDI_NOTEON: - if (note < 128 || note == 255) { -#if 0 - if (scp->music && chn == 9) { - /* - * This channel is a percussion. The note - * number is the patch number. - */ - /* - mtx_unlock(&scp->seq_lock); - if (SYNTH_SETINSTR(md, voice, 128 + note) - == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - */ - note = 60; /* Middle C. */ - } -#endif - if (scp->music) { - /* - mtx_unlock(&scp->seq_lock); - if (SYNTH_SETUPVOICE(md, voice, chn) - == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - */ - } - SYNTH_STARTNOTE(md, voice, note, parm); - } - break; - case MIDI_NOTEOFF: - SYNTH_KILLNOTE(md, voice, note, parm); - break; - case MIDI_KEY_PRESSURE: - SYNTH_AFTERTOUCH(md, voice, parm); - break; - default: - ret = 1; - SEQ_DEBUG(2, printf("seq_chnvoice event type %d not handled\n", - event[1])); - break; - } - - mtx_lock(&scp->seq_lock); - return ret; -} - -static int -seq_chncommon(struct seq_softc *scp, kobj_t md, u_char *event) -{ - int ret; - u_short w14; - u_char cmd, chn, p1; - - ret = 0; - cmd = event[2]; - chn = event[3]; - p1 = event[4]; - w14 = *(u_short *)&event[6]; - - SEQ_DEBUG(5, printf("seq_chncommon: unit %d, dev %d, cmd %s, chn %d," - " p1 %d, w14 %d.\n", scp->unit, event[1], - midi_cmdname(cmd, cmdtab_seqccmn), chn, p1, w14)); - mtx_unlock(&scp->seq_lock); - switch (cmd) { - case MIDI_PGM_CHANGE: - SEQ_DEBUG(4, printf("seq_chncommon pgmchn chn %d pg %d\n", - chn, p1)); - SYNTH_SETINSTR(md, chn, p1); - break; - case MIDI_CTL_CHANGE: - SEQ_DEBUG(4, printf("seq_chncommon ctlch chn %d pg %d %d\n", - chn, p1, w14)); - SYNTH_CONTROLLER(md, chn, p1, w14); - break; - case MIDI_PITCH_BEND: - if (scp->music) { - /* - * TODO: MIDI_PITCH_BEND - */ -#if 0 - mtx_lock(&md->synth.vc_mtx); - md->synth.chn_info[chn].bender_value = w14; - if (md->midiunit >= 0) { - /* - * Handle all of the notes playing on this - * channel. - */ - key = ((int)chn << 8); - for (i = 0; i < md->synth.alloc.max_voice; i++) - if ((md->synth.alloc.map[i] & 0xff00) == key) { - mtx_unlock(&md->synth.vc_mtx); - mtx_unlock(&scp->seq_lock); - if (md->synth.bender(md, i, w14) == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - } - } else { - mtx_unlock(&md->synth.vc_mtx); - mtx_unlock(&scp->seq_lock); - if (md->synth.bender(md, chn, w14) == EAGAIN) { - mtx_lock(&scp->seq_lock); - return (QUEUEFULL); - } - mtx_lock(&scp->seq_lock); - } -#endif - } else - SYNTH_BENDER(md, chn, w14); - break; - default: - ret = 1; - SEQ_DEBUG(2, - printf("seq_chncommon event type %d not handled.\n", - event[1])); - break; - } - mtx_lock(&scp->seq_lock); - return ret; -} - -static int -seq_timing(struct seq_softc *scp, u_char *event) -{ - int param; - int ret; - - ret = 0; - param = event[4] + (event[5] << 8) + - (event[6] << 16) + (event[7] << 24); - - SEQ_DEBUG(5, printf("seq_timing: unit %d, cmd %d, param %d.\n", - scp->unit, event[1], param)); - switch (event[1]) { - case TMR_WAIT_REL: - timer_wait(scp, param, 0); - break; - case TMR_WAIT_ABS: - timer_wait(scp, param, 1); - break; - case TMR_START: - timer_start(scp); - cv_broadcast(&scp->reset_cv); - break; - case TMR_STOP: - timer_stop(scp); - /* - * The following cv_broadcast isn't needed since we only - * wait for 0->1 transitions. It probably won't hurt - */ - cv_broadcast(&scp->reset_cv); - break; - case TMR_CONTINUE: - timer_continue(scp); - cv_broadcast(&scp->reset_cv); - break; - case TMR_TEMPO: - if (param < 8) - param = 8; - if (param > 360) - param = 360; - SEQ_DEBUG(4, printf("Timer set tempo %d\n", param)); - timer_setvals(scp, param, scp->timerbase); - break; - case TMR_TIMERBASE: - if (param < 1) - param = 1; - if (param > 1000) - param = 1000; - SEQ_DEBUG(4, printf("Timer set timerbase %d\n", param)); - timer_setvals(scp, scp->tempo, param); - break; - case TMR_ECHO: - /* - * TODO: Consider making 4-byte events for /dev/sequencer - * PRO: Maybe needed by legacy apps - * CON: soundcard.h has been warning for a while many years - * to expect 8 byte events. - */ -#if 0 - if (scp->music) - seq_copytoinput(scp, event, 8); - else { - param = (param << 8 | SEQ_ECHO); - seq_copytoinput(scp, (u_char *)¶m, 4); - } -#else - seq_copytoinput(scp, event, 8); -#endif - break; - default: - SEQ_DEBUG(2, printf("seq_timing event type %d not handled.\n", - event[1])); - ret = 1; - break; - } - return ret; -} - -static int -seq_local(struct seq_softc *scp, u_char *event) -{ - int ret; - - ret = 0; - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(5, printf("seq_local: unit %d, cmd %d\n", scp->unit, - event[1])); - switch (event[1]) { - default: - SEQ_DEBUG(1, printf("seq_local event type %d not handled\n", - event[1])); - ret = 1; - break; - } - return ret; -} - -static int -seq_sysex(struct seq_softc *scp, kobj_t md, u_char *event) -{ - int i, l; - - mtx_assert(&scp->seq_lock, MA_OWNED); - SEQ_DEBUG(5, printf("seq_sysex: unit %d device %d\n", scp->unit, - event[1])); - l = 0; - for (i = 0; i < 6 && event[i + 2] != 0xff; i++) - l = i + 1; - if (l > 0) { - mtx_unlock(&scp->seq_lock); - if (SYNTH_SENDSYSEX(md, &event[2], l) == EAGAIN) { - mtx_lock(&scp->seq_lock); - return 1; - } - mtx_lock(&scp->seq_lock); - } - return 0; -} - -/* - * Reset no longer closes the raw devices nor seq_sync's - * Callers are IOCTL and seq_close - */ -static void -seq_reset(struct seq_softc *scp) -{ - int chn, i; - kobj_t m; - - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(5, printf("seq_reset: unit %d.\n", scp->unit)); - - /* - * Stop reading and writing. - */ - - /* scp->recording = 0; */ - scp->playing = 0; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->reset_cv); - - /* - * For now, don't reset the timers. - */ - MIDIQ_CLEAR(scp->in_q); - MIDIQ_CLEAR(scp->out_q); - - for (i = 0; i < scp->midi_number; i++) { - m = scp->midis[i]; - mtx_unlock(&scp->seq_lock); - SYNTH_RESET(m); - for (chn = 0; chn < 16; chn++) { - SYNTH_CONTROLLER(m, chn, 123, 0); - SYNTH_CONTROLLER(m, chn, 121, 0); - SYNTH_BENDER(m, chn, 1 << 13); - } - mtx_lock(&scp->seq_lock); - } -} - -/* - * seq_sync - * *really* flush the output queue - * flush the event queue, then flush the synthsisers. - * Callers are IOCTL and close - */ - -#define SEQ_SYNC_TIMEOUT 8 -static int -seq_sync(struct seq_softc *scp) -{ - int i, rl, sync[16], done; - - mtx_assert(&scp->seq_lock, MA_OWNED); - - SEQ_DEBUG(4, printf("seq_sync: unit %d.\n", scp->unit)); - - /* - * Wait until output queue is empty. Check every so often to see if - * the queue is moving along. If it isn't just abort. - */ - while (!MIDIQ_EMPTY(scp->out_q)) { - if (!scp->playing) { - scp->playing = 1; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - } - rl = MIDIQ_LEN(scp->out_q); - - i = cv_timedwait_sig(&scp->out_cv, - &scp->seq_lock, SEQ_SYNC_TIMEOUT * hz); - - if (i == EINTR || i == ERESTART) { - if (i == EINTR) { - /* - * XXX: I don't know why we stop playing - */ - scp->playing = 0; - cv_broadcast(&scp->out_cv); - } - return i; - } - if (i == EWOULDBLOCK && rl == MIDIQ_LEN(scp->out_q) && - scp->waiting == 0) { - /* - * A queue seems to be stuck up. Give up and clear - * queues. - */ - MIDIQ_CLEAR(scp->out_q); - scp->playing = 0; - cv_broadcast(&scp->state_cv); - cv_broadcast(&scp->out_cv); - cv_broadcast(&scp->reset_cv); - - /* - * TODO: Consider if the raw devices need to be flushed - */ - - SEQ_DEBUG(1, printf("seq_sync queue stuck, aborting\n")); - - return i; - } - } - - scp->playing = 0; - /* - * Since syncing a midi device might block, unlock scp->seq_lock. - */ - - mtx_unlock(&scp->seq_lock); - for (i = 0; i < scp->midi_number; i++) - sync[i] = 1; - - do { - done = 1; - for (i = 0; i < scp->midi_number; i++) - if (sync[i]) { - if (SYNTH_INSYNC(scp->midis[i]) == 0) - sync[i] = 0; - else - done = 0; - } - if (!done) - DELAY(5000); - - } while (!done); - - mtx_lock(&scp->seq_lock); - return 0; -} - -char * -midi_cmdname(int cmd, midi_cmdtab *tab) -{ - while (tab->name != NULL) { - if (cmd == tab->cmd) - return (tab->name); - tab++; - } - - return ("unknown"); -} diff --git a/sys/dev/sound/midi/synth_if.m b/sys/dev/sound/midi/synth_if.m deleted file mode 100644 index a763b3422bc6..000000000000 --- a/sys/dev/sound/midi/synth_if.m +++ /dev/null @@ -1,312 +0,0 @@ -#- -# Copyright (c) 2003 Mathew Kanner -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# - -INTERFACE synth; - -#include <sys/systm.h> - -CODE { - -synth_killnote_t nokillnote; -synth_startnote_t nostartnote; -synth_setinstr_t nosetinstr; -synth_hwcontrol_t nohwcontrol; -synth_aftertouch_t noaftertouch; -synth_panning_t nopanning; -synth_controller_t nocontroller; -synth_volumemethod_t novolumemethod; -synth_bender_t nobender; -synth_setupvoice_t nosetupvoice; -synth_sendsysex_t nosendsysex; -synth_allocvoice_t noallocvoice; -synth_writeraw_t nowriteraw; -synth_reset_t noreset; -synth_shortname_t noshortname; -synth_open_t noopen; -synth_close_t noclose; -synth_query_t noquery; -synth_insync_t noinsync; -synth_alloc_t noalloc; - - int - nokillnote(void *_kobj, uint8_t _chn, uint8_t _note, uint8_t _vel) - { - printf("nokillnote\n"); - return 0; - } - - int - noopen(void *_kobj, void *_arg, int mode) - { - printf("noopen\n"); - return 0; - } - - int - noquery(void *_kboj) - { - printf("noquery\n"); - return 0; - } - - int - nostartnote(void *_kb, uint8_t _voice, uint8_t _note, uint8_t _parm) - { - printf("nostartnote\n"); - return 0; - } - - int - nosetinstr(void *_kb, uint8_t _chn, uint16_t _patchno) - { - printf("nosetinstr\n"); - return 0; - } - - int - nohwcontrol(void *_kb, uint8_t *_event) - { - printf("nohwcontrol\n"); - return 0; - } - - int - noaftertouch ( void /* X */ * _kobj, uint8_t _x1, uint8_t _x2) - { - printf("noaftertouch\n"); - return 0; - } - - int - nopanning ( void /* X */ * _kobj, uint8_t _x1, uint8_t _x2) - { - printf("nopanning\n"); - return 0; - } - - int - nocontroller ( void /* X */ * _kobj, uint8_t _x1, uint8_t _x2, uint16_t _x3) - { - printf("nocontroller\n"); - return 0; - } - - int - novolumemethod ( - void /* X */ * _kobj, - uint8_t _x1) - { - printf("novolumemethod\n"); - return 0; - } - - int - nobender ( void /* X */ * _kobj, uint8_t _voice, uint16_t _bend) - { - printf("nobender\n"); - return 0; - } - - int - nosetupvoice ( void /* X */ * _kobj, uint8_t _voice, uint8_t _chn) - { - - printf("nosetupvoice\n"); - return 0; - } - - int - nosendsysex ( void /* X */ * _kobj, void * _buf, size_t _len) - { - printf("nosendsysex\n"); - return 0; - } - - int - noallocvoice ( void /* X */ * _kobj, uint8_t _chn, uint8_t _note, void *_x) - { - printf("noallocvoice\n"); - return 0; - } - - int - nowriteraw ( void /* X */ * _kobjt, uint8_t * _buf, size_t _len) - { - printf("nowriteraw\n"); - return 1; - } - - int - noreset ( void /* X */ * _kobjt) - { - - printf("noreset\n"); - return 0; - } - - char * - noshortname (void /* X */ * _kobjt) - { - printf("noshortname\n"); - return "noshortname"; - } - - int - noclose ( void /* X */ * _kobjt) - { - - printf("noclose\n"); - return 0; - } - - int - noinsync (void /* X */ * _kobjt) - { - - printf("noinsync\n"); - return 0; - } - - int - noalloc ( void /* x */ * _kbojt, uint8_t _chn, uint8_t _note) - { - printf("noalloc\n"); - return 0; - } -} - -METHOD int killnote { - void /* X */ *_kobj; - uint8_t _chan; - uint8_t _note; - uint8_t _vel; -} DEFAULT nokillnote; - -METHOD int startnote { - void /* X */ *_kobj; - uint8_t _voice; - uint8_t _note; - uint8_t _parm; -} DEFAULT nostartnote; - -METHOD int setinstr { - void /* X */ *_kobj; - uint8_t _chn; - uint16_t _patchno; -} DEFAULT nosetinstr; - -METHOD int hwcontrol { - void /* X */ *_kobj; - uint8_t *_event; -} DEFAULT nohwcontrol; - -METHOD int aftertouch { - void /* X */ *_kobj; - uint8_t _x1; - uint8_t _x2; -} DEFAULT noaftertouch; - -METHOD int panning { - void /* X */ *_kobj; - uint8_t _x1; - uint8_t _x2; -} DEFAULT nopanning; - -METHOD int controller { - void /* X */ *_kobj; - uint8_t _x1; - uint8_t _x2; - uint16_t _x3; -} DEFAULT nocontroller; - -METHOD int volumemethod { - void /* X */ *_kobj; - uint8_t _x1; -} DEFAULT novolumemethod; - -METHOD int bender { - void /* X */ *_kobj; - uint8_t _voice; - uint16_t _bend; -} DEFAULT nobender; - -METHOD int setupvoice { - void /* X */ *_kobj; - uint8_t _voice; - uint8_t _chn; -} DEFAULT nosetupvoice; - -METHOD int sendsysex { - void /* X */ *_kobj; - void *_buf; - size_t _len; -} DEFAULT nosendsysex; - -METHOD int allocvoice { - void /* X */ *_kobj; - uint8_t _chn; - uint8_t _note; - void *_x; -} DEFAULT noallocvoice; - -METHOD int writeraw { - void /* X */ *_kobjt; - uint8_t *_buf; - size_t _len; -} DEFAULT nowriteraw; - -METHOD int reset { - void /* X */ *_kobjt; -} DEFAULT noreset; - -METHOD char * shortname { - void /* X */ *_kobjt; -} DEFAULT noshortname; - -METHOD int open { - void /* X */ *_kobjt; - void *_sythn; - int _mode; -} DEFAULT noopen; - -METHOD int close { - void /* X */ *_kobjt; -} DEFAULT noclose; - -METHOD int query { - void /* X */ *_kobjt; -} DEFAULT noquery; - -METHOD int insync { - void /* X */ *_kobjt; -} DEFAULT noinsync; - -METHOD int alloc { - void /* x */ *_kbojt; - uint8_t _chn; - uint8_t _note; -} DEFAULT noalloc; diff --git a/sys/dev/sound/pcm/mixer.c b/sys/dev/sound/pcm/mixer.c index 092af3298f0e..f281dff36248 100644 --- a/sys/dev/sound/pcm/mixer.c +++ b/sys/dev/sound/pcm/mixer.c @@ -750,8 +750,8 @@ mixer_init(device_t dev, kobj_class_t cls, void *devinfo) mixer_setrecsrc(m, 0); /* Set default input. */ - pdev = make_dev(&mixer_cdevsw, SND_DEV_CTL, UID_ROOT, GID_WHEEL, 0666, - "mixer%d", unit); + pdev = make_dev(&mixer_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "mixer%d", + unit); pdev->si_drv1 = m; snddev->mixer_dev = pdev; diff --git a/sys/dev/sound/pcm/sndstat.c b/sys/dev/sound/pcm/sndstat.c index 509a35c5a038..51d0fb3bb686 100644 --- a/sys/dev/sound/pcm/sndstat.c +++ b/sys/dev/sound/pcm/sndstat.c @@ -52,7 +52,6 @@ #define SS_TYPE_PCM 1 #define SS_TYPE_MIDI 2 -#define SS_TYPE_SEQUENCER 3 static d_open_t sndstat_open; static void sndstat_close(void *); @@ -1165,8 +1164,6 @@ sndstat_register(device_t dev, char *str) type = SS_TYPE_PCM; else if (!strcmp(devtype, "midi")) type = SS_TYPE_MIDI; - else if (!strcmp(devtype, "sequencer")) - type = SS_TYPE_SEQUENCER; else return (EINVAL); @@ -1441,8 +1438,8 @@ static void sndstat_sysinit(void *p) { sx_init(&sndstat_lock, "sndstat lock"); - sndstat_dev = make_dev(&sndstat_cdevsw, SND_DEV_STATUS, - UID_ROOT, GID_WHEEL, 0644, "sndstat"); + sndstat_dev = make_dev(&sndstat_cdevsw, 0, UID_ROOT, GID_WHEEL, 0644, + "sndstat"); } SYSINIT(sndstat_sysinit, SI_SUB_DRIVERS, SI_ORDER_FIRST, sndstat_sysinit, NULL); diff --git a/sys/dev/sound/pcm/sound.h b/sys/dev/sound/pcm/sound.h index 315452e294d1..6bd435d0ea25 100644 --- a/sys/dev/sound/pcm/sound.h +++ b/sys/dev/sound/pcm/sound.h @@ -148,14 +148,6 @@ struct snd_mixer; #define RANGE(var, low, high) (var) = \ (((var)<(low))? (low) : ((var)>(high))? (high) : (var)) -enum { - SND_DEV_CTL = 0, /* Control port /dev/mixer */ - SND_DEV_SEQ, /* Sequencer /dev/sequencer */ - SND_DEV_MIDIN, /* Raw midi access */ - SND_DEV_DSP, /* Digitized voice /dev/dsp */ - SND_DEV_STATUS, /* /dev/sndstat */ -}; - #define DSP_DEFAULT_SPEED 8000 extern int snd_unit; diff --git a/sys/dev/vmm/vmm_dev.c b/sys/dev/vmm/vmm_dev.c index 1ffa15dd157b..819debadd1ac 100644 --- a/sys/dev/vmm/vmm_dev.c +++ b/sys/dev/vmm/vmm_dev.c @@ -351,6 +351,7 @@ static const struct vmmdev_ioctl vmmdev_ioctls[] = { VMMDEV_IOCTL(VM_ACTIVATE_CPU, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_INJECT_EXCEPTION, VMMDEV_IOCTL_LOCK_ONE_VCPU), VMMDEV_IOCTL(VM_STATS, VMMDEV_IOCTL_LOCK_ONE_VCPU), + VMMDEV_IOCTL(VM_STAT_DESC, 0), #if defined(__amd64__) && defined(COMPAT_FREEBSD12) VMMDEV_IOCTL(VM_ALLOC_MEMSEG_12, diff --git a/sys/fs/fuse/fuse_file.h b/sys/fs/fuse/fuse_file.h index 2a90e66d1b23..232132473953 100644 --- a/sys/fs/fuse/fuse_file.h +++ b/sys/fs/fuse/fuse_file.h @@ -139,7 +139,7 @@ struct fuse_filehandle { /* * flags returned by FUSE_OPEN - * Supported flags: FOPEN_DIRECT_IO, FOPEN_KEEP_CACHE + * Supported flags: FOPEN_DIRECT_IO, FOPEN_KEEP_CACHE, FOPEN_NOFLUSH * Unsupported: * FOPEN_NONSEEKABLE: Adding support would require a new per-file * or per-vnode attribute, which would have to be checked by diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h index c95caf898ad8..942448b47365 100644 --- a/sys/fs/fuse/fuse_kernel.h +++ b/sys/fs/fuse/fuse_kernel.h @@ -182,6 +182,12 @@ * - add FUSE_OPEN_KILL_SUIDGID * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT * - add FUSE_SETXATTR_ACL_KILL_SGID + * + * 7.34 + * - add FUSE_SYNCFS + * + * 7.35 + * - add FOPEN_NOFLUSH */ #ifndef _FUSE_FUSE_KERNEL_H @@ -217,7 +223,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 33 +#define FUSE_KERNEL_MINOR_VERSION 35 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -288,12 +294,14 @@ struct fuse_file_lock { * FOPEN_NONSEEKABLE: the file is not seekable * FOPEN_CACHE_DIR: allow caching this directory * FOPEN_STREAM: the file is stream-like (no file position at all) + * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) #define FOPEN_NONSEEKABLE (1 << 2) #define FOPEN_CACHE_DIR (1 << 3) #define FOPEN_STREAM (1 << 4) +#define FOPEN_NOFLUSH (1 << 5) /** * INIT request/reply flags @@ -518,6 +526,7 @@ enum fuse_opcode { FUSE_COPY_FILE_RANGE = 47, FUSE_SETUPMAPPING = 48, FUSE_REMOVEMAPPING = 49, + FUSE_SYNCFS = 50, #ifdef linux /* CUSE specific operations */ @@ -939,7 +948,8 @@ struct fuse_notify_retrieve_in { }; /* Device ioctls: */ -#define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) +#define FUSE_DEV_IOC_MAGIC 229 +#define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) struct fuse_lseek_in { uint64_t fh; @@ -992,4 +1002,8 @@ struct fuse_removemapping_one { #define FUSE_REMOVEMAPPING_MAX_ENTRY \ (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) +struct fuse_syncfs_in { + uint64_t padding; +}; + #endif /* _FUSE_FUSE_KERNEL_H */ diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 107e6db299e0..ae28617537fd 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -89,6 +89,8 @@ #include <sys/buf.h> #include <sys/sysctl.h> #include <sys/vmmeter.h> +#define EXTERR_CATEGORY EXTERR_CAT_FUSE +#include <sys/exterrvar.h> #include <vm/vm.h> #include <vm/vm_extern.h> @@ -289,6 +291,10 @@ fuse_flush(struct vnode *vp, struct ucred *cred, pid_t pid, int fflag) if (err) return err; + if (fufh->fuse_open_flags & FOPEN_NOFLUSH && + (!fsess_opt_writeback(vnode_mount(vp)))) + return (0); + fdisp_init(&fdi, sizeof(*ffi)); fdisp_make_vp(&fdi, FUSE_FLUSH, vp, td, cred); ffi = fdi.indata; @@ -435,7 +441,8 @@ fuse_vnop_access(struct vop_access_args *ap) if (vnode_isvroot(vp)) { return 0; } - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!(data->dataflags & FSESS_INITED)) { if (vnode_isvroot(vp)) { @@ -444,7 +451,8 @@ fuse_vnop_access(struct vop_access_args *ap) return 0; } } - return EBADF; + return (EXTERROR(EBADF, "Access denied until FUSE session " + "is initialized")); } if (vnode_islnk(vp)) { return 0; @@ -485,7 +493,8 @@ fuse_vnop_advlock(struct vop_advlock_args *ap) dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } switch(ap->a_op) { @@ -502,7 +511,7 @@ fuse_vnop_advlock(struct vop_advlock_args *ap) op = FUSE_SETLK; break; default: - return EINVAL; + return (EXTERROR(EINVAL, "Unsupported lock flags")); } if (!(dataflags & FSESS_POSIX_LOCKS)) @@ -530,14 +539,14 @@ fuse_vnop_advlock(struct vop_advlock_args *ap) size = vattr.va_size; if (size > OFF_MAX || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) { - err = EOVERFLOW; + err = EXTERROR(EOVERFLOW, "Offset is too large"); goto out; } start = size + fl->l_start; break; default: - return (EINVAL); + return (EXTERROR(EINVAL, "Unsupported offset type")); } err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid); @@ -599,15 +608,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); switch (vp->v_type) { case VFIFO: return (ESPIPE); case VLNK: case VREG: - if (vfs_isrdonly(mp)) - return (EROFS); break; default: return (ENODEV); @@ -617,7 +625,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) return (EROFS); if (fsess_not_impl(mp, FUSE_FALLOCATE)) - return (EINVAL); + return (EXTERROR(EINVAL, "This server does not implement " + "FUSE_FALLOCATE")); io.uio_offset = *offset; io.uio_resid = *len; @@ -647,13 +656,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_FALLOCATE); - err = EINVAL; + err = EXTERROR(EINVAL, "This server does not implement " + "FUSE_ALLOCATE"); } else if (err == EOPNOTSUPP) { /* * The file system server does not support FUSE_FALLOCATE with * the supplied mode for this particular file. */ - err = EINVAL; + err = EXTERROR(EINVAL, "This file can't be pre-allocated"); } else if (!err) { *offset += *len; *len = 0; @@ -699,7 +709,8 @@ fuse_vnop_bmap(struct vop_bmap_args *ap) int maxrun; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } mp = vnode_mount(vp); @@ -866,19 +877,21 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) pid_t pid; int err; - err = ENOSYS; if (mp == NULL || mp != vnode_mount(outvp)) - goto fallback; + return (EXTERROR(ENOSYS, "Mount points do not match")); if (incred->cr_uid != outcred->cr_uid) - goto fallback; + return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not " + "support different credentials for infd and outfd")); if (incred->cr_groups[0] != outcred->cr_groups[0]) - goto fallback; + return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not " + "support different credentials for infd and outfd")); /* Caller busied mp, mnt_data can be safely accessed. */ if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE)) - goto fallback; + return (EXTERROR(ENOSYS, "This daemon does not " + "implement COPY_FILE_RANGE")); if (ap->a_fsizetd == NULL) td = curthread; @@ -888,7 +901,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE); if (invp->v_data == NULL || outvp->v_data == NULL) { - err = EBADF; + err = EXTERROR(EBADF, "vnode got reclaimed"); goto unlock; } @@ -952,7 +965,6 @@ unlock: if (err == ENOSYS) fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE); -fallback: /* * No need to call vn_rlimit_fsizex_res before return, since the uio is @@ -1020,7 +1032,8 @@ fuse_vnop_create(struct vop_create_args *ap) int flags; if (fuse_isdeadfs(dvp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); /* FUSE expects sockets to be created with FUSE_MKNOD */ if (vap->va_type == VSOCK) @@ -1036,7 +1049,7 @@ fuse_vnop_create(struct vop_create_args *ap) bzero(&fdi, sizeof(fdi)); if (vap->va_type != VREG) - return (EINVAL); + return (EXTERROR(EINVAL, "Only regular files can be created")); if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) { /* Fallback to FUSE_MKNOD/FUSE_OPEN */ @@ -1217,8 +1230,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap) if (!(dataflags & FSESS_INITED)) { if (!vnode_isvroot(vp)) { fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); - err = ENOTCONN; - return err; + return (EXTERROR(ENOTCONN, "FUSE daemon is not " + "initialized")); } else { goto fake; } @@ -1347,10 +1360,11 @@ fuse_vnop_link(struct vop_link_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vnode_mount(tdvp) != vnode_mount(vp)) { - return EXDEV; + return (EXDEV); } /* @@ -1360,7 +1374,7 @@ fuse_vnop_link(struct vop_link_args *ap) * validating that nlink does not overflow. */ if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX) - return EMLINK; + return (EMLINK); fli.oldnodeid = VTOI(vp); fdisp_init(&fdi, 0); @@ -1372,12 +1386,13 @@ fuse_vnop_link(struct vop_link_args *ap) feo = fdi.answ; if (fli.oldnodeid != feo->nodeid) { + static const char exterr[] = "Server assigned wrong inode " + "for a hard link."; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); - fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, - "Assigned wrong inode for a hard link."); + fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, exterr); fuse_vnode_clear_attr_cache(vp); fuse_vnode_clear_attr_cache(tdvp); - err = EIO; + err = EXTERROR(EIO, exterr); goto out; } @@ -1454,7 +1469,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) if (fuse_isdeadfs(dvp)) { *vpp = NULL; - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!vnode_isdir(dvp)) return ENOTDIR; @@ -1474,7 +1490,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) * Since the file system doesn't support ".." lookups, * we have no way to find this entry. */ - return ESTALE; + return (EXTERROR(ESTALE, "This server does not support " + "'..' lookups")); } nid = VTOFUD(dvp)->parent_nid; if (nid == 0) @@ -1597,11 +1614,11 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) vref(dvp); *vpp = dvp; } else { + static const char exterr[] = "Server assigned " + "same inode to both parent and child."; fuse_warn(fuse_get_mpdata(mp), - FSESS_WARN_ILLEGAL_INODE, - "Assigned same inode to both parent and " - "child."); - err = EIO; + FSESS_WARN_ILLEGAL_INODE, exterr); + err = EXTERROR(EIO, exterr); } } else { @@ -1689,7 +1706,8 @@ fuse_vnop_mkdir(struct vop_mkdir_args *ap) struct fuse_mkdir_in fmdi; if (fuse_isdeadfs(dvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); fmdi.umask = curthread->td_proc->p_pd->pd_cmask; @@ -1716,7 +1734,8 @@ fuse_vnop_mknod(struct vop_mknod_args *ap) struct vattr *vap = ap->a_vap; if (fuse_isdeadfs(dvp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); return fuse_internal_mknod(dvp, vpp, cnp, vap); } @@ -1740,11 +1759,13 @@ fuse_vnop_open(struct vop_open_args *ap) pid_t pid = td->td_proc->p_pid; if (fuse_isdeadfs(vp)) - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO) - return (EOPNOTSUPP); + return (EXTERROR(EOPNOTSUPP, "Unsupported vnode type", + vp->v_type)); if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0) - return EINVAL; + return (EXTERROR(EINVAL, "Illegal mode", a_mode)); if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) { fuse_vnode_open(vp, 0, td); @@ -1826,7 +1847,8 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap) return (0); } else if (fsess_not_impl(mp, FUSE_LSEEK)) { /* FUSE_LSEEK is not implemented */ - return (EINVAL); + return (EXTERROR(EINVAL, "This server does not " + "implement FUSE_LSEEK")); } else { return (err); } @@ -1860,7 +1882,8 @@ fuse_vnop_read(struct vop_read_args *ap) MPASS(vp->v_type == VREG || vp->v_type == VDIR); if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp)->flag & FN_DIRECTIO) { @@ -1937,10 +1960,11 @@ fuse_vnop_readdir(struct vop_readdir_args *ap) if (ap->a_eofflag) *ap->a_eofflag = 0; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (uio_resid(uio) < sizeof(struct dirent)) - return EINVAL; + return (EXTERROR(EINVAL, "Buffer is too small")); tresid = uio->uio_resid; err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); @@ -2010,7 +2034,8 @@ fuse_vnop_readlink(struct vop_readlink_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (!vnode_islnk(vp)) { return EINVAL; @@ -2021,10 +2046,11 @@ fuse_vnop_readlink(struct vop_readlink_args *ap) goto out; } if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) { + static const char exterr[] = "Server returned an embedded NUL " + "from FUSE_READLINK."; struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); - fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, - "Returned an embedded NUL from FUSE_READLINK."); - err = EIO; + fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, exterr); + err = EXTERROR(EIO, exterr); goto out; } if (((char *)fdi.answ)[0] == '/' && @@ -2108,10 +2134,11 @@ fuse_vnop_remove(struct vop_remove_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vnode_isdir(vp)) { - return EPERM; + return (EXTERROR(EPERM, "vnode is a directory")); } err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); @@ -2144,12 +2171,13 @@ fuse_vnop_rename(struct vop_rename_args *ap) int err = 0; if (fuse_isdeadfs(fdvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (fvp->v_mount != tdvp->v_mount || (tvp && fvp->v_mount != tvp->v_mount)) { SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename"); - err = EXDEV; + err = EXTERROR(EXDEV, "Cross-device rename"); goto out; } cache_purge(fvp); @@ -2220,10 +2248,12 @@ fuse_vnop_rmdir(struct vop_rmdir_args *ap) int err; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp) == VTOFUD(dvp)) { - return EINVAL; + return (EXTERROR(EINVAL, "Directory to be removed " + "contains itself")); } err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); @@ -2260,7 +2290,8 @@ fuse_vnop_setattr(struct vop_setattr_args *ap) checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS; if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (vap->va_uid != (uid_t)VNOVAL) { @@ -2425,7 +2456,8 @@ fuse_vnop_symlink(struct vop_symlink_args *ap) size_t len; if (fuse_isdeadfs(dvp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } /* * Unlike the other creator type calls, here we have to create a message @@ -2471,7 +2503,8 @@ fuse_vnop_write(struct vop_write_args *ap) MPASS(vp->v_type == VREG || vp->v_type == VDIR); if (fuse_isdeadfs(vp)) { - return ENXIO; + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); } if (VTOFUD(vp)->flag & FN_DIRECTIO) { @@ -2624,10 +2657,12 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_GETXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "extended attributes")); err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) @@ -2665,7 +2700,8 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap) if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_GETXATTR); - err = EOPNOTSUPP; + err = (EXTERROR(EOPNOTSUPP, "This server does not " + "implement extended attributes")); } goto out; } @@ -2711,10 +2747,12 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_SETXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "setting extended attributes")); if (vfs_isrdonly(mp)) return EROFS; @@ -2726,9 +2764,11 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) * return EOPNOTSUPP. */ if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) - return (EOPNOTSUPP); + return (EXTERROR(EOPNOTSUPP, "This server does not " + "implement removing extended attributess")); else - return (EINVAL); + return (EXTERROR(EINVAL, "DELETEEXTATTR should be used " + "to remove extattrs")); } err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, @@ -2774,7 +2814,8 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap) if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_SETXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not implement " + "setting extended attributes"); } if (err == ERESTART) { /* Can't restart after calling uiomove */ @@ -2885,10 +2926,12 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_LISTXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "extended attributes")); err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD); if (err) @@ -2916,7 +2959,8 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap) if (err != 0) { if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_LISTXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not " + "implement extended attributes"); } goto out; } @@ -3016,7 +3060,8 @@ fuse_vnop_deallocate(struct vop_deallocate_args *ap) bool closefufh = false; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (vfs_isrdonly(mp)) return (EROFS); @@ -3122,10 +3167,12 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) int err; if (fuse_isdeadfs(vp)) - return (ENXIO); + return (EXTERROR(ENXIO, "This FUSE session is about " + "to be closed")); if (fsess_not_impl(mp, FUSE_REMOVEXATTR)) - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server does not implement " + "removing extended attributes")); if (vfs_isrdonly(mp)) return EROFS; @@ -3154,7 +3201,8 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap) err = fdisp_wait_answ(&fdi); if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_REMOVEXATTR); - err = EOPNOTSUPP; + err = EXTERROR(EOPNOTSUPP, "This server does not implement " + "removing extended attributes"); } fdisp_destroy(&fdi); @@ -3208,7 +3256,8 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap) /* NFS requires lookups for "." and ".." */ SDT_PROBE2(fusefs, , vnops, trace, 1, "VOP_VPTOFH without FUSE_EXPORT_SUPPORT"); - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server is " + "missing FUSE_EXPORT_SUPPORT")); } if ((mp->mnt_flag & MNT_EXPORTED) && fsess_is_impl(mp, FUSE_OPENDIR)) @@ -3226,7 +3275,8 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap) */ SDT_PROBE2(fusefs, , vnops, trace, 1, "VOP_VPTOFH with FUSE_OPENDIR"); - return EOPNOTSUPP; + return (EXTERROR(EOPNOTSUPP, "This server implements " + "FUSE_OPENDIR so is not compatible with getfh")); } err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); @@ -3240,6 +3290,7 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap) if (fvdat->generation <= UINT32_MAX) fhp->gen = fvdat->generation; else - return EOVERFLOW; + return (EXTERROR(EOVERFLOW, "inode generation " + "number overflow")); return (0); } diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c index e799a5ce05f6..8ab6d35a2685 100644 --- a/sys/fs/msdosfs/msdosfs_lookup.c +++ b/sys/fs/msdosfs/msdosfs_lookup.c @@ -845,7 +845,6 @@ doscheckpath(struct denode *source, struct denode *target, daddr_t *wait_scn) *wait_scn = 0; pmp = target->de_pmp; - lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED); KASSERT(pmp == source->de_pmp, ("doscheckpath: source and target on different filesystems")); diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index adcffe45df82..4431d36c8a8e 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -575,7 +575,6 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp) pmp->pm_bo = bo; lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0); - lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0); TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp); @@ -871,7 +870,6 @@ error_exit: } if (pmp != NULL) { lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; @@ -971,7 +969,6 @@ msdosfs_unmount(struct mount *mp, int mntflags) dev_rel(pmp->pm_dev); free(pmp->pm_inusemap, M_MSDOSFSFAT); lockdestroy(&pmp->pm_fatlock); - lockdestroy(&pmp->pm_checkpath_lock); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; return (error); diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c index 6417b7dac16b..5db61c8951f6 100644 --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -945,7 +945,7 @@ msdosfs_rename(struct vop_rename_args *ap) struct denode *fdip, *fip, *tdip, *tip, *nip; u_char toname[12], oldname[11]; u_long to_diroffset; - bool checkpath_locked, doingdirectory, newparent; + bool doingdirectory, newparent; int error; u_long cn, pcl, blkoff; daddr_t bn, wait_scn, scn; @@ -986,8 +986,6 @@ msdosfs_rename(struct vop_rename_args *ap) if (tvp != NULL && tvp != tdvp) VOP_UNLOCK(tvp); - checkpath_locked = false; - relock: doingdirectory = newparent = false; @@ -1108,12 +1106,8 @@ relock: if (doingdirectory && newparent) { if (error != 0) /* write access check above */ goto unlock; - lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL); - checkpath_locked = true; error = doscheckpath(fip, tdip, &wait_scn); if (wait_scn != 0) { - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); @@ -1276,8 +1270,6 @@ relock: cache_purge(fvp); unlock: - if (checkpath_locked) - lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL); vput(fdvp); vput(fvp); if (tvp != NULL) { @@ -1289,7 +1281,6 @@ unlock: vput(tdvp); return (error); releout: - MPASS(!checkpath_locked); vrele(tdvp); if (tvp != NULL) vrele(tvp); @@ -1951,6 +1942,9 @@ msdosfs_pathconf(struct vop_pathconf_args *ap) case _PC_NO_TRUNC: *ap->a_retval = 0; return (0); + case _PC_HAS_HIDDENSYSTEM: + *ap->a_retval = 1; + return (0); default: return (vop_stdpathconf(ap)); } diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h index fcaac544a74d..04e6b75bea2a 100644 --- a/sys/fs/msdosfs/msdosfsmount.h +++ b/sys/fs/msdosfs/msdosfsmount.h @@ -118,7 +118,6 @@ struct msdosfsmount { void *pm_u2d; /* Unicode->DOS iconv handle */ void *pm_d2u; /* DOS->Local iconv handle */ struct lock pm_fatlock; /* lockmgr protecting allocations */ - struct lock pm_checkpath_lock; /* protects doscheckpath result */ struct task pm_rw2ro_task; /* context for emergency remount ro */ }; diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index 0356877eaf05..7dcc83880bb9 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -245,6 +245,10 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) vp->v_object = lowervp->v_object; vn_irflag_set(vp, VIRF_PGREAD); } + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0) + vn_irflag_set(vp, VIRF_INOTIFY); + if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0) + vn_irflag_set(vp, VIRF_INOTIFY_PARENT); if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp) vp->v_vflag |= VV_ROOT; diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 8608216e10e5..74c1a8f3acb6 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -190,6 +190,26 @@ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); /* + * Synchronize inotify flags with the lower vnode: + * - If the upper vnode has the flag set and the lower does not, then the lower + * vnode is unwatched and the upper vnode does not need to go through + * VOP_INOTIFY. + * - If the lower vnode is watched, then the upper vnode should go through + * VOP_INOTIFY, so copy the flag up. + */ +static void +null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag) +{ + if ((vn_irflag_read(vp) & flag) != 0) { + if (__predict_false((vn_irflag_read(lvp) & flag) == 0)) + vn_irflag_unset(vp, flag); + } else if ((vn_irflag_read(lvp) & flag) != 0) { + if (__predict_false((vn_irflag_read(vp) & flag) == 0)) + vn_irflag_set(vp, flag); + } +} + +/* * This is the 10-Apr-92 bypass routine. * This version has been optimized for speed, throwing away some * safety checks. It should still always work, but it's not as @@ -305,7 +325,10 @@ null_bypass(struct vop_generic_args *ap) lvp = *(vps_p[i]); /* - * Get rid of the transient hold on lvp. + * Get rid of the transient hold on lvp. Copy inotify + * flags up in case something is watching the lower + * layer. + * * If lowervp was unlocked during VOP * operation, nullfs upper vnode could have * been reclaimed, which changes its v_vnlock @@ -314,6 +337,10 @@ null_bypass(struct vop_generic_args *ap) * upper (reclaimed) vnode. */ if (lvp != NULLVP) { + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY); + null_copy_inotify(old_vps[i], lvp, + VIRF_INOTIFY_PARENT); if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE && old_vps[i]->v_vnlock != lvp->v_vnlock) { VOP_UNLOCK(lvp); diff --git a/sys/fs/smbfs/smbfs_vnops.c b/sys/fs/smbfs/smbfs_vnops.c index c30995508c00..5d412cabadb8 100644 --- a/sys/fs/smbfs/smbfs_vnops.c +++ b/sys/fs/smbfs/smbfs_vnops.c @@ -810,6 +810,9 @@ smbfs_pathconf(struct vop_pathconf_args *ap) case _PC_NO_TRUNC: *retval = 1; break; + case _PC_HAS_HIDDENSYSTEM: + *retval = 1; + break; default: error = vop_stdpathconf(ap); } diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index c99d0732be50..9d2a587b177a 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -1691,6 +1691,10 @@ tmpfs_pathconf(struct vop_pathconf_args *v) *retval = PAGE_SIZE; break; + case _PC_HAS_HIDDENSYSTEM: + *retval = 1; + break; + default: error = vop_stdpathconf(v); } diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h index aa2dfbb68745..49f002a633d2 100644 --- a/sys/i386/linux/linux_proto.h +++ b/sys/i386/linux/linux_proto.h @@ -981,10 +981,13 @@ struct linux_inotify_init_args { syscallarg_t dummy; }; struct linux_inotify_add_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; }; struct linux_inotify_rm_watch_args { - syscallarg_t dummy; + char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)]; + char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)]; }; struct linux_migrate_pages_args { syscallarg_t dummy; @@ -1178,7 +1181,7 @@ struct linux_pipe2_args { char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_inotify_init1_args { - syscallarg_t dummy; + char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)]; }; struct linux_preadv_args { char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)]; diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c index 7be646f34144..b8893008944b 100644 --- a/sys/i386/linux/linux_sysent.c +++ b/sys/i386/linux/linux_sysent.c @@ -306,8 +306,8 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */ { .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ + { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */ + { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */ { .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */ { .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */ { .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */ @@ -346,7 +346,7 @@ struct sysent linux_sysent[] = { { .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */ { .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */ { .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */ - { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ + { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */ { .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */ { .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */ { .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */ diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c index f3e3c32a2bbf..563d1a795ae1 100644 --- a/sys/i386/linux/linux_systrace_args.c +++ b/sys/i386/linux/linux_systrace_args.c @@ -2071,12 +2071,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_add_watch */ case 292: { - *n_args = 0; + struct linux_inotify_add_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = (intptr_t)p->pathname; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 3; break; } /* linux_inotify_rm_watch */ case 293: { - *n_args = 0; + struct linux_inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* l_int */ + uarg[a++] = p->wd; /* uint32_t */ + *n_args = 2; break; } /* linux_migrate_pages */ @@ -2410,7 +2417,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) } /* linux_inotify_init1 */ case 332: { - *n_args = 0; + struct linux_inotify_init1_args *p = params; + iarg[a++] = p->flags; /* l_int */ + *n_args = 1; break; } /* linux_preadv */ @@ -6604,9 +6613,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_add_watch */ case 292: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "userland const char *"; + break; + case 2: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_inotify_rm_watch */ case 293: + switch (ndx) { + case 0: + p = "l_int"; + break; + case 1: + p = "uint32_t"; + break; + default: + break; + }; break; /* linux_migrate_pages */ case 294: @@ -7172,6 +7204,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + switch (ndx) { + case 0: + p = "l_int"; + break; + default: + break; + }; break; /* linux_preadv */ case 333: @@ -9889,8 +9928,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) case 291: /* linux_inotify_add_watch */ case 292: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_inotify_rm_watch */ case 293: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_migrate_pages */ case 294: /* linux_openat */ @@ -10062,6 +10107,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* linux_inotify_init1 */ case 332: + if (ndx == 0 || ndx == 1) + p = "int"; + break; /* linux_preadv */ case 333: if (ndx == 0 || ndx == 1) diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index 958336be0f08..2113ea51ac5d 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -1605,10 +1605,17 @@ int linux_inotify_init(void); } 292 AUE_NULL STD { - int linux_inotify_add_watch(void); + int linux_inotify_add_watch( + l_int fd, + const char *pathname, + uint32_t mask + ); } 293 AUE_NULL STD { - int linux_inotify_rm_watch(void); + int linux_inotify_rm_watch( + l_int fd, + uint32_t wd + ); } ; Linux 2.6.16: 294 AUE_NULL STD { @@ -1872,7 +1879,9 @@ ); } 332 AUE_NULL STD { - int linux_inotify_init1(void); + int linux_inotify_init1( + l_int flags + ); } ; Linux 2.6.30: 333 AUE_NULL STD { diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index a48a513aa3b5..34e71a0665ed 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -659,4 +659,6 @@ struct sysent sysent[] = { { .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */ { .sy_narg = AS(setcred_args), .sy_call = (sy_call_t *)sys_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = setcred */ { .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */ + { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */ + { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index cf067527237e..03268365891e 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -29,6 +29,7 @@ #include <sys/cdefs.h> #include "opt_capsicum.h" #include "opt_hwpmc_hooks.h" +#include "opt_hwt_hooks.h" #include "opt_ktrace.h" #include "opt_vm.h" @@ -90,6 +91,10 @@ #include <sys/pmckern.h> #endif +#ifdef HWT_HOOKS +#include <dev/hwt/hwt_hook.h> +#endif + #include <security/audit/audit.h> #include <security/mac/mac_framework.h> @@ -936,6 +941,20 @@ interpret: } #endif +#ifdef HWT_HOOKS + if ((td->td_proc->p_flag2 & P2_HWT) != 0) { + struct hwt_record_entry ent; + + VOP_UNLOCK(imgp->vp); + ent.fullpath = imgp->execpath; + ent.addr = imgp->et_dyn_addr; + ent.baseaddr = imgp->reloc_base; + ent.record_type = HWT_RECORD_EXECUTABLE; + HWT_CALL_HOOK(td, HWT_EXEC, &ent); + vn_lock(imgp->vp, LK_SHARED | LK_RETRY); + } +#endif + /* Set values passed into the program in registers. */ (*p->p_sysent->sv_setregs)(td, imgp, stack_base); diff --git a/sys/kern/kern_linker.c b/sys/kern/kern_linker.c index f388ac8a583a..d566bc01bc5e 100644 --- a/sys/kern/kern_linker.c +++ b/sys/kern/kern_linker.c @@ -30,6 +30,7 @@ #include "opt_ddb.h" #include "opt_kld.h" #include "opt_hwpmc_hooks.h" +#include "opt_hwt_hooks.h" #include <sys/param.h> #include <sys/systm.h> @@ -64,7 +65,7 @@ #include "linker_if.h" -#ifdef HWPMC_HOOKS +#if defined(HWPMC_HOOKS) || defined(HWT_HOOKS) #include <sys/pmckern.h> #endif @@ -2184,7 +2185,7 @@ linker_basename(const char *path) return (filename); } -#ifdef HWPMC_HOOKS +#if defined(HWPMC_HOOKS) || defined(HWT_HOOKS) /* * Inform hwpmc about the set of kernel modules currently loaded. */ diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c index a3b572976fbf..15afe1a46d07 100644 --- a/sys/kern/kern_pmc.c +++ b/sys/kern/kern_pmc.c @@ -72,6 +72,10 @@ int __read_mostly (*pmc_hook)(struct thread *td, int function, void *arg) = NULL /* Interrupt handler */ int __read_mostly (*pmc_intr)(struct trapframe *tf) = NULL; +/* HWT hooks */ +void __read_mostly (*hwt_hook)(struct thread *td, int func, void *arg) = NULL; +int __read_mostly (*hwt_intr)(struct trapframe *tf) = NULL; + DPCPU_DEFINE(uint8_t, pmc_sampled); /* diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index c8b01afeab4f..dcd38c6e6fbe 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -1637,6 +1637,12 @@ uifree(struct uidinfo *uip) if (uip->ui_pipecnt != 0) printf("freeing uidinfo: uid = %d, pipecnt = %ld\n", uip->ui_uid, uip->ui_pipecnt); + if (uip->ui_inotifycnt != 0) + printf("freeing uidinfo: uid = %d, inotifycnt = %ld\n", + uip->ui_uid, uip->ui_inotifycnt); + if (uip->ui_inotifywatchcnt != 0) + printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n", + uip->ui_uid, uip->ui_inotifywatchcnt); free(uip, M_UIDINFO); } @@ -1742,6 +1748,21 @@ chgpipecnt(struct uidinfo *uip, int diff, rlim_t max) return (chglimit(uip, &uip->ui_pipecnt, diff, max, "pipecnt")); } +int +chginotifycnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifycnt, diff, max, "inotifycnt")); +} + +int +chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t max) +{ + + return (chglimit(uip, &uip->ui_inotifywatchcnt, diff, max, + "inotifywatchcnt")); +} + static int sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS) { diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index 17b53208157a..35b258e68701 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -27,12 +27,12 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> #include "opt_kern_tls.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/capsicum.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/ktls.h> @@ -1246,6 +1246,8 @@ out: */ if (error == 0) { td->td_retval[0] = 0; + if (sbytes > 0 && vp != NULL) + INOTIFY(vp, IN_ACCESS); } if (sent != NULL) { (*sent) = sbytes; diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 4565abc4b540..a61ebfc5c7c8 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -4139,7 +4139,7 @@ coredump(struct thread *td) struct flock lf; struct vattr vattr; size_t fullpathsize; - int error, error1, locked; + int error, error1, jid, locked, ppid, sig; char *name; /* name of corefile */ void *rl_cookie; off_t limit; @@ -4168,6 +4168,10 @@ coredump(struct thread *td) PROC_UNLOCK(p); return (EFBIG); } + + ppid = p->p_oppid; + sig = p->p_sig; + jid = p->p_ucred->cr_prison->pr_id; PROC_UNLOCK(p); error = corefile_open(p->p_comm, cred->cr_uid, p->p_pid, td, @@ -4253,6 +4257,9 @@ coredump(struct thread *td) } devctl_safe_quote_sb(sb, name); sbuf_putc(sb, '"'); + + sbuf_printf(sb, " jid=%d pid=%d ppid=%d signo=%d", + jid, p->p_pid, ppid, sig); if (sbuf_finish(sb) == 0) devctl_notify("kernel", "signal", "coredump", sbuf_data(sb)); out2: diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index 8ad885b42ebe..0e8c2b9f362e 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -29,7 +29,7 @@ #include "opt_ktrace.h" #include "opt_posix.h" #include "opt_hwpmc_hooks.h" - +#include "opt_hwt_hooks.h" #include <sys/systm.h> #include <sys/kernel.h> #ifdef KTRACE @@ -60,6 +60,9 @@ #ifdef HWPMC_HOOKS #include <sys/pmckern.h> #endif +#ifdef HWT_HOOKS +#include <dev/hwt/hwt_hook.h> +#endif #include <machine/frame.h> @@ -280,6 +283,10 @@ thread_create(struct thread *td, struct rtprio *rtp, PMC_CALL_HOOK_UNLOCKED(newtd, PMC_FN_THR_CREATE_LOG, NULL); #endif +#ifdef HWT_HOOKS + HWT_CALL_HOOK(newtd, HWT_THREAD_CREATE, NULL); +#endif + tidhash_add(newtd); /* ignore timesharing class */ @@ -613,6 +620,9 @@ sys_thr_set_name(struct thread *td, struct thr_set_name_args *uap) if (PMC_PROC_IS_USING_PMCS(p) || PMC_SYSTEM_SAMPLING_ACTIVE()) PMC_CALL_HOOK_UNLOCKED(ttd, PMC_FN_THR_CREATE_LOG, NULL); #endif +#ifdef HWT_HOOKS + HWT_CALL_HOOK(ttd, HWT_THREAD_SET_NAME, NULL); +#endif #ifdef KTR sched_clear_tdname(ttd); #endif diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 2dff461e932a..f853af193016 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -30,6 +30,7 @@ #include "opt_witness.h" #include "opt_hwpmc_hooks.h" +#include "opt_hwt_hooks.h" #include <sys/systm.h> #include <sys/asan.h> @@ -60,6 +61,9 @@ #ifdef HWPMC_HOOKS #include <sys/pmckern.h> #endif +#ifdef HWT_HOOKS +#include <dev/hwt/hwt_hook.h> +#endif #include <sys/priv.h> #include <security/audit/audit.h> @@ -1002,6 +1006,11 @@ thread_exit(void) } else if (PMC_SYSTEM_SAMPLING_ACTIVE()) PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_THR_EXIT_LOG, NULL); #endif + +#ifdef HWT_HOOKS + HWT_CALL_HOOK(td, HWT_THREAD_EXIT, NULL); +#endif + PROC_UNLOCK(p); PROC_STATLOCK(p); thread_lock(td); diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 753494983416..504f9a2338ef 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -36,6 +36,7 @@ #include <sys/cdefs.h> #include "opt_hwpmc_hooks.h" +#include "opt_hwt_hooks.h" #include "opt_sched.h" #include <sys/param.h> @@ -63,6 +64,10 @@ #include <sys/pmckern.h> #endif +#ifdef HWT_HOOKS +#include <dev/hwt/hwt_hook.h> +#endif + #ifdef KDTRACE_HOOKS #include <sys/dtrace_bsd.h> int __read_mostly dtrace_vtime_active; @@ -1075,6 +1080,11 @@ sched_switch(struct thread *td, int flags) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif +#ifdef HWT_HOOKS + HWT_CALL_HOOK(td, HWT_SWITCH_OUT, NULL); + HWT_CALL_HOOK(newtd, HWT_SWITCH_IN, NULL); +#endif + SDT_PROBE2(sched, , , off__cpu, newtd, newtd->td_proc); /* I feel sleepy */ @@ -1696,10 +1706,20 @@ sched_idletd(void *dummy) static void sched_throw_tail(struct thread *td) { + struct thread *newtd; mtx_assert(&sched_lock, MA_OWNED); KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); - cpu_throw(td, choosethread()); /* doesn't return */ + + newtd = choosethread(); + +#ifdef HWT_HOOKS + if (td) + HWT_CALL_HOOK(td, HWT_SWITCH_OUT, NULL); + HWT_CALL_HOOK(newtd, HWT_SWITCH_IN, NULL); +#endif + + cpu_throw(td, newtd); /* doesn't return */ } /* diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 508ec0ab97ec..409439ca34da 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -39,6 +39,7 @@ #include <sys/cdefs.h> #include "opt_hwpmc_hooks.h" +#include "opt_hwt_hooks.h" #include "opt_sched.h" #include <sys/param.h> @@ -69,6 +70,10 @@ #include <sys/pmckern.h> #endif +#ifdef HWT_HOOKS +#include <dev/hwt/hwt_hook.h> +#endif + #ifdef KDTRACE_HOOKS #include <sys/dtrace_bsd.h> int __read_mostly dtrace_vtime_active; @@ -2432,6 +2437,12 @@ sched_switch(struct thread *td, int flags) if (dtrace_vtime_active) (*dtrace_vtime_switch_func)(newtd); #endif + +#ifdef HWT_HOOKS + HWT_CALL_HOOK(td, HWT_SWITCH_OUT, NULL); + HWT_CALL_HOOK(newtd, HWT_SWITCH_IN, NULL); +#endif + td->td_oncpu = NOCPU; cpu_switch(td, newtd, mtx); cpuid = td->td_oncpu = PCPU_GET(cpuid); @@ -3252,6 +3263,10 @@ sched_ap_entry(void) newtd = sched_throw_grab(tdq); +#ifdef HWT_HOOKS + HWT_CALL_HOOK(newtd, HWT_SWITCH_IN, NULL); +#endif + /* doesn't return */ cpu_throw(NULL, newtd); } @@ -3278,6 +3293,10 @@ sched_throw(struct thread *td) newtd = sched_throw_grab(tdq); +#ifdef HWT_HOOKS + HWT_CALL_HOOK(newtd, HWT_SWITCH_IN, NULL); +#endif + /* doesn't return */ cpu_switch(td, newtd, TDQ_LOCKPTR(tdq)); } diff --git a/sys/kern/subr_capability.c b/sys/kern/subr_capability.c index 7cc6fb593697..5ad5b0af1681 100644 --- a/sys/kern/subr_capability.c +++ b/sys/kern/subr_capability.c @@ -74,6 +74,10 @@ const cap_rights_t cap_getsockopt_rights = CAP_RIGHTS_INITIALIZER(CAP_GETSOCKOPT); const cap_rights_t cap_getsockname_rights = CAP_RIGHTS_INITIALIZER(CAP_GETSOCKNAME); +const cap_rights_t cap_inotify_add_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_ADD); +const cap_rights_t cap_inotify_rm_rights = + CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_RM); const cap_rights_t cap_ioctl_rights = CAP_RIGHTS_INITIALIZER(CAP_IOCTL); const cap_rights_t cap_listen_rights = CAP_RIGHTS_INITIALIZER(CAP_LISTEN); const cap_rights_t cap_linkat_source_rights = diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index d31ff3b939cc..94e44d888181 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -37,16 +37,17 @@ #include "opt_capsicum.h" #include "opt_ktrace.h" -#define EXTERR_CATEGORY EXTERR_CAT_FILEDESC +#define EXTERR_CATEGORY EXTERR_CAT_GENIO #include <sys/param.h> #include <sys/systm.h> #include <sys/sysproto.h> #include <sys/capsicum.h> +#include <sys/exterrvar.h> #include <sys/filedesc.h> #include <sys/filio.h> #include <sys/fcntl.h> #include <sys/file.h> -#include <sys/exterrvar.h> +#include <sys/inotify.h> #include <sys/lock.h> #include <sys/proc.h> #include <sys/signalvar.h> @@ -195,7 +196,7 @@ sys_read(struct thread *td, struct read_args *uap) int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -233,7 +234,7 @@ kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset) int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -329,7 +330,7 @@ kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset) error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -396,7 +397,7 @@ sys_write(struct thread *td, struct write_args *uap) int error; if (uap->nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)uap->buf; aiov.iov_len = uap->nbyte; auio.uio_iov = &aiov; @@ -435,7 +436,7 @@ kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, int error; if (nbyte > IOSIZE_MAX) - return (EINVAL); + return (EXTERROR(EINVAL, "length > iosize_max")); aiov.iov_base = (void *)(uintptr_t)buf; aiov.iov_len = nbyte; auio.uio_iov = &aiov; @@ -531,7 +532,7 @@ kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset) error = ESPIPE; else if (offset < 0 && (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) - error = EINVAL; + error = EXTERROR(EINVAL, "neg offset"); else error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); fdrop(fp, td); @@ -602,14 +603,14 @@ kern_ftruncate(struct thread *td, int fd, off_t length) AUDIT_ARG_FD(fd); if (length < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative length")); error = fget(td, fd, &cap_ftruncate_rights, &fp); if (error) return (error); AUDIT_ARG_FILE(td->td_proc, fp); if (!(fp->f_flag & FWRITE)) { fdrop(fp, td); - return (EINVAL); + return (EXTERROR(EINVAL, "non-writable")); } error = fo_truncate(fp, length, td->td_ucred, td); fdrop(fp, td); @@ -840,8 +841,10 @@ kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) int error; AUDIT_ARG_FD(fd); - if (offset < 0 || len <= 0) - return (EINVAL); + if (offset < 0) + return (EXTERROR(EINVAL, "negative offset")); + if (len <= 0) + return (EXTERROR(EINVAL, "negative length")); /* Check for wrap. */ if (offset > OFF_MAX - len) return (EFBIG); @@ -898,16 +901,21 @@ kern_fspacectl(struct thread *td, int fd, int cmd, AUDIT_ARG_FFLAGS(flags); if (rqsr == NULL) - return (EINVAL); + return (EXTERROR(EINVAL, "no range")); rmsr = *rqsr; if (rmsrp != NULL) *rmsrp = rmsr; - if (cmd != SPACECTL_DEALLOC || - rqsr->r_offset < 0 || rqsr->r_len <= 0 || - rqsr->r_offset > OFF_MAX - rqsr->r_len || - (flags & ~SPACECTL_F_SUPPORTED) != 0) - return (EINVAL); + if (cmd != SPACECTL_DEALLOC) + return (EXTERROR(EINVAL, "cmd", cmd)); + if (rqsr->r_offset < 0) + return (EXTERROR(EINVAL, "neg offset")); + if (rqsr->r_len <= 0) + return (EXTERROR(EINVAL, "neg len")); + if (rqsr->r_offset > OFF_MAX - rqsr->r_len) + return (EXTERROR(EINVAL, "offset too large")); + if ((flags & ~SPACECTL_F_SUPPORTED) != 0) + return (EXTERROR(EINVAL, "reserved flags", flags)); error = fget_write(td, fd, &cap_pwrite_rights, &fp); if (error != 0) @@ -939,7 +947,6 @@ int kern_specialfd(struct thread *td, int type, void *arg) { struct file *fp; - struct specialfd_eventfd *ae; int error, fd, fflags; fflags = 0; @@ -948,14 +955,24 @@ kern_specialfd(struct thread *td, int type, void *arg) return (error); switch (type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd *ae; + ae = arg; if ((ae->flags & EFD_CLOEXEC) != 0) fflags |= O_CLOEXEC; error = eventfd_create_file(td, fp, ae->initval, ae->flags); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify *si; + + si = arg; + error = inotify_create_file(td, fp, si->flags, &fflags); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "invalid type", type); break; } @@ -970,13 +987,14 @@ kern_specialfd(struct thread *td, int type, void *arg) int sys___specialfd(struct thread *td, struct __specialfd_args *args) { - struct specialfd_eventfd ae; int error; switch (args->type) { - case SPECIALFD_EVENTFD: + case SPECIALFD_EVENTFD: { + struct specialfd_eventfd ae; + if (args->len != sizeof(struct specialfd_eventfd)) { - error = EINVAL; + error = EXTERROR(EINVAL, "eventfd params ABI"); break; } error = copyin(args->req, &ae, sizeof(ae)); @@ -984,13 +1002,27 @@ sys___specialfd(struct thread *td, struct __specialfd_args *args) break; if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE)) != 0) { - error = EINVAL; + error = EXTERROR(EINVAL, "reserved flag"); break; } error = kern_specialfd(td, args->type, &ae); break; + } + case SPECIALFD_INOTIFY: { + struct specialfd_inotify si; + + if (args->len != sizeof(si)) { + error = EINVAL; + break; + } + error = copyin(args->req, &si, sizeof(si)); + if (error != 0) + break; + error = kern_specialfd(td, args->type, &si); + break; + } default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown type", args->type); break; } return (error); @@ -1166,7 +1198,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, int error, lf, ndu; if (nd < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "negative ndescs")); fdp = td->td_proc->p_fd; ndu = nd; lf = fdp->fd_nfiles; @@ -1259,7 +1291,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) { - error = EINVAL; + error = EXTERROR(EINVAL, "invalid timeval"); goto done; } if (!timevalisset(&rtv)) @@ -1491,7 +1523,7 @@ sys_poll(struct thread *td, struct poll_args *uap) if (uap->timeout != INFTIM) { if (uap->timeout < 0) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeout")); ts.tv_sec = uap->timeout / 1000; ts.tv_nsec = (uap->timeout % 1000) * 1000000; tsp = &ts; @@ -1516,7 +1548,7 @@ kern_poll_kfds(struct thread *td, struct pollfd *kfds, u_int nfds, precision = 0; if (tsp != NULL) { if (!timespecvalid_interval(tsp)) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timespec")); if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) sbt = 0; else { @@ -1619,7 +1651,7 @@ kern_poll(struct thread *td, struct pollfd *ufds, u_int nfds, int error; if (kern_poll_maxfds(nfds)) - return (EINVAL); + return (EXTERROR(EINVAL, "too large nfds")); if (nfds > nitems(stackfds)) kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); else @@ -1796,7 +1828,7 @@ selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) rtv = *tvp; if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || rtv.tv_usec >= 1000000) - return (EINVAL); + return (EXTERROR(EINVAL, "invalid timeval")); if (!timevalisset(&rtv)) asbt = 0; else if (rtv.tv_sec <= INT32_MAX) { @@ -2173,7 +2205,7 @@ kern_kcmp(struct thread *td, pid_t pid1, pid_t pid2, int type, (uintptr_t)p2->p_vmspace); break; default: - error = EINVAL; + error = EXTERROR(EINVAL, "unknown op"); break; } diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index fa36cc824078..90a4f3a7dad8 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -598,4 +598,6 @@ const char *syscallnames[] = { "fchroot", /* 590 = fchroot */ "setcred", /* 591 = setcred */ "exterrctl", /* 592 = exterrctl */ + "inotify_add_watch_at", /* 593 = inotify_add_watch_at */ + "inotify_rm_watch", /* 594 = inotify_rm_watch */ }; diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 08b557a7a540..2ab17e036d5c 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -3356,4 +3356,19 @@ _In_reads_bytes_(4) void *ptr ); } +593 AUE_INOTIFY STD|CAPENABLED { + int inotify_add_watch_at( + int fd, + int dfd, + _In_z_ const char *path, + uint32_t mask + ); + } +594 AUE_INOTIFY STD|CAPENABLED { + int inotify_rm_watch( + int fd, + int wd + ); + } + ; vim: syntax=off diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c index 15789d3eb5fa..90b21616a558 100644 --- a/sys/kern/systrace_args.c +++ b/sys/kern/systrace_args.c @@ -3482,6 +3482,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } + /* inotify_add_watch_at */ + case 593: { + struct inotify_add_watch_at_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->dfd; /* int */ + uarg[a++] = (intptr_t)p->path; /* const char * */ + uarg[a++] = p->mask; /* uint32_t */ + *n_args = 4; + break; + } + /* inotify_rm_watch */ + case 594: { + struct inotify_rm_watch_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->wd; /* int */ + *n_args = 2; + break; + } default: *n_args = 0; break; @@ -9317,6 +9335,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* inotify_add_watch_at */ + case 593: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const char *"; + break; + case 3: + p = "uint32_t"; + break; + default: + break; + }; + break; + /* inotify_rm_watch */ + case 594: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -11305,6 +11355,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* inotify_add_watch_at */ + case 593: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* inotify_rm_watch */ + case 594: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 97dc854c9386..66ea50eee77b 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -301,7 +301,7 @@ static TAILQ_HEAD(,kaiocb) aio_jobs; /* (c) Async job list */ static struct unrhdr *aiod_unr; static void aio_biocleanup(struct bio *bp); -void aio_init_aioinfo(struct proc *p); +static int aio_init_aioinfo(struct proc *p); static int aio_onceonly(void); static int aio_free_entry(struct kaiocb *job); static void aio_process_rw(struct kaiocb *job); @@ -309,7 +309,7 @@ static void aio_process_sync(struct kaiocb *job); static void aio_process_mlock(struct kaiocb *job); static void aio_schedule_fsync(void *context, int pending); static int aio_newproc(int *); -int aio_aqueue(struct thread *td, struct aiocb *ujob, +static int aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lio, int type, struct aiocb_ops *ops); static int aio_queue_file(struct file *fp, struct kaiocb *job); static void aio_biowakeup(struct bio *bp); @@ -422,10 +422,11 @@ aio_onceonly(void) * Init the per-process aioinfo structure. The aioinfo limits are set * per-process for user limit (resource) management. */ -void +static int aio_init_aioinfo(struct proc *p) { struct kaioinfo *ki; + int error; ki = uma_zalloc(kaio_zone, M_WAITOK); mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF | MTX_NEW); @@ -451,8 +452,13 @@ aio_init_aioinfo(struct proc *p) uma_zfree(kaio_zone, ki); } - while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) - aio_newproc(NULL); + error = 0; + while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) { + error = aio_newproc(NULL); + if (error != 0) + break; + } + return (error); } static int @@ -1476,7 +1482,7 @@ static struct aiocb_ops aiocb_ops_osigevent = { * Queue a new AIO request. Choosing either the threaded or direct bio VCHR * technique is done in this code. */ -int +static int aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, int type, struct aiocb_ops *ops) { @@ -1490,8 +1496,11 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj, int fd, kqfd; u_short evflags; - if (p->p_aioinfo == NULL) - aio_init_aioinfo(p); + if (p->p_aioinfo == NULL) { + error = aio_init_aioinfo(p); + if (error != 0) + goto err1; + } ki = p->p_aioinfo; @@ -2213,8 +2222,11 @@ kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, if (nent < 0 || nent > max_aio_queue_per_proc) return (EINVAL); - if (p->p_aioinfo == NULL) - aio_init_aioinfo(p); + if (p->p_aioinfo == NULL) { + error = aio_init_aioinfo(p); + if (error != 0) + return (error); + } ki = p->p_aioinfo; @@ -2503,8 +2515,11 @@ kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp, timo = tvtohz(&atv); } - if (p->p_aioinfo == NULL) - aio_init_aioinfo(p); + if (p->p_aioinfo == NULL) { + error = aio_init_aioinfo(p); + if (error != 0) + return (error); + } ki = p->p_aioinfo; error = 0; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 883beaf6d1da..3d455b3874cc 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -41,6 +41,7 @@ #include <sys/counter.h> #include <sys/filedesc.h> #include <sys/fnv_hash.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/lock.h> @@ -2629,6 +2630,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, atomic_store_ptr(&dvp->v_cache_dd, ncp); } else if (vp != NULL) { /* + * Take the slow path in INOTIFY(). This flag will be lazily + * cleared by cache_vop_inotify() once all directories referring + * to vp are unwatched. + */ + if (__predict_false((vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) + vn_irflag_set_cond(vp, VIRF_INOTIFY_PARENT); + + /* * For this case, the cache entry maps both the * directory name in it and the name ".." for the * directory's parent. @@ -4008,6 +4017,56 @@ out: return (error); } +void +cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie) +{ + struct mtx *vlp; + struct namecache *ncp; + int isdir; + bool logged, self; + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + self = (vn_irflag_read(vp) & VIRF_INOTIFY) != 0 && + (vp->v_type != VDIR || (event & ~_IN_DIR_EVENTS) != 0); + + if (self) { + int selfevent; + + if (event == _IN_ATTRIB_LINKCOUNT) + selfevent = IN_ATTRIB; + else + selfevent = event; + inotify_log(vp, NULL, 0, selfevent | isdir, cookie); + } + if ((event & IN_ALL_EVENTS) == 0) + return; + + logged = false; + vlp = VP2VNODELOCK(vp); + mtx_lock(vlp); + TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) { + if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) + continue; + if ((vn_irflag_read(ncp->nc_dvp) & VIRF_INOTIFY) != 0) { + /* + * XXX-MJ if the vnode has two links in the same + * dir, we'll log the same event twice. + */ + inotify_log(ncp->nc_dvp, ncp->nc_name, ncp->nc_nlen, + event | isdir, cookie); + logged = true; + } + } + if (!logged && (vn_irflag_read(vp) & VIRF_INOTIFY_PARENT) != 0) { + /* + * We didn't find a watched directory that contains this vnode, + * so stop calling VOP_INOTIFY for operations on the vnode. + */ + vn_irflag_unset(vp, VIRF_INOTIFY_PARENT); + } + mtx_unlock(vlp); +} + #ifdef DDB static void db_print_vpath(struct vnode *vp) diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index be49c0887609..fd6202a1424c 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -39,6 +39,7 @@ #include <sys/conf.h> #include <sys/event.h> #include <sys/filio.h> +#include <sys/inotify.h> #include <sys/kernel.h> #include <sys/limits.h> #include <sys/lock.h> @@ -119,6 +120,8 @@ struct vop_vector default_vnodeops = { .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, .vop_need_inactive = vop_stdneed_inactive, + .vop_inotify = vop_stdinotify, + .vop_inotify_add_watch = vop_stdinotify_add_watch, .vop_ioctl = vop_stdioctl, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, @@ -453,6 +456,7 @@ vop_stdpathconf(struct vop_pathconf_args *ap) case _PC_MAC_PRESENT: case _PC_NAMEDATTR_ENABLED: case _PC_HAS_NAMEDATTR: + case _PC_HAS_HIDDENSYSTEM: *ap->a_retval = 0; return (0); default: @@ -1306,6 +1310,20 @@ vop_stdneed_inactive(struct vop_need_inactive_args *ap) } int +vop_stdinotify(struct vop_inotify_args *ap) +{ + vn_inotify(ap->a_vp, ap->a_dvp, ap->a_cnp, ap->a_event, ap->a_cookie); + return (0); +} + +int +vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *ap) +{ + return (vn_inotify_add_watch(ap->a_vp, ap->a_sc, ap->a_mask, + ap->a_wdp, ap->a_td)); +} + +int vop_stdioctl(struct vop_ioctl_args *ap) { struct vnode *vp; diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c new file mode 100644 index 000000000000..9562350c897f --- /dev/null +++ b/sys/kern/vfs_inotify.c @@ -0,0 +1,1008 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include "opt_ktrace.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/caprights.h> +#include <sys/counter.h> +#include <sys/dirent.h> +#define EXTERR_CATEGORY EXTERR_CAT_INOTIFY +#include <sys/exterrvar.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/filio.h> +#include <sys/inotify.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/ktrace.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/resourcevar.h> +#include <sys/selinfo.h> +#include <sys/stat.h> +#include <sys/syscallsubr.h> +#include <sys/sysctl.h> +#include <sys/sysent.h> +#include <sys/syslimits.h> +#include <sys/sysproto.h> +#include <sys/tree.h> +#include <sys/user.h> +#include <sys/vnode.h> + +uint32_t inotify_rename_cookie; + +static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "inotify configuration"); + +static int inotify_max_queued_events = 16384; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_queued_events, CTLFLAG_RWTUN, + &inotify_max_queued_events, 0, + "Maximum number of events to queue on an inotify descriptor"); + +static int inotify_max_user_instances = 256; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN, + &inotify_max_user_instances, 0, + "Maximum number of inotify descriptors per user"); + +static int inotify_max_user_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN, + &inotify_max_user_watches, 0, + "Maximum number of inotify watches per user"); + +static int inotify_max_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, max_watches, CTLFLAG_RWTUN, + &inotify_max_watches, 0, + "Maximum number of inotify watches system-wide"); + +static int inotify_watches; +SYSCTL_INT(_vfs_inotify, OID_AUTO, watches, CTLFLAG_RD, + &inotify_watches, 0, + "Total number of inotify watches currently in use"); + +static int inotify_coalesce = 1; +SYSCTL_INT(_vfs_inotify, OID_AUTO, coalesce, CTLFLAG_RWTUN, + &inotify_coalesce, 0, + "Coalesce inotify events when possible"); + +static COUNTER_U64_DEFINE_EARLY(inotify_event_drops); +SYSCTL_COUNTER_U64(_vfs_inotify, OID_AUTO, event_drops, CTLFLAG_RD, + &inotify_event_drops, + "Number of inotify events dropped due to limits or allocation failures"); + +static fo_rdwr_t inotify_read; +static fo_ioctl_t inotify_ioctl; +static fo_poll_t inotify_poll; +static fo_kqfilter_t inotify_kqfilter; +static fo_stat_t inotify_stat; +static fo_close_t inotify_close; +static fo_fill_kinfo_t inotify_fill_kinfo; + +static const struct fileops inotifyfdops = { + .fo_read = inotify_read, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, + .fo_ioctl = inotify_ioctl, + .fo_poll = inotify_poll, + .fo_kqfilter = inotify_kqfilter, + .fo_stat = inotify_stat, + .fo_close = inotify_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = inotify_fill_kinfo, + .fo_cmp = file_kcmp_generic, + .fo_flags = DFLAG_PASSABLE, +}; + +static void filt_inotifydetach(struct knote *kn); +static int filt_inotifyevent(struct knote *kn, long hint); + +static const struct filterops inotify_rfiltops = { + .f_isfd = 1, + .f_detach = filt_inotifydetach, + .f_event = filt_inotifyevent, +}; + +static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures"); + +struct inotify_record { + STAILQ_ENTRY(inotify_record) link; + struct inotify_event ev; +}; + +static uint64_t inotify_ino = 1; + +/* + * On LP64 systems this occupies 64 bytes, so we don't get internal + * fragmentation by allocating watches with malloc(9). If the size changes, + * consider using a UMA zone to improve memory efficiency. + */ +struct inotify_watch { + struct inotify_softc *sc; /* back-pointer */ + int wd; /* unique ID */ + uint32_t mask; /* event mask */ + struct vnode *vp; /* vnode being watched, refed */ + RB_ENTRY(inotify_watch) ilink; /* inotify linkage */ + TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */ +}; + +static void +inotify_init(void *arg __unused) +{ + /* Don't let a user hold too many vnodes. */ + inotify_max_user_watches = desiredvnodes / 3; + /* Don't let the system hold too many vnodes. */ + inotify_max_watches = desiredvnodes / 2; +} +SYSINIT(inotify, SI_SUB_VFS, SI_ORDER_ANY, inotify_init, NULL); + +static int +inotify_watch_cmp(const struct inotify_watch *a, + const struct inotify_watch *b) +{ + if (a->wd < b->wd) + return (-1); + else if (a->wd > b->wd) + return (1); + else + return (0); +} +RB_HEAD(inotify_watch_tree, inotify_watch); +RB_GENERATE_STATIC(inotify_watch_tree, inotify_watch, ilink, inotify_watch_cmp); + +struct inotify_softc { + struct mtx lock; /* serialize all softc writes */ + STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */ + struct inotify_record overflow; /* preallocated record */ + int nextwatch; /* next watch ID to try */ + int npending; /* number of pending events */ + size_t nbpending; /* bytes available to read */ + uint64_t ino; /* unique identifier */ + struct inotify_watch_tree watches; /* active watches */ + struct selinfo sel; /* select/poll/kevent info */ + struct ucred *cred; /* credential ref */ +}; + +static struct inotify_record * +inotify_dequeue(struct inotify_softc *sc) +{ + struct inotify_record *rec; + + mtx_assert(&sc->lock, MA_OWNED); + KASSERT(!STAILQ_EMPTY(&sc->pending), + ("%s: queue for %p is empty", __func__, sc)); + + rec = STAILQ_FIRST(&sc->pending); + STAILQ_REMOVE_HEAD(&sc->pending, link); + sc->npending--; + sc->nbpending -= sizeof(rec->ev) + rec->ev.len; + return (rec); +} + +static void +inotify_enqueue(struct inotify_softc *sc, struct inotify_record *rec, bool head) +{ + mtx_assert(&sc->lock, MA_OWNED); + + if (head) + STAILQ_INSERT_HEAD(&sc->pending, rec, link); + else + STAILQ_INSERT_TAIL(&sc->pending, rec, link); + sc->npending++; + sc->nbpending += sizeof(rec->ev) + rec->ev.len; +} + +static int +inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags, + struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + int error; + bool first; + + sc = fp->f_data; + error = 0; + + mtx_lock(&sc->lock); + while (STAILQ_EMPTY(&sc->pending)) { + if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) { + mtx_unlock(&sc->lock); + return (EWOULDBLOCK); + } + error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0); + if (error != 0) { + mtx_unlock(&sc->lock); + return (error); + } + } + for (first = true; !STAILQ_EMPTY(&sc->pending); first = false) { + size_t len; + + rec = inotify_dequeue(sc); + len = sizeof(rec->ev) + rec->ev.len; + if (uio->uio_resid < (ssize_t)len) { + inotify_enqueue(sc, rec, true); + if (first) { + error = EXTERROR(EINVAL, + "read buffer is too small"); + } + break; + } + mtx_unlock(&sc->lock); + error = uiomove(&rec->ev, len, uio); +#ifdef KTRACE + if (error == 0 && KTRPOINT(td, KTR_STRUCT)) + ktrstruct("inotify", &rec->ev, len); +#endif + mtx_lock(&sc->lock); + if (error != 0) { + inotify_enqueue(sc, rec, true); + mtx_unlock(&sc->lock); + return (error); + } + if (rec == &sc->overflow) { + /* + * Signal to inotify_queue_record() that the overflow + * record can be reused. + */ + memset(rec, 0, sizeof(*rec)); + } else { + free(rec, M_INOTIFY); + } + } + mtx_unlock(&sc->lock); + return (error); +} + +static int +inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred, + struct thread *td) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + switch (com) { + case FIONREAD: + *(int *)data = (int)sc->nbpending; + return (0); + case FIONBIO: + case FIOASYNC: + return (0); + default: + return (ENOTTY); + } + + return (0); +} + +static int +inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) +{ + struct inotify_softc *sc; + int revents; + + sc = fp->f_data; + revents = 0; + + mtx_lock(&sc->lock); + if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(td, &sc->sel); + mtx_unlock(&sc->lock); + return (revents); +} + +static void +filt_inotifydetach(struct knote *kn) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + knlist_remove(&sc->sel.si_note, kn, 0); +} + +static int +filt_inotifyevent(struct knote *kn, long hint) +{ + struct inotify_softc *sc; + + sc = kn->kn_hook; + mtx_assert(&sc->lock, MA_OWNED); + kn->kn_data = sc->nbpending; + return (kn->kn_data > 0); +} + +static int +inotify_kqfilter(struct file *fp, struct knote *kn) +{ + struct inotify_softc *sc; + + if (kn->kn_filter != EVFILT_READ) + return (EINVAL); + sc = fp->f_data; + kn->kn_fop = &inotify_rfiltops; + kn->kn_hook = sc; + knlist_add(&sc->sel.si_note, kn, 0); + return (0); +} + +static int +inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + memset(sb, 0, sizeof(*sb)); + sb->st_mode = S_IFREG | S_IRUSR; + sb->st_blksize = sizeof(struct inotify_event) + _IN_NAMESIZE(NAME_MAX); + mtx_lock(&sc->lock); + sb->st_size = sc->nbpending; + sb->st_blocks = sc->npending; + sb->st_uid = sc->cred->cr_ruid; + sb->st_gid = sc->cred->cr_rgid; + sb->st_ino = sc->ino; + mtx_unlock(&sc->lock); + return (0); +} + +static void +inotify_unlink_watch_locked(struct inotify_softc *sc, struct inotify_watch *watch) +{ + struct vnode *vp; + + vp = watch->vp; + mtx_assert(&vp->v_pollinfo->vpi_lock, MA_OWNED); + + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + + TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink); + if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify)) + vn_irflag_unset_locked(vp, VIRF_INOTIFY); +} + +/* + * Assumes that the watch has already been removed from its softc. + */ +static void +inotify_remove_watch(struct inotify_watch *watch) +{ + struct inotify_softc *sc; + struct vnode *vp; + + sc = watch->sc; + + vp = watch->vp; + mtx_lock(&vp->v_pollinfo->vpi_lock); + inotify_unlink_watch_locked(sc, watch); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + vrele(vp); + free(watch, M_INOTIFY); +} + +static int +inotify_close(struct file *fp, struct thread *td) +{ + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch *watch; + + sc = fp->f_data; + + mtx_lock(&sc->lock); + (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0); + while ((watch = RB_MIN(inotify_watch_tree, &sc->watches)) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + mtx_unlock(&sc->lock); + inotify_remove_watch(watch); + mtx_lock(&sc->lock); + } + while (!STAILQ_EMPTY(&sc->pending)) { + rec = inotify_dequeue(sc); + if (rec != &sc->overflow) + free(rec, M_INOTIFY); + } + mtx_unlock(&sc->lock); + seldrain(&sc->sel); + knlist_destroy(&sc->sel.si_note); + mtx_destroy(&sc->lock); + crfree(sc->cred); + free(sc, M_INOTIFY); + return (0); +} + +static int +inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif, + struct filedesc *fdp) +{ + struct inotify_softc *sc; + + sc = fp->f_data; + + kif->kf_type = KF_TYPE_INOTIFY; + kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending; + kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending; + return (0); +} + +int +inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp) +{ + struct inotify_softc *sc; + int fflags; + + if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0) + return (EINVAL); + + if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1, + inotify_max_user_instances)) + return (EMFILE); + + sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO); + sc->nextwatch = 1; /* Required for compatibility. */ + STAILQ_INIT(&sc->pending); + RB_INIT(&sc->watches); + mtx_init(&sc->lock, "inotify", NULL, MTX_DEF); + knlist_init_mtx(&sc->sel.si_note, &sc->lock); + sc->cred = crhold(td->td_ucred); + sc->ino = atomic_fetchadd_64(&inotify_ino, 1); + + fflags = FREAD; + if ((flags & IN_NONBLOCK) != 0) + fflags |= FNONBLOCK; + if ((flags & IN_CLOEXEC) != 0) + *fflagsp |= O_CLOEXEC; + finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops); + + return (0); +} + +static struct inotify_record * +inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event, + uint32_t cookie, int waitok) +{ + struct inotify_event *evp; + struct inotify_record *rec; + + rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY, + waitok | M_ZERO); + if (rec == NULL) + return (NULL); + evp = &rec->ev; + evp->wd = wd; + evp->mask = event; + evp->cookie = cookie; + evp->len = _IN_NAMESIZE(namelen); + if (name != NULL) + memcpy(evp->name, name, namelen); + return (rec); +} + +static bool +inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp) +{ + struct inotify_record *prev; + + mtx_assert(&sc->lock, MA_OWNED); + + prev = STAILQ_LAST(&sc->pending, inotify_record, link); + return (prev != NULL && prev->ev.mask == evp->mask && + prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie && + prev->ev.len == evp->len && + (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0)); +} + +static void +inotify_overflow_event(struct inotify_event *evp) +{ + evp->mask = IN_Q_OVERFLOW; + evp->wd = -1; + evp->cookie = 0; + evp->len = 0; +} + +/* + * Put an event record on the queue for an inotify desscriptor. Return false if + * the record was not enqueued for some reason, true otherwise. + */ +static bool +inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec) +{ + struct inotify_event *evp; + + mtx_assert(&sc->lock, MA_OWNED); + + evp = &rec->ev; + if (__predict_false(rec == &sc->overflow)) { + /* + * Is the overflow record already in the queue? If so, there's + * not much else we can do: we're here because a kernel memory + * shortage prevented new record allocations. + */ + counter_u64_add(inotify_event_drops, 1); + if (evp->mask == IN_Q_OVERFLOW) + return (false); + inotify_overflow_event(evp); + } else { + /* Try to coalesce duplicate events. */ + if (inotify_coalesce && inotify_can_coalesce(sc, evp)) + return (false); + + /* + * Would this one overflow the queue? If so, convert it to an + * overflow event and try again to coalesce. + */ + if (sc->npending >= inotify_max_queued_events) { + counter_u64_add(inotify_event_drops, 1); + inotify_overflow_event(evp); + if (inotify_can_coalesce(sc, evp)) + return (false); + } + } + inotify_enqueue(sc, rec, false); + selwakeup(&sc->sel); + KNOTE_LOCKED(&sc->sel.si_note, 0); + wakeup(&sc->pending); + return (true); +} + +static int +inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen, + int event, uint32_t cookie) +{ + struct inotify_watch key; + struct inotify_softc *sc; + struct inotify_record *rec; + int relecount; + bool allocfail; + + relecount = 0; + + sc = watch->sc; + rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie, + M_NOWAIT); + if (rec == NULL) { + rec = &sc->overflow; + allocfail = true; + } else { + allocfail = false; + } + + mtx_lock(&sc->lock); + if (!inotify_queue_record(sc, rec) && rec != &sc->overflow) + free(rec, M_INOTIFY); + if ((watch->mask & IN_ONESHOT) != 0 || + (event & (IN_DELETE_SELF | IN_UNMOUNT)) != 0) { + if (!allocfail) { + rec = inotify_alloc_record(watch->wd, NULL, 0, + IN_IGNORED, 0, M_NOWAIT); + if (rec == NULL) + rec = &sc->overflow; + if (!inotify_queue_record(sc, rec) && + rec != &sc->overflow) + free(rec, M_INOTIFY); + } + + /* + * Remove the watch, taking care to handle races with + * inotify_close(). + */ + key.wd = watch->wd; + if (RB_FIND(inotify_watch_tree, &sc->watches, &key) != NULL) { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + inotify_unlink_watch_locked(sc, watch); + free(watch, M_INOTIFY); + + /* Defer vrele() to until locks are dropped. */ + relecount++; + } + } + mtx_unlock(&sc->lock); + return (relecount); +} + +void +inotify_log(struct vnode *vp, const char *name, size_t namelen, int event, + uint32_t cookie) +{ + struct inotify_watch *watch, *tmp; + int relecount; + + KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT)) == 0, + ("inotify_log: invalid event %#x", event)); + + relecount = 0; + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) { + KASSERT(watch->vp == vp, + ("inotify_log: watch %p vp != vp", watch)); + if ((watch->mask & event) != 0 || event == IN_UNMOUNT) { + relecount += inotify_log_one(watch, name, namelen, event, + cookie); + } + } + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + for (int i = 0; i < relecount; i++) + vrele(vp); +} + +/* + * An inotify event occurred on a watched vnode. + */ +void +vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp, + int event, uint32_t cookie) +{ + int isdir; + + VNPASS(vp->v_holdcnt > 0, vp); + + isdir = vp->v_type == VDIR ? IN_ISDIR : 0; + + if (dvp != NULL) { + VNPASS(dvp->v_holdcnt > 0, dvp); + + /* + * Should we log an event for the vnode itself? + */ + if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) { + int selfevent; + + switch (event) { + case _IN_MOVE_DELETE: + case IN_DELETE: + /* + * IN_DELETE_SELF is only generated when the + * last hard link of a file is removed. + */ + selfevent = IN_DELETE_SELF; + if (vp->v_type != VDIR) { + struct vattr va; + int error; + + error = VOP_GETATTR(vp, &va, cnp->cn_cred); + if (error == 0 && va.va_nlink != 0) + selfevent = 0; + } + break; + case IN_MOVED_FROM: + cookie = 0; + selfevent = IN_MOVE_SELF; + break; + case _IN_ATTRIB_LINKCOUNT: + selfevent = IN_ATTRIB; + break; + default: + selfevent = event; + break; + } + + if ((selfevent & ~_IN_DIR_EVENTS) != 0) { + inotify_log(vp, NULL, 0, selfevent | isdir, + cookie); + } + } + + /* + * Something is watching the directory through which this vnode + * was referenced, so we may need to log the event. + */ + if ((event & IN_ALL_EVENTS) != 0 && + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) { + inotify_log(dvp, cnp->cn_nameptr, + cnp->cn_namelen, event | isdir, cookie); + } + } else { + /* + * We don't know which watched directory might contain the + * vnode, so we have to fall back to searching the name cache. + */ + cache_vop_inotify(vp, event, cookie); + } +} + +int +vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask, + uint32_t *wdp, struct thread *td) +{ + struct inotify_watch *watch, *watch1; + uint32_t wd; + + /* + * If this is a directory, make sure all of its entries are present in + * the name cache so that we're able to look them up if an event occurs. + * The persistent reference on the directory prevents the outgoing name + * cache entries from being reclaimed. + */ + if (vp->v_type == VDIR) { + struct dirent *dp; + char *buf; + off_t off; + size_t buflen, len; + int eof, error; + + buflen = 128 * sizeof(struct dirent); + buf = malloc(buflen, M_TEMP, M_WAITOK); + + error = 0; + len = off = eof = 0; + for (;;) { + struct nameidata nd; + + error = vn_dir_next_dirent(vp, td, buf, buflen, &dp, + &len, &off, &eof); + if (error != 0) + break; + if (len == 0) + /* Finished reading. */ + break; + if (strcmp(dp->d_name, ".") == 0 || + strcmp(dp->d_name, "..") == 0) + continue; + + /* + * namei() consumes a reference on the starting + * directory if it's specified as a vnode. + */ + vrefact(vp); + NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, + dp->d_name, vp); + error = namei(&nd); + if (error != 0) + break; + vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT); + vrele(nd.ni_vp); + } + free(buf, M_TEMP); + if (error != 0) + return (error); + } + + /* + * The vnode referenced in kern_inotify_add_watch() might be different + * than this one if nullfs is in the picture. + */ + vrefact(vp); + watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO); + watch->sc = sc; + watch->vp = vp; + watch->mask = mask; + + /* + * Are we updating an existing watch? Search the vnode's list rather + * than that of the softc, as the former is likely to be shorter. + */ + v_addpollinfo(vp); + mtx_lock(&vp->v_pollinfo->vpi_lock); + TAILQ_FOREACH(watch1, &vp->v_pollinfo->vpi_inotify, vlink) { + if (watch1->sc == sc) + break; + } + mtx_lock(&sc->lock); + if (watch1 != NULL) { + mtx_unlock(&vp->v_pollinfo->vpi_lock); + + /* + * We found an existing watch, update it based on our flags. + */ + if ((mask & IN_MASK_CREATE) != 0) { + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EEXIST); + } + if ((mask & IN_MASK_ADD) != 0) + watch1->mask |= mask; + else + watch1->mask = mask; + *wdp = watch1->wd; + mtx_unlock(&sc->lock); + vrele(vp); + free(watch, M_INOTIFY); + return (EJUSTRETURN); + } + + /* + * We're creating a new watch. Add it to the softc and vnode watch + * lists. + */ + do { + struct inotify_watch key; + + /* + * Search for the next available watch descriptor. This is + * implemented so as to avoid reusing watch descriptors for as + * long as possible. + */ + key.wd = wd = sc->nextwatch++; + watch1 = RB_FIND(inotify_watch_tree, &sc->watches, &key); + } while (watch1 != NULL || wd == 0); + watch->wd = wd; + RB_INSERT(inotify_watch_tree, &sc->watches, watch); + TAILQ_INSERT_TAIL(&vp->v_pollinfo->vpi_inotify, watch, vlink); + mtx_unlock(&sc->lock); + mtx_unlock(&vp->v_pollinfo->vpi_lock); + vn_irflag_set_cond(vp, VIRF_INOTIFY); + + *wdp = wd; + + return (0); +} + +void +vn_inotify_revoke(struct vnode *vp) +{ + if (vp->v_pollinfo == NULL) { + /* This is a nullfs vnode which shadows a watched vnode. */ + return; + } + inotify_log(vp, NULL, 0, IN_UNMOUNT, 0); +} + +static int +fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp, + struct file **fpp) +{ + struct file *fp; + int error; + + error = fget(td, fd, needrightsp, &fp); + if (error != 0) + return (error); + if (fp->f_type != DTYPE_INOTIFY) { + fdrop(fp, td); + return (EINVAL); + } + *fpp = fp; + return (0); +} + +int +kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask, + struct thread *td) +{ + struct nameidata nd; + struct file *fp; + struct inotify_softc *sc; + struct vnode *vp; + uint32_t wd; + int count, error; + + fp = NULL; + vp = NULL; + + if ((mask & IN_ALL_EVENTS) == 0) + return (EXTERROR(EINVAL, "no events specified")); + if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) == + (IN_MASK_ADD | IN_MASK_CREATE)) + return (EXTERROR(EINVAL, + "IN_MASK_ADD and IN_MASK_CREATE are mutually exclusive")); + if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS | IN_UNMOUNT)) != 0) + return (EXTERROR(EINVAL, "unrecognized flag")); + + error = fget_inotify(td, fd, &cap_inotify_add_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + NDINIT_AT(&nd, LOOKUP, + ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF | + LOCKSHARED | AUDITVNODE1, UIO_USERSPACE, path, dfd); + error = namei(&nd); + if (error != 0) + goto out; + NDFREE_PNBUF(&nd); + vp = nd.ni_vp; + + error = VOP_ACCESS(vp, VREAD, td->td_ucred, td); + if (error != 0) + goto out; + + if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + + count = atomic_fetchadd_int(&inotify_watches, 1); + if (count > inotify_max_watches) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1, + inotify_max_user_watches)) { + atomic_subtract_int(&inotify_watches, 1); + error = ENOSPC; + goto out; + } + error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td); + if (error != 0) { + atomic_subtract_int(&inotify_watches, 1); + (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0); + if (error == EJUSTRETURN) { + /* We updated an existing watch, everything is ok. */ + error = 0; + } else { + goto out; + } + } + td->td_retval[0] = wd; + +out: + if (vp != NULL) + vput(vp); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_add_watch_at(struct thread *td, + struct inotify_add_watch_at_args *uap) +{ + return (kern_inotify_add_watch(uap->fd, uap->dfd, uap->path, + uap->mask, td)); +} + +int +kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td) +{ + struct file *fp; + struct inotify_softc *sc; + struct inotify_record *rec; + struct inotify_watch key, *watch; + int error; + + error = fget_inotify(td, fd, &cap_inotify_rm_rights, &fp); + if (error != 0) + return (error); + sc = fp->f_data; + + rec = inotify_alloc_record(wd, NULL, 0, IN_IGNORED, 0, M_WAITOK); + + /* + * For compatibility with Linux, we do not remove pending events + * associated with the watch. Watch descriptors are implemented so as + * to avoid being reused for as long as possible, so one hopes that any + * pending events from the removed watch descriptor will be removed + * before the watch descriptor is recycled. + */ + key.wd = wd; + mtx_lock(&sc->lock); + watch = RB_FIND(inotify_watch_tree, &sc->watches, &key); + if (watch == NULL) { + free(rec, M_INOTIFY); + error = EINVAL; + } else { + RB_REMOVE(inotify_watch_tree, &sc->watches, watch); + if (!inotify_queue_record(sc, rec)) { + free(rec, M_INOTIFY); + error = 0; + } + } + mtx_unlock(&sc->lock); + if (watch != NULL) + inotify_remove_watch(watch); + fdrop(fp, td); + return (error); +} + +int +sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap) +{ + return (kern_inotify_rm_watch(uap->fd, uap->wd, td)); +} diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 86c7bdaa02c0..fb3e6a7a2534 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -75,14 +75,20 @@ static void NDVALIDATE_impl(struct nameidata *, int); #endif /* + * Reset ndp to its original state. + */ +#define NDRESET(ndp) do { \ + NDREINIT_DBG(ndp); \ + ndp->ni_resflags = 0; \ + ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ +} while (0) +/* * Prepare namei() to restart. Reset components to its original state and set * ISRESTARTED flag which signals the underlying lookup code to change the root * from ABI root to actual root and prevents a further restarts. */ #define NDRESTART(ndp) do { \ - NDREINIT_DBG(ndp); \ - ndp->ni_resflags = 0; \ - ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \ + NDRESET(ndp); \ ndp->ni_cnd.cn_flags |= ISRESTARTED; \ } while (0) @@ -162,8 +168,8 @@ static struct vop_vector crossmp_vnodeops = { */ struct nameicap_tracker { - struct vnode *dp; TAILQ_ENTRY(nameicap_tracker) nm_link; + struct mount *mp; }; /* Zone for cap mode tracker elements used for dotdot capability checks. */ @@ -192,49 +198,75 @@ SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN, "enables \"..\" components in path lookup in capability mode " "on non-local mount"); -static void +static int nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp) { struct nameicap_tracker *nt; + struct mount *mp; + int error; if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR) - return; + return (0); + mp = NULL; + error = VOP_GETWRITEMOUNT(dp, &mp); + if (error != 0) + return (error); nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head); - if (nt != NULL && nt->dp == dp) - return; + if (nt != NULL && nt->mp == mp) { + vfs_rel(mp); + return (0); + } nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK); - vhold(dp); - nt->dp = dp; - TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + nt->mp = mp; + error = lockmgr(&mp->mnt_renamelock, LK_SHARED | LK_NOWAIT, 0); + if (error != 0) { + MPASS(ndp->ni_nctrack_mnt == NULL); + ndp->ni_nctrack_mnt = mp; + free(nt, M_NAMEITRACKER); + error = ERESTART; + } else { + TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link); + } + return (error); } static void -nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first) +nameicap_cleanup(struct nameidata *ndp, int error) { struct nameicap_tracker *nt, *nt1; + struct mount *mp; + + KASSERT((ndp->ni_nctrack_mnt == NULL && + TAILQ_EMPTY(&ndp->ni_cap_tracker)) || + (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, + ("tracker active and not strictrelative")); - nt = first; - TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) { + mp = nt->mp; + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(mp); TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link); - vdrop(nt->dp); free(nt, M_NAMEITRACKER); } -} -static void -nameicap_cleanup(struct nameidata *ndp) -{ - KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) || - (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative")); - nameicap_cleanup_from(ndp, NULL); + mp = ndp->ni_nctrack_mnt; + if (mp != NULL) { + if (error == ERESTART) { + lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0); + lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0); + } + vfs_rel(mp); + ndp->ni_nctrack_mnt = NULL; + } } /* - * For dotdot lookups in capability mode, only allow the component - * lookup to succeed if the resulting directory was already traversed - * during the operation. This catches situations where already - * traversed directory is moved to different parent, and then we walk - * over it with dotdots. + * For dotdot lookups in capability mode, disallow walking over the + * directory no_rbeneath_dpp that was used as the starting point of + * the lookup. Since we take the mnt_renamelocks of all mounts we + * ever walked over during lookup, parallel renames are disabled. + * This prevents the situation where we circumvent walk over + * ni_rbeneath_dpp following dotdots. * * Also allow to force failure of dotdot lookups for non-local * filesystems, where external agents might assist local lookups to @@ -243,7 +275,6 @@ nameicap_cleanup(struct nameidata *ndp) static int nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) { - struct nameicap_tracker *nt; struct mount *mp; if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf & @@ -253,22 +284,16 @@ nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp) NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0) - return (ENOTCAPABLE); + goto violation; + if (dp == ndp->ni_rbeneath_dpp) + goto violation; mp = dp->v_mount; if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL && (mp->mnt_flag & MNT_LOCAL) == 0) - goto capfail; - TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head, - nm_link) { - if (dp == nt->dp) { - nt = TAILQ_NEXT(nt, nm_link); - if (nt != NULL) - nameicap_cleanup_from(ndp, nt); - return (0); - } - } + goto violation; + return (0); -capfail: +violation: if (__predict_false((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0)) NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf); return (ENOTCAPABLE); @@ -394,6 +419,8 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) NI_LCF_CAP_DOTDOT; } } + if (error == 0 && (ndp->ni_lcf & NI_LCF_STRICTREL) != 0) + ndp->ni_rbeneath_dpp = *dpp; /* * If we are auditing the kernel pathname, save the user pathname. @@ -631,6 +658,7 @@ restart: error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); return (error); @@ -661,12 +689,12 @@ restart: else if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && (cnp->cn_flags & ISRESTARTED) == 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ERESTART); NDRESTART(ndp); goto restart; } return (error); case CACHE_FPL_STATUS_PARTIAL: - TAILQ_INIT(&ndp->ni_cap_tracker); dp = ndp->ni_startdir; break; case CACHE_FPL_STATUS_DESTROYED: @@ -674,18 +702,21 @@ restart: error = namei_getpath(ndp); if (__predict_false(error != 0)) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } cnp->cn_nameptr = cnp->cn_pnbuf; /* FALLTHROUGH */ case CACHE_FPL_STATUS_ABORTED: - TAILQ_INIT(&ndp->ni_cap_tracker); MPASS(ndp->ni_lcf == 0); if (*cnp->cn_pnbuf == '\0') { if ((cnp->cn_flags & EMPTYPATH) != 0) { - return (namei_emptypath(ndp)); + error = namei_emptypath(ndp); + nameicap_cleanup(ndp, error); + return (error); } namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, ENOENT); SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL, false, ndp); return (ENOENT); @@ -693,6 +724,7 @@ restart: error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); + nameicap_cleanup(ndp, error); return (error); } break; @@ -705,16 +737,23 @@ restart: ndp->ni_startdir = dp; error = vfs_lookup(ndp); if (error != 0) { - if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir && - error == ENOENT && - (cnp->cn_flags & ISRESTARTED) == 0)) { - nameicap_cleanup(ndp); - pwd_drop(pwd); - namei_cleanup_cnp(cnp); - NDRESTART(ndp); - goto restart; - } else + uint64_t was_restarted; + bool abi_restart; + + was_restarted = ndp->ni_cnd.cn_flags & + ISRESTARTED; + abi_restart = pwd->pwd_adir != pwd->pwd_rdir && + error == ENOENT && was_restarted == 0; + if (error != ERESTART && !abi_restart) goto out; + nameicap_cleanup(ndp, error); + pwd_drop(pwd); + namei_cleanup_cnp(cnp); + NDRESET(ndp); + if (abi_restart) + was_restarted = ISRESTARTED; + ndp->ni_cnd.cn_flags |= was_restarted; + goto restart; } /* @@ -723,7 +762,7 @@ restart: if ((cnp->cn_flags & ISSYMLINK) == 0) { SDT_PROBE4(vfs, namei, lookup, return, error, ndp->ni_vp, false, ndp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, 0); pwd_drop(pwd); NDVALIDATE(ndp); return (0); @@ -756,10 +795,10 @@ restart: ndp->ni_vp = NULL; vrele(ndp->ni_dvp); out: - MPASS(error != 0); + MPASS(error != 0 && error != ERESTART); SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp); namei_cleanup_cnp(cnp); - nameicap_cleanup(ndp); + nameicap_cleanup(ndp, error); pwd_drop(pwd); return (error); } @@ -1185,7 +1224,9 @@ dirloop: } } - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; /* * Make sure degenerate names don't get here, their handling was @@ -1210,9 +1251,7 @@ dirloop: * the jail or chroot, don't let them out. * 5. If doing a capability lookup and lookup_cap_dotdot is * enabled, return ENOTCAPABLE if the lookup would escape - * from the initial file descriptor directory. Checks are - * done by ensuring that namei() already traversed the - * result of dotdot lookup. + * from the initial file descriptor directory. */ if (cnp->cn_flags & ISDOTDOT) { if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR | @@ -1238,7 +1277,7 @@ dirloop: NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf); if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0) { error = ENOTCAPABLE; - goto capdotdot; + goto bad; } } if (isroot || ((dp->v_vflag & VV_ROOT) != 0 && @@ -1261,11 +1300,6 @@ dirloop: vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - error = nameicap_check_dotdot(ndp, dp); - if (error != 0) { -capdotdot: - goto bad; - } } } @@ -1314,7 +1348,9 @@ unionlookup: vn_lock(dp, enforce_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); - nameicap_tracker_add(ndp, dp); + error = nameicap_tracker_add(ndp, dp); + if (error != 0) + goto bad; goto unionlookup; } @@ -1415,7 +1451,7 @@ nextname: goto dirloop; } if (cnp->cn_flags & ISDOTDOT) { - error = nameicap_check_dotdot(ndp, ndp->ni_vp); + error = nameicap_check_dotdot(ndp, ndp->ni_dvp); if (error != 0) goto bad2; } @@ -1485,8 +1521,11 @@ success: } success_right_lock: if (ndp->ni_vp != NULL) { - if ((cnp->cn_flags & ISDOTDOT) == 0) - nameicap_tracker_add(ndp, ndp->ni_vp); + if ((cnp->cn_flags & ISDOTDOT) == 0) { + error = nameicap_tracker_add(ndp, ndp->ni_vp); + if (error != 0) + goto bad2; + } if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS) return (vfs_lookup_failifexists(ndp)); } diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index cb18468d28bc..8e64a7fe966b 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -156,6 +156,7 @@ mount_init(void *mem, int size, int flags) mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF); lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); + lockinit(&mp->mnt_renamelock, PVFS, "rename", 0, 0); mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO); mp->mnt_ref = 0; mp->mnt_vfs_ops = 1; @@ -170,6 +171,7 @@ mount_fini(void *mem, int size) mp = (struct mount *)mem; uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu); + lockdestroy(&mp->mnt_renamelock); lockdestroy(&mp->mnt_explock); mtx_destroy(&mp->mnt_listmtx); mtx_destroy(&mp->mnt_mtx); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index dc2fb59fb81c..918b256e6c59 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -38,7 +38,6 @@ * External virtual filesystem routines */ -#include <sys/cdefs.h> #include "opt_ddb.h" #include "opt_watchdog.h" @@ -57,6 +56,7 @@ #include <sys/extattr.h> #include <sys/file.h> #include <sys/fcntl.h> +#include <sys/inotify.h> #include <sys/jail.h> #include <sys/kdb.h> #include <sys/kernel.h> @@ -5246,7 +5246,8 @@ destroy_vpollinfo_free(struct vpollinfo *vi) static void destroy_vpollinfo(struct vpollinfo *vi) { - + KASSERT(TAILQ_EMPTY(&vi->vpi_inotify), + ("%s: pollinfo %p has lingering watches", __func__, vi)); knlist_clear(&vi->vpi_selinfo.si_note, 1); seldrain(&vi->vpi_selinfo); destroy_vpollinfo_free(vi); @@ -5260,12 +5261,13 @@ v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; - if (vp->v_pollinfo != NULL) + if (atomic_load_ptr(&vp->v_pollinfo) != NULL) return; vi = malloc(sizeof(*vi), M_VNODEPOLL, M_WAITOK | M_ZERO); mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knl_assert_lock); + TAILQ_INIT(&vi->vpi_inotify); VI_LOCK(vp); if (vp->v_pollinfo != NULL) { VI_UNLOCK(vp); @@ -5851,6 +5853,8 @@ vop_rename_pre(void *ap) struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS + struct mount *tmp; + if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); @@ -5868,6 +5872,11 @@ vop_rename_pre(void *ap) if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); + + tmp = NULL; + VOP_GETWRITEMOUNT(a->a_tdvp, &tmp); + lockmgr_assert(&tmp->mnt_renamelock, KA_XLOCKED); + vfs_rel(tmp); #endif /* * It may be tempting to add vn_seqc_write_begin/end calls here and @@ -6057,6 +6066,28 @@ vop_need_inactive_debugpost(void *ap, int rc) #endif void +vop_allocate_post(void *ap, int rc) +{ + struct vop_allocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); +} + +void +vop_copy_file_range_post(void *ap, int rc) +{ + struct vop_copy_file_range_args *a; + + a = ap; + if (rc == 0) { + INOTIFY(a->a_invp, IN_ACCESS); + INOTIFY(a->a_outvp, IN_MODIFY); + } +} + +void vop_create_pre(void *ap) { struct vop_create_args *a; @@ -6076,8 +6107,20 @@ vop_create_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } +} + +void +vop_deallocate_post(void *ap, int rc) +{ + struct vop_deallocate_args *a; + + a = ap; + if (rc == 0) + INOTIFY(a->a_vp, IN_MODIFY); } void @@ -6122,8 +6165,10 @@ vop_deleteextattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6153,6 +6198,8 @@ vop_link_post(void *ap, int rc) if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_LINK); VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE); + INOTIFY_NAME(vp, tdvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, tdvp, a->a_cnp, IN_CREATE); } } @@ -6176,8 +6223,10 @@ vop_mkdir_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } #ifdef DEBUG_VFS_LOCKS @@ -6212,8 +6261,10 @@ vop_mknod_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6225,8 +6276,10 @@ vop_reclaim_post(void *ap, int rc) a = ap; vp = a->a_vp; ASSERT_VOP_IN_SEQC(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_REVOKE); + INOTIFY_REVOKE(vp); + } } void @@ -6257,6 +6310,8 @@ vop_remove_post(void *ap, int rc) if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6288,6 +6343,8 @@ vop_rename_post(void *ap, int rc) VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); + INOTIFY_MOVE(a->a_fvp, a->a_fdvp, a->a_fcnp, a->a_tvp, + a->a_tdvp, a->a_tcnp); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); @@ -6327,6 +6384,7 @@ vop_rmdir_post(void *ap, int rc) vp->v_vflag |= VV_UNLINKED; VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(vp, NOTE_DELETE); + INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE); } } @@ -6350,8 +6408,10 @@ vop_setattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6396,8 +6456,10 @@ vop_setextattr_post(void *ap, int rc) a = ap; vp = a->a_vp; vn_seqc_write_end(vp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); + INOTIFY(vp, IN_ATTRIB); + } } void @@ -6420,8 +6482,10 @@ vop_symlink_post(void *ap, int rc) a = ap; dvp = a->a_dvp; vn_seqc_write_end(dvp); - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE); + } } void @@ -6429,8 +6493,10 @@ vop_open_post(void *ap, int rc) { struct vop_open_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN); + INOTIFY(a->a_vp, IN_OPEN); + } } void @@ -6442,6 +6508,8 @@ vop_close_post(void *ap, int rc) !VN_IS_DOOMED(a->a_vp))) { VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ? NOTE_CLOSE_WRITE : NOTE_CLOSE); + INOTIFY(a->a_vp, (a->a_fflag & FWRITE) != 0 ? + IN_CLOSE_WRITE : IN_CLOSE_NOWRITE); } } @@ -6450,8 +6518,10 @@ vop_read_post(void *ap, int rc) { struct vop_read_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } void @@ -6468,8 +6538,10 @@ vop_readdir_post(void *ap, int rc) { struct vop_readdir_args *a = ap; - if (!rc) + if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ); + INOTIFY(a->a_vp, IN_ACCESS); + } } static struct knlist fs_knlist; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index c236f241bf20..d880733cbfe7 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3766,7 +3766,7 @@ int kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, const char *new, enum uio_seg pathseg) { - struct mount *mp = NULL; + struct mount *mp, *tmp; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; uint64_t tondflags; @@ -3774,6 +3774,7 @@ kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, short irflag; again: + tmp = mp = NULL; bwillwrite(); #ifdef MAC if (mac_vnode_check_rename_from_enabled()) { @@ -3809,6 +3810,7 @@ again: tvp = tond.ni_vp; error = vn_start_write(fvp, &mp, V_NOWAIT); if (error != 0) { +again1: NDFREE_PNBUF(&fromnd); NDFREE_PNBUF(&tond); if (tvp != NULL) @@ -3819,11 +3821,25 @@ again: vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); + if (tmp != NULL) { + lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); + vfs_rel(tmp); + tmp = NULL; + } error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); if (error != 0) return (error); goto again; } + error = VOP_GETWRITEMOUNT(tdvp, &tmp); + if (error != 0 || tmp == NULL) + goto again1; + error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); + if (error != 0) { + vn_finished_write(mp); + goto again1; + } irflag = vn_irflag_read(fvp); if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || (irflag & VIRF_NAMEDDIR) != 0) { @@ -3884,6 +3900,8 @@ out: vrele(fromnd.ni_dvp); vrele(fvp); } + lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); + vfs_rel(tmp); vn_finished_write(mp); out1: if (error == ERESTART) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index b29286654f60..6451c9e07a60 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -41,6 +41,7 @@ */ #include "opt_hwpmc_hooks.h" +#include "opt_hwt_hooks.h" #include <sys/param.h> #include <sys/systm.h> @@ -51,6 +52,7 @@ #include <sys/fcntl.h> #include <sys/file.h> #include <sys/filio.h> +#include <sys/inotify.h> #include <sys/ktr.h> #include <sys/ktrace.h> #include <sys/limits.h> @@ -86,6 +88,10 @@ #include <sys/pmckern.h> #endif +#ifdef HWT_HOOKS +#include <dev/hwt/hwt_hook.h> +#endif + static fo_rdwr_t vn_read; static fo_rdwr_t vn_write; static fo_rdwr_t vn_io_fault; @@ -303,7 +309,8 @@ restart: NDREINIT(ndp); goto restart; } - if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0) + if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0 || + (vn_irflag_read(ndp->ni_dvp) & VIRF_INOTIFY) != 0) ndp->ni_cnd.cn_flags |= MAKEENTRY; #ifdef MAC error = mac_vnode_check_create(cred, ndp->ni_dvp, @@ -479,6 +486,7 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, if (vp->v_type != VFIFO && vp->v_type != VSOCK && VOP_ACCESS(vp, VREAD, cred, td) == 0) fp->f_flag |= FKQALLOWED; + INOTIFY(vp, IN_OPEN); return (0); } @@ -1741,6 +1749,8 @@ vn_truncate_locked(struct vnode *vp, off_t length, bool sync, vattr.va_vaflags |= VA_SYNC; error = VOP_SETATTR(vp, &vattr, cred); VOP_ADD_WRITECOUNT_CHECKED(vp, -1); + if (error == 0) + INOTIFY(vp, IN_MODIFY); } return (error); } @@ -3005,6 +3015,24 @@ vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, } } #endif + +#ifdef HWT_HOOKS + if (HWT_HOOK_INSTALLED && (prot & VM_PROT_EXECUTE) != 0 && + error == 0) { + struct hwt_record_entry ent; + char *fullpath; + char *freepath; + + if (vn_fullpath(vp, &fullpath, &freepath) == 0) { + ent.fullpath = fullpath; + ent.addr = (uintptr_t) *addr; + ent.record_type = HWT_RECORD_MMAP; + HWT_CALL_HOOK(td, HWT_MMAP, &ent); + free(freepath, M_TEMP); + } + } +#endif + return (error); } diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index a2b6a7c8ff9f..38138a4af921 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -702,6 +702,7 @@ vop_vptocnp { %% allocate vp E E E +%! allocate post vop_allocate_post vop_allocate { IN struct vnode *vp; @@ -786,6 +787,7 @@ vop_fdatasync { %% copy_file_range invp U U U %% copy_file_range outvp U U U +%! copy_file_range post vop_copy_file_range_post vop_copy_file_range { IN struct vnode *invp; @@ -810,6 +812,7 @@ vop_vput_pair { %% deallocate vp L L L +%! deallocate post vop_deallocate_post vop_deallocate { IN struct vnode *vp; @@ -821,6 +824,27 @@ vop_deallocate { }; +%% inotify vp - - - + +vop_inotify { + IN struct vnode *vp; + IN struct vnode *dvp; + IN struct componentname *cnp; + IN int event; + IN uint32_t cookie; +}; + + +%% inotify_add_watch vp L L L + +vop_inotify_add_watch { + IN struct vnode *vp; + IN struct inotify_softc *sc; + IN uint32_t mask; + OUT uint32_t *wdp; + IN struct thread *td; +}; + # The VOPs below are spares at the end of the table to allow new VOPs to be # added in stable branches without breaking the KBI. New VOPs in HEAD should # be added above these spares. When merging a new VOP to a stable branch, diff --git a/sys/modules/Makefile b/sys/modules/Makefile index 35cf17be109f..9922796f8a1d 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -141,6 +141,7 @@ SUBDIR= \ ${_hptnr} \ ${_hptrr} \ hwpmc \ + ${_hwt} \ ${_hyperv} \ i2c \ ${_iavf} \ @@ -859,6 +860,10 @@ _smartpqi= smartpqi _p2sb= p2sb .endif +.if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" +_hwt= hwt +.endif + .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \ ${MACHINE_CPUARCH} == "riscv" .if ${MK_BHYVE} != "no" || defined(ALL_MODULES) diff --git a/sys/modules/hwt/Makefile b/sys/modules/hwt/Makefile new file mode 100644 index 000000000000..6704e22422d1 --- /dev/null +++ b/sys/modules/hwt/Makefile @@ -0,0 +1,21 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/dev/hwt + +KMOD = hwt +SRCS = \ + hwt.c \ + hwt_backend.c \ + hwt_config.c \ + hwt_context.c \ + hwt_contexthash.c \ + hwt_cpu.c \ + hwt_hook.c \ + hwt_ioctl.c \ + hwt_owner.c \ + hwt_ownerhash.c \ + hwt_record.c \ + hwt_thread.c \ + hwt_vm.c + +.include <bsd.kmod.mk> diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile index 6e0fea6efc3a..9774c3da61ee 100644 --- a/sys/modules/iwlwifi/Makefile +++ b/sys/modules/iwlwifi/Makefile @@ -4,6 +4,7 @@ DEVIWLWIFIDIR= ${SRCTOP}/sys/contrib/dev/iwlwifi WITH_CONFIG_PM= 0 WITH_DEBUGFS= 1 +WITH_CONFIG_ACPI= 1 KMOD= if_iwlwifi @@ -40,6 +41,12 @@ CFLAGS+= -DCONFIG_PM CFLAGS+= -DCONFIG_PM_SLEEP .endif +.if defined(WITH_CONFIG_ACPI) && ${WITH_CONFIG_ACPI} > 0 +SRCS+= fw/acpi.c +CFLAGS+= -DCONFIG_ACPI +CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI +.endif + SRCS+= iwl-devtrace.c # Other @@ -56,7 +63,6 @@ CFLAGS+= -DCONFIG_IWLMVM=1 # Helpful after fresh imports. #CFLAGS+= -ferror-limit=0 -#CFLAGS+= -DCONFIG_ACPI=1 #CFLAGS+= -DCONFIG_INET=1 # Need LKPI TSO implementation. #CFLAGS+= -DCONFIG_IPV6=1 CFLAGS+= -DCONFIG_IWLWIFI_DEBUG=1 diff --git a/sys/modules/rtw89/Makefile b/sys/modules/rtw89/Makefile index 73945591826c..09580f288c62 100644 --- a/sys/modules/rtw89/Makefile +++ b/sys/modules/rtw89/Makefile @@ -39,6 +39,7 @@ SRCS+= ${LINUXKPI_GENSRCS} SRCS+= opt_wlan.h opt_inet6.h opt_inet.h opt_acpi.h CFLAGS+= -DKBUILD_MODNAME='"rtw89"' +CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI CFLAGS+= -I${DEVRTW89DIR} CFLAGS+= ${LINUXKPI_INCLUDES} diff --git a/sys/modules/sound/sound/Makefile b/sys/modules/sound/sound/Makefile index d2cfed2f4b6a..f3978e9bd9cc 100644 --- a/sys/modules/sound/sound/Makefile +++ b/sys/modules/sound/sound/Makefile @@ -13,11 +13,11 @@ SRCS+= feeder.c feeder_rate.c feeder_volume.c SRCS+= feeder_chain.c feeder_eq.c feeder_format.c SRCS+= feeder_matrix.c feeder_mixer.c SRCS+= feeder_eq_gen.h feeder_rate_gen.h snd_fxdiv_gen.h -SRCS+= mpu_if.h mpufoi_if.h synth_if.h -SRCS+= mpu_if.c mpufoi_if.c synth_if.c +SRCS+= mpu_if.h mpufoi_if.h +SRCS+= mpu_if.c mpufoi_if.c SRCS+= ac97.c buffer.c channel.c dsp.c SRCS+= mixer.c sndstat.c sound.c vchan.c -SRCS+= midi.c mpu401.c sequencer.c +SRCS+= midi.c mpu401.c feeder_eq_gen.h: ${SYSDIR}/tools/sound/feeder_eq_mkfilter.awk ${AWK} -f ${SYSDIR}/tools/sound/feeder_eq_mkfilter.awk -- ${FEEDER_EQ_PRESETS} > ${.TARGET} diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index 6eefedba8775..cf4f75bd0b6c 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -81,6 +81,23 @@ struct ether_addr { (addr)[3] | (addr)[4] | (addr)[5]) == 0x00) /* + * 802.1q VID constants from IEEE 802.1Q-2014, table 9-2. + */ + +/* Null VID: The tag contains only PCP (priority) and DEI information. */ +#define DOT1Q_VID_NULL 0x0 +/* The default PVID for a bridge port. NB: bridge(4) does not honor this. */ +#define DOT1Q_VID_DEF_PVID 0x1 +/* The default SR_PVID for SRP Stream related traffic. */ +#define DOT1Q_VID_DEF_SR_PVID 0x2 +/* A VID reserved for implementation use, not permitted on the wire. */ +#define DOT1Q_VID_RSVD_IMPL 0xfff +/* The lowest valid VID. */ +#define DOT1Q_VID_MIN 0x1 +/* The highest valid VID. */ +#define DOT1Q_VID_MAX 0xffe + +/* * This is the type of the VLAN ID inside the tag, not the tag itself. */ typedef uint16_t ether_vlanid_t; diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c index bc421a8e156d..5b3ee740d75e 100644 --- a/sys/net/if_bridge.c +++ b/sys/net/if_bridge.c @@ -254,6 +254,8 @@ struct bridge_iflist { uint32_t bif_addrcnt; /* cur. # of addresses */ uint32_t bif_addrexceeded;/* # of address violations */ struct epoch_context bif_epoch_ctx; + ether_vlanid_t bif_untagged; /* untagged vlan id */ + ifbvlan_set_t bif_vlan_set; /* allowed tagged vlans */ }; /* @@ -331,13 +333,12 @@ static void bridge_inject(struct ifnet *, struct mbuf *); static int bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); static int bridge_enqueue(struct bridge_softc *, struct ifnet *, - struct mbuf *); + struct mbuf *, struct bridge_iflist *); static void bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp, int); static void bridge_forward(struct bridge_softc *, struct bridge_iflist *, struct mbuf *m); static bool bridge_member_ifaddrs(void); - static void bridge_timer(void *); static void bridge_broadcast(struct bridge_softc *, struct ifnet *, @@ -353,6 +354,9 @@ static void bridge_rtage(struct bridge_softc *); static void bridge_rtflush(struct bridge_softc *, int); static int bridge_rtdaddr(struct bridge_softc *, const uint8_t *, ether_vlanid_t); +static bool bridge_vfilter_in(const struct bridge_iflist *, struct mbuf *); +static bool bridge_vfilter_out(const struct bridge_iflist *, + const struct mbuf *); static void bridge_rtable_init(struct bridge_softc *); static void bridge_rtable_fini(struct bridge_softc *); @@ -400,6 +404,9 @@ static int bridge_ioctl_sma(struct bridge_softc *, void *); static int bridge_ioctl_sifprio(struct bridge_softc *, void *); static int bridge_ioctl_sifcost(struct bridge_softc *, void *); static int bridge_ioctl_sifmaxaddr(struct bridge_softc *, void *); +static int bridge_ioctl_sifuntagged(struct bridge_softc *, void *); +static int bridge_ioctl_sifvlanset(struct bridge_softc *, void *); +static int bridge_ioctl_gifvlanset(struct bridge_softc *, void *); static int bridge_ioctl_addspan(struct bridge_softc *, void *); static int bridge_ioctl_delspan(struct bridge_softc *, void *); static int bridge_ioctl_gbparam(struct bridge_softc *, void *); @@ -618,6 +625,14 @@ static const struct bridge_control bridge_control_table[] = { { bridge_ioctl_sifmaxaddr, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER }, + { bridge_ioctl_sifuntagged, sizeof(struct ifbreq), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_sifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_SUSER }, + + { bridge_ioctl_gifvlanset, sizeof(struct ifbif_vlan_req), + BC_F_COPYIN|BC_F_COPYOUT }, }; static const int bridge_control_table_size = nitems(bridge_control_table); @@ -832,6 +847,7 @@ bridge_clone_create(struct if_clone *ifc, char *name, size_t len, ifp->if_softc = sc; if_initname(ifp, bridge_name, ifd->unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_capabilities = ifp->if_capenable = IFCAP_VLAN_HWTAGGING; ifp->if_ioctl = bridge_ioctl; #ifdef ALTQ ifp->if_start = bridge_altq_start; @@ -954,6 +970,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) struct ifbaconf ifbaconf; struct ifbrparam ifbrparam; struct ifbropreq ifbropreq; + struct ifbif_vlan_req ifvlanreq; } args; struct ifdrv *ifd = (struct ifdrv *) data; const struct bridge_control *bc; @@ -1495,6 +1512,7 @@ bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg) req->ifbr_addrcnt = bif->bif_addrcnt; req->ifbr_addrmax = bif->bif_addrmax; req->ifbr_addrexceeded = bif->bif_addrexceeded; + req->ifbr_untagged = bif->bif_untagged; /* Copy STP state options as flags */ if (bp->bp_operedge) @@ -1873,6 +1891,84 @@ bridge_ioctl_sifmaxaddr(struct bridge_softc *sc, void *arg) } static int +bridge_ioctl_sifuntagged(struct bridge_softc *sc, void *arg) +{ + struct ifbreq *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->ifbr_ifsname); + if (bif == NULL) + return (ENOENT); + + if (req->ifbr_untagged > DOT1Q_VID_MAX) + return (EINVAL); + + if (req->ifbr_untagged != DOT1Q_VID_NULL) + bif->bif_flags |= IFBIF_VLANFILTER; + bif->bif_untagged = req->ifbr_untagged; + return (0); +} + +static int +bridge_ioctl_sifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + /* Reject invalid VIDs. */ + if (BRVLAN_TEST(&req->bv_set, DOT1Q_VID_NULL) || + BRVLAN_TEST(&req->bv_set, DOT1Q_VID_RSVD_IMPL)) + return (EINVAL); + + switch (req->bv_op) { + /* Replace the existing vlan set with the new set */ + case BRDG_VLAN_OP_SET: + BIT_COPY(BRVLAN_SETSIZE, &req->bv_set, &bif->bif_vlan_set); + break; + + /* Modify the existing vlan set to add the given vlans */ + case BRDG_VLAN_OP_ADD: + BIT_OR(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Modify the existing vlan set to remove the given vlans */ + case BRDG_VLAN_OP_DEL: + BIT_ANDNOT(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + break; + + /* Invalid or unknown operation */ + default: + return (EINVAL); + } + + /* + * The only reason to modify the VLAN access list is to use VLAN + * filtering on this interface, so enable it automatically. + */ + bif->bif_flags |= IFBIF_VLANFILTER; + + return (0); +} + +static int +bridge_ioctl_gifvlanset(struct bridge_softc *sc, void *arg) +{ + struct ifbif_vlan_req *req = arg; + struct bridge_iflist *bif; + + bif = bridge_lookup_member(sc, req->bv_ifname); + if (bif == NULL) + return (ENOENT); + + BIT_COPY(BRVLAN_SETSIZE, &bif->bif_vlan_set, &req->bv_set); + return (0); +} + +static int bridge_ioctl_addspan(struct bridge_softc *sc, void *arg) { struct ifbreq *req = arg; @@ -2150,12 +2246,25 @@ bridge_stop(struct ifnet *ifp, int disable) * */ static int -bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) +bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m, + struct bridge_iflist *bif) { int len, err = 0; short mflags; struct mbuf *m0; + /* + * Find the bridge member port this packet is being sent on, if the + * caller didn't already provide it. + */ + if (bif == NULL) + bif = bridge_lookup_member_if(sc, dst_ifp); + if (bif == NULL) { + /* Perhaps the interface was removed from the bridge */ + m_freem(m); + return (EINVAL); + } + /* We may be sending a fragment so traverse the mbuf */ for (; m; m = m0) { m0 = m->m_nextpkt; @@ -2164,6 +2273,18 @@ bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m) mflags = m->m_flags; /* + * If VLAN filtering is enabled, and the native VLAN ID of the + * outgoing interface matches the VLAN ID of the frame, remove + * the VLAN header. + */ + if ((bif->bif_flags & IFBIF_VLANFILTER) && + bif->bif_untagged != DOT1Q_VID_NULL && + VLANTAGOF(m) == bif->bif_untagged) { + m->m_flags &= ~M_VLANTAG; + m->m_pkthdr.ether_vtag = 0; + } + + /* * If underlying interface can not do VLAN tag insertion itself * then attach a packet tag that holds it. */ @@ -2234,7 +2355,7 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp) return; } - bridge_enqueue(sc, ifp, m); + bridge_enqueue(sc, ifp, m, NULL); } /* @@ -2329,7 +2450,7 @@ bridge_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, } } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } if (used == 0) m_freem(m); @@ -2347,7 +2468,7 @@ sendunicast: return (0); } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, NULL); return (0); } @@ -2364,17 +2485,18 @@ bridge_transmit(struct ifnet *ifp, struct mbuf *m) struct ether_header *eh; struct ifnet *dst_if; int error = 0; + ether_vlanid_t vlan; sc = ifp->if_softc; ETHER_BPF_MTAP(ifp, m); eh = mtod(m, struct ether_header *); + vlan = VLANTAGOF(m); if (((m->m_flags & (M_BCAST|M_MCAST)) == 0) && - (dst_if = bridge_rtlookup(sc, eh->ether_dhost, DOT1Q_VID_NULL)) != - NULL) { - error = bridge_enqueue(sc, dst_if, m); + (dst_if = bridge_rtlookup(sc, eh->ether_dhost, vlan)) != NULL) { + error = bridge_enqueue(sc, dst_if, m, NULL); } else bridge_broadcast(sc, ifp, m, 0); @@ -2435,18 +2557,18 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, struct bridge_iflist *dbif; struct ifnet *src_if, *dst_if, *ifp; struct ether_header *eh; - uint16_t vlan; uint8_t *dst; int error; + ether_vlanid_t vlan; NET_EPOCH_ASSERT(); src_if = m->m_pkthdr.rcvif; ifp = sc->sc_ifp; + vlan = VLANTAGOF(m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); - vlan = VLANTAGOF(m); if ((sbif->bif_flags & IFBIF_STP) && sbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) @@ -2555,6 +2677,10 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, if (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE) goto drop; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + goto drop; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) goto drop; @@ -2566,7 +2692,7 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif, return; } - bridge_enqueue(sc, dst_if, m); + bridge_enqueue(sc, dst_if, m, dbif); return; drop: @@ -2636,6 +2762,15 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) return (NULL); } + /* Do VLAN filtering. */ + if (!bridge_vfilter_in(bif, m)) { + if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + return (NULL); + } + /* bridge_vfilter_in() may add a tag */ + vlan = VLANTAGOF(m); + bridge_span(sc, m); if (m->m_flags & (M_BCAST|M_MCAST)) { @@ -2761,6 +2896,15 @@ bridge_input(struct ifnet *ifp, struct mbuf *m) } \ if ((iface) != bifp) \ ETHER_BPF_MTAP(iface, m); \ + /* Pass tagged packets to if_vlan, if it's loaded */ \ + if (VLANTAGOF(m) != 0) { \ + if (bifp->if_vlantrunk == NULL) { \ + m_freem(m); \ + return (NULL); \ + } \ + (*vlan_input_p)(bifp, m); \ + return (NULL); \ + } \ return (m); \ } \ \ @@ -2817,6 +2961,30 @@ bridge_inject(struct ifnet *ifp, struct mbuf *m) { struct bridge_softc *sc; + if (ifp->if_type == IFT_L2VLAN) { + /* + * vlan(4) gives us the vlan ifnet, so we need to get the + * bridge softc to get a pointer to ether_input to send the + * packet to. + */ + struct ifnet *bifp = NULL; + + if (vlan_trunkdev_p == NULL) { + m_freem(m); + return; + } + + bifp = vlan_trunkdev_p(ifp); + if (bifp == NULL) { + m_freem(m); + return; + } + + sc = if_getsoftc(bifp); + sc->sc_if_input(ifp, m); + return; + } + KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0, ("%s: iface %s is not running in netmap mode", __func__, if_name(ifp))); @@ -2867,6 +3035,10 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, if (sbif && (sbif->bif_flags & dbif->bif_flags & IFBIF_PRIVATE)) continue; + /* Do VLAN filtering. */ + if (!bridge_vfilter_out(dbif, m)) + continue; + if ((dbif->bif_flags & IFBIF_STP) && dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING) continue; @@ -2910,7 +3082,7 @@ bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if, continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, dbif); } if (used == 0) m_freem(m); @@ -2946,11 +3118,116 @@ bridge_span(struct bridge_softc *sc, struct mbuf *m) continue; } - bridge_enqueue(sc, dst_if, mc); + bridge_enqueue(sc, dst_if, mc, bif); } } /* + * Incoming VLAN filtering. Given a frame and the member interface it was + * received on, decide whether the port configuration allows it. + */ +static bool +bridge_vfilter_in(const struct bridge_iflist *sbif, struct mbuf *m) +{ + ether_vlanid_t vlan; + + vlan = VLANTAGOF(m); + /* Make sure the vlan id is reasonable. */ + if (vlan > DOT1Q_VID_MAX) + return (false); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((sbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + if (vlan == DOT1Q_VID_NULL) { + /* + * The frame doesn't have a tag. If the interface does not + * have an untagged vlan configured, drop the frame. + */ + if (sbif->bif_untagged == DOT1Q_VID_NULL) + return (false); + + /* + * Otherwise, insert a new tag based on the interface's + * untagged vlan id. + */ + m->m_pkthdr.ether_vtag = sbif->bif_untagged; + m->m_flags |= M_VLANTAG; + } else { + /* + * The frame has a tag, so check it matches the interface's + * vlan access list. We explicitly do not accept tagged + * frames for the untagged vlan id here (unless it's also + * in the access list). + */ + if (!BRVLAN_TEST(&sbif->bif_vlan_set, vlan)) + return (false); + } + + /* Accept the frame. */ + return (true); +} + +/* + * Outgoing VLAN filtering. Given a frame, its vlan, and the member interface + * we intend to send it to, decide whether the port configuration allows it to + * be sent. + */ +static bool +bridge_vfilter_out(const struct bridge_iflist *dbif, const struct mbuf *m) +{ + struct ether_header *eh; + ether_vlanid_t vlan; + + NET_EPOCH_ASSERT(); + + /* If VLAN filtering isn't enabled, pass everything. */ + if ((dbif->bif_flags & IFBIF_VLANFILTER) == 0) + return (true); + + vlan = VLANTAGOF(m); + + /* + * Always allow untagged 802.1D STP frames, even if they would + * otherwise be dropped. This is required for STP to work on + * a filtering bridge. + * + * Tagged STP (Cisco PVST+) is a non-standard extension, so + * handle those frames via the normal filtering path. + */ + eh = mtod(m, struct ether_header *); + if (vlan == DOT1Q_VID_NULL && + memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) + return (true); + + /* + * If the frame wasn't assigned to a vlan at ingress, drop it. + * We can't forward these frames to filtering ports because we + * don't know what VLAN they're supposed to be in. + */ + if (vlan == DOT1Q_VID_NULL) + return (false); + + /* + * If the frame's vlan matches the interfaces's untagged vlan, + * allow it. + */ + if (vlan == dbif->bif_untagged) + return (true); + + /* + * If the frame's vlan is on the interface's tagged access list, + * allow it. + */ + if (BRVLAN_TEST(&dbif->bif_vlan_set, vlan)) + return (true); + + /* The frame was not permitted, so drop it. */ + return (false); +} + +/* * bridge_rtupdate: * * Add a bridge routing entry. diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h index 90beb6c96d82..97b63e3d4416 100644 --- a/sys/net/if_bridgevar.h +++ b/sys/net/if_bridgevar.h @@ -78,6 +78,8 @@ #define _NET_IF_BRIDGEVAR_H_ #include <sys/types.h> +#include <sys/_bitset.h> +#include <sys/bitset.h> #include <sys/callout.h> #include <sys/queue.h> #include <sys/condvar.h> @@ -122,6 +124,9 @@ #define BRDGSPROTO 28 /* set protocol (ifbrparam) */ #define BRDGSTXHC 29 /* set tx hold count (ifbrparam) */ #define BRDGSIFAMAX 30 /* set max interface addrs (ifbreq) */ +#define BRDGSIFUNTAGGED 31 /* set if untagged vlan */ +#define BRDGSIFVLANSET 32 /* set if vlan set */ +#define BRDGGIFVLANSET 33 /* get if vlan set */ /* * Generic bridge control request. @@ -139,6 +144,7 @@ struct ifbreq { uint32_t ifbr_addrcnt; /* member if addr number */ uint32_t ifbr_addrmax; /* member if addr max */ uint32_t ifbr_addrexceeded; /* member if addr violations */ + ether_vlanid_t ifbr_untagged; /* member if untagged vlan */ uint8_t pad[32]; }; @@ -155,10 +161,11 @@ struct ifbreq { #define IFBIF_BSTP_ADMEDGE 0x0200 /* member stp admin edge enabled */ #define IFBIF_BSTP_ADMCOST 0x0400 /* member stp admin path cost */ #define IFBIF_PRIVATE 0x0800 /* if is a private segment */ +#define IFBIF_VLANFILTER 0x1000 /* if does vlan filtering */ #define IFBIFBITS "\020\001LEARNING\002DISCOVER\003STP\004SPAN" \ "\005STICKY\014PRIVATE\006EDGE\007AUTOEDGE\010PTP" \ - "\011AUTOPTP" + "\011AUTOPTP\015VLANFILTER" #define IFBIFMASK ~(IFBIF_BSTP_EDGE|IFBIF_BSTP_AUTOEDGE|IFBIF_BSTP_PTP| \ IFBIF_BSTP_AUTOPTP|IFBIF_BSTP_ADMEDGE| \ IFBIF_BSTP_ADMCOST) /* not saved */ @@ -304,6 +311,26 @@ struct ifbpstpconf { eaddr[5] = pv >> 0; \ } while (0) +/* + * Bridge VLAN access request. + */ +#define BRVLAN_SETSIZE 4096 +typedef __BITSET_DEFINE(ifbvlan_set, BRVLAN_SETSIZE) ifbvlan_set_t; + +#define BRVLAN_SET(set, bit) __BIT_SET(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_CLR(set, bit) __BIT_CLR(BRVLAN_SETSIZE, (bit), set) +#define BRVLAN_TEST(set, bit) __BIT_ISSET(BRVLAN_SETSIZE, (bit), set) + +#define BRDG_VLAN_OP_SET 1 /* replace current vlan set */ +#define BRDG_VLAN_OP_ADD 2 /* add vlans to current set */ +#define BRDG_VLAN_OP_DEL 3 /* remove vlans from current set */ + +struct ifbif_vlan_req { + char bv_ifname[IFNAMSIZ]; + uint8_t bv_op; + ifbvlan_set_t bv_set; +}; + #ifdef _KERNEL #define BRIDGE_INPUT(_ifp, _m) do { \ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index e9e1c82cb688..22fcb7bf7c64 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1673,6 +1673,7 @@ vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, */ if (p->if_type != IFT_ETHER && p->if_type != IFT_L2VLAN && + p->if_type != IFT_BRIDGE && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h index f0b09445d04b..695bb81f77b3 100644 --- a/sys/net/if_vlan_var.h +++ b/sys/net/if_vlan_var.h @@ -126,13 +126,6 @@ struct vlanreq { #define VLAN_PCP_MAX 7 -#define DOT1Q_VID_NULL 0x0 -#define DOT1Q_VID_DEF_PVID 0x1 -#define DOT1Q_VID_DEF_SR_PVID 0x2 -#define DOT1Q_VID_RSVD_IMPL 0xfff -#define DOT1Q_VID_MIN 1 /* minimum valid vlan id */ -#define DOT1Q_VID_MAX 4094 /* maximum valid vlan id */ - /* * 802.1q full tag. Proto and vid are stored in host byte order. */ diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index 71cb1862aabf..1416f0c2cdbe 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -508,18 +508,6 @@ extern struct sx pf_end_lock; (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \ !(a)->addr32[2] && !(a)->addr32[3] )) \ -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - pf_addrcpy(a, b, f) - -#define PF_AINC(a, f) \ - pf_addr_inc(a, f) - -#define PF_POOLMASK(a, b, c, d, f) \ - pf_poolmask(a, b, c, d, f) - #else /* Just IPv6 */ @@ -544,18 +532,6 @@ extern struct sx pf_end_lock; !(a)->addr32[2] && \ !(a)->addr32[3] ) \ -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - pf_addrcpy(a, b, f) - -#define PF_AINC(a, f) \ - pf_addr_inc(a, f) - -#define PF_POOLMASK(a, b, c, d, f) \ - pf_poolmask(a, b, c, d, f) - #else /* Just IPv4 */ @@ -570,29 +546,11 @@ extern struct sx pf_end_lock; #define PF_AZERO(a, c) \ (!(a)->addr32[0]) -#define PF_MATCHA(n, a, m, b, f) \ - pf_match_addr(n, a, m, b, f) - -#define PF_ACPY(a, b, f) \ - (a)->v4.s_addr = (b)->v4.s_addr - -#define PF_AINC(a, f) \ - do { \ - (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \ - } while (0) - -#define PF_POOLMASK(a, b, c, d, f) \ - do { \ - (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \ - (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \ - } while (0) - #endif /* PF_INET_ONLY */ #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ #ifdef _KERNEL -#ifdef INET6 static void inline pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) { @@ -602,12 +560,13 @@ pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) memcpy(&dst->v4, &src->v4, sizeof(dst->v4)); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: memcpy(&dst->v6, &src->v6, sizeof(dst->v6)); break; +#endif /* INET6 */ } } -#endif /* INET6 */ #endif /* @@ -629,7 +588,7 @@ pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ - !PF_MATCHA(0, &(aw)->v.a.addr, \ + !pf_match_addr(0, &(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))))) != \ (neg) \ ) @@ -2477,11 +2436,11 @@ int pf_test(sa_family_t, int, int, struct ifnet *, struct mbuf **, struct inpcb int pf_normalize_ip(u_short *, struct pf_pdesc *); #endif /* INET */ -#ifdef INET6 -int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, sa_family_t); void pf_addr_inc(struct pf_addr *, sa_family_t); +#ifdef INET6 +int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); int pf_max_frag_size(struct mbuf *); int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *, struct ifnet *, bool); @@ -2674,11 +2633,10 @@ int pf_kanchor_copyout(const struct pf_kruleset *, const struct pf_krule *, char *, size_t); int pf_kanchor_nvcopyout(const struct pf_kruleset *, const struct pf_krule *, nvlist_t *); -void pf_kanchor_remove(struct pf_krule *); +void pf_remove_kanchor(struct pf_krule *); void pf_remove_if_empty_kruleset(struct pf_kruleset *); struct pf_kruleset *pf_find_kruleset(const char *); struct pf_kruleset *pf_get_leaf_kruleset(char *, char **); -struct pf_kanchor *pf_create_kanchor(struct pf_kanchor *, const char *); struct pf_kruleset *pf_find_or_create_kruleset(const char *); void pf_rs_initialize(void); diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c index b24bbe036141..127b29320acb 100644 --- a/sys/netpfil/pf/pf.c +++ b/sys/netpfil/pf/pf.c @@ -682,7 +682,8 @@ pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk) 0); break; case AF_INET6: - PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + pf_addrcpy(pd->src, &nk->addr[pd->sidx], + pd->af); break; default: unhandled_af(pd->af); @@ -696,7 +697,8 @@ pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk) 0); break; case AF_INET6: - PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + pf_addrcpy(pd->dst, &nk->addr[pd->didx], + pd->af); break; default: unhandled_af(pd->af); @@ -1084,9 +1086,9 @@ pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX], (*sn)->af = af; (*sn)->rule = r_track; - PF_ACPY(&(*sn)->addr, src, af); + pf_addrcpy(&(*sn)->addr, src, af); if (raddr != NULL) - PF_ACPY(&(*sn)->raddr, raddr, af); + pf_addrcpy(&(*sn)->raddr, raddr, af); (*sn)->rkif = rkif; LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry); (*sn)->creation = time_uptime; @@ -1687,9 +1689,9 @@ pf_state_key_addr_setup(struct pf_pdesc *pd, copy: #endif /* INET6 */ if (saddr) - PF_ACPY(&key->addr[pd->sidx], saddr, pd->af); + pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af); if (daddr) - PF_ACPY(&key->addr[pd->didx], daddr, pd->af); + pf_addrcpy(&key->addr[pd->didx], daddr, pd->af); return (0); } @@ -1734,13 +1736,17 @@ pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport, bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0])); bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1])); if (pd->dir == PF_IN) { - PF_ACPY(&(*nk)->addr[pd->didx], &pd->nsaddr, pd->naf); - PF_ACPY(&(*nk)->addr[pd->sidx], &pd->ndaddr, pd->naf); + pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr, + pd->naf); + pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr, + pd->naf); (*nk)->port[pd->didx] = pd->nsport; (*nk)->port[pd->sidx] = pd->ndport; } else { - PF_ACPY(&(*nk)->addr[pd->sidx], &pd->nsaddr, pd->naf); - PF_ACPY(&(*nk)->addr[pd->didx], &pd->ndaddr, pd->naf); + pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr, + pd->naf); + pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr, + pd->naf); (*nk)->port[pd->sidx] = pd->nsport; (*nk)->port[pd->didx] = pd->ndport; } @@ -2053,11 +2059,11 @@ pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_por mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO); if (mapping == NULL) return (NULL); - PF_ACPY(&mapping->endpoints[0].addr, src_addr, af); + pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af); mapping->endpoints[0].port = src_port; mapping->endpoints[0].af = af; mapping->endpoints[0].mapping = mapping; - PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af); + pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af); mapping->endpoints[1].port = nat_port; mapping->endpoints[1].af = af; mapping->endpoints[1].mapping = mapping; @@ -3295,9 +3301,9 @@ pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p, MPASS(pd->ip_sum); } - PF_ACPY(&ao, a, pd->af); + pf_addrcpy(&ao, a, pd->af); if (pd->af == pd->naf) - PF_ACPY(a, an, pd->af); + pf_addrcpy(a, an, pd->af); if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) *pd->pcksum = ~*pd->pcksum; @@ -3426,8 +3432,8 @@ pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u) { struct pf_addr ao; - PF_ACPY(&ao, a, AF_INET6); - PF_ACPY(a, an, AF_INET6); + pf_addrcpy(&ao, a, AF_INET6); + pf_addrcpy(a, an, AF_INET6); *c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup( @@ -3450,9 +3456,9 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, { struct pf_addr oia, ooa; - PF_ACPY(&oia, ia, af); + pf_addrcpy(&oia, ia, af); if (oa) - PF_ACPY(&ooa, oa, af); + pf_addrcpy(&ooa, oa, af); /* Change inner protocol port, fix inner protocol checksum. */ if (ip != NULL) { @@ -3469,7 +3475,7 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, *ic = pf_cksum_fixup(*ic, opc, *pc, 0); } /* Change inner ip address, fix inner ip and icmp checksums. */ - PF_ACPY(ia, na, af); + pf_addrcpy(ia, na, af); switch (af) { #ifdef INET case AF_INET: { @@ -3503,7 +3509,7 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa, } /* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */ if (oa) { - PF_ACPY(oa, na, af); + pf_addrcpy(oa, na, af); switch (af) { #ifdef INET case AF_INET: @@ -4299,8 +4305,8 @@ pf_undo_nat(struct pf_krule *nr, struct pf_pdesc *pd, uint16_t bip_sum) { /* undo NAT changes, if they have taken place */ if (nr != NULL) { - PF_ACPY(pd->src, &pd->osrc, pd->af); - PF_ACPY(pd->dst, &pd->odst, pd->af); + pf_addrcpy(pd->src, &pd->osrc, pd->af); + pf_addrcpy(pd->dst, &pd->odst, pd->af); if (pd->sport) *pd->sport = pd->osport; if (pd->dport) @@ -4675,6 +4681,13 @@ pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r) } } else { rv = pf_match_rule(ctx, &r->anchor->ruleset); + /* + * Unless errors occured, stop iff any rule matched + * within quick anchors. + */ + if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK && + *ctx->am == r) + rv = PF_TEST_QUICK; } ctx->depth--; @@ -4784,7 +4797,6 @@ pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth, return (quick); } -#ifdef INET6 void pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) @@ -4796,6 +4808,7 @@ pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); @@ -4806,6 +4819,7 @@ pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); break; +#endif /* INET6 */ } } @@ -4818,6 +4832,7 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); break; #endif /* INET */ +#ifdef INET6 case AF_INET6: if (addr->addr32[3] == 0xffffffff) { addr->addr32[3] = 0; @@ -4837,9 +4852,9 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af) addr->addr32[3] = htonl(ntohl(addr->addr32[3]) + 1); break; +#endif /* INET6 */ } } -#endif /* INET6 */ void pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a) @@ -5738,8 +5753,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, ctx.reason = *reason; SLIST_INIT(&ctx.rules); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); if (inp != NULL) { INP_LOCK_ASSERT(inp); @@ -6357,7 +6372,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) &nk->addr[pd->sidx], nk->port[pd->sidx]); pd->sport = &th->th_sport; pd->nsport = th->th_sport; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || @@ -6366,7 +6381,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) &nk->addr[pd->didx], nk->port[pd->didx]); pd->dport = &th->th_dport; pd->ndport = th->th_dport; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6379,7 +6394,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->sidx]); pd->sport = &pd->hdr.udp.uh_sport; pd->nsport = pd->hdr.udp.uh_sport; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || @@ -6390,7 +6405,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->didx]); pd->dport = &pd->hdr.udp.uh_dport; pd->ndport = pd->hdr.udp.uh_dport; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6403,7 +6418,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->sidx]); pd->sport = &pd->hdr.sctp.src_port; pd->nsport = pd->hdr.sctp.src_port; - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) || nk->port[pd->didx] != pd->ndport) { @@ -6413,7 +6428,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) nk->port[pd->didx]); pd->dport = &pd->hdr.sctp.dest_port; pd->ndport = pd->hdr.sctp.dest_port; - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } break; } @@ -6422,13 +6437,13 @@ pf_translate_compat(struct pf_test_ctx *ctx) if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) { pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) { pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } if (ctx->virtual_type == htons(ICMP_ECHO) && @@ -6447,13 +6462,13 @@ pf_translate_compat(struct pf_test_ctx *ctx) if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) { pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->sidx], 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) { pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum, &nk->addr[pd->didx], 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } rewrite++; break; @@ -6467,7 +6482,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) pf_change_a(&pd->src->v4.s_addr, pd->ip_sum, nk->addr[pd->sidx].v4.s_addr, 0); - PF_ACPY(&pd->nsaddr, pd->src, pd->af); + pf_addrcpy(&pd->nsaddr, pd->src, pd->af); } if (PF_ANEQ(&pd->ndaddr, @@ -6475,7 +6490,7 @@ pf_translate_compat(struct pf_test_ctx *ctx) pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum, nk->addr[pd->didx].v4.s_addr, 0); - PF_ACPY(&pd->ndaddr, pd->dst, pd->af); + pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); } break; #endif /* INET */ @@ -6483,14 +6498,17 @@ pf_translate_compat(struct pf_test_ctx *ctx) case AF_INET6: if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) { - PF_ACPY(&pd->nsaddr, &nk->addr[pd->sidx], pd->af); - PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx], + pd->af); + pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af); } if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) { - PF_ACPY(&pd->ndaddr, &nk->addr[pd->didx], pd->af); - PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx], + pd->af); + pf_addrcpy(pd->dst, &nk->addr[pd->didx], + pd->af); } break; #endif /* INET6 */ @@ -7009,8 +7027,8 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->virtual_proto; - PF_ACPY(&key.addr[pd->sidx], pd->src, key.af); - PF_ACPY(&key.addr[pd->didx], pd->dst, key.af); + pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af); + pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af); key.port[pd->sidx] = pd->osport; key.port[pd->didx] = pd->odport; @@ -7201,8 +7219,8 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason) } if (afto) { - PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); - PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); pd->naf = nk->af; action = PF_AFRT; } @@ -7496,13 +7514,13 @@ again: key.af = j->pd.af; key.proto = IPPROTO_SCTP; if (j->pd.dir == PF_IN) { /* wire side, straight */ - PF_ACPY(&key.addr[0], j->pd.src, key.af); - PF_ACPY(&key.addr[1], j->pd.dst, key.af); + pf_addrcpy(&key.addr[0], j->pd.src, key.af); + pf_addrcpy(&key.addr[1], j->pd.dst, key.af); key.port[0] = j->pd.hdr.sctp.src_port; key.port[1] = j->pd.hdr.sctp.dest_port; } else { /* stack side, reverse */ - PF_ACPY(&key.addr[1], j->pd.src, key.af); - PF_ACPY(&key.addr[0], j->pd.dst, key.af); + pf_addrcpy(&key.addr[1], j->pd.src, key.af); + pf_addrcpy(&key.addr[0], j->pd.dst, key.af); key.port[1] = j->pd.hdr.sctp.src_port; key.port[0] = j->pd.hdr.sctp.dest_port; } @@ -7898,8 +7916,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, #endif /* INET6 */ } if (afto) { - PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af); - PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af); + pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], + nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[didx], + nk->af); pd->naf = nk->af; return (PF_AFRT); } @@ -8031,8 +8051,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = IPPROTO_TCP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = th->th_sport; key.port[pd2.didx] = th->th_dport; @@ -8135,9 +8155,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); - PF_ACPY(&pd->nsaddr, &nk->addr[pd2.sidx], - nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->nsaddr, + &nk->addr[pd2.sidx], nk->af); + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8226,8 +8246,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = IPPROTO_UDP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = uh->uh_sport; key.port[pd2.didx] = uh->uh_dport; @@ -8270,9 +8290,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, &nk->addr[didx], pd->af, nk->af)) return (PF_DROP); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8358,8 +8378,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = IPPROTO_SCTP; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[pd2.sidx] = sh->src_port; key.port[pd2.didx] = sh->dest_port; @@ -8425,9 +8445,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, sh->src_port = nk->port[sidx]; sh->dest_port = nk->port[didx]; m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); if (nk->af == AF_INET) { pd->proto = IPPROTO_ICMP; @@ -8568,9 +8588,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, iih->icmp_id = nk->port[iidx]; m_copyback(pd2.m, pd2.off, ICMP_MINLEN, (c_caddr_t)iih); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); /* * IPv4 becomes IPv6 so we must copy @@ -8696,9 +8716,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, iih->icmp6_id = nk->port[iidx]; m_copyback(pd2.m, pd2.off, sizeof(struct icmp6_hdr), (c_caddr_t)iih); - PF_ACPY(&pd->nsaddr, + pf_addrcpy(&pd->nsaddr, &nk->addr[pd2.sidx], nk->af); - PF_ACPY(&pd->ndaddr, + pf_addrcpy(&pd->ndaddr, &nk->addr[pd2.didx], nk->af); pd->naf = nk->af; return (PF_AFRT); @@ -8740,8 +8760,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd, key.af = pd2.af; key.proto = pd2.proto; - PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af); - PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af); + pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); + pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); key.port[0] = key.port[1] = 0; action = pf_find_state(&pd2, &key, state); @@ -9277,7 +9297,8 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp, bzero(&dst, sizeof(dst)); dst.sin6_family = AF_INET6; dst.sin6_len = sizeof(dst); - PF_ACPY((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, AF_INET6); + pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, + AF_INET6); if (pd->dir == PF_IN) { if (ip6->ip6_hlim <= IPV6_HLIMDEC) { @@ -10077,8 +10098,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, pd->src = (struct pf_addr *)&h->ip_src; pd->dst = (struct pf_addr *)&h->ip_dst; - PF_ACPY(&pd->osrc, pd->src, af); - PF_ACPY(&pd->odst, pd->dst, af); + pf_addrcpy(&pd->osrc, pd->src, af); + pf_addrcpy(&pd->odst, pd->dst, af); pd->ip_sum = &h->ip_sum; pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; pd->ttl = h->ip_ttl; @@ -10115,8 +10136,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0, h = mtod(pd->m, struct ip6_hdr *); pd->src = (struct pf_addr *)&h->ip6_src; pd->dst = (struct pf_addr *)&h->ip6_dst; - PF_ACPY(&pd->osrc, pd->src, af); - PF_ACPY(&pd->odst, pd->dst, af); + pf_addrcpy(&pd->osrc, pd->src, af); + pf_addrcpy(&pd->odst, pd->dst, af); pd->ip_sum = NULL; pd->tos = IPV6_DSCP(h); pd->ttl = h->ip6_hlim; diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c index 389b74d09d37..e2200c15c704 100644 --- a/sys/netpfil/pf/pf_if.c +++ b/sys/netpfil/pf/pf_if.c @@ -522,7 +522,7 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) case 0: return (0); case 1: - return (PF_MATCHA(0, &dyn->pfid_addr4, + return (pf_match_addr(0, &dyn->pfid_addr4, &dyn->pfid_mask4, a, AF_INET)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET)); @@ -535,7 +535,7 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af) case 0: return (0); case 1: - return (PF_MATCHA(0, &dyn->pfid_addr6, + return (pf_match_addr(0, &dyn->pfid_addr6, &dyn->pfid_mask6, a, AF_INET6)); default: return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6)); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index 05a7e1311ad8..357b2be194a5 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -615,7 +615,7 @@ pf_free_rule(struct pf_krule *rule) pfi_kkif_unref(rule->kif); if (rule->rcv_kif) pfi_kkif_unref(rule->rcv_kif); - pf_kanchor_remove(rule); + pf_remove_kanchor(rule); pf_empty_kpool(&rule->rdr.list); pf_empty_kpool(&rule->nat.list); pf_empty_kpool(&rule->route.list); @@ -2350,15 +2350,17 @@ relock_DIOCKILLSTATES: if (psk->psk_proto && psk->psk_proto != sk->proto) continue; - if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, + if (! pf_match_addr(psk->psk_src.neg, + &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, srcaddr, sk->af)) continue; - if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, + if (! pf_match_addr(psk->psk_dst.neg, + &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af)) continue; - if (! PF_MATCHA(psk->psk_rt_addr.neg, + if (! pf_match_addr(psk->psk_rt_addr.neg, &psk->psk_rt_addr.addr.v.a.addr, &psk->psk_rt_addr.addr.v.a.mask, &s->act.rt_addr, sk->af)) @@ -2398,10 +2400,10 @@ relock_DIOCKILLSTATES: match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; - PF_ACPY(&match_key.addr[0], + pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; - PF_ACPY(&match_key.addr[1], + pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } @@ -2738,7 +2740,7 @@ pf_ioctl_get_rulesets(struct pfioc_ruleset *pr) return (ENOENT); } pr->nr = 0; - if (ruleset->anchor == NULL) { + if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL) @@ -2770,7 +2772,7 @@ pf_ioctl_get_ruleset(struct pfioc_ruleset *pr) } pr->name[0] = 0; - if (ruleset->anchor == NULL) { + if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL && nr++ == pr->nr) { @@ -4152,9 +4154,9 @@ DIOCGETSTATESV2_full: bzero(&key, sizeof(key)); key.af = pnl->af; key.proto = pnl->proto; - PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af); + pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af); key.port[sidx] = pnl->sport; - PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af); + pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af); key.port[didx] = pnl->dport; state = pf_find_state_all(&key, direction, &m); @@ -4166,9 +4168,11 @@ DIOCGETSTATESV2_full: error = E2BIG; /* more than one state */ } else { sk = state->key[sidx]; - PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af); + pf_addrcpy(&pnl->rsaddr, + &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; - PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af); + pf_addrcpy(&pnl->rdaddr, + &sk->addr[didx], sk->af); pnl->rdport = sk->port[didx]; PF_STATE_UNLOCK(state); } @@ -4606,7 +4610,7 @@ DIOCGETSTATESV2_full: } pool->cur = TAILQ_FIRST(&pool->list); - PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); + pf_addrcpy(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); PF_RULES_WUNLOCK(); break; @@ -6024,11 +6028,11 @@ pf_kill_srcnodes(struct pfioc_src_node_kill *psnk) PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp) if (psnk == NULL || - (PF_MATCHA(psnk->psnk_src.neg, + (pf_match_addr(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, &sn->addr, sn->af) && - PF_MATCHA(psnk->psnk_dst.neg, + pf_match_addr(psnk->psnk_dst.neg, &psnk->psnk_dst.addr.v.a.addr, &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af))) { @@ -6132,10 +6136,10 @@ relock_DIOCCLRSTATES: match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; - PF_ACPY(&match_key.addr[0], + pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; - PF_ACPY(&match_key.addr[1], + pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 5e7865e4fac5..308d76c46e5b 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -319,12 +319,14 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, bzero(&udp_source, sizeof(udp_source)); udp_source.af = pd->af; - PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af); + pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); udp_source.port = pd->nsport; if (udp_mapping) { *udp_mapping = pf_udp_mapping_find(&udp_source); if (*udp_mapping) { - PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); + pf_addrcpy(naddr, + &(*udp_mapping)->endpoints[1].addr, + pd->af); *nport = (*udp_mapping)->endpoints[1].port; /* Try to find a src_node as per pf_map_addr(). */ if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && @@ -369,12 +371,13 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, key.proto = pd->proto; do { - PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af); - PF_ACPY(&key.addr[sidx], naddr, key.af); + pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); + pf_addrcpy(&key.addr[sidx], naddr, key.af); key.port[didx] = pd->ndport; if (udp_mapping && *udp_mapping) - PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af); + pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr, + pd->af); /* * port search; start random, step; @@ -591,10 +594,10 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, switch (rpool->opts & PF_POOL_TYPEMASK) { case PF_POOL_NONE: - PF_ACPY(naddr, raddr, af); + pf_addrcpy(naddr, raddr, af); break; case PF_POOL_BITMASK: - PF_POOLMASK(naddr, raddr, rmask, saddr, af); + pf_poolmask(naddr, raddr, rmask, saddr, af); break; case PF_POOL_RANDOM: if (rpool->cur->addr.type == PF_ADDR_TABLE) { @@ -609,7 +612,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; if (cnt == 0) @@ -623,7 +626,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { switch (af) { #ifdef INET @@ -654,12 +657,12 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, break; #endif /* INET6 */ } - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); - PF_ACPY(init_addr, naddr, af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); + pf_addrcpy(init_addr, naddr, af); } else { - PF_AINC(&rpool->counter, af); - PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); + pf_addr_inc(&rpool->counter, af); + pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); } break; case PF_POOL_SRCHASH: @@ -680,7 +683,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt; if (cnt == 0) @@ -694,9 +697,9 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); } else { - PF_POOLMASK(naddr, raddr, rmask, + pf_poolmask(naddr, raddr, rmask, (struct pf_addr *)&hash, af); } break; @@ -743,14 +746,14 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; - PF_ACPY(&rpool->counter, raddr, af); + pf_addrcpy(&rpool->counter, raddr, af); } get_addr: - PF_ACPY(naddr, &rpool->counter, af); + pf_addrcpy(naddr, &rpool->counter, af); if (init_addr != NULL && PF_AZERO(init_addr, af)) - PF_ACPY(init_addr, naddr, af); - PF_AINC(&rpool->counter, af); + pf_addrcpy(init_addr, naddr, af); + pf_addr_inc(&rpool->counter, af); break; } } @@ -798,7 +801,7 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr, goto done; } - PF_ACPY(naddr, &(*sn)->raddr, af); + pf_addrcpy(naddr, &(*sn)->raddr, af); if (nkif) *nkif = (*sn)->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { @@ -948,7 +951,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr4, &rpool->cur->addr.p.dyn->pfid_mask4, &pd->nsaddr, AF_INET); @@ -961,7 +964,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr6, &rpool->cur->addr.p.dyn->pfid_mask6, &pd->nsaddr, AF_INET6); @@ -969,7 +972,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, #endif /* INET6 */ } } else - PF_POOLMASK(naddr, + pf_poolmask(naddr, &rpool->cur->addr.v.a.addr, &rpool->cur->addr.v.a.mask, &pd->nsaddr, pd->af); @@ -983,7 +986,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr4, &r->src.addr.p.dyn->pfid_mask4, &pd->ndaddr, AF_INET); @@ -995,7 +998,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, reason = PFRES_MAPFAILED; goto notrans; } - PF_POOLMASK(naddr, + pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr6, &r->src.addr.p.dyn->pfid_mask6, &pd->ndaddr, AF_INET6); @@ -1003,7 +1006,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, #endif /* INET6 */ } } else - PF_POOLMASK(naddr, &r->src.addr.v.a.addr, + pf_poolmask(naddr, &r->src.addr.v.a.addr, &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); break; } @@ -1018,7 +1021,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, if (reason != 0) goto notrans; if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) - PF_POOLMASK(naddr, naddr, &rpool->cur->addr.v.a.mask, + pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask, &pd->ndaddr, pd->af); /* Do not change SCTP ports. */ @@ -1056,9 +1059,9 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, key.af = pd->af; key.proto = pd->proto; key.port[0] = pd->nsport; - PF_ACPY(&key.addr[0], &pd->nsaddr, key.af); + pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af); key.port[1] = nport; - PF_ACPY(&key.addr[1], naddr, key.af); + pf_addrcpy(&key.addr[1], naddr, key.af); if (!pf_find_state_all_exists(&key, PF_OUT)) break; @@ -1220,8 +1223,8 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) } } - PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf); - PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf); + pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); + pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: af-to %s done, prefixlen %d, ", diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 381e966eacf1..d5d6dc70255e 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -1308,9 +1308,9 @@ pf_handle_natlook(struct nlmsghdr *hdr, struct nl_pstate *npt) key.af = attrs.af; key.proto = attrs.proto; - PF_ACPY(&key.addr[sidx], &attrs.src, attrs.af); + pf_addrcpy(&key.addr[sidx], &attrs.src, attrs.af); key.port[sidx] = attrs.sport; - PF_ACPY(&key.addr[didx], &attrs.dst, attrs.af); + pf_addrcpy(&key.addr[didx], &attrs.dst, attrs.af); key.port[didx] = attrs.dport; state = pf_find_state_all(&key, attrs.direction, &m); diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c index 865c5ecd72d9..2e5165a9900c 100644 --- a/sys/netpfil/pf/pf_ruleset.c +++ b/sys/netpfil/pf/pf_ruleset.c @@ -232,7 +232,7 @@ pf_get_leaf_kruleset(char *path, char **path_remainder) return (ruleset); } -struct pf_kanchor * +static struct pf_kanchor * pf_create_kanchor(struct pf_kanchor *parent, const char *aname) { struct pf_kanchor *anchor, *dup; @@ -259,8 +259,8 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname) if ((dup = RB_INSERT(pf_kanchor_global, &V_pf_anchors, anchor)) != NULL) { - printf("pf_find_or_create_ruleset: RB_INSERT1 " - "'%s' '%s' collides with '%s' '%s'\n", + printf("%s: RB_INSERT1 " + "'%s' '%s' collides with '%s' '%s'\n", __func__, anchor->path, anchor->name, dup->path, dup->name); rs_free(anchor); return (NULL); @@ -270,10 +270,10 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname) anchor->parent = parent; if ((dup = RB_INSERT(pf_kanchor_node, &parent->children, anchor)) != NULL) { - printf("pf_find_or_create_ruleset: " + printf("%s: " "RB_INSERT2 '%s' '%s' collides with " - "'%s' '%s'\n", anchor->path, anchor->name, - dup->path, dup->name); + "'%s' '%s'\n", __func__, anchor->path, + anchor->name, dup->path, dup->name); RB_REMOVE(pf_kanchor_global, &V_pf_anchors, anchor); rs_free(anchor); @@ -339,7 +339,7 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset) int i; while (ruleset != NULL) { - if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL || + if (ruleset == &pf_main_ruleset || !RB_EMPTY(&ruleset->anchor->children) || ruleset->anchor->refcnt > 0 || ruleset->tables > 0 || ruleset->topen) @@ -407,7 +407,7 @@ pf_kanchor_setup(struct pf_krule *r, const struct pf_kruleset *s, } ruleset = pf_find_or_create_kruleset(path); rs_free(path); - if (ruleset == NULL || ruleset->anchor == NULL) { + if (ruleset == NULL || ruleset == &pf_main_ruleset) { DPFPRINTF("%s: ruleset\n", __func__); return (1); } @@ -432,7 +432,7 @@ pf_kanchor_copyout(const struct pf_kruleset *rs, const struct pf_krule *r, char a[MAXPATHLEN]; char *p; int i; - if (rs->anchor == NULL) + if (rs == &pf_main_ruleset) a[0] = 0; else strlcpy(a, rs->anchor->path, MAXPATHLEN); @@ -444,7 +444,7 @@ pf_kanchor_copyout(const struct pf_kruleset *rs, const struct pf_krule *r, anchor_call_len); } if (strncmp(a, r->anchor->path, strlen(a))) { - printf("pf_anchor_copyout: '%s' '%s'\n", a, + printf("%s: '%s' '%s'\n", __func__, a, r->anchor->path); return (1); } @@ -525,16 +525,13 @@ done: } void -pf_kanchor_remove(struct pf_krule *r) +pf_remove_kanchor(struct pf_krule *r) { if (r->anchor == NULL) return; - if (r->anchor->refcnt <= 0) { - printf("pf_anchor_remove: broken refcount\n"); - r->anchor = NULL; - return; - } - if (!--r->anchor->refcnt) + if (r->anchor->refcnt <= 0) + printf("%s: broken refcount\n", __func__); + else if (!--r->anchor->refcnt) pf_remove_if_empty_kruleset(&r->anchor->ruleset); r->anchor = NULL; } diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c index d5874df3df66..43e4366845a2 100644 --- a/sys/netpfil/pf/pf_table.c +++ b/sys/netpfil/pf/pf_table.c @@ -704,7 +704,7 @@ pfr_validate_addr(struct pfr_addr *ad) return (-1); if (ad->pfra_not && ad->pfra_not != 1) return (-1); - if (ad->pfra_fback) + if (ad->pfra_fback != PFR_FB_NONE) return (-1); return (0); } @@ -2340,16 +2340,16 @@ _next_block: if (use_counter && !PF_AZERO(counter, af)) { /* is supplied address within block? */ - if (!PF_MATCHA(0, &cur, &mask, counter, af)) { + if (!pf_match_addr(0, &cur, &mask, counter, af)) { /* no, go to next block in table */ idx++; use_counter = 0; goto _next_block; } - PF_ACPY(addr, counter, af); + pf_addrcpy(addr, counter, af); } else { /* use first address of block */ - PF_ACPY(addr, &cur, af); + pf_addrcpy(addr, &cur, af); } if (!KENTRY_NETWORK(ke)) { @@ -2358,7 +2358,7 @@ _next_block: idx++; goto _next_block; } - PF_ACPY(counter, addr, af); + pf_addrcpy(counter, addr, af); *pidx = idx; pfr_kstate_counter_add(&kt->pfrkt_match, 1); return (0); @@ -2382,7 +2382,7 @@ _next_block: /* lookup return the same block - perfect */ if (filter && filter(af, addr)) goto _next_entry; - PF_ACPY(counter, addr, af); + pf_addrcpy(counter, addr, af); *pidx = idx; pfr_kstate_counter_add(&kt->pfrkt_match, 1); return (0); @@ -2392,9 +2392,9 @@ _next_entry: /* we need to increase the counter past the nested block */ pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net); pfr_sockaddr_to_pf_addr(&umask, &umask_addr); - PF_POOLMASK(addr, addr, &umask_addr, &pfr_ffaddr, af); - PF_AINC(addr, af); - if (!PF_MATCHA(0, &cur, &mask, addr, af)) { + pf_poolmask(addr, addr, &umask_addr, &pfr_ffaddr, af); + pf_addr_inc(addr, af); + if (!pf_match_addr(0, &cur, &mask, addr, af)) { /* ok, we reached the end of our main block */ /* go to next block in table */ idx++; diff --git a/sys/powerpc/mpc85xx/mpc85xx_gpio.c b/sys/powerpc/mpc85xx/mpc85xx_gpio.c index 0f333feb747f..cb96d768adef 100644 --- a/sys/powerpc/mpc85xx/mpc85xx_gpio.c +++ b/sys/powerpc/mpc85xx/mpc85xx_gpio.c @@ -226,14 +226,14 @@ mpc85xx_gpio_attach(device_t dev) return (ENOMEM); } + OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); + sc->busdev = gpiobus_attach_bus(dev); if (sc->busdev == NULL) { mpc85xx_gpio_detach(dev); return (ENOMEM); } - OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev); - return (0); } diff --git a/sys/riscv/allwinner/files.allwinner b/sys/riscv/allwinner/files.allwinner index 423a89c10c78..73fa9660e2d2 100644 --- a/sys/riscv/allwinner/files.allwinner +++ b/sys/riscv/allwinner/files.allwinner @@ -1,5 +1,6 @@ arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt +arm/allwinner/aw_rtc.c optional aw_rtc fdt arm/allwinner/aw_syscon.c optional syscon arm/allwinner/aw_sid.c optional aw_sid nvmem arm/allwinner/aw_timer.c optional aw_timer fdt diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner index 1bf6b027a4cb..2b1e0d4e09dc 100644 --- a/sys/riscv/conf/std.allwinner +++ b/sys/riscv/conf/std.allwinner @@ -7,6 +7,7 @@ options SOC_ALLWINNER_D1 device aw_ccu # Allwinner clock controller device aw_gpio # Allwinner GPIO controller +device aw_rtc # Allwinner Real-time Clock device aw_sid # Allwinner Secure ID EFUSE device aw_timer # Allwinner Timer device aw_usbphy # Allwinner USB PHY diff --git a/sys/sys/caprights.h b/sys/sys/caprights.h index 48c75afc62a0..6a5a17eda5ee 100644 --- a/sys/sys/caprights.h +++ b/sys/sys/caprights.h @@ -79,6 +79,8 @@ extern const cap_rights_t cap_futimes_rights; extern const cap_rights_t cap_getpeername_rights; extern const cap_rights_t cap_getsockopt_rights; extern const cap_rights_t cap_getsockname_rights; +extern const cap_rights_t cap_inotify_add_rights; +extern const cap_rights_t cap_inotify_rm_rights; extern const cap_rights_t cap_ioctl_rights; extern const cap_rights_t cap_linkat_source_rights; extern const cap_rights_t cap_linkat_target_rights; diff --git a/sys/sys/capsicum.h b/sys/sys/capsicum.h index d493535454e9..3847c4c73e75 100644 --- a/sys/sys/capsicum.h +++ b/sys/sys/capsicum.h @@ -279,11 +279,15 @@ #define CAP_KQUEUE (CAP_KQUEUE_EVENT | CAP_KQUEUE_CHANGE) +/* Allows operations on inotify descriptors. */ +#define CAP_INOTIFY_ADD CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_INOTIFY_RM CAPRIGHT(1, 0x0000000000400000ULL) + /* All used bits for index 1. */ -#define CAP_ALL1 CAPRIGHT(1, 0x00000000001FFFFFULL) +#define CAP_ALL1 CAPRIGHT(1, 0x00000000007FFFFFULL) /* Available bits for index 1. */ -#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000200000ULL) +#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000800000ULL) /* ... */ #define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL) diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h index d770c274d7b7..cab94ac511a5 100644 --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -16,6 +16,8 @@ #define EXTERR_KTRACE 3 /* To allow inclusion of this file into kern_ktrace.c */ #define EXTERR_CAT_FUSE 4 +#define EXTERR_CAT_INOTIFY 5 +#define EXTERR_CAT_GENIO 6 #endif diff --git a/sys/sys/file.h b/sys/sys/file.h index 284d523147b6..63313926c4f0 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -71,6 +71,7 @@ struct nameidata; #define DTYPE_PROCDESC 12 /* process descriptor */ #define DTYPE_EVENTFD 13 /* eventfd */ #define DTYPE_TIMERFD 14 /* timerfd */ +#define DTYPE_INOTIFY 15 /* inotify descriptor */ #ifdef _KERNEL diff --git a/sys/sys/hwt.h b/sys/sys/hwt.h new file mode 100644 index 000000000000..78b774a70f9f --- /dev/null +++ b/sys/sys/hwt.h @@ -0,0 +1,129 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* User-visible header. */ + +#include <sys/param.h> +#include <sys/cpuset.h> +#include <sys/types.h> +#include <sys/hwt_record.h> + +#ifndef _SYS_HWT_H_ +#define _SYS_HWT_H_ + +#define HWT_MAGIC 0x42 +#define HWT_IOC_ALLOC _IOW(HWT_MAGIC, 0x00, struct hwt_alloc) +#define HWT_IOC_START _IOW(HWT_MAGIC, 0x01, struct hwt_start) +#define HWT_IOC_STOP _IOW(HWT_MAGIC, 0x02, struct hwt_stop) +#define HWT_IOC_RECORD_GET _IOW(HWT_MAGIC, 0x03, struct hwt_record_get) +#define HWT_IOC_BUFPTR_GET _IOW(HWT_MAGIC, 0x04, struct hwt_bufptr_get) +#define HWT_IOC_SET_CONFIG _IOW(HWT_MAGIC, 0x05, struct hwt_set_config) +#define HWT_IOC_WAKEUP _IOW(HWT_MAGIC, 0x06, struct hwt_wakeup) +#define HWT_IOC_SVC_BUF _IOW(HWT_MAGIC, 0x07, struct hwt_svc_buf) + +#define HWT_BACKEND_MAXNAMELEN 256 + +#define HWT_MODE_THREAD 1 +#define HWT_MODE_CPU 2 + +struct hwt_alloc { + size_t bufsize; + int mode; + pid_t pid; /* thread mode */ + cpuset_t *cpu_map; /* cpu mode only */ + size_t cpusetsize; + const char *backend_name; + int *ident; + int kqueue_fd; +} __aligned(16); + +struct hwt_start { + int reserved; +} __aligned(16); + +struct hwt_stop { + int reserved; +} __aligned(16); + +struct hwt_wakeup { + int reserved; +} __aligned(16); + +struct hwt_record_user_entry { + enum hwt_record_type record_type; + union { + /* + * Used for MMAP, EXECUTABLE, INTERP, + * and KERNEL records. + */ + struct { + char fullpath[MAXPATHLEN]; + uintptr_t addr; + uintptr_t baseaddr; + }; + /* Used for BUFFER records. */ + struct { + int buf_id; + int curpage; + vm_offset_t offset; + }; + /* Used for THREAD_* records. */ + int thread_id; + }; +} __aligned(16); + +struct hwt_record_get { + struct hwt_record_user_entry *records; + int *nentries; + int wait; +} __aligned(16); + +struct hwt_bufptr_get { + int *ident; + vm_offset_t *offset; + uint64_t *data; +} __aligned(16); + +struct hwt_set_config { + /* Configuration of ctx. */ + int pause_on_mmap; + + /* The following passed to backend as is. */ + void *config; + size_t config_size; + int config_version; +} __aligned(16); + +struct hwt_svc_buf { + /* The following passed to backend as is. */ + void *data; + size_t data_size; + int data_version; +} __aligned(16); + +#endif /* !_SYS_HWT_H_ */ diff --git a/sys/sys/hwt_record.h b/sys/sys/hwt_record.h new file mode 100644 index 000000000000..8336723f9396 --- /dev/null +++ b/sys/sys/hwt_record.h @@ -0,0 +1,70 @@ +/*- + * Copyright (c) 2023-2025 Ruslan Bukin <br@bsdpad.com> + * + * This work was supported by Innovate UK project 105694, "Digital Security + * by Design (DSbD) Technology Platform Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* User-visible header. */ + +#ifndef _SYS_HWT_RECORD_H_ +#define _SYS_HWT_RECORD_H_ + +enum hwt_record_type { + HWT_RECORD_MMAP, + HWT_RECORD_MUNMAP, + HWT_RECORD_EXECUTABLE, + HWT_RECORD_KERNEL, + HWT_RECORD_THREAD_CREATE, + HWT_RECORD_THREAD_SET_NAME, + HWT_RECORD_BUFFER +}; + +#ifdef _KERNEL +struct hwt_record_entry { + TAILQ_ENTRY(hwt_record_entry) next; + enum hwt_record_type record_type; + union { + /* + * Used for MMAP, EXECUTABLE, INTERP, + * and KERNEL records. + */ + struct { + char *fullpath; + uintptr_t addr; + uintptr_t baseaddr; + }; + /* Used for BUFFER records. */ + struct { + int buf_id; + int curpage; + vm_offset_t offset; + }; + /* Used for THREAD_* records. */ + int thread_id; + }; +}; +#endif + +#endif /* !_SYS_HWT_RECORD_H_ */ diff --git a/sys/sys/inotify.h b/sys/sys/inotify.h new file mode 100644 index 000000000000..65dc5dba43f3 --- /dev/null +++ b/sys/sys/inotify.h @@ -0,0 +1,150 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#ifndef _INOTIFY_H_ +#define _INOTIFY_H_ + +#include <sys/_types.h> + +/* Flags for inotify_init1(). */ +#define IN_NONBLOCK 0x00000004 /* O_NONBLOCK */ +#define IN_CLOEXEC 0x00100000 /* O_CLOEXEC */ + +struct inotify_event { + int wd; + __uint32_t mask; + __uint32_t cookie; + __uint32_t len; + char name[0]; +}; + +/* Events, set in the mask field. */ +#define IN_ACCESS 0x00000001 +#define IN_MODIFY 0x00000002 +#define IN_ATTRIB 0x00000004 +#define IN_CLOSE_WRITE 0x00000008 +#define IN_CLOSE_NOWRITE 0x00000010 +#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) +#define IN_OPEN 0x00000020 +#define IN_MOVED_FROM 0x00000040 +#define IN_MOVED_TO 0x00000080 +#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) +#define IN_CREATE 0x00000100 +#define IN_DELETE 0x00000200 +#define IN_DELETE_SELF 0x00000400 +#define IN_MOVE_SELF 0x00000800 +#define IN_ALL_EVENTS 0x00000fff + +/* Events report only for entries in a watched dir, not the dir itself. */ +#define _IN_DIR_EVENTS (IN_CLOSE_WRITE | IN_DELETE | IN_MODIFY | \ + IN_MOVED_FROM | IN_MOVED_TO) + +#ifdef _KERNEL +/* + * An unlink that's done as part of a rename only records IN_DELETE if the + * unlinked vnode itself is watched, and not when the containing directory is + * watched. + */ +#define _IN_MOVE_DELETE 0x40000000 +/* + * Inode link count changes only trigger IN_ATTRIB events if the inode itself is + * watched, and not when the containing directory is watched. + */ +#define _IN_ATTRIB_LINKCOUNT 0x80000000 +#endif + +/* Flags, set in the mask field. */ +#define IN_ONLYDIR 0x01000000 +#define IN_DONT_FOLLOW 0x02000000 +#define IN_EXCL_UNLINK 0x04000000 +#define IN_MASK_CREATE 0x10000000 +#define IN_MASK_ADD 0x20000000 +#define IN_ONESHOT 0x80000000 +#define _IN_ALL_FLAGS (IN_ONLYDIR | IN_DONT_FOLLOW | \ + IN_EXCL_UNLINK | IN_MASK_CREATE | \ + IN_MASK_ADD | IN_ONESHOT) + +/* Flags returned by the kernel. */ +#define IN_UNMOUNT 0x00002000 +#define IN_Q_OVERFLOW 0x00004000 +#define IN_IGNORED 0x00008000 +#define IN_ISDIR 0x40000000 +#define _IN_ALL_RETFLAGS (IN_Q_OVERFLOW | IN_UNMOUNT | IN_IGNORED | \ + IN_ISDIR) + +#define _IN_ALIGN _Alignof(struct inotify_event) +#define _IN_NAMESIZE(namelen) \ + ((namelen) == 0 ? 0 : __align_up((namelen) + 1, _IN_ALIGN)) + +#ifdef _KERNEL +struct componentname; +struct file; +struct inotify_softc; +struct thread; +struct vnode; + +int inotify_create_file(struct thread *, struct file *, int, int *); +void inotify_log(struct vnode *, const char *, size_t, int, __uint32_t); + +int kern_inotify_rm_watch(int, uint32_t, struct thread *); +int kern_inotify_add_watch(int, int, const char *, uint32_t, + struct thread *); + +void vn_inotify(struct vnode *, struct vnode *, struct componentname *, int, + uint32_t); +int vn_inotify_add_watch(struct vnode *, struct inotify_softc *, + __uint32_t, __uint32_t *, struct thread *); +void vn_inotify_revoke(struct vnode *); + +/* Log an inotify event. */ +#define INOTIFY(vp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & (VIRF_INOTIFY | \ + VIRF_INOTIFY_PARENT)) != 0)) \ + VOP_INOTIFY((vp), NULL, NULL, (ev), 0); \ +} while (0) + +/* Log an inotify event using a specific name for the vnode. */ +#define INOTIFY_NAME(vp, dvp, cnp, ev) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) \ + VOP_INOTIFY((vp), (dvp), (cnp), (ev), 0); \ +} while (0) + +extern __uint32_t inotify_rename_cookie; + +#define INOTIFY_MOVE(vp, fdvp, fcnp, tvp, tdvp, tcnp) do { \ + if (__predict_false((vn_irflag_read(fdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(tdvp) & VIRF_INOTIFY) != 0 || \ + (vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) { \ + __uint32_t cookie; \ + \ + cookie = atomic_fetchadd_32(&inotify_rename_cookie, 1); \ + VOP_INOTIFY((vp), (fdvp), (fcnp), IN_MOVED_FROM, cookie); \ + VOP_INOTIFY((vp), (tdvp), (tcnp), IN_MOVED_TO, cookie); \ + } \ + if ((tvp) != NULL) \ + INOTIFY_NAME((tvp), (tdvp), (tcnp), _IN_MOVE_DELETE); \ +} while (0) + +#define INOTIFY_REVOKE(vp) do { \ + if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) \ + vn_inotify_revoke((vp)); \ +} while (0) + +#else +#include <sys/cdefs.h> + +__BEGIN_DECLS +int inotify_init(void); +int inotify_init1(int flags); +int inotify_add_watch(int fd, const char *pathname, __uint32_t mask); +int inotify_add_watch_at(int fd, int dfd, const char *pathname, + __uint32_t mask); +int inotify_rm_watch(int fd, int wd); +__END_DECLS +#endif /* !_KERNEL */ + +#endif /* !_INOTIFY_H_ */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h index a6f858e02395..f6480b173a5c 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -267,6 +267,7 @@ struct mount { int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */ int mnt_upper_pending; /* (i) # of pending ops on mnt_uppers */ struct lock mnt_explock; /* vfs_export walkers lock */ + struct lock mnt_renamelock; /* renames and O_RESOLVE_BENEATH */ TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */ TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */ STAILQ_ENTRY(mount) mnt_taskqueue_link; /* (d) our place in deferred unmount list */ diff --git a/sys/sys/namei.h b/sys/sys/namei.h index 5c245235ace5..6008d83f729d 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -108,7 +108,12 @@ struct nameidata { * through the VOP interface. */ struct componentname ni_cnd; + + /* Serving RBENEATH. */ struct nameicap_tracker_head ni_cap_tracker; + struct vnode *ni_rbeneath_dpp; + struct mount *ni_nctrack_mnt; + /* * Private helper data for UFS, must be at the end. See * NDINIT_PREFILL(). @@ -235,6 +240,10 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, panic("namei data not inited"); \ if (((arg)->ni_debugflags & NAMEI_DBG_HADSTARTDIR) != 0) \ panic("NDREINIT on namei data with NAMEI_DBG_HADSTARTDIR"); \ + if ((arg)->ni_nctrack_mnt != NULL) \ + panic("NDREINIT on namei data with leaked ni_nctrack_mnt"); \ + if (!TAILQ_EMPTY(&(arg)->ni_cap_tracker)) \ + panic("NDREINIT on namei data with leaked ni_cap_tracker"); \ (arg)->ni_debugflags = NAMEI_DBG_INITED; \ } #else @@ -259,6 +268,9 @@ do { \ _ndp->ni_resflags = 0; \ filecaps_init(&_ndp->ni_filecaps); \ _ndp->ni_rightsneeded = _rightsp; \ + _ndp->ni_rbeneath_dpp = NULL; \ + _ndp->ni_nctrack_mnt = NULL; \ + TAILQ_INIT(&_ndp->ni_cap_tracker); \ } while (0) #define NDREINIT(ndp) do { \ diff --git a/sys/sys/param.h b/sys/sys/param.h index f1bf874cb5fd..af116d6e3f7a 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -74,7 +74,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1500049 +#define __FreeBSD_version 1500051 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/proc.h b/sys/sys/proc.h index c7e1a1f51cb4..af9cafa99dd0 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -893,6 +893,8 @@ struct proc { #define P2_LOGSIGEXIT_ENABLE 0x00800000 /* Disable logging on sigexit */ #define P2_LOGSIGEXIT_CTL 0x01000000 /* Override kern.logsigexit */ +#define P2_HWT 0x02000000 /* Process is using HWT. */ + /* Flags protected by proctree_lock, kept in p_treeflags. */ #define P_TREE_ORPHANED 0x00000001 /* Reparented, on orphan list */ #define P_TREE_FIRST_ORPHAN 0x00000002 /* First element of orphan diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index b15dace8cfa0..61411890c85b 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -122,6 +122,8 @@ struct uidinfo { long ui_kqcnt; /* (b) number of kqueues */ long ui_umtxcnt; /* (b) number of shared umtxs */ long ui_pipecnt; /* (b) consumption of pipe buffers */ + long ui_inotifycnt; /* (b) number of inotify descriptors */ + long ui_inotifywatchcnt; /* (b) number of inotify watches */ uid_t ui_uid; /* (a) uid */ u_int ui_ref; /* (b) reference count */ #ifdef RACCT @@ -144,6 +146,8 @@ int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to, int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval); int chgpipecnt(struct uidinfo *uip, int diff, rlim_t max); +int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval); +int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval); int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which, struct rlimit *limp); struct plimit diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h index dc4d88ce689f..0b79c841d149 100644 --- a/sys/sys/specialfd.h +++ b/sys/sys/specialfd.h @@ -30,6 +30,7 @@ enum specialfd_type { SPECIALFD_EVENTFD = 1, + SPECIALFD_INOTIFY = 2, }; struct specialfd_eventfd { @@ -37,4 +38,8 @@ struct specialfd_eventfd { int flags; }; +struct specialfd_inotify { + int flags; +}; + #endif /* !_SYS_SPECIALFD_H_ */ diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h index 68406a2dfc29..eec923d0b82e 100644 --- a/sys/sys/syscall.h +++ b/sys/sys/syscall.h @@ -529,4 +529,6 @@ #define SYS_fchroot 590 #define SYS_setcred 591 #define SYS_exterrctl 592 -#define SYS_MAXSYSCALL 593 +#define SYS_inotify_add_watch_at 593 +#define SYS_inotify_rm_watch 594 +#define SYS_MAXSYSCALL 595 diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk index 9a90a63f35a3..547242a73277 100644 --- a/sys/sys/syscall.mk +++ b/sys/sys/syscall.mk @@ -434,4 +434,6 @@ MIASM = \ getrlimitusage.o \ fchroot.o \ setcred.o \ - exterrctl.o + exterrctl.o \ + inotify_add_watch_at.o \ + inotify_rm_watch.o diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h index 94da81c84d25..94b5a0a7a95e 100644 --- a/sys/sys/sysproto.h +++ b/sys/sys/sysproto.h @@ -1891,6 +1891,16 @@ struct exterrctl_args { char flags_l_[PADL_(u_int)]; u_int flags; char flags_r_[PADR_(u_int)]; char ptr_l_[PADL_(void *)]; void * ptr; char ptr_r_[PADR_(void *)]; }; +struct inotify_add_watch_at_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char dfd_l_[PADL_(int)]; int dfd; char dfd_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)]; +}; +struct inotify_rm_watch_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char wd_l_[PADL_(int)]; int wd; char wd_r_[PADR_(int)]; +}; int sys_exit(struct thread *, struct exit_args *); int sys_fork(struct thread *, struct fork_args *); int sys_read(struct thread *, struct read_args *); @@ -2293,6 +2303,8 @@ int sys_getrlimitusage(struct thread *, struct getrlimitusage_args *); int sys_fchroot(struct thread *, struct fchroot_args *); int sys_setcred(struct thread *, struct setcred_args *); int sys_exterrctl(struct thread *, struct exterrctl_args *); +int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *); +int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *); #ifdef COMPAT_43 @@ -3275,6 +3287,8 @@ int freebsd13_swapoff(struct thread *, struct freebsd13_swapoff_args *); #define SYS_AUE_fchroot AUE_NULL #define SYS_AUE_setcred AUE_SETCRED #define SYS_AUE_exterrctl AUE_NULL +#define SYS_AUE_inotify_add_watch_at AUE_INOTIFY +#define SYS_AUE_inotify_rm_watch AUE_INOTIFY #undef PAD_ #undef PADL_ diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h index f5caea2e3919..c12343e5d0fd 100644 --- a/sys/sys/unistd.h +++ b/sys/sys/unistd.h @@ -156,6 +156,7 @@ #define _PC_DEALLOC_PRESENT 65 #define _PC_NAMEDATTR_ENABLED 66 #define _PC_HAS_NAMEDATTR 67 +#define _PC_HAS_HIDDENSYSTEM 68 #endif /* From OpenSolaris, used by SEEK_DATA/SEEK_HOLE. */ diff --git a/sys/sys/user.h b/sys/sys/user.h index f94a91ca1238..103236b6ed1b 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -265,6 +265,7 @@ struct user { #define KF_TYPE_DEV 12 #define KF_TYPE_EVENTFD 13 #define KF_TYPE_TIMERFD 14 +#define KF_TYPE_INOTIFY 15 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -456,6 +457,10 @@ struct kinfo_file { int32_t kf_kqueue_count; int32_t kf_kqueue_state; } kf_kqueue; + struct { + uint64_t kf_inotify_npending; + uint64_t kf_inotify_nbpending; + } kf_inotify; } kf_un; }; uint16_t kf_status; /* Status flags. */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index bed20f607339..3ed469bdce6d 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -86,11 +86,13 @@ enum vgetstate { * it from v_data. If non-null, this area is freed in getnewvnode(). */ -struct namecache; struct cache_fpl; +struct inotify_watch; +struct namecache; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ + TAILQ_HEAD(, inotify_watch) vpi_inotify; /* list of inotify watchers */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ @@ -248,6 +250,9 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes"); #define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */ #define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */ #define VIRF_NAMEDATTR 0x0040 /* Named attribute */ +#define VIRF_INOTIFY 0x0080 /* This vnode is being watched */ +#define VIRF_INOTIFY_PARENT 0x0100 /* A parent of this vnode may be being + watched */ #define VI_UNUSED0 0x0001 /* unused */ #define VI_MOUNT 0x0002 /* Mount in progress */ @@ -667,6 +672,7 @@ char *cache_symlink_alloc(size_t size, int flags); void cache_symlink_free(char *string, size_t size); int cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len); +void cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie); void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp); void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp); @@ -869,8 +875,10 @@ int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); -int vop_stdioctl(struct vop_ioctl_args *); int vop_stdneed_inactive(struct vop_need_inactive_args *); +int vop_stdinotify(struct vop_inotify_args *); +int vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *); +int vop_stdioctl(struct vop_ioctl_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); int vop_stdunlock(struct vop_unlock_args *); @@ -910,9 +918,12 @@ int dead_read(struct vop_read_args *ap); int dead_write(struct vop_write_args *ap); /* These are called from within the actual VOPS. */ +void vop_allocate_post(void *a, int rc); +void vop_copy_file_range_post(void *ap, int rc); void vop_close_post(void *a, int rc); void vop_create_pre(void *a); void vop_create_post(void *a, int rc); +void vop_deallocate_post(void *a, int rc); void vop_whiteout_pre(void *a); void vop_whiteout_post(void *a, int rc); void vop_deleteextattr_pre(void *a); @@ -1020,9 +1031,12 @@ void vop_rename_fail(struct vop_rename_args *ap); #define VOP_WRITE_POST(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ - if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \ - VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \ - | (noffset > osize ? NOTE_EXTEND : 0)); \ + if (noffset > ooffset) { \ + if (VN_KNLIST_EMPTY((ap)->a_vp)) { \ + VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE | \ + (noffset > osize ? NOTE_EXTEND : 0)); \ + } \ + INOTIFY((ap)->a_vp, IN_MODIFY); \ } #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__) diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk index d23c2af9bd9a..e829105197cc 100644 --- a/sys/tools/vnode_if.awk +++ b/sys/tools/vnode_if.awk @@ -193,6 +193,7 @@ if (cfile) { printc(common_head \ "#include <sys/param.h>\n" \ "#include <sys/event.h>\n" \ + "#include <sys/inotify.h>\n" \ "#include <sys/kernel.h>\n" \ "#include <sys/mount.h>\n" \ "#include <sys/sdt.h>\n" \ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 891e490a7031..75f5fe716c31 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1012,7 +1012,6 @@ ffs_mountfs(struct vnode *odevvp, struct mount *mp, struct thread *td) else ump->um_check_blkno = NULL; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); - sx_init(&ump->um_checkpath_lock, "uchpth"); fs->fs_ronly = ronly; fs->fs_active = NULL; mp->mnt_data = ump; @@ -1182,7 +1181,6 @@ out: } if (ump != NULL) { mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; @@ -1306,7 +1304,6 @@ ffs_unmount(struct mount *mp, int mntflags) vrele(ump->um_odevvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); - sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index eaf37c58756b..3f9c95e934fc 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1412,7 +1412,6 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, vp = tvp = ITOV(target); mp = vp->v_mount; *wait_ino = 0; - sx_assert(&VFSTOUFS(mp)->um_checkpath_lock, SA_XLOCKED); if (target->i_number == source_ino) return (EEXIST); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 9aea01e70951..53fac4b0665e 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1273,9 +1273,9 @@ ufs_rename( struct mount *mp; ino_t ino; seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s; - bool checkpath_locked, want_seqc_end; + bool want_seqc_end; - checkpath_locked = want_seqc_end = false; + want_seqc_end = false; endoff = 0; mp = tdvp->v_mount; @@ -1427,10 +1427,6 @@ relock: } vfs_ref(mp); MPASS(!want_seqc_end); - if (checkpath_locked) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; - } VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1484,8 +1480,6 @@ relock: if (error) goto unlockout; - sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = true; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* @@ -1493,8 +1487,6 @@ relock: * everything else and VGET before restarting. */ if (ino) { - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); VOP_UNLOCK(tdvp); @@ -1574,9 +1566,6 @@ relock: vn_seqc_write_end(fdvp); want_seqc_end = false; vfs_ref(mp); - MPASS(checkpath_locked); - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); @@ -1763,9 +1752,6 @@ unlockout: vn_seqc_write_end(fdvp); } - if (checkpath_locked) - sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); - vput(fdvp); vput(fvp); @@ -2734,6 +2720,9 @@ ufs_pathconf( case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; break; + case _PC_HAS_HIDDENSYSTEM: + *ap->a_retval = 1; + break; default: error = vop_stdpathconf(ap); diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index 5c7fa11dae6a..d33b01e4425e 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -97,8 +97,6 @@ struct ufsmount { uint64_t um_maxsymlinklen; /* (c) max size of short symlink */ struct mtx um_lock; /* (c) Protects ufsmount & fs */ - struct sx um_checkpath_lock; /* (c) Protects ufs_checkpath() - result */ struct mount_softdeps *um_softdep; /* (c) softdep mgmt structure */ struct vnode *um_quotas[MAXQUOTAS]; /* (q) pointer to quota files */ struct ucred *um_cred[MAXQUOTAS]; /* (q) quota file access cred */ diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 46fd212df299..501ace32bd11 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -41,6 +41,7 @@ */ #include "opt_hwpmc_hooks.h" +#include "opt_hwt_hooks.h" #include "opt_vm.h" #define EXTERR_CATEGORY EXTERR_CAT_MMAP @@ -95,6 +96,10 @@ #include <sys/pmckern.h> #endif +#ifdef HWT_HOOKS +#include <dev/hwt/hwt_hook.h> +#endif + int old_mlock = 0; SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, "Do not apply RLIMIT_MEMLOCK on mlockall"); @@ -613,6 +618,17 @@ kern_munmap(struct thread *td, uintptr_t addr0, size_t size) #endif rv = vm_map_delete(map, addr, end); +#ifdef HWT_HOOKS + if (HWT_HOOK_INSTALLED && rv == KERN_SUCCESS) { + struct hwt_record_entry ent; + + ent.addr = (uintptr_t) addr; + ent.fullpath = NULL; + ent.record_type = HWT_RECORD_MUNMAP; + HWT_CALL_HOOK(td, HWT_RECORD, &ent); + } +#endif + #ifdef HWPMC_HOOKS if (rv == KERN_SUCCESS && __predict_false(pmc_handled)) { /* downgrade the lock to prevent a LOR with the pmc-sx lock */ diff --git a/sys/x86/linux/linux_dummy_x86.c b/sys/x86/linux/linux_dummy_x86.c index ae1d23e811e7..221f5dbf5ba3 100644 --- a/sys/x86/linux/linux_dummy_x86.c +++ b/sys/x86/linux/linux_dummy_x86.c @@ -46,7 +46,5 @@ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); DUMMY(sysfs); DUMMY(quotactl); -/* Linux 2.6.13: */ -DUMMY(inotify_init); /* Linux 2.6.22: */ DUMMY(signalfd); diff --git a/tests/ci/tools/freebsdci b/tests/ci/tools/freebsdci index f0030fe00aba..7b4ce9669ab2 100755 --- a/tests/ci/tools/freebsdci +++ b/tests/ci/tools/freebsdci @@ -34,6 +34,7 @@ rcvar=freebsdci_enable start_cmd="firstboot_ci_run" stop_cmd=":" os_arch=$(uname -p) +parallelism=$(nproc) tardev=/dev/vtbd1 metadir=/meta istar=$(file -s ${tardev} | grep "POSIX tar archive" | wc -l) @@ -74,7 +75,7 @@ full_tests() tar xvf ${tardev} -C ${metadir} cd /usr/tests set +e - kyua test + kyua -v parallelism=${parallelism} test rc=$? set -e if [ ${rc} -ne 0 ] && [ ${rc} -ne 1 ]; then diff --git a/tests/sys/fs/fusefs/flush.cc b/tests/sys/fs/fusefs/flush.cc index 474cdbdb2203..7ba1218b3287 100644 --- a/tests/sys/fs/fusefs/flush.cc +++ b/tests/sys/fs/fusefs/flush.cc @@ -109,6 +109,36 @@ TEST_F(Flush, open_twice) EXPECT_EQ(0, close(fd)) << strerror(errno); } +/** + * Test for FOPEN_NOFLUSH: we expect that zero flush calls will be performed. + */ +TEST_F(Flush, open_noflush) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + uint64_t ino = 42; + uint64_t pid = (uint64_t)getpid(); + int fd; + + expect_lookup(RELPATH, ino, 1); + expect_open(ino, FOPEN_NOFLUSH, 1); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_FLUSH && + in.header.nodeid == ino && + in.body.flush.lock_owner == pid && + in.body.flush.fh == FH); + }, Eq(true)), + _) + ).Times(0); + expect_release(); + + fd = open(FULLPATH, O_WRONLY); + ASSERT_LE(0, fd) << strerror(errno); + // close MUST not flush + EXPECT_EQ(0, close(fd)) << strerror(errno); +} + /* * Some FUSE filesystem cache data internally and flush it on release. Such * filesystems may generate errors during release. On Linux, these get diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile index 8cc7beade3f3..26c0013696c7 100644 --- a/tests/sys/kern/Makefile +++ b/tests/sys/kern/Makefile @@ -19,6 +19,7 @@ ATF_TESTS_C+= kern_descrip_test TEST_METADATA.kern_descrip_test+= is_exclusive="true" ATF_TESTS_C+= fdgrowtable_test ATF_TESTS_C+= jail_lookup_root +ATF_TESTS_C+= inotify_test ATF_TESTS_C+= kill_zombie .if ${MK_OPENSSL} != "no" ATF_TESTS_C+= ktls_test @@ -85,6 +86,7 @@ LIBADD.sys_getrandom+= c LIBADD.sys_getrandom+= pthread LIBADD.ptrace_test+= pthread LIBADD.unix_seqpacket_test+= pthread +LIBADD.inotify_test+= util LIBADD.kcov+= pthread CFLAGS.ktls_test+= -DOPENSSL_API_COMPAT=0x10100000L LIBADD.ktls_test+= crypto util diff --git a/tests/sys/kern/inotify_test.c b/tests/sys/kern/inotify_test.c new file mode 100644 index 000000000000..ed7cef5d148c --- /dev/null +++ b/tests/sys/kern/inotify_test.c @@ -0,0 +1,862 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2025 Klara, Inc. + */ + +#include <sys/capsicum.h> +#include <sys/filio.h> +#include <sys/inotify.h> +#include <sys/ioccom.h> +#include <sys/mount.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysctl.h> +#include <sys/un.h> + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <mntopts.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <atf-c.h> + +static const char * +ev2name(int event) +{ + switch (event) { + case IN_ACCESS: + return ("IN_ACCESS"); + case IN_ATTRIB: + return ("IN_ATTRIB"); + case IN_CLOSE_WRITE: + return ("IN_CLOSE_WRITE"); + case IN_CLOSE_NOWRITE: + return ("IN_CLOSE_NOWRITE"); + case IN_CREATE: + return ("IN_CREATE"); + case IN_DELETE: + return ("IN_DELETE"); + case IN_DELETE_SELF: + return ("IN_DELETE_SELF"); + case IN_MODIFY: + return ("IN_MODIFY"); + case IN_MOVE_SELF: + return ("IN_MOVE_SELF"); + case IN_MOVED_FROM: + return ("IN_MOVED_FROM"); + case IN_MOVED_TO: + return ("IN_MOVED_TO"); + case IN_OPEN: + return ("IN_OPEN"); + default: + return (NULL); + } +} + +static void +close_checked(int fd) +{ + ATF_REQUIRE(close(fd) == 0); +} + +/* + * Make sure that no other events are pending, and close the inotify descriptor. + */ +static void +close_inotify(int fd) +{ + int n; + + ATF_REQUIRE(ioctl(fd, FIONREAD, &n) == 0); + ATF_REQUIRE(n == 0); + close_checked(fd); +} + +static uint32_t +consume_event_cookie(int ifd, int wd, int event, int flags, const char *name) +{ + struct inotify_event *ev; + size_t evsz, namelen; + ssize_t n; + uint32_t cookie; + + /* Only read one record. */ + namelen = name == NULL ? 0 : strlen(name); + evsz = sizeof(*ev) + _IN_NAMESIZE(namelen); + ev = malloc(evsz); + ATF_REQUIRE(ev != NULL); + + n = read(ifd, ev, evsz); + ATF_REQUIRE_MSG(n >= 0, "failed to read event %s", ev2name(event)); + ATF_REQUIRE((size_t)n >= sizeof(*ev)); + ATF_REQUIRE((size_t)n == sizeof(*ev) + ev->len); + ATF_REQUIRE((size_t)n == evsz); + + ATF_REQUIRE_MSG((ev->mask & IN_ALL_EVENTS) == event, + "expected event %#x, got %#x", event, ev->mask); + ATF_REQUIRE_MSG((ev->mask & _IN_ALL_RETFLAGS) == flags, + "expected flags %#x, got %#x", flags, ev->mask); + ATF_REQUIRE_MSG(ev->wd == wd, + "expected wd %d, got %d", wd, ev->wd); + ATF_REQUIRE_MSG(name == NULL || strcmp(name, ev->name) == 0, + "expected name '%s', got '%s'", name, ev->name); + cookie = ev->cookie; + if ((ev->mask & (IN_MOVED_FROM | IN_MOVED_TO)) == 0) + ATF_REQUIRE(cookie == 0); + free(ev); + return (cookie); +} + +/* + * Read an event from the inotify file descriptor and check that it + * matches the expected values. + */ +static void +consume_event(int ifd, int wd, int event, int flags, const char *name) +{ + (void)consume_event_cookie(ifd, wd, event, flags, name); +} + +static int +inotify(int flags) +{ + int ifd; + + ifd = inotify_init1(flags); + ATF_REQUIRE(ifd != -1); + return (ifd); +} + +static void +mount_nullfs(char *dir, char *src) +{ + struct iovec *iov; + char errmsg[1024]; + int error, iovlen; + + iov = NULL; + iovlen = 0; + + build_iovec(&iov, &iovlen, "fstype", "nullfs", (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", dir, (size_t)-1); + build_iovec(&iov, &iovlen, "target", src, (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); + + errmsg[0] = '\0'; + error = nmount(iov, iovlen, 0); + ATF_REQUIRE_MSG(error == 0, + "mount nullfs %s %s: %s", src, dir, + errmsg[0] == '\0' ? strerror(errno) : errmsg); + + free_iovec(&iov, &iovlen); +} + +static void +mount_tmpfs(const char *dir) +{ + struct iovec *iov; + char errmsg[1024]; + int error, iovlen; + + iov = NULL; + iovlen = 0; + + build_iovec(&iov, &iovlen, "fstype", "tmpfs", (size_t)-1); + build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir), + (size_t)-1); + build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); + + errmsg[0] = '\0'; + error = nmount(iov, iovlen, 0); + ATF_REQUIRE_MSG(error == 0, + "mount tmpfs %s: %s", dir, + errmsg[0] == '\0' ? strerror(errno) : errmsg); + + free_iovec(&iov, &iovlen); +} + +static int +watch_file(int ifd, int events, char *path) +{ + int fd, wd; + + strncpy(path, "test.XXXXXX", PATH_MAX); + fd = mkstemp(path); + ATF_REQUIRE(fd != -1); + close_checked(fd); + + wd = inotify_add_watch(ifd, path, events); + ATF_REQUIRE(wd != -1); + + return (wd); +} + +static int +watch_dir(int ifd, int events, char *path) +{ + char *p; + int wd; + + strlcpy(path, "test.XXXXXX", PATH_MAX); + p = mkdtemp(path); + ATF_REQUIRE(p == path); + + wd = inotify_add_watch(ifd, path, events); + ATF_REQUIRE(wd != -1); + + return (wd); +} + +/* + * Verify that Capsicum restrictions are applied as expected. + */ +ATF_TC_WITHOUT_HEAD(inotify_capsicum); +ATF_TC_BODY(inotify_capsicum, tc) +{ + int error, dfd, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + ATF_REQUIRE(ifd != -1); + + dfd = open(".", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + + error = mkdirat(dfd, "testdir", 0755); + ATF_REQUIRE(error == 0); + + error = cap_enter(); + ATF_REQUIRE(error == 0); + + /* + * Plain inotify_add_watch() is disallowed. + */ + wd = inotify_add_watch(ifd, ".", IN_DELETE_SELF); + ATF_REQUIRE_ERRNO(ECAPMODE, wd == -1); + wd = inotify_add_watch_at(ifd, dfd, "testdir", IN_DELETE_SELF); + ATF_REQUIRE(wd >= 0); + + /* + * Generate a record and consume it. + */ + error = unlinkat(dfd, "testdir", AT_REMOVEDIR); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_checked(dfd); + close_inotify(ifd); +} + +/* + * Make sure that duplicate, back-to-back events are coalesced. + */ +ATF_TC_WITHOUT_HEAD(inotify_coalesce); +ATF_TC_BODY(inotify_coalesce, tc) +{ + char file[PATH_MAX], path[PATH_MAX]; + int fd, fd1, ifd, n, wd; + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it. */ + wd = watch_dir(ifd, IN_OPEN, path); + /* Create a file in the directory and open it. */ + snprintf(file, sizeof(file), "%s/file", path); + fd = open(file, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + fd = open(file, O_RDWR); + ATF_REQUIRE(fd != -1); + fd1 = open(file, O_RDONLY); + ATF_REQUIRE(fd1 != -1); + close_checked(fd1); + close_checked(fd); + + consume_event(ifd, wd, IN_OPEN, 0, "file"); + ATF_REQUIRE(ioctl(ifd, FIONREAD, &n) == 0); + ATF_REQUIRE(n == 0); + + close_inotify(ifd); +} + +/* + * Check handling of IN_MASK_CREATE. + */ +ATF_TC_WITHOUT_HEAD(inotify_mask_create); +ATF_TC_BODY(inotify_mask_create, tc) +{ + char path[PATH_MAX]; + int ifd, wd, wd1; + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it. */ + wd = watch_dir(ifd, IN_CREATE, path); + /* Updating the watch with IN_MASK_CREATE should result in an error. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_CREATE); + ATF_REQUIRE_ERRNO(EEXIST, wd1 == -1); + /* It's an error to specify IN_MASK_ADD with IN_MASK_CREATE. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_ADD | + IN_MASK_CREATE); + ATF_REQUIRE_ERRNO(EINVAL, wd1 == -1); + /* Updating the watch without IN_MASK_CREATE should work. */ + wd1 = inotify_add_watch(ifd, path, IN_MODIFY); + ATF_REQUIRE(wd1 != -1); + ATF_REQUIRE_EQ(wd, wd1); + + close_inotify(ifd); +} + +/* + * Make sure that inotify cooperates with nullfs: if a lower vnode is the + * subject of an event, the upper vnode should be notified, and if the upper + * vnode is the subject of an event, the lower vnode should be notified. + */ +ATF_TC_WITH_CLEANUP(inotify_nullfs); +ATF_TC_HEAD(inotify_nullfs, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(inotify_nullfs, tc) +{ + char path[PATH_MAX], *p; + int dfd, error, fd, ifd, mask, wd; + + mask = IN_CREATE | IN_OPEN; + + ifd = inotify(IN_NONBLOCK); + + strlcpy(path, "./test.XXXXXX", sizeof(path)); + p = mkdtemp(path); + ATF_REQUIRE(p == path); + + error = mkdir("./mnt", 0755); + ATF_REQUIRE(error == 0); + + /* Mount the testdir onto ./mnt. */ + mount_nullfs("./mnt", path); + + wd = inotify_add_watch(ifd, "./mnt", mask); + ATF_REQUIRE(wd != -1); + + /* Create a file in the lower directory and open it. */ + dfd = open(path, O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + fd = openat(dfd, "file", O_RDWR | O_CREAT, 0644); + close_checked(fd); + close_checked(dfd); + + /* We should see events via the nullfs mount. */ + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + consume_event(ifd, wd, IN_CREATE, 0, "file"); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + error = inotify_rm_watch(ifd, wd); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + /* Watch the lower directory. */ + wd = inotify_add_watch(ifd, path, mask); + ATF_REQUIRE(wd != -1); + /* ... and create a file in the upper directory and open it. */ + dfd = open("./mnt", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + fd = openat(dfd, "file2", O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + close_checked(dfd); + + /* We should see events via the lower directory. */ + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + consume_event(ifd, wd, IN_CREATE, 0, "file2"); + consume_event(ifd, wd, IN_OPEN, 0, "file2"); + + close_inotify(ifd); +} +ATF_TC_CLEANUP(inotify_nullfs, tc) +{ + int error; + + error = unmount("./mnt", 0); + if (error != 0) { + perror("unmount"); + exit(1); + } +} + +/* + * Make sure that exceeding max_events pending events results in an overflow + * event. + */ +ATF_TC_WITHOUT_HEAD(inotify_queue_overflow); +ATF_TC_BODY(inotify_queue_overflow, tc) +{ + char path[PATH_MAX]; + size_t size; + int error, dfd, ifd, max, wd; + + size = sizeof(max); + error = sysctlbyname("vfs.inotify.max_queued_events", &max, &size, NULL, + 0); + ATF_REQUIRE(error == 0); + + ifd = inotify(IN_NONBLOCK); + + /* Create a directory and watch it for file creation events. */ + wd = watch_dir(ifd, IN_CREATE, path); + dfd = open(path, O_DIRECTORY); + ATF_REQUIRE(dfd != -1); + /* Generate max+1 file creation events. */ + for (int i = 0; i < max + 1; i++) { + char name[NAME_MAX]; + int fd; + + (void)snprintf(name, sizeof(name), "file%d", i); + fd = openat(dfd, name, O_CREAT | O_RDWR, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + } + + /* + * Read our events. We should see files 0..max-1 and then an overflow + * event. + */ + for (int i = 0; i < max; i++) { + char name[NAME_MAX]; + + (void)snprintf(name, sizeof(name), "file%d", i); + consume_event(ifd, wd, IN_CREATE, 0, name); + } + + /* Look for an overflow event. */ + consume_event(ifd, -1, 0, IN_Q_OVERFLOW, NULL); + + close_checked(dfd); + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_access_file); +ATF_TC_BODY(inotify_event_access_file, tc) +{ + char path[PATH_MAX], buf[16]; + off_t nb; + ssize_t n; + int error, fd, fd1, ifd, s[2], wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_ACCESS, path); + + fd = open(path, O_RDWR); + n = write(fd, "test", 4); + ATF_REQUIRE(n == 4); + + /* A simple read(2) should generate an access. */ + ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0); + n = read(fd, buf, sizeof(buf)); + ATF_REQUIRE(n == 4); + ATF_REQUIRE(memcmp(buf, "test", 4) == 0); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + + /* copy_file_range(2) should as well. */ + ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0); + fd1 = open("sink", O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd1 != -1); + n = copy_file_range(fd, NULL, fd1, NULL, 4, 0); + ATF_REQUIRE(n == 4); + close_checked(fd1); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + + /* As should sendfile(2). */ + error = socketpair(AF_UNIX, SOCK_STREAM, 0, s); + ATF_REQUIRE(error == 0); + error = sendfile(fd, s[0], 0, 4, NULL, &nb, 0); + ATF_REQUIRE(error == 0); + ATF_REQUIRE(nb == 4); + consume_event(ifd, wd, IN_ACCESS, 0, NULL); + close_checked(s[0]); + close_checked(s[1]); + + close_checked(fd); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_access_dir); +ATF_TC_BODY(inotify_event_access_dir, tc) +{ + char root[PATH_MAX], path[PATH_MAX]; + struct dirent *ent; + DIR *dir; + int error, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_ACCESS, root); + snprintf(path, sizeof(path), "%s/dir", root); + error = mkdir(path, 0755); + ATF_REQUIRE(error == 0); + + /* Read an entry and generate an access. */ + dir = opendir(path); + ATF_REQUIRE(dir != NULL); + ent = readdir(dir); + ATF_REQUIRE(ent != NULL); + ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 || + strcmp(ent->d_name, "..") == 0); + ATF_REQUIRE(closedir(dir) == 0); + consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, "dir"); + + /* + * Reading the watched directory should generate an access event. + * This is contrary to Linux's inotify man page, which states that + * IN_ACCESS is only generated for accesses to objects in a watched + * directory. + */ + dir = opendir(root); + ATF_REQUIRE(dir != NULL); + ent = readdir(dir); + ATF_REQUIRE(ent != NULL); + ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 || + strcmp(ent->d_name, "..") == 0); + ATF_REQUIRE(closedir(dir) == 0); + consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_attrib); +ATF_TC_BODY(inotify_event_attrib, tc) +{ + char path[PATH_MAX]; + int error, ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_ATTRIB, path); + + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + error = fchmod(fd, 0600); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_ATTRIB, 0, NULL); + + error = fchown(fd, getuid(), getgid()); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_ATTRIB, 0, NULL); + + close_checked(fd); + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_close_nowrite); +ATF_TC_BODY(inotify_event_close_nowrite, tc) +{ + char file[PATH_MAX], file1[PATH_MAX], dir[PATH_MAX]; + int ifd, fd, wd1, wd2; + + ifd = inotify(IN_NONBLOCK); + + wd1 = watch_dir(ifd, IN_CLOSE_NOWRITE, dir); + wd2 = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, file); + + fd = open(dir, O_DIRECTORY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd1, IN_CLOSE_NOWRITE, IN_ISDIR, NULL); + + fd = open(file, O_RDONLY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd2, IN_CLOSE_NOWRITE, 0, NULL); + + snprintf(file1, sizeof(file1), "%s/file", dir); + fd = open(file1, O_RDONLY | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd1, IN_CLOSE_NOWRITE, 0, "file"); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_close_write); +ATF_TC_BODY(inotify_event_close_write, tc) +{ + char path[PATH_MAX]; + int ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, path); + + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_CLOSE_WRITE, 0, NULL); + + close_inotify(ifd); +} + +/* Verify that various operations in a directory generate IN_CREATE events. */ +ATF_TC_WITHOUT_HEAD(inotify_event_create); +ATF_TC_BODY(inotify_event_create, tc) +{ + struct sockaddr_un sun; + char path[PATH_MAX], path1[PATH_MAX], root[PATH_MAX]; + ssize_t n; + int error, ifd, ifd1, fd, s, wd, wd1; + char b; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_CREATE, root); + + /* Regular file. */ + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + /* + * Make sure we get an event triggered by the fd used to create the + * file. + */ + ifd1 = inotify(IN_NONBLOCK); + wd1 = inotify_add_watch(ifd1, root, IN_MODIFY); + b = 42; + n = write(fd, &b, sizeof(b)); + ATF_REQUIRE(n == sizeof(b)); + close_checked(fd); + consume_event(ifd, wd, IN_CREATE, 0, "file"); + consume_event(ifd1, wd1, IN_MODIFY, 0, "file"); + close_inotify(ifd1); + + /* Hard link. */ + snprintf(path1, sizeof(path1), "%s/link", root); + error = link(path, path1); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "link"); + + /* Directory. */ + snprintf(path, sizeof(path), "%s/dir", root); + error = mkdir(path, 0755); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, IN_ISDIR, "dir"); + + /* Symbolic link. */ + snprintf(path1, sizeof(path1), "%s/symlink", root); + error = symlink(path, path1); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "symlink"); + + /* FIFO. */ + snprintf(path, sizeof(path), "%s/fifo", root); + error = mkfifo(path, 0644); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_CREATE, 0, "fifo"); + + /* Binding a socket. */ + s = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&sun, 0, sizeof(sun)); + sun.sun_family = AF_UNIX; + sun.sun_len = sizeof(sun); + snprintf(sun.sun_path, sizeof(sun.sun_path), "%s/socket", root); + error = bind(s, (struct sockaddr *)&sun, sizeof(sun)); + ATF_REQUIRE(error == 0); + close_checked(s); + consume_event(ifd, wd, IN_CREATE, 0, "socket"); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_delete); +ATF_TC_BODY(inotify_event_delete, tc) +{ + char root[PATH_MAX], path[PATH_MAX], file[PATH_MAX]; + int error, fd, ifd, wd, wd2; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_DELETE | IN_DELETE_SELF, root); + + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + error = unlink(path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE, 0, "file"); + close_checked(fd); + + /* + * Make sure that renaming over a file generates a delete event when and + * only when that file is watched. + */ + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + wd2 = inotify_add_watch(ifd, path, IN_DELETE | IN_DELETE_SELF); + ATF_REQUIRE(wd2 != -1); + snprintf(file, sizeof(file), "%s/file2", root); + fd = open(file, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + error = rename(file, path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd2, IN_DELETE_SELF, 0, NULL); + consume_event(ifd, wd2, 0, IN_IGNORED, NULL); + + error = unlink(path); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE, 0, "file"); + error = rmdir(root); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_move); +ATF_TC_BODY(inotify_event_move, tc) +{ + char dir1[PATH_MAX], dir2[PATH_MAX], path1[PATH_MAX], path2[PATH_MAX]; + char path3[PATH_MAX]; + int error, ifd, fd, wd1, wd2, wd3; + uint32_t cookie1, cookie2; + + ifd = inotify(IN_NONBLOCK); + + wd1 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir1); + wd2 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir2); + + snprintf(path1, sizeof(path1), "%s/file", dir1); + fd = open(path1, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + snprintf(path2, sizeof(path2), "%s/file2", dir2); + error = rename(path1, path2); + ATF_REQUIRE(error == 0); + cookie1 = consume_event_cookie(ifd, wd1, IN_MOVED_FROM, 0, "file"); + cookie2 = consume_event_cookie(ifd, wd2, IN_MOVED_TO, 0, "file2"); + ATF_REQUIRE_MSG(cookie1 == cookie2, + "expected cookie %u, got %u", cookie1, cookie2); + + snprintf(path2, sizeof(path2), "%s/dir", dir2); + error = rename(dir1, path2); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd1, IN_MOVE_SELF, IN_ISDIR, NULL); + consume_event(ifd, wd2, IN_MOVED_TO, IN_ISDIR, "dir"); + + wd3 = watch_file(ifd, IN_MOVE_SELF, path3); + error = rename(path3, "foo"); + ATF_REQUIRE(error == 0); + consume_event(ifd, wd3, IN_MOVE_SELF, 0, NULL); + + close_inotify(ifd); +} + +ATF_TC_WITHOUT_HEAD(inotify_event_open); +ATF_TC_BODY(inotify_event_open, tc) +{ + char root[PATH_MAX], path[PATH_MAX]; + int error, ifd, fd, wd; + + ifd = inotify(IN_NONBLOCK); + + wd = watch_dir(ifd, IN_OPEN, root); + + snprintf(path, sizeof(path), "%s/file", root); + fd = open(path, O_RDWR | O_CREAT, 0644); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + fd = open(path, O_PATH); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "file"); + + fd = open(root, O_DIRECTORY); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + + snprintf(path, sizeof(path), "%s/fifo", root); + error = mkfifo(path, 0644); + ATF_REQUIRE(error == 0); + fd = open(path, O_RDWR); + ATF_REQUIRE(fd != -1); + close_checked(fd); + consume_event(ifd, wd, IN_OPEN, 0, "fifo"); + + close_inotify(ifd); +} + +ATF_TC_WITH_CLEANUP(inotify_event_unmount); +ATF_TC_HEAD(inotify_event_unmount, tc) +{ + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(inotify_event_unmount, tc) +{ + int error, fd, ifd, wd; + + ifd = inotify(IN_NONBLOCK); + + error = mkdir("./root", 0755); + ATF_REQUIRE(error == 0); + + mount_tmpfs("./root"); + + error = mkdir("./root/dir", 0755); + ATF_REQUIRE(error == 0); + wd = inotify_add_watch(ifd, "./root/dir", IN_OPEN); + ATF_REQUIRE(wd >= 0); + + fd = open("./root/dir", O_RDONLY | O_DIRECTORY); + ATF_REQUIRE(fd != -1); + consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL); + close_checked(fd); + + /* A regular unmount should fail, as inotify holds a vnode reference. */ + error = unmount("./root", 0); + ATF_REQUIRE_ERRNO(EBUSY, error == -1); + error = unmount("./root", MNT_FORCE); + ATF_REQUIRE_MSG(error == 0, + "unmounting ./root failed: %s", strerror(errno)); + + consume_event(ifd, wd, 0, IN_UNMOUNT, NULL); + consume_event(ifd, wd, 0, IN_IGNORED, NULL); + + close_inotify(ifd); +} +ATF_TC_CLEANUP(inotify_event_unmount, tc) +{ + (void)unmount("./root", MNT_FORCE); +} + +ATF_TP_ADD_TCS(tp) +{ + /* Tests for the inotify syscalls. */ + ATF_TP_ADD_TC(tp, inotify_capsicum); + ATF_TP_ADD_TC(tp, inotify_coalesce); + ATF_TP_ADD_TC(tp, inotify_mask_create); + ATF_TP_ADD_TC(tp, inotify_nullfs); + ATF_TP_ADD_TC(tp, inotify_queue_overflow); + /* Tests for the various inotify event types. */ + ATF_TP_ADD_TC(tp, inotify_event_access_file); + ATF_TP_ADD_TC(tp, inotify_event_access_dir); + ATF_TP_ADD_TC(tp, inotify_event_attrib); + ATF_TP_ADD_TC(tp, inotify_event_close_nowrite); + ATF_TP_ADD_TC(tp, inotify_event_close_write); + ATF_TP_ADD_TC(tp, inotify_event_create); + ATF_TP_ADD_TC(tp, inotify_event_delete); + ATF_TP_ADD_TC(tp, inotify_event_move); + ATF_TP_ADD_TC(tp, inotify_event_open); + ATF_TP_ADD_TC(tp, inotify_event_unmount); + return (atf_no_error()); +} diff --git a/tests/sys/net/if_bridge_test.sh b/tests/sys/net/if_bridge_test.sh index 2c6b039048e3..cc0b212aebd2 100755 --- a/tests/sys/net/if_bridge_test.sh +++ b/tests/sys/net/if_bridge_test.sh @@ -829,6 +829,398 @@ member_ifaddrs_vlan_cleanup() vnet_cleanup } +atf_test_case "vlan_pvid" "cleanup" +vlan_pvid_head() +{ + atf_set descr 'bridge with two ports with pvid set' + atf_set require.user root +} + +vlan_pvid_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 + + # With VLAN filtering enabled, traffic should be passed. + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Removed the untagged VLAN on one port; traffic should not be passed. + ifconfig ${bridge} -untagged ${epone}a + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_filtered" "cleanup" +vlan_pvid_filtered_head() +{ + atf_set descr 'bridge with two ports with different pvids' + atf_set require.user root +} + +vlan_pvid_filtered_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 30 + + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_filtered_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_tagged" "cleanup" +vlan_pvid_tagged_head() +{ + atf_set descr 'bridge pvid with tagged frames for pvid' + atf_set require.user root +} + +vlan_pvid_tagged_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Create two tagged interfaces on the appropriate VLANs + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a untagged ${eptwo}a 20 + + # Tagged frames should not be passed. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_tagged_cleanup() +{ + vnet_cleanup +} + +atf_test_case "vlan_pvid_1q" "cleanup" +vlan_pvid_1q_head() +{ + atf_set descr '802.1q tag addition and removal' + atf_set require.user root +} + +vlan_pvid_1q_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Set up one jail with an access port, and the other with a trunk port. + # This forces the bridge to add and remove .1q tags to bridge the + # traffic. + + jexec one ifconfig ${epone}b 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} addm ${epone}a untagged ${epone}a 20 + ifconfig ${bridge} addm ${eptwo}a + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_pvid_1q_cleanup() +{ + vnet_cleanup +} + +# +# Test vlan filtering. +# +atf_test_case "vlan_filtering" "cleanup" +vlan_filtering_head() +{ + atf_set descr 'tagged traffic with filtering' + atf_set require.user root +} + +vlan_filtering_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.20 create 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a + ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a + + # Right now there are no VLANs on the access list, so everything + # should be blocked. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Set the untagged vlan on both ports to 20 and make sure traffic is + # still blocked. We intentionally do not pass tagged traffic for the + # untagged vlan. + atf_check -s exit:0 ifconfig ${bridge} untagged ${epone}a 20 + atf_check -s exit:0 ifconfig ${bridge} untagged ${eptwo}a 20 + + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + atf_check -s exit:0 ifconfig ${bridge} -untagged ${epone}a + atf_check -s exit:0 ifconfig ${bridge} -untagged ${eptwo}a + + # Add VLANs 10-30 to the access list; now access should be allowed. + ifconfig ${bridge} +tagged ${epone}a 10-30 + ifconfig ${bridge} +tagged ${eptwo}a 10-30 + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Remove vlan 20 from the access list, now access should be blocked + # again. + ifconfig ${bridge} -tagged ${epone}a 20 + ifconfig ${bridge} -tagged ${eptwo}a 20 + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_filtering_cleanup() +{ + vnet_cleanup +} + +# +# Test the ifconfig 'tagged' option. +# +atf_test_case "vlan_ifconfig_tagged" "cleanup" +vlan_ifconfig_tagged_head() +{ + atf_set descr 'test the ifconfig tagged option' + atf_set require.user root +} + +vlan_ifconfig_tagged_body() +{ + vnet_init + vnet_init_bridge + + ep=$(vnet_mkepair) + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} addm ${ep}a vlanfilter ${ep}a up + ifconfig ${ep}a up + + # To start with, no vlans should be configured. + atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} + + # Add vlans 100-149. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 100-149 + atf_check -s exit:0 -o match:"tagged 100-149" ifconfig ${bridge} + + # Replace the vlan list with 139-199. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a 139-199 + atf_check -s exit:0 -o match:"tagged 139-199" ifconfig ${bridge} + + # Add vlans 100-170. + atf_check -s exit:0 ifconfig ${bridge} +tagged ${ep}a 100-170 + atf_check -s exit:0 -o match:"tagged 100-199" ifconfig ${bridge} + + # Remove vlans 104, 105, and 150-159 + atf_check -s exit:0 ifconfig ${bridge} -tagged ${ep}a 104,105,150-159 + atf_check -s exit:0 -o match:"tagged 100-103,106-149,160-199" \ + ifconfig ${bridge} + + # Remove the entire vlan list. + atf_check -s exit:0 ifconfig ${bridge} tagged ${ep}a none + atf_check -s exit:0 -o not-match:"tagged" ifconfig ${bridge} + + # Test some invalid vlans sets. + for bad_vlan in -1 0 4096 4097 foo 0-10 4000-5000 foo-40 40-foo; do + atf_check -s exit:1 -e ignore \ + ifconfig ${bridge} tagged "$bad_vlan" + done +} + +vlan_ifconfig_tagged_cleanup() +{ + vnet_cleanup +} + +# +# Test a vlan(4) "SVI" interface on top of a bridge. +# +atf_test_case "vlan_svi" "cleanup" +vlan_svi_head() +{ + atf_set descr 'vlan bridge with an SVI' + atf_set require.user root +} + +vlan_svi_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.20 create 192.0.2.1/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${bridge} addm ${epone}a tagged ${epone}a 20 + + svi=$(vnet_mkvlan) + ifconfig ${svi} vlan 20 vlandev ${bridge} + ifconfig ${svi} inet 192.0.2.2/24 up + + atf_check -s exit:0 -o ignore ping -c 3 -t 1 192.0.2.1 +} + +vlan_svi_cleanup() +{ + vnet_cleanup +} + +# +# Test QinQ (802.1ad). +# +atf_test_case "vlan_qinq" "cleanup" +vlan_qinq_head() +{ + atf_set descr 'vlan filtering with QinQ traffic' + atf_set require.user root +} + +vlan_qinq_body() +{ + vnet_init + vnet_init_bridge + + epone=$(vnet_mkepair) + eptwo=$(vnet_mkepair) + + vnet_mkjail one ${epone}b + vnet_mkjail two ${eptwo}b + + # Create a QinQ trunk between the two jails. The outer (provider) tag + # is 5, and the inner tag is 10. + + jexec one ifconfig ${epone}b up + jexec one ifconfig ${epone}b.5 create vlanproto 802.1ad up + jexec one ifconfig ${epone}b.5.10 create inet 192.0.2.1/24 up + + jexec two ifconfig ${eptwo}b up + jexec two ifconfig ${eptwo}b.5 create vlanproto 802.1ad up + jexec two ifconfig ${eptwo}b.5.10 create inet 192.0.2.2/24 up + + bridge=$(vnet_mkbridge) + + ifconfig ${bridge} up + ifconfig ${epone}a up + ifconfig ${eptwo}a up + ifconfig ${bridge} addm ${epone}a vlanfilter ${epone}a + ifconfig ${bridge} addm ${eptwo}a vlanfilter ${eptwo}a + + # Right now there are no VLANs on the access list, so everything + # should be blocked. + atf_check -s exit:2 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:2 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 + + # Add the provider tag to the access list; now traffic should be passed. + ifconfig ${bridge} +tagged ${epone}a 5 + ifconfig ${bridge} +tagged ${eptwo}a 5 + atf_check -s exit:0 -o ignore jexec one ping -c 3 -t 1 192.0.2.2 + atf_check -s exit:0 -o ignore jexec two ping -c 3 -t 1 192.0.2.1 +} + +vlan_qinq_cleanup() +{ + vnet_cleanup +} + atf_init_test_cases() { atf_add_test_case "bridge_transmit_ipv4_unicast" @@ -847,4 +1239,12 @@ atf_init_test_cases() atf_add_test_case "member_ifaddrs_enabled" atf_add_test_case "member_ifaddrs_disabled" atf_add_test_case "member_ifaddrs_vlan" + atf_add_test_case "vlan_pvid" + atf_add_test_case "vlan_pvid_1q" + atf_add_test_case "vlan_pvid_filtered" + atf_add_test_case "vlan_pvid_tagged" + atf_add_test_case "vlan_filtering" + atf_add_test_case "vlan_ifconfig_tagged" + atf_add_test_case "vlan_svi" + atf_add_test_case "vlan_qinq" } diff --git a/tests/sys/netpfil/pf/anchor.sh b/tests/sys/netpfil/pf/anchor.sh index fbac10240d8d..b4b52d7a24d6 100644 --- a/tests/sys/netpfil/pf/anchor.sh +++ b/tests/sys/netpfil/pf/anchor.sh @@ -401,6 +401,42 @@ include_cleanup() pft_cleanup } +atf_test_case "quick" "cleanup" +quick_head() +{ + atf_set descr 'Test quick on anchors' + atf_set require.user root +} + +quick_body() +{ + pft_init + + epair=$(vnet_mkepair) + vnet_mkjail alcatraz ${epair}a + + ifconfig ${epair}b 192.0.2.2/24 up + jexec alcatraz ifconfig ${epair}a 192.0.2.1/24 up + + # Sanity check + atf_check -s exit:0 -o ignore ping -c 1 192.0.2.1 + + jexec alcatraz pfctl -e + pft_set_rules alcatraz \ + "anchor quick {\n\ + pass\n\ + }" \ + "block" + + atf_check -s exit:0 -o ignore ping -c 1 192.0.2.1 + jexec alcatraz pfctl -sr -vv -a "*" +} + +quick_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "pr183198" @@ -413,4 +449,5 @@ atf_init_test_cases() atf_add_test_case "counter" atf_add_test_case "nat" atf_add_test_case "include" + atf_add_test_case "quick" } diff --git a/tools/build/options/WITH_RUN_TESTS b/tools/build/options/WITH_RUN_TESTS new file mode 100644 index 000000000000..91b30522a3d3 --- /dev/null +++ b/tools/build/options/WITH_RUN_TESTS @@ -0,0 +1 @@ +Run tests as part of the build. diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 9cc22d382de5..17ed43b55c5a 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -46,6 +46,7 @@ #include <sys/ktrace.h> #include <sys/mman.h> #include <sys/ioctl.h> +#include <sys/inotify.h> #include <sys/poll.h> #include <sys/socket.h> #include <sys/stat.h> @@ -105,6 +106,7 @@ static void ktrcsw(struct ktr_csw *); static void ktrcsw_old(struct ktr_csw_old *); static void ktruser(int, void *); static void ktrcaprights(cap_rights_t *); +static void ktrinotify(struct inotify_event *); static void ktritimerval(struct itimerval *it); static void ktrsockaddr(struct sockaddr *); static void ktrsplice(struct splice *); @@ -1861,6 +1863,14 @@ ktrtimeval(struct timeval *tv) } static void +ktrinotify(struct inotify_event *ev) +{ + printf( + "inotify { .wd = %d, .mask = %#x, .cookie = %u, .len = %u, .name = %s }\n", + ev->wd, ev->mask, ev->cookie, ev->len, ev->name); +} + +static void ktritimerval(struct itimerval *it) { @@ -2128,6 +2138,17 @@ ktrstruct(char *buf, size_t buflen) goto invalid; memcpy(&rights, data, datalen); ktrcaprights(&rights); + } else if (strcmp(name, "inotify") == 0) { + struct inotify_event *ev; + + if (datalen < sizeof(struct inotify_event) || + datalen > sizeof(struct inotify_event) + NAME_MAX + 1) + goto invalid; + ev = malloc(datalen); + if (ev == NULL) + err(1, "malloc"); + memcpy(ev, data, datalen); + ktrinotify(ev); } else if (strcmp(name, "itimerval") == 0) { if (datalen != sizeof(struct itimerval)) goto invalid; diff --git a/usr.bin/procstat/procstat.1 b/usr.bin/procstat/procstat.1 index cc775ffe133b..1e05e235e619 100644 --- a/usr.bin/procstat/procstat.1 +++ b/usr.bin/procstat/procstat.1 @@ -377,6 +377,8 @@ eventfd fifo .It h shared memory +.It i +inotify descriptor .It k kqueue .It m @@ -862,6 +864,7 @@ procstat: procstat_getprocs() .Xr sockstat 1 , .Xr cap_enter 2 , .Xr cap_rights_limit 2 , +.Xr inotify 2 , .Xr mlock 2 , .Xr mlockall 2 , .Xr libprocstat 3 , diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c index d61cf1693053..aa4850632aa7 100644 --- a/usr.bin/procstat/procstat_files.c +++ b/usr.bin/procstat/procstat_files.c @@ -226,6 +226,10 @@ static struct cap_desc { { CAP_BINDAT, "ba" }, { CAP_CONNECTAT, "ca" }, + /* Inotify descriptor rights. */ + { CAP_INOTIFY_ADD, "ina" }, + { CAP_INOTIFY_RM, "inr" }, + /* Aliases and defines that combine multiple rights. */ { CAP_PREAD, "prd" }, { CAP_PWRITE, "pwr" }, @@ -416,6 +420,11 @@ procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) xo_emit("{eq:fd_type/eventfd}"); break; + case PS_FST_TYPE_INOTIFY: + str = "i"; + xo_emit("{eq:fd_type/inotify}"); + break; + case PS_FST_TYPE_NONE: str = "?"; xo_emit("{eq:fd_type/none}"); diff --git a/usr.bin/sockstat/sockstat.c b/usr.bin/sockstat/sockstat.c index 52243910a31c..1a24ff67c321 100644 --- a/usr.bin/sockstat/sockstat.c +++ b/usr.bin/sockstat/sockstat.c @@ -951,7 +951,7 @@ formataddr(struct sockaddr_storage *ss, char *buf, size_t bufsize) } if (addrstr[0] == '\0') { error = cap_getnameinfo(capnet, sstosa(ss), ss->ss_len, - addrstr, sizeof(addrstr), buf, bufsize, NI_NUMERICHOST); + addrstr, sizeof(addrstr), NULL, 0, NI_NUMERICHOST); if (error) errx(1, "cap_getnameinfo()"); } diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h index d79ef882cff0..47d973326dfb 100644 --- a/usr.bin/truss/syscall.h +++ b/usr.bin/truss/syscall.h @@ -99,6 +99,7 @@ enum Argtype { Getfsstatmode, Idtype, Ioctl, + Inotifyflags, Itimerwhich, Kldsymcmd, Kldunloadflags, diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 47d6aef8f6ff..656d642e1f19 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -31,7 +31,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> /* * This file has routines used to print out system calls and their * arguments. @@ -316,6 +315,9 @@ static const struct syscall_decode decoded_syscalls[] = { { Ptr | OUT, 3 }, { Ptr | OUT, 4 } } }, { .name = "gettimeofday", .ret_type = 1, .nargs = 2, .args = { { Timeval | OUT, 0 }, { Ptr, 1 } } }, + { .name = "inotify_add_watch_at", .ret_type = 1, .nargs = 4, + .args = { { Int, 0 }, { Atfd, 1 }, { Name | IN, 2 }, + { Inotifyflags, 3 } } }, { .name = "ioctl", .ret_type = 1, .nargs = 3, .args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } }, { .name = "kevent", .ret_type = 1, .nargs = 6, @@ -2447,6 +2449,9 @@ print_arg(struct syscall_arg *sc, syscallarg_t *args, syscallarg_t *retval, print_integer_arg(sysdecode_getfsstat_mode, fp, args[sc->offset]); break; + case Inotifyflags: + print_mask_arg(sysdecode_inotifyflags, fp, args[sc->offset]); + break; case Itimerwhich: print_integer_arg(sysdecode_itimer, fp, args[sc->offset]); break; |