aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--RELNOTES3
-rw-r--r--etc/mtree/BSD.usr.dist2
-rw-r--r--lib/libc/gen/Makefile.inc1
-rw-r--r--lib/libc/gen/Symbol.map3
-rw-r--r--lib/libc/gen/inotify.c48
-rw-r--r--lib/libprocstat/libprocstat.c5
-rw-r--r--lib/libprocstat/libprocstat.h1
-rw-r--r--lib/libsys/Makefile.sys6
-rw-r--r--lib/libsys/Symbol.sys.map2
-rw-r--r--lib/libsys/_libsys.h4
-rw-r--r--lib/libsys/inotify.2379
-rw-r--r--lib/libsys/syscalls.map4
-rw-r--r--lib/libsysdecode/Makefile1
-rw-r--r--lib/libsysdecode/flags.c9
-rw-r--r--lib/libsysdecode/mktables1
-rw-r--r--lib/libsysdecode/sysdecode.h1
-rw-r--r--libexec/flua/modules/lposix.c29
-rw-r--r--sbin/pfctl/parse.y8
-rw-r--r--sbin/pfctl/pfctl.86
-rw-r--r--sbin/pfctl/pfctl.c5
-rw-r--r--sbin/pfctl/pfctl.h2
-rw-r--r--sbin/pfctl/pfctl_table.c25
-rw-r--r--share/examples/Makefile5
-rw-r--r--share/examples/inotify/Makefile6
-rw-r--r--share/examples/inotify/inotify.c172
-rw-r--r--share/man/man4/pf.46
-rw-r--r--share/man/man4/rights.410
-rw-r--r--share/man/man9/Makefile2
-rw-r--r--share/man/man9/VOP_INOTIFY.960
-rw-r--r--stand/efi/loader/main.c1
-rw-r--r--sys/amd64/linux/linux_proto.h7
-rw-r--r--sys/amd64/linux/linux_sysent.c4
-rw-r--r--sys/amd64/linux/linux_systrace_args.c40
-rw-r--r--sys/amd64/linux/syscalls.master11
-rw-r--r--sys/amd64/linux32/linux32_proto.h9
-rw-r--r--sys/amd64/linux32/linux32_sysent.c6
-rw-r--r--sys/amd64/linux32/linux32_systrace_args.c54
-rw-r--r--sys/amd64/linux32/syscalls.master15
-rw-r--r--sys/arm/allwinner/aw_gpio.c8
-rw-r--r--sys/arm/allwinner/aw_rtc.c29
-rw-r--r--sys/arm/broadcom/bcm2835/bcm2835_gpio.c6
-rw-r--r--sys/arm/mv/mvebu_gpio.c1
-rw-r--r--sys/arm/nvidia/as3722_gpio.c2
-rw-r--r--sys/arm/nvidia/tegra_gpio.c1
-rw-r--r--sys/arm64/apple/apple_pinctrl.c20
-rw-r--r--sys/arm64/linux/linux_proto.h7
-rw-r--r--sys/arm64/linux/linux_sysent.c4
-rw-r--r--sys/arm64/linux/linux_systrace_args.c40
-rw-r--r--sys/arm64/linux/syscalls.master11
-rw-r--r--sys/arm64/nvidia/tegra210/max77620_gpio.c2
-rw-r--r--sys/arm64/rockchip/rk_gpio.c12
-rw-r--r--sys/bsm/audit_kevents.h1
-rw-r--r--sys/cam/mmc/mmc_da.c21
-rw-r--r--sys/compat/freebsd32/freebsd32_syscall.h4
-rw-r--r--sys/compat/freebsd32/freebsd32_syscalls.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_sysent.c2
-rw-r--r--sys/compat/freebsd32/freebsd32_systrace_args.c60
-rw-r--r--sys/compat/linux/linux_dummy.c4
-rw-r--r--sys/compat/linux/linux_file.c121
-rw-r--r--sys/compat/linux/linux_file.h32
-rw-r--r--sys/conf/files1
-rw-r--r--sys/contrib/dev/iwlwifi/iwl-debug.h4
-rw-r--r--sys/dev/gpio/acpi_gpiobus.c1
-rw-r--r--sys/dev/gpio/gpiobus.c79
-rw-r--r--sys/dev/gpio/gpiobus_internal.h47
-rw-r--r--sys/dev/gpio/gpiobusvar.h17
-rw-r--r--sys/dev/gpio/gpiopps.c2
-rw-r--r--sys/dev/gpio/ofw_gpiobus.c1
-rw-r--r--sys/dev/gpio/pl061.c12
-rw-r--r--sys/dev/gpio/pl061.h1
-rw-r--r--sys/dev/gpio/pl061_acpi.c15
-rw-r--r--sys/dev/gpio/pl061_fdt.c15
-rw-r--r--sys/dev/gpio/qoriq_gpio.c11
-rw-r--r--sys/dev/iicbus/gpio/tca64xx.c3
-rw-r--r--sys/dev/regulator/regulator_fixed.c8
-rw-r--r--sys/fs/fuse/fuse_vnops.c199
-rw-r--r--sys/fs/msdosfs/msdosfs_lookup.c1
-rw-r--r--sys/fs/msdosfs/msdosfs_vfsops.c3
-rw-r--r--sys/fs/msdosfs/msdosfs_vnops.c11
-rw-r--r--sys/fs/msdosfs/msdosfsmount.h1
-rw-r--r--sys/fs/nullfs/null_subr.c4
-rw-r--r--sys/fs/nullfs/null_vnops.c29
-rw-r--r--sys/i386/linux/linux_proto.h9
-rw-r--r--sys/i386/linux/linux_sysent.c6
-rw-r--r--sys/i386/linux/linux_systrace_args.c54
-rw-r--r--sys/i386/linux/syscalls.master15
-rw-r--r--sys/kern/init_sysent.c2
-rw-r--r--sys/kern/kern_resource.c21
-rw-r--r--sys/kern/kern_sendfile.c4
-rw-r--r--sys/kern/subr_capability.c4
-rw-r--r--sys/kern/sys_generic.c98
-rw-r--r--sys/kern/syscalls.c2
-rw-r--r--sys/kern/syscalls.master15
-rw-r--r--sys/kern/systrace_args.c60
-rw-r--r--sys/kern/vfs_cache.c59
-rw-r--r--sys/kern/vfs_default.c17
-rw-r--r--sys/kern/vfs_inotify.c1008
-rw-r--r--sys/kern/vfs_lookup.c177
-rw-r--r--sys/kern/vfs_mount.c2
-rw-r--r--sys/kern/vfs_subr.c100
-rw-r--r--sys/kern/vfs_syscalls.c20
-rw-r--r--sys/kern/vfs_vnops.c7
-rw-r--r--sys/kern/vnode_if.src24
-rw-r--r--sys/modules/iwlwifi/Makefile8
-rw-r--r--sys/net/ethernet.h17
-rw-r--r--sys/net/if_vlan_var.h7
-rw-r--r--sys/net/pfvar.h54
-rw-r--r--sys/netpfil/pf/pf.c173
-rw-r--r--sys/netpfil/pf/pf_if.c4
-rw-r--r--sys/netpfil/pf/pf_ioctl.c38
-rw-r--r--sys/netpfil/pf/pf_lb.c67
-rw-r--r--sys/netpfil/pf/pf_nl.c4
-rw-r--r--sys/netpfil/pf/pf_ruleset.c31
-rw-r--r--sys/netpfil/pf/pf_table.c18
-rw-r--r--sys/powerpc/mpc85xx/mpc85xx_gpio.c4
-rw-r--r--sys/riscv/allwinner/files.allwinner1
-rw-r--r--sys/riscv/conf/std.allwinner1
-rw-r--r--sys/sys/caprights.h2
-rw-r--r--sys/sys/capsicum.h8
-rw-r--r--sys/sys/exterr_cat.h2
-rw-r--r--sys/sys/file.h1
-rw-r--r--sys/sys/inotify.h150
-rw-r--r--sys/sys/mount.h1
-rw-r--r--sys/sys/namei.h12
-rw-r--r--sys/sys/resourcevar.h4
-rw-r--r--sys/sys/specialfd.h5
-rw-r--r--sys/sys/syscall.h4
-rw-r--r--sys/sys/syscall.mk4
-rw-r--r--sys/sys/sysproto.h14
-rw-r--r--sys/sys/user.h5
-rw-r--r--sys/sys/vnode.h24
-rw-r--r--sys/tools/vnode_if.awk1
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c3
-rw-r--r--sys/ufs/ufs/ufs_lookup.c1
-rw-r--r--sys/ufs/ufs/ufs_vnops.c18
-rw-r--r--sys/ufs/ufs/ufsmount.h2
-rw-r--r--sys/x86/linux/linux_dummy_x86.c2
-rwxr-xr-xtests/ci/tools/freebsdci3
-rw-r--r--tests/sys/kern/Makefile2
-rw-r--r--tests/sys/kern/inotify_test.c862
-rw-r--r--usr.bin/kdump/kdump.c21
-rw-r--r--usr.bin/procstat/procstat.13
-rw-r--r--usr.bin/procstat/procstat_files.c9
-rw-r--r--usr.bin/sockstat/sockstat.c2
-rw-r--r--usr.bin/truss/syscall.h1
-rw-r--r--usr.bin/truss/syscalls.c7
146 files changed, 4431 insertions, 681 deletions
diff --git a/RELNOTES b/RELNOTES
index 4933c8392552..09696a37998b 100644
--- a/RELNOTES
+++ b/RELNOTES
@@ -10,6 +10,9 @@ newline. Entries should be separated by a newline.
Changes to this file should not be MFCed.
+f1f230439fa4:
+ FreeBSD now implements the inotify(2) family of system calls.
+
50e733f19b37, 171f66b0c2ca:
These commits helped improve utilization of NFSv4.1/4.2
delegations. The changes are only used when the NFSv4
diff --git a/etc/mtree/BSD.usr.dist b/etc/mtree/BSD.usr.dist
index 97b555e50dc1..ffdd82ae9911 100644
--- a/etc/mtree/BSD.usr.dist
+++ b/etc/mtree/BSD.usr.dist
@@ -304,6 +304,8 @@
..
indent
..
+ inotify
+ ..
ipfilter
..
ipfw
diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc
index ad13aaa65621..4d064d18d36e 100644
--- a/lib/libc/gen/Makefile.inc
+++ b/lib/libc/gen/Makefile.inc
@@ -89,6 +89,7 @@ SRCS+= \
glob.c \
glob-compat11.c \
initgroups.c \
+ inotify.c \
isatty.c \
isinf.c \
isnan.c \
diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map
index 8faecf4b3048..50dbf3425964 100644
--- a/lib/libc/gen/Symbol.map
+++ b/lib/libc/gen/Symbol.map
@@ -464,6 +464,9 @@ FBSD_1.8 {
fdscandir_b;
fts_open_b;
glob_b;
+ inotify_add_watch;
+ inotify_init;
+ inotify_init1;
psiginfo;
rtld_get_var;
rtld_set_var;
diff --git a/lib/libc/gen/inotify.c b/lib/libc/gen/inotify.c
new file mode 100644
index 000000000000..7ce53aaccd58
--- /dev/null
+++ b/lib/libc/gen/inotify.c
@@ -0,0 +1,48 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+#include "namespace.h"
+#include <sys/fcntl.h>
+#include <sys/inotify.h>
+#include <sys/specialfd.h>
+#include "un-namespace.h"
+#include "libc_private.h"
+
+/*
+ * Provide compatibility with libinotify, which uses different values for these
+ * flags.
+ */
+#define IN_NONBLOCK_OLD 0x80000
+#define IN_CLOEXEC_OLD 0x00800
+
+int
+inotify_add_watch(int fd, const char *pathname, uint32_t mask)
+{
+ return (inotify_add_watch_at(fd, AT_FDCWD, pathname, mask));
+}
+
+int
+inotify_init1(int flags)
+{
+ struct specialfd_inotify args;
+
+ if ((flags & IN_NONBLOCK_OLD) != 0) {
+ flags &= ~IN_NONBLOCK_OLD;
+ flags |= IN_NONBLOCK;
+ }
+ if ((flags & IN_CLOEXEC_OLD) != 0) {
+ flags &= ~IN_CLOEXEC_OLD;
+ flags |= IN_CLOEXEC;
+ }
+ args.flags = flags;
+ return (__sys___specialfd(SPECIALFD_INOTIFY, &args, sizeof(args)));
+}
+
+int
+inotify_init(void)
+{
+ return (inotify_init1(0));
+}
diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c
index 29f464ef6414..eb8137f6c76f 100644
--- a/lib/libprocstat/libprocstat.c
+++ b/lib/libprocstat/libprocstat.c
@@ -625,6 +625,10 @@ procstat_getfiles_kvm(struct procstat *procstat, struct kinfo_proc *kp, int mmap
type = PS_FST_TYPE_EVENTFD;
data = file.f_data;
break;
+ case DTYPE_INOTIFY:
+ type = PS_FST_TYPE_INOTIFY;
+ data = file.f_data;
+ break;
default:
continue;
}
@@ -717,6 +721,7 @@ kinfo_type2fst(int kftype)
{ KF_TYPE_SOCKET, PS_FST_TYPE_SOCKET },
{ KF_TYPE_VNODE, PS_FST_TYPE_VNODE },
{ KF_TYPE_EVENTFD, PS_FST_TYPE_EVENTFD },
+ { KF_TYPE_INOTIFY, PS_FST_TYPE_INOTIFY },
{ KF_TYPE_UNKNOWN, PS_FST_TYPE_UNKNOWN }
};
#define NKFTYPES (sizeof(kftypes2fst) / sizeof(*kftypes2fst))
diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h
index 0e9a4214414c..548747f90171 100644
--- a/lib/libprocstat/libprocstat.h
+++ b/lib/libprocstat/libprocstat.h
@@ -71,6 +71,7 @@
#define PS_FST_TYPE_PROCDESC 13
#define PS_FST_TYPE_DEV 14
#define PS_FST_TYPE_EVENTFD 15
+#define PS_FST_TYPE_INOTIFY 16
/*
* Special descriptor numbers.
diff --git a/lib/libsys/Makefile.sys b/lib/libsys/Makefile.sys
index 491c765e9416..3eb4bf85153d 100644
--- a/lib/libsys/Makefile.sys
+++ b/lib/libsys/Makefile.sys
@@ -224,6 +224,7 @@ MAN+= abort2.2 \
getsockopt.2 \
gettimeofday.2 \
getuid.2 \
+ inotify.2 \
intro.2 \
ioctl.2 \
issetugid.2 \
@@ -448,6 +449,11 @@ MLINKS+=getrlimit.2 setrlimit.2
MLINKS+=getsockopt.2 setsockopt.2
MLINKS+=gettimeofday.2 settimeofday.2
MLINKS+=getuid.2 geteuid.2
+MLINKS+=inotify.2 inotify_init.2 \
+ inotify.2 inotify_init1.2 \
+ inotify.2 inotify_add_watch.2 \
+ inotify.2 inotify_add_watch_at.2 \
+ inotify.2 inotify_rm_watch.2
MLINKS+=intro.2 errno.2
MLINKS+=jail.2 jail_attach.2 \
jail.2 jail_get.2 \
diff --git a/lib/libsys/Symbol.sys.map b/lib/libsys/Symbol.sys.map
index 7fac1ed6160d..45e0160100af 100644
--- a/lib/libsys/Symbol.sys.map
+++ b/lib/libsys/Symbol.sys.map
@@ -381,6 +381,8 @@ FBSD_1.8 {
exterrctl;
fchroot;
getrlimitusage;
+ inotify_add_watch_at;
+ inotify_rm_watch;
kcmp;
setcred;
};
diff --git a/lib/libsys/_libsys.h b/lib/libsys/_libsys.h
index e2a8f2253814..1799906eb885 100644
--- a/lib/libsys/_libsys.h
+++ b/lib/libsys/_libsys.h
@@ -466,6 +466,8 @@ typedef int (__sys_getrlimitusage_t)(u_int, int, rlim_t *);
typedef int (__sys_fchroot_t)(int);
typedef int (__sys_setcred_t)(u_int, const struct setcred *, size_t);
typedef int (__sys_exterrctl_t)(u_int, u_int, void *);
+typedef int (__sys_inotify_add_watch_at_t)(int, int, const char *, uint32_t);
+typedef int (__sys_inotify_rm_watch_t)(int, int);
void __sys_exit(int rval);
int __sys_fork(void);
@@ -868,6 +870,8 @@ int __sys_getrlimitusage(u_int which, int flags, rlim_t * res);
int __sys_fchroot(int fd);
int __sys_setcred(u_int flags, const struct setcred * wcred, size_t size);
int __sys_exterrctl(u_int op, u_int flags, void * ptr);
+int __sys_inotify_add_watch_at(int fd, int dfd, const char * path, uint32_t mask);
+int __sys_inotify_rm_watch(int fd, int wd);
__END_DECLS
#endif /* __LIBSYS_H_ */
diff --git a/lib/libsys/inotify.2 b/lib/libsys/inotify.2
new file mode 100644
index 000000000000..f94509d6f59e
--- /dev/null
+++ b/lib/libsys/inotify.2
@@ -0,0 +1,379 @@
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2025 Klara, Inc.
+.\"
+.Dd May 19, 2025
+.Dt INOTIFY 2
+.Os
+.Sh NAME
+.Nm inotify_init ,
+.Nm inotify_init1 ,
+.Nm inotify_add_watch ,
+.Nm inotify_add_watch_at ,
+.Nm inotify_rm_watch
+.Nd monitor file system events
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/inotify.h
+.Ft int
+.Fo inotify_init
+.Fc
+.Ft int
+.Fo inotify_init1
+.Fa "int flags"
+.Fc
+.Ft int
+.Fo inotify_add_watch
+.Fa "int fd"
+.Fa "const char *pathname"
+.Fa "uint32_t mask"
+.Fc
+.Ft int
+.Fo inotify_add_watch_at
+.Fa "int fd"
+.Fa "int dfd"
+.Fa "const char *pathname"
+.Fa "uint32_t mask"
+.Fc
+.Ft int
+.Fo inotify_rm_watch
+.Fa "int fd"
+.Fa "uint32_t wd"
+.Fc
+.Bd -literal
+struct inotify_event {
+ int wd; /* Watch descriptor */
+ uint32_t mask; /* Event and flags */
+ uint32_t cookie; /* Unique ID which links rename events */
+ uint32_t len; /* Name field size, including nul bytes */
+ char name[0]; /* Filename (nul-terminated) */
+};
+.Ed
+.Sh DESCRIPTION
+The inotify system calls provide an interface to monitor file system events.
+They aim to be compatible with the Linux inotify interface.
+The provided functionality is similar to the
+.Dv EVFILT_VNODE
+filter of the
+.Xr kevent 2
+system call, but further allows monitoring of a directory without needing to
+open each object in that directory.
+This avoids races and reduces the number of file descriptors needed to monitor
+a large file hierarchy.
+.Pp
+inotify allows one or more file system objects, generally files or directories,
+to be watched for events, such as file open or close.
+Watched objects are associated with a file descriptor returned
+by
+.Fn inotify_init
+or
+.Fn inotify_init1 .
+When an event occurs, a record describing the event becomes available for
+reading from the inotify file descriptor.
+Each inotify descriptor thus refers to a queue of events waiting to be read.
+inotify descriptors are inherited across
+.Xr fork 2
+calls and may be passed to other processes via
+.Xr unix 4
+sockets.
+.Pp
+The
+.Fn inotify_init1
+system call accepts two flags.
+The
+.Dv IN_NONBLOCK
+flag causes the inotify descriptor to be opened in non-blocking mode, such that
+.Xr read 2
+calls will not block if no records are available to consume, and will instead
+return
+.Er EWOULDBLOCK .
+The
+.Dv IN_CLOEXEC
+flag causes the inotify descriptor to be closed automatically when
+.Xr execve 2
+is called.
+.Pp
+To watch a file or directory, the
+.Fn inotify_add_watch
+or
+.Fn inotify_add_watch_at
+system calls must be used.
+They take a path and a mask of events to watch for, and return a
+.Dq watch descriptor ,
+a non-negative integer which uniquely identifies the watched object within the
+inotify descriptor.
+.Pp
+The
+.Fn inotify_rm_watch
+system call removes a watch from an inotify descriptor.
+.Pp
+When watching a directory, objects within the directory are monitored for events
+as well as the directory itself.
+A record describing an inotify event consists of a
+.Dq struct inotify_event
+followed by the name of the object in the directory being watched.
+If the watched object itself generates an event, no name is present.
+Extra nul bytes may follow the file name in order to provide alignment for a
+subsequent record.
+.Pp
+The following events are defined:
+.Bl -tag -width IN_CLOSE_NOWRITE
+.It Dv IN_ACCESS
+A file's contents were accessed, e.g., by
+.Xr read 2
+.Xr copy_file_range 2 ,
+.Xr sendfile 2 ,
+or
+.Xr getdirentries 2 .
+.It Dv IN_ATTRIB
+A file's metadata was changed, e.g., by
+.Xr chmod 2
+or
+.Xr unlink 2 .
+.It Dv IN_CLOSE_WRITE
+A file that was previously opened for writing was closed.
+.It Dv IN_CLOSE_NOWRITE
+A file that was previously opened read-only was closed.
+.It Dv IN_CREATE
+A file within a watched directory was created, e.g., by
+.Xr open 2 ,
+.Xr mkdir 2 ,
+.Xr symlink 2 ,
+.Xr mknod 2 ,
+or
+.Xr bind 2 .
+.It Dv IN_DELETE
+A file or directory within a watched directory was removed.
+.It Dv IN_DELETE_SELF
+The watched file or directory itself was deleted.
+This event is generated only when the link count of the file drops
+to zero.
+.It Dv IN_MODIFY
+A file's contents were modified, e.g., by
+.Xr write 2
+or
+.Xr copy_file_range 2 .
+.It Dv IN_MOVE_SELF
+The watched file or directory itself was renamed.
+.It Dv IN_MOVED_FROM
+A file or directory was moved from a watched directory.
+.It Dv IN_MOVED_TO
+A file or directory was moved into a watched directory.
+A
+.Xr rename 2
+call thus may generate two events, one for the old name and one for the new
+name.
+These are linked together by the
+.Ar cookie
+field in the inotify record, which can be compared to link the two records
+to the same event.
+.It Dv IN_OPEN
+A file was opened.
+.El
+.Pp
+Some additional flags may be set in inotify event records:
+.Bl -tag -width IN_Q_OVERFLOW
+.It Dv IN_IGNORED
+When a watch is removed from a file, for example because it was created with the
+.Dv IN_ONESHOT
+flag, the file was deleted, or the watch was explicitly removed with
+.Xr inotify_rm_watch 2 ,
+an event with this mask is generated to indicate that the watch will not
+generate any more events.
+Once this event is generated, the watch is automatically removed, and in
+particular should not be removed manually with
+.Xr inotify_rm_watch 2 .
+.It Dv IN_ISDIR
+When the subject of an event is a directory, this flag is set in the
+.Ar mask
+.It Dv IN_Q_OVERFLOW
+One or more events were dropped, for example because of a kernel memory allocation
+failure or because the event queue size hit a limit.
+.It Dv IN_UNMOUNT
+The filesystem containing the watched object was unmounted.
+.El
+.Pp
+A number of flags may also be specified in the
+.Ar mask
+given to
+.Fn inotify_add_watch
+and
+.Fn inotify_add_watch_at :
+.Bl -tag -width IN_DONT_FOLLOW
+.It Dv IN_DONT_FOLLOW
+If
+.Ar pathname
+is a symbolic link, do not follow it.
+.It Dv IN_EXCL_UNLINK
+This currently has no effect, see the
+.Sx BUGS
+section.
+.In Dv IN_MASK_ADD
+When adding a watch to an object, and that object is already watched by the
+same inotify descriptor, by default the mask of the existing watch is
+overwritten.
+When
+.Dv IN_MASK_ADD
+is specified, the mask of the existing watch is instead logically ORed with
+the new mask.
+.In Dv IN_MASK_CREATE
+When
+.Fn inotify_add watch
+is used to add a watch to an object,
+.Dv IN_MASK_CREATE
+is specified, and that object is already watched by the same inotify descriptor,
+return an error instead of updating the existing watch.
+.In Dv IN_ONESHOT
+Monitor the object for a single event, after which the watch is automatically
+removed.
+As part of removal, a
+.Dv IN_IGNORED
+event is generated.
+.In Dv IN_ONLYDIR
+When creating a watch, fail with
+.Er ENOTDIR
+if the path does not refer to a directory.
+.El
+.Sh SYSCTL VARIABLES
+The following variables are available as both
+.Xr sysctl 8
+variables and
+.Xr loader 8
+tunables:
+.Bl -tag -width 15
+.It Va vfs.inotify.max_events
+The maximum number of inotify records that can be queued for a single
+inotify descriptor.
+Records in excess of this limit are discarded, and a single event with
+mask equal to
+.Dv IN_Q_OVERFLOW
+will be present in the queue.
+.It Va vfs.inotify.max_user_instances
+The maximum number of inotify descriptors that can be created by a single
+user.
+.It Va vfs.inotify.max_user_watches
+The maximum number of inotify watches per user.
+.El
+.Sh EXAMPLES
+See the example program in
+.Pa /usr/share/examples/inotify/inotify.c .
+.Sh ERRORS
+The
+.Fn inotify_init
+and
+.Fn inotify_init1
+functions will fail if:
+.Bl -tag -width Er
+.It Bq Er ENFILE
+The system limit on the total number of open files has been reached.
+.It Bq Er EMFILE
+A per-process limit on the number of open files has been reached.
+.It Bq Er EMFILE
+The system limit on the number of inotify descriptors has been reached.
+.It Bq Er EINVAL
+An unrecognized flag was passed to
+.Fn inotify_init1 .
+.El
+.Pp
+The
+.Fn inotify_add_watch
+and
+.Fn inotify_add_watch_at
+system calls will fail if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Ar fd
+parameter is not a valid file descriptor.
+.It Bq Er EINVAL
+The
+.Ar fd
+parameter is not an inotify descriptor.
+.It Bq Er EINVAL
+The
+.Ar mask
+parameter does not specify an event, or
+the
+.Dv IN_MASK_CREATE
+and
+.Dv IN_MASK_ADD
+flags are both set, or an unrecognized flag was passed.
+.It Bq Er ENOTDIR
+The
+.Ar pathname
+parameter refers to a file that is not a directory, and the
+.Dv IN_ONLYDIR
+flag was specified.
+.It Bq Er ENOSPC
+The per-user limit on the total number of inotify watches has been reached.
+.It Bq Er ECAPMODE
+The process is in capability mode and
+.Fn inotify_add_watch
+was called, or
+.Fn inotify_add_watch_at
+was called with
+.Dv AT_FDCWD
+as the directory file descriptor
+.Ar dfd .
+.It Bq Er ENOTCAPABLE
+The process is in capability mode and
+.Ar pathname
+contains a
+.Dq ..
+component leading to a directory outside the directory hierarchy specified
+by
+.Ar dfd .
+.El
+.Pp
+The
+.Fn inotify_rm_watch
+system call will fail if:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Ar fd
+parameter is not a valid file descriptor.
+.It Bq Er EINVAL
+The
+.Ar fd
+parameter is not an inotify descriptor.
+.It Bq Er EINVAL
+The
+.Ar wd
+parameter is not a valid watch descriptor.
+.El
+.Sh SEE ALSO
+.Xr kevent 2 ,
+.Xr capsicum 4
+.Sh STANDARDS
+The
+.Nm
+interface originates from Linux and is non-standard.
+This implementation aims to be compatible with that of Linux and is based
+on the documentation available at
+.Pa https://man7.org/linux/man-pages/man7/inotify.7.html .
+.Sh HISTORY
+The inotify system calls first appeared in
+.Fx 15.0 .
+.Sh BUGS
+If a file in a watched directory has multiple hard links,
+an access via any hard link for that file will generate an event, even
+if the accessed link belongs to an unwatched directory.
+This is not the case for the Linux implementation, where only accesses
+via the hard link in the watched directory will generate an event.
+.Pp
+If a watched directory contains multiple hard links of a file, an event
+on one of the hard links will generate an inotify record for each link
+in the directory.
+.Pp
+When a file is unlinked, no more events will be generated for that file,
+even if it continues to be accessed.
+By default, the Linux implementation will continue to generate events in
+this case.
+Thus, the
+.Fx
+implementation behaves as though
+.Dv IN_EXCL_UNLINK
+is always set.
diff --git a/lib/libsys/syscalls.map b/lib/libsys/syscalls.map
index 51be88203c17..69fce2ea7c63 100644
--- a/lib/libsys/syscalls.map
+++ b/lib/libsys/syscalls.map
@@ -809,4 +809,8 @@ FBSDprivate_1.0 {
__sys_setcred;
_exterrctl;
__sys_exterrctl;
+ _inotify_add_watch_at;
+ __sys_inotify_add_watch_at;
+ _inotify_rm_watch;
+ __sys_inotify_rm_watch;
};
diff --git a/lib/libsysdecode/Makefile b/lib/libsysdecode/Makefile
index b01877bb8bb8..ca020552a6e9 100644
--- a/lib/libsysdecode/Makefile
+++ b/lib/libsysdecode/Makefile
@@ -84,6 +84,7 @@ MLINKS+=sysdecode_mask.3 sysdecode_accessmode.3 \
sysdecode_mask.3 sysdecode_fileflags.3 \
sysdecode_mask.3 sysdecode_filemode.3 \
sysdecode_mask.3 sysdecode_flock_operation.3 \
+ sysdecode_mask.3 sysdecode_inotifyflags.3 \
sysdecode_mask.3 sysdecode_mlockall_flags.3 \
sysdecode_mask.3 sysdecode_mmap_flags.3 \
sysdecode_mask.3 sysdecode_mmap_prot.3 \
diff --git a/lib/libsysdecode/flags.c b/lib/libsysdecode/flags.c
index 32829d35dbe0..dc09c5747968 100644
--- a/lib/libsysdecode/flags.c
+++ b/lib/libsysdecode/flags.c
@@ -23,7 +23,6 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#define L2CAP_SOCKET_CHECKED
#include <sys/types.h>
@@ -31,6 +30,7 @@
#include <sys/capsicum.h>
#include <sys/event.h>
#include <sys/extattr.h>
+#include <sys/inotify.h>
#include <sys/linker.h>
#include <sys/mman.h>
#include <sys/mount.h>
@@ -351,6 +351,13 @@ sysdecode_getrusage_who(int who)
return (lookup_value(rusage, who));
}
+bool
+sysdecode_inotifyflags(FILE *fp, int flag, int *rem)
+{
+
+ return (print_mask_int(fp, inotifyflags, flag, rem));
+}
+
static struct name_table kevent_user_ffctrl[] = {
X(NOTE_FFNOP) X(NOTE_FFAND) X(NOTE_FFOR) X(NOTE_FFCOPY)
XEND
diff --git a/lib/libsysdecode/mktables b/lib/libsysdecode/mktables
index 5d7be2aad3c8..6b4f79402660 100644
--- a/lib/libsysdecode/mktables
+++ b/lib/libsysdecode/mktables
@@ -98,6 +98,7 @@ gen_table "extattrns" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/
gen_table "fadvisebehav" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h"
gen_table "openflags" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" "O_RDONLY|O_RDWR|O_WRONLY"
gen_table "flockops" "LOCK_[A-Z]+[[:space:]]+0x[0-9]+" "sys/fcntl.h"
+gen_table "inotifyflags" "IN_[A-Z_]+[[:space:]]+0x[0-9]+" "sys/inotify.h" "IN_CLOEXEC|IN_NONBLOCK"
gen_table "kldsymcmd" "KLDSYM_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h"
gen_table "kldunloadfflags" "LINKER_UNLOAD_[A-Z]+[[:space:]]+[0-9]+" "sys/linker.h"
gen_table "lio_listiomodes" "LIO_(NO)?WAIT[[:space:]]+[0-9]+" "aio.h"
diff --git a/lib/libsysdecode/sysdecode.h b/lib/libsysdecode/sysdecode.h
index 8dc0bbea6f0d..c95d7f71379b 100644
--- a/lib/libsysdecode/sysdecode.h
+++ b/lib/libsysdecode/sysdecode.h
@@ -61,6 +61,7 @@ const char *sysdecode_getfsstat_mode(int _mode);
const char *sysdecode_getrusage_who(int _who);
const char *sysdecode_idtype(int _idtype);
const char *sysdecode_ioctlname(unsigned long _val);
+bool sysdecode_inotifyflags(FILE *_fp, int _flags, int *_rem);
const char *sysdecode_ipproto(int _protocol);
void sysdecode_kevent_fflags(FILE *_fp, short _filter, int _fflags,
int _base);
diff --git a/libexec/flua/modules/lposix.c b/libexec/flua/modules/lposix.c
index 816d4bc688d2..9edc7e687786 100644
--- a/libexec/flua/modules/lposix.c
+++ b/libexec/flua/modules/lposix.c
@@ -88,18 +88,23 @@ static int
lua_chown(lua_State *L)
{
const char *path;
- uid_t owner = (uid_t) -1;
- gid_t group = (gid_t) -1;
+ uid_t owner = (uid_t)-1;
+ gid_t group = (gid_t)-1;
+ int error;
enforce_max_args(L, 3);
path = luaL_checkstring(L, 1);
if (lua_isinteger(L, 2))
- owner = (uid_t) lua_tointeger(L, 2);
+ owner = (uid_t)lua_tointeger(L, 2);
else if (lua_isstring(L, 2)) {
- struct passwd *p = getpwnam(lua_tostring(L, 2));
- if (p != NULL)
- owner = p->pw_uid;
+ char buf[4096];
+ struct passwd passwd, *pwd;
+
+ error = getpwnam_r(lua_tostring(L, 2), &passwd,
+ buf, sizeof(buf), &pwd);
+ if (error == 0)
+ owner = pwd->pw_uid;
else
return (luaL_argerror(L, 2,
lua_pushfstring(L, "unknown user %s",
@@ -112,11 +117,15 @@ lua_chown(lua_State *L)
}
if (lua_isinteger(L, 3))
- group = (gid_t) lua_tointeger(L, 3);
+ group = (gid_t)lua_tointeger(L, 3);
else if (lua_isstring(L, 3)) {
- struct group *g = getgrnam(lua_tostring(L, 3));
- if (g != NULL)
- group = g->gr_gid;
+ char buf[4096];
+ struct group gr, *grp;
+
+ error = getgrnam_r(lua_tostring(L, 3), &gr, buf, sizeof(buf),
+ &grp);
+ if (error == 0)
+ group = grp->gr_gid;
else
return (luaL_argerror(L, 3,
lua_pushfstring(L, "unknown group %s",
diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
index c939b5ae7cce..c59204d3d5a4 100644
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -6854,7 +6854,8 @@ top:
} else if (c == '\\') {
if ((next = lgetc(quotec)) == EOF)
return (0);
- if (next == quotec || c == ' ' || c == '\t')
+ if (next == quotec || next == ' ' ||
+ next == '\t')
c = next;
else if (next == '\n') {
file->lineno++;
@@ -7149,11 +7150,10 @@ pfctl_cmdline_symset(char *s)
if ((val = strrchr(s, '=')) == NULL)
return (-1);
- if ((sym = malloc(strlen(s) - strlen(val) + 1)) == NULL)
+ sym = strndup(s, val - s);
+ if (sym == NULL)
err(1, "%s: malloc", __func__);
- strlcpy(sym, s, strlen(s) - strlen(val) + 1);
-
ret = symset(sym, val + 1, 1);
free(sym);
diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8
index 5238c53f709d..c7fad58262dc 100644
--- a/sbin/pfctl/pfctl.8
+++ b/sbin/pfctl/pfctl.8
@@ -474,7 +474,7 @@ Show the list of tables.
.It Fl s Cm osfp
Show the list of operating system fingerprints.
.It Fl s Cm Interfaces
-Show the list of interfaces and interface drivers available to PF.
+Show the list of interfaces and interface groups available to PF.
When used together with
.Fl v ,
it additionally lists which interfaces have skip rules activated.
@@ -544,7 +544,7 @@ Kill a table.
Flush all addresses of a table.
.It Fl T Cm add
Add one or more addresses in a table.
-Automatically create a nonexisting table.
+Automatically create a persistent table if it does not exist.
.It Fl T Cm delete
Delete one or more addresses from a table.
.It Fl T Cm expire Ar number
@@ -556,7 +556,7 @@ For entries which have never had their statistics cleared,
refers to the time they were added to the table.
.It Fl T Cm replace
Replace the addresses of the table.
-Automatically create a nonexisting table.
+Automatically create a persistent table if it does not exist.
.It Fl T Cm show
Show the content (addresses) of a table.
.It Fl T Cm test
diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c
index 8c6497b4d1ee..38d74aceba80 100644
--- a/sbin/pfctl/pfctl.c
+++ b/sbin/pfctl/pfctl.c
@@ -3175,6 +3175,9 @@ main(int argc, char *argv[])
}
}
+ if (tblcmdopt == NULL ^ tableopt == NULL)
+ usage();
+
if (tblcmdopt != NULL) {
argc -= optind;
argv += optind;
@@ -3400,7 +3403,7 @@ main(int argc, char *argv[])
pfctl_kill_src_nodes(dev, ifaceopt, opts);
if (tblcmdopt != NULL) {
- error = pfctl_command_tables(argc, argv, tableopt,
+ error = pfctl_table(argc, argv, tableopt,
tblcmdopt, rulesopt, anchorname, opts);
rulesopt = NULL;
}
diff --git a/sbin/pfctl/pfctl.h b/sbin/pfctl/pfctl.h
index 5abd5ddcdf8f..08d48695709e 100644
--- a/sbin/pfctl/pfctl.h
+++ b/sbin/pfctl/pfctl.h
@@ -83,7 +83,7 @@ int pfi_clr_istats(const char *, int *, int);
void pfctl_print_title(char *);
void pfctl_do_clear_tables(const char *, int);
void pfctl_show_tables(const char *, int);
-int pfctl_command_tables(int, char *[], char *, const char *, char *,
+int pfctl_table(int, char *[], char *, const char *, char *,
const char *, int);
int pfctl_show_altq(int, const char *, int, int);
void warn_namespace_collision(const char *);
diff --git a/sbin/pfctl/pfctl_table.c b/sbin/pfctl/pfctl_table.c
index 3fe87b53b7f9..53abea3e1ae1 100644
--- a/sbin/pfctl/pfctl_table.c
+++ b/sbin/pfctl/pfctl_table.c
@@ -55,8 +55,6 @@
#include "pfctl.h"
extern void usage(void);
-static int pfctl_table(int, char *[], char *, const char *, char *,
- const char *, int);
static void print_table(const struct pfr_table *, int, int);
static int print_tstats(const struct pfr_tstats *, int);
static int load_addr(struct pfr_buffer *, int, char *[], char *, int, int);
@@ -119,15 +117,6 @@ pfctl_show_tables(const char *anchor, int opts)
}
int
-pfctl_command_tables(int argc, char *argv[], char *tname,
- const char *command, char *file, const char *anchor, int opts)
-{
- if (tname == NULL || command == NULL)
- usage();
- return pfctl_table(argc, argv, tname, command, file, anchor, opts);
-}
-
-int
pfctl_table(int argc, char *argv[], char *tname, const char *command,
char *file, const char *anchor, int opts)
{
@@ -214,7 +203,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command,
xprintf(opts, "%d/%d addresses added", nadd, b.pfrb_size);
if (opts & PF_OPT_VERBOSE)
PFRB_FOREACH(a, &b)
- if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback)
+ if ((opts & PF_OPT_VERBOSE2) ||
+ a->pfra_fback != PFR_FB_NONE)
print_addrx(a, NULL,
opts & PF_OPT_USEDNS);
} else if (!strcmp(command, "delete")) {
@@ -228,7 +218,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command,
xprintf(opts, "%d/%d addresses deleted", ndel, b.pfrb_size);
if (opts & PF_OPT_VERBOSE)
PFRB_FOREACH(a, &b)
- if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback)
+ if ((opts & PF_OPT_VERBOSE2) ||
+ a->pfra_fback != PFR_FB_NONE)
print_addrx(a, NULL,
opts & PF_OPT_USEDNS);
} else if (!strcmp(command, "replace")) {
@@ -259,7 +250,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command,
xprintf(opts, "no changes");
if (opts & PF_OPT_VERBOSE)
PFRB_FOREACH(a, &b)
- if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback)
+ if ((opts & PF_OPT_VERBOSE2) ||
+ a->pfra_fback != PFR_FB_NONE)
print_addrx(a, NULL,
opts & PF_OPT_USEDNS);
} else if (!strcmp(command, "expire")) {
@@ -282,7 +274,7 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command,
break;
}
PFRB_FOREACH(p, &b) {
- ((struct pfr_astats *)p)->pfras_a.pfra_fback = 0;
+ ((struct pfr_astats *)p)->pfras_a.pfra_fback = PFR_FB_NONE;
if (time(NULL) - ((struct pfr_astats *)p)->pfras_tzero >
lifetime)
if (pfr_buf_add(&b2,
@@ -297,7 +289,8 @@ pfctl_table(int argc, char *argv[], char *tname, const char *command,
xprintf(opts, "%d/%d addresses expired", ndel, b2.pfrb_size);
if (opts & PF_OPT_VERBOSE)
PFRB_FOREACH(a, &b2)
- if ((opts & PF_OPT_VERBOSE2) || a->pfra_fback)
+ if ((opts & PF_OPT_VERBOSE2) ||
+ a->pfra_fback != PFR_FB_NONE)
print_addrx(a, NULL,
opts & PF_OPT_USEDNS);
} else if (!strcmp(command, "reset")) {
diff --git a/share/examples/Makefile b/share/examples/Makefile
index 560fdae6de5b..f0c050a36306 100644
--- a/share/examples/Makefile
+++ b/share/examples/Makefile
@@ -15,6 +15,7 @@ LDIRS= BSD_daemon \
find_interface \
flua \
indent \
+ inotify \
ipfw \
jails \
kld \
@@ -97,6 +98,10 @@ SE_FLUA= libjail.lua
SE_DIRS+= indent
SE_INDENT= indent.pro
+SE_DIRS+= inotify
+SE_INOTIFY= inotify.c \
+ Makefile
+
.if ${MK_IPFILTER} != "no"
SUBDIR+= ipfilter
.endif
diff --git a/share/examples/inotify/Makefile b/share/examples/inotify/Makefile
new file mode 100644
index 000000000000..c54c629c58d7
--- /dev/null
+++ b/share/examples/inotify/Makefile
@@ -0,0 +1,6 @@
+PROG= inotify
+MAN=
+
+LIBADD= xo
+
+.include <bsd.prog.mk>
diff --git a/share/examples/inotify/inotify.c b/share/examples/inotify/inotify.c
new file mode 100644
index 000000000000..b8e300bc992c
--- /dev/null
+++ b/share/examples/inotify/inotify.c
@@ -0,0 +1,172 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+/*
+ * A simple program to demonstrate inotify. Given one or more paths, it watches
+ * all events on those paths and prints them to standard output.
+ */
+
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/inotify.h>
+
+#include <assert.h>
+#include <err.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <libxo/xo.h>
+
+static void
+usage(void)
+{
+ xo_errx(1, "usage: inotify <path1> [<path2> ...]");
+}
+
+static const char *
+ev2str(uint32_t event)
+{
+ switch (event & IN_ALL_EVENTS) {
+ case IN_ACCESS:
+ return ("IN_ACCESS");
+ case IN_ATTRIB:
+ return ("IN_ATTRIB");
+ case IN_CLOSE_WRITE:
+ return ("IN_CLOSE_WRITE");
+ case IN_CLOSE_NOWRITE:
+ return ("IN_CLOSE_NOWRITE");
+ case IN_CREATE:
+ return ("IN_CREATE");
+ case IN_DELETE:
+ return ("IN_DELETE");
+ case IN_DELETE_SELF:
+ return ("IN_DELETE_SELF");
+ case IN_MODIFY:
+ return ("IN_MODIFY");
+ case IN_MOVE_SELF:
+ return ("IN_MOVE_SELF");
+ case IN_MOVED_FROM:
+ return ("IN_MOVED_FROM");
+ case IN_MOVED_TO:
+ return ("IN_MOVED_TO");
+ case IN_OPEN:
+ return ("IN_OPEN");
+ default:
+ switch (event) {
+ case IN_IGNORED:
+ return ("IN_IGNORED");
+ case IN_Q_OVERFLOW:
+ return ("IN_Q_OVERFLOW");
+ case IN_UNMOUNT:
+ return ("IN_UNMOUNT");
+ }
+ warnx("unknown event %#x", event);
+ assert(0);
+ }
+}
+
+static void
+set_handler(int kq, int sig)
+{
+ struct kevent kev;
+
+ (void)signal(sig, SIG_IGN);
+ EV_SET(&kev, sig, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
+ if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0)
+ xo_err(1, "kevent");
+}
+
+int
+main(int argc, char **argv)
+{
+ struct inotify_event *iev, *iev1;
+ struct kevent kev;
+ size_t ievsz;
+ int ifd, kq;
+
+ argc = xo_parse_args(argc, argv);
+ if (argc < 2)
+ usage();
+ argc--;
+ argv++;
+
+ ifd = inotify_init1(IN_NONBLOCK);
+ if (ifd < 0)
+ xo_err(1, "inotify");
+ for (int i = 0; i < argc; i++) {
+ int wd;
+
+ wd = inotify_add_watch(ifd, argv[i], IN_ALL_EVENTS);
+ if (wd < 0)
+ xo_err(1, "inotify_add_watch(%s)", argv[i]);
+ }
+
+ xo_set_version("1");
+ xo_open_list("events");
+
+ kq = kqueue();
+ if (kq < 0)
+ xo_err(1, "kqueue");
+
+ /*
+ * Handle signals in the event loop so that we can close the xo list.
+ */
+ set_handler(kq, SIGINT);
+ set_handler(kq, SIGTERM);
+ set_handler(kq, SIGHUP);
+ set_handler(kq, SIGQUIT);
+
+ /*
+ * Monitor the inotify descriptor for events.
+ */
+ EV_SET(&kev, ifd, EVFILT_READ, EV_ADD, 0, 0, NULL);
+ if (kevent(kq, &kev, 1, NULL, 0, NULL) < 0)
+ xo_err(1, "kevent");
+
+ ievsz = sizeof(*iev) + NAME_MAX + 1;
+ iev = malloc(ievsz);
+ if (iev == NULL)
+ err(1, "malloc");
+
+ for (;;) {
+ ssize_t n;
+ const char *ev;
+
+ if (kevent(kq, NULL, 0, &kev, 1, NULL) < 0)
+ xo_err(1, "kevent");
+ if (kev.filter == EVFILT_SIGNAL)
+ break;
+
+ n = read(ifd, iev, ievsz);
+ if (n < 0)
+ xo_err(1, "read");
+ assert(n >= (ssize_t)sizeof(*iev));
+
+ for (iev1 = iev; n > 0;) {
+ assert(n >= (ssize_t)sizeof(*iev1));
+
+ ev = ev2str(iev1->mask);
+ xo_open_instance("event");
+ xo_emit("{:wd/%3d} {:event/%16s} {:name/%s}\n",
+ iev1->wd, ev, iev1->name);
+ xo_close_instance("event");
+
+ n -= sizeof(*iev1) + iev1->len;
+ iev1 = (struct inotify_event *)(void *)
+ ((char *)iev1 + sizeof(*iev1) + iev1->len);
+ }
+ (void)xo_flush();
+ }
+
+ xo_close_list("events");
+
+ if (xo_finish() < 0)
+ xo_err(1, "stdout");
+ exit(0);
+}
diff --git a/share/man/man4/pf.4 b/share/man/man4/pf.4
index 422600a6fa44..03a4ba2bbe7f 100644
--- a/share/man/man4/pf.4
+++ b/share/man/man4/pf.4
@@ -26,7 +26,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd July 1, 2025
+.Dd July 2, 2025
.Dt PF 4
.Os
.Sh NAME
@@ -1114,7 +1114,7 @@ will be set to the length of the buffer actually used.
.It Dv DIOCCLRSRCNODES
Clear the tree of source tracking nodes.
.It Dv DIOCIGETIFACES Fa "struct pfioc_iface *io"
-Get the list of interfaces and interface drivers known to
+Get the list of interfaces and interface groups known to
.Nm .
All the ioctls that manipulate interfaces
use the same structure described below:
@@ -1131,7 +1131,7 @@ struct pfioc_iface {
.Pp
If not empty,
.Va pfiio_name
-can be used to restrict the search to a specific interface or driver.
+can be used to restrict the search to a specific interface or group.
.Va pfiio_buffer[pfiio_size]
is the user-supplied buffer for returning the data.
On entry,
diff --git a/share/man/man4/rights.4 b/share/man/man4/rights.4
index 0c24f6b45f88..8f5f6ad9c2d2 100644
--- a/share/man/man4/rights.4
+++ b/share/man/man4/rights.4
@@ -30,7 +30,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd May 1, 2024
+.Dd May 22, 2025
.Dt RIGHTS 4
.Os
.Sh NAME
@@ -319,6 +319,14 @@ Permit
.It Dv CAP_GETSOCKOPT
Permit
.Xr getsockopt 2 .
+.It Dv CAP_INOTIFY_ADD
+Permit
+.Xr inotify_add_watch 2
+and
+.Xr inotify_add_watch_at 2 .
+.It Dv CAP_INOTIFY_RM
+Permit
+.Xr inotify_rm_watch 2 .
.It Dv CAP_IOCTL
Permit
.Xr ioctl 2 .
diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index badaee1479f7..f709a4818dd5 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -434,6 +434,7 @@ MAN= accept_filter.9 \
VOP_GETEXTATTR.9 \
VOP_GETPAGES.9 \
VOP_INACTIVE.9 \
+ VOP_INOTIFY.9 \
VOP_IOCTL.9 \
VOP_LINK.9 \
VOP_LISTEXTATTR.9 \
@@ -2460,6 +2461,7 @@ MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 \
MLINKS+=VOP_FSYNC.9 VOP_FDATASYNC.9
MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9
MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9
+MLINKS+=VOP_INOTIFY.9 VOP_INOTIFY_ADD_WATCH.9
MLINKS+=VOP_LOCK.9 vn_lock.9 \
VOP_LOCK.9 VOP_ISLOCKED.9 \
VOP_LOCK.9 VOP_UNLOCK.9
diff --git a/share/man/man9/VOP_INOTIFY.9 b/share/man/man9/VOP_INOTIFY.9
new file mode 100644
index 000000000000..43b644682153
--- /dev/null
+++ b/share/man/man9/VOP_INOTIFY.9
@@ -0,0 +1,60 @@
+.\"-
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.\" Copyright (c) 2025 Klara, Inc.
+.\"
+.Dd May 27, 2025
+.Dt VOP_INOTIFY 9
+.Os
+.Sh NAME
+.Nm VOP_INOTIFY
+.Nd vnode inotify interface
+.Sh SYNOPSIS
+.In sys/param.h
+.In sys/vnode.h
+.Ft int
+.Fo VOP_INOTIFY
+.Fa struct vnode *vp
+.Fa struct vnode *dvp
+.Fa struct componentname *cnp
+.Fa int event
+.Fa uint32_t cookie
+.Fc
+.Ft int
+.Fo VOP_INOTIFY_ADD_WATCH
+.Fa struct vnode *vp
+.Fa struct inotify_softc *sc
+.Fa uint32_t mask
+.Fa uint32_t *wdp
+.Fa struct thread *td
+.Fc
+.Sh DESCRIPTION
+The
+.Fn VOP_INOTIFY
+operation notifies the
+.Xr inotify 2
+subsystem of a file system event on a vnode.
+The
+.Fa dvp
+and
+.Fa cnp
+arguments are optional and are only used to obtain a file name for the event.
+If they are omitted, the inotify subsystem will use the file name cache to
+find a name for the vnode, but this is more expensive.
+.Pp
+The
+.Fn VOP_INOTIFY_ADD_WATCH
+operation is for internal use by the inotify subsystem to add a watch to a
+vnode.
+.Sh LOCKS
+The
+.Fn VOP_INOTIFY
+operation does not assume any particular vnode lock state.
+The
+.Fn VOP_INOTIFY_ADD_WATCH
+operation should be called with the vnode locked.
+.Sh RETURN VALUES
+Zero is returned on success, otherwise an error code is returned.
+.Sh SEE ALSO
+.Xr inotify 2 ,
+.Xr vnode 9
diff --git a/stand/efi/loader/main.c b/stand/efi/loader/main.c
index 70cdfb2e9328..3ef418d20df3 100644
--- a/stand/efi/loader/main.c
+++ b/stand/efi/loader/main.c
@@ -1547,6 +1547,7 @@ command_seed_entropy(int argc, char *argv[])
}
COMMAND_SET(poweroff, "poweroff", "power off the system", command_poweroff);
+COMMAND_SET(halt, "halt", "power off the system", command_poweroff);
static int
command_poweroff(int argc __unused, char *argv[] __unused)
diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h
index 15e1dfc1a444..f1d9c96a78d7 100644
--- a/sys/amd64/linux/linux_proto.h
+++ b/sys/amd64/linux/linux_proto.h
@@ -914,10 +914,13 @@ struct linux_inotify_init_args {
syscallarg_t dummy;
};
struct linux_inotify_add_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+ char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)];
};
struct linux_inotify_rm_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)];
};
struct linux_migrate_pages_args {
syscallarg_t dummy;
diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c
index 8413d2723551..62b50cf68a32 100644
--- a/sys/amd64/linux/linux_sysent.c
+++ b/sys/amd64/linux/linux_sysent.c
@@ -268,8 +268,8 @@ struct sysent linux_sysent[] = {
{ .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 251 = linux_ioprio_set */
{ .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 252 = linux_ioprio_get */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 253 = linux_inotify_init */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */
+ { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 254 = linux_inotify_add_watch */
+ { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 255 = linux_inotify_rm_watch */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 256 = linux_migrate_pages */
{ .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 257 = linux_openat */
{ .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 258 = linux_mkdirat */
diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c
index 20322f7a8660..1dc4de019080 100644
--- a/sys/amd64/linux/linux_systrace_args.c
+++ b/sys/amd64/linux/linux_systrace_args.c
@@ -1918,12 +1918,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_add_watch */
case 254: {
- *n_args = 0;
+ struct linux_inotify_add_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = (intptr_t)p->pathname; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 3;
break;
}
/* linux_inotify_rm_watch */
case 255: {
- *n_args = 0;
+ struct linux_inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = p->wd; /* uint32_t */
+ *n_args = 2;
break;
}
/* linux_migrate_pages */
@@ -5860,9 +5867,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_add_watch */
case 254:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "userland const char *";
+ break;
+ case 2:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_inotify_rm_watch */
case 255:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_migrate_pages */
case 256:
@@ -8353,8 +8383,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 253:
/* linux_inotify_add_watch */
case 254:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_inotify_rm_watch */
case 255:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_migrate_pages */
case 256:
/* linux_openat */
diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master
index fd08c9b0279d..5e1394751ef6 100644
--- a/sys/amd64/linux/syscalls.master
+++ b/sys/amd64/linux/syscalls.master
@@ -1476,10 +1476,17 @@
int linux_inotify_init(void);
}
254 AUE_NULL STD {
- int linux_inotify_add_watch(void);
+ int linux_inotify_add_watch(
+ l_int fd,
+ const char *pathname,
+ uint32_t mask
+ );
}
255 AUE_NULL STD {
- int linux_inotify_rm_watch(void);
+ int linux_inotify_rm_watch(
+ l_int fd,
+ uint32_t wd
+ );
}
256 AUE_NULL STD {
int linux_migrate_pages(void);
diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h
index ab0edd99df42..57a303271f1c 100644
--- a/sys/amd64/linux32/linux32_proto.h
+++ b/sys/amd64/linux32/linux32_proto.h
@@ -983,10 +983,13 @@ struct linux_inotify_init_args {
syscallarg_t dummy;
};
struct linux_inotify_add_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+ char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)];
};
struct linux_inotify_rm_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)];
};
struct linux_migrate_pages_args {
syscallarg_t dummy;
@@ -1184,7 +1187,7 @@ struct linux_pipe2_args {
char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
};
struct linux_inotify_init1_args {
- syscallarg_t dummy;
+ char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
};
struct linux_preadv_args {
char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)];
diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c
index add9844254ce..1bc8841badf3 100644
--- a/sys/amd64/linux32/linux32_sysent.c
+++ b/sys/amd64/linux32/linux32_sysent.c
@@ -307,8 +307,8 @@ struct sysent linux32_sysent[] = {
{ .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */
{ .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */
+ { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */
+ { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */
{ .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */
{ .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */
@@ -347,7 +347,7 @@ struct sysent linux32_sysent[] = {
{ .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */
{ .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */
{ .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */
+ { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */
{ .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */
{ .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */
{ .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */
diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c
index 7793124e6935..cbd1641c2a34 100644
--- a/sys/amd64/linux32/linux32_systrace_args.c
+++ b/sys/amd64/linux32/linux32_systrace_args.c
@@ -2036,12 +2036,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_add_watch */
case 292: {
- *n_args = 0;
+ struct linux_inotify_add_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = (intptr_t)p->pathname; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 3;
break;
}
/* linux_inotify_rm_watch */
case 293: {
- *n_args = 0;
+ struct linux_inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = p->wd; /* uint32_t */
+ *n_args = 2;
break;
}
/* linux_migrate_pages */
@@ -2379,7 +2386,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_init1 */
case 332: {
- *n_args = 0;
+ struct linux_inotify_init1_args *p = params;
+ iarg[a++] = p->flags; /* l_int */
+ *n_args = 1;
break;
}
/* linux_preadv */
@@ -6536,9 +6545,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_add_watch */
case 292:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "userland const char *";
+ break;
+ case 2:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_inotify_rm_watch */
case 293:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_migrate_pages */
case 294:
@@ -7116,6 +7148,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_init1 */
case 332:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ default:
+ break;
+ };
break;
/* linux_preadv */
case 333:
@@ -9809,8 +9848,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 291:
/* linux_inotify_add_watch */
case 292:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_inotify_rm_watch */
case 293:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_migrate_pages */
case 294:
/* linux_openat */
@@ -9982,6 +10027,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_init1 */
case 332:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_preadv */
case 333:
if (ndx == 0 || ndx == 1)
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index 92d5f09c423f..7bd522a598e8 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -1589,10 +1589,17 @@
int linux_inotify_init(void);
}
292 AUE_NULL STD {
- int linux_inotify_add_watch(void);
+ int linux_inotify_add_watch(
+ l_int fd,
+ const char *pathname,
+ uint32_t mask
+ );
}
293 AUE_NULL STD {
- int linux_inotify_rm_watch(void);
+ int linux_inotify_rm_watch(
+ l_int fd,
+ uint32_t wd
+ );
}
; Linux 2.6.16:
294 AUE_NULL STD {
@@ -1860,7 +1867,9 @@
);
}
332 AUE_NULL STD {
- int linux_inotify_init1(void);
+ int linux_inotify_init1(
+ l_int flags
+ );
}
; Linux 2.6.30:
333 AUE_NULL STD {
diff --git a/sys/arm/allwinner/aw_gpio.c b/sys/arm/allwinner/aw_gpio.c
index 18b47bab12d9..2061e38a155f 100644
--- a/sys/arm/allwinner/aw_gpio.c
+++ b/sys/arm/allwinner/aw_gpio.c
@@ -1154,10 +1154,6 @@ aw_gpio_attach(device_t dev)
aw_gpio_register_isrcs(sc);
intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev)));
- sc->sc_busdev = gpiobus_attach_bus(dev);
- if (sc->sc_busdev == NULL)
- goto fail;
-
/*
* Register as a pinctrl device
*/
@@ -1166,6 +1162,10 @@ aw_gpio_attach(device_t dev)
fdt_pinctrl_register(dev, "allwinner,pins");
fdt_pinctrl_configure_tree(dev);
+ sc->sc_busdev = gpiobus_attach_bus(dev);
+ if (sc->sc_busdev == NULL)
+ goto fail;
+
config_intrhook_oneshot(aw_gpio_enable_bank_supply, sc);
return (0);
diff --git a/sys/arm/allwinner/aw_rtc.c b/sys/arm/allwinner/aw_rtc.c
index 9938601f17ce..4af57ab879e8 100644
--- a/sys/arm/allwinner/aw_rtc.c
+++ b/sys/arm/allwinner/aw_rtc.c
@@ -134,6 +134,7 @@ static struct ofw_compat_data compat_data[] = {
{ "allwinner,sun7i-a20-rtc", (uintptr_t) &a20_conf },
{ "allwinner,sun6i-a31-rtc", (uintptr_t) &a31_conf },
{ "allwinner,sun8i-h3-rtc", (uintptr_t) &h3_conf },
+ { "allwinner,sun20i-d1-rtc", (uintptr_t) &h3_conf },
{ "allwinner,sun50i-h5-rtc", (uintptr_t) &h3_conf },
{ "allwinner,sun50i-h6-rtc", (uintptr_t) &h3_conf },
{ NULL, 0 }
@@ -147,11 +148,13 @@ struct aw_rtc_softc {
static struct clk_fixed_def aw_rtc_osc32k = {
.clkdef.id = 0,
+ .clkdef.name = "osc32k",
.freq = 32768,
};
static struct clk_fixed_def aw_rtc_iosc = {
.clkdef.id = 2,
+ .clkdef.name = "iosc",
};
static void aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev);
@@ -250,23 +253,33 @@ aw_rtc_install_clocks(struct aw_rtc_softc *sc, device_t dev) {
int nclocks;
node = ofw_bus_get_node(dev);
- nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames);
- /* No clocks to export */
- if (nclocks <= 0)
- return;
- if (nclocks != 3) {
- device_printf(dev, "Having only %d clocks instead of 3, aborting\n", nclocks);
+ /* Nothing to do. */
+ if (!OF_hasprop(node, "clocks"))
return;
+
+ /*
+ * If the device tree gives us specific output names for the clocks,
+ * use them.
+ */
+ nclocks = ofw_bus_string_list_to_array(node, "clock-output-names", &clknames);
+ if (nclocks > 0) {
+ if (nclocks != 3) {
+ device_printf(dev,
+ "Found %d clocks names instead of 3, aborting\n",
+ nclocks);
+ return;
+ }
+
+ aw_rtc_osc32k.clkdef.name = clknames[0];
+ aw_rtc_iosc.clkdef.name = clknames[2];
}
clkdom = clkdom_create(dev);
- aw_rtc_osc32k.clkdef.name = clknames[0];
if (clknode_fixed_register(clkdom, &aw_rtc_osc32k) != 0)
device_printf(dev, "Cannot register osc32k clock\n");
- aw_rtc_iosc.clkdef.name = clknames[2];
aw_rtc_iosc.freq = sc->conf->iosc_freq;
if (clknode_fixed_register(clkdom, &aw_rtc_iosc) != 0)
device_printf(dev, "Cannot register iosc clock\n");
diff --git a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c
index e4fc57b79ba5..48d1d2af5abc 100644
--- a/sys/arm/broadcom/bcm2835/bcm2835_gpio.c
+++ b/sys/arm/broadcom/bcm2835/bcm2835_gpio.c
@@ -837,12 +837,12 @@ bcm_gpio_attach(device_t dev)
}
sc->sc_gpio_npins = i;
bcm_gpio_sysctl_init(sc);
- sc->sc_busdev = gpiobus_attach_bus(dev);
- if (sc->sc_busdev == NULL)
- goto fail;
fdt_pinctrl_register(dev, "brcm,pins");
fdt_pinctrl_configure_tree(dev);
+ sc->sc_busdev = gpiobus_attach_bus(dev);
+ if (sc->sc_busdev == NULL)
+ goto fail;
return (0);
diff --git a/sys/arm/mv/mvebu_gpio.c b/sys/arm/mv/mvebu_gpio.c
index 681cf20f7f9f..7acdfff539dc 100644
--- a/sys/arm/mv/mvebu_gpio.c
+++ b/sys/arm/mv/mvebu_gpio.c
@@ -810,7 +810,6 @@ mvebu_gpio_attach(device_t dev)
return (ENXIO);
}
- bus_attach_children(dev);
return (0);
}
diff --git a/sys/arm/nvidia/as3722_gpio.c b/sys/arm/nvidia/as3722_gpio.c
index 073d057884c9..f7b3d4d43bab 100644
--- a/sys/arm/nvidia/as3722_gpio.c
+++ b/sys/arm/nvidia/as3722_gpio.c
@@ -544,7 +544,7 @@ as3722_gpio_attach(struct as3722_softc *sc, phandle_t node)
sc->gpio_pins = malloc(sizeof(struct as3722_gpio_pin *) *
sc->gpio_npins, M_AS3722_GPIO, M_WAITOK | M_ZERO);
- sc->gpio_busdev = gpiobus_attach_bus(sc->dev);
+ sc->gpio_busdev = gpiobus_add_bus(sc->dev);
if (sc->gpio_busdev == NULL)
return (ENXIO);
for (i = 0; i < sc->gpio_npins; i++) {
diff --git a/sys/arm/nvidia/tegra_gpio.c b/sys/arm/nvidia/tegra_gpio.c
index 16e1ef94d6a9..e37fd69a121e 100644
--- a/sys/arm/nvidia/tegra_gpio.c
+++ b/sys/arm/nvidia/tegra_gpio.c
@@ -824,7 +824,6 @@ tegra_gpio_attach(device_t dev)
return (ENXIO);
}
- bus_attach_children(dev);
return (0);
}
diff --git a/sys/arm64/apple/apple_pinctrl.c b/sys/arm64/apple/apple_pinctrl.c
index ec2dd5907024..ebaaccea1d99 100644
--- a/sys/arm64/apple/apple_pinctrl.c
+++ b/sys/arm64/apple/apple_pinctrl.c
@@ -161,22 +161,22 @@ apple_pinctrl_attach(device_t dev)
goto error;
}
+ fdt_pinctrl_register(dev, "pinmux");
+ fdt_pinctrl_configure_tree(dev);
+
+ if (OF_hasprop(node, "interrupt-controller")) {
+ sc->sc_irqs = mallocarray(sc->sc_ngpios,
+ sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK);
+ intr_pic_register(dev,
+ OF_xref_from_node(ofw_bus_get_node(dev)));
+ }
+
sc->sc_busdev = gpiobus_attach_bus(dev);
if (sc->sc_busdev == NULL) {
device_printf(dev, "failed to attach gpiobus\n");
goto error;
}
- fdt_pinctrl_register(dev, "pinmux");
- fdt_pinctrl_configure_tree(dev);
-
- if (!OF_hasprop(node, "interrupt-controller"))
- return (0);
-
- sc->sc_irqs = mallocarray(sc->sc_ngpios,
- sizeof(*sc->sc_irqs), M_DEVBUF, M_ZERO | M_WAITOK);
- intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev)));
-
return (0);
error:
mtx_destroy(&sc->sc_mtx);
diff --git a/sys/arm64/linux/linux_proto.h b/sys/arm64/linux/linux_proto.h
index ae3d8569df58..82f57f77ffae 100644
--- a/sys/arm64/linux/linux_proto.h
+++ b/sys/arm64/linux/linux_proto.h
@@ -141,10 +141,13 @@ struct linux_inotify_init1_args {
char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
};
struct linux_inotify_add_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+ char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)];
};
struct linux_inotify_rm_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)];
};
struct linux_ioctl_args {
char fd_l_[PADL_(l_uint)]; l_uint fd; char fd_r_[PADR_(l_uint)];
diff --git a/sys/arm64/linux/linux_sysent.c b/sys/arm64/linux/linux_sysent.c
index 722ada465730..e54a76cfd55e 100644
--- a/sys/arm64/linux/linux_sysent.c
+++ b/sys/arm64/linux/linux_sysent.c
@@ -41,8 +41,8 @@ struct sysent linux_sysent[] = {
{ .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 24 = linux_dup3 */
{ .sy_narg = AS(linux_fcntl_args), .sy_call = (sy_call_t *)linux_fcntl, .sy_auevent = AUE_FCNTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 25 = linux_fcntl */
{ .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 26 = linux_inotify_init1 */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */
+ { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 27 = linux_inotify_add_watch */
+ { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 28 = linux_inotify_rm_watch */
{ .sy_narg = AS(linux_ioctl_args), .sy_call = (sy_call_t *)linux_ioctl, .sy_auevent = AUE_IOCTL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 29 = linux_ioctl */
{ .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 30 = linux_ioprio_set */
{ .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 31 = linux_ioprio_get */
diff --git a/sys/arm64/linux/linux_systrace_args.c b/sys/arm64/linux/linux_systrace_args.c
index 54e4dd82355d..1b946a9406a5 100644
--- a/sys/arm64/linux/linux_systrace_args.c
+++ b/sys/arm64/linux/linux_systrace_args.c
@@ -210,12 +210,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_add_watch */
case 27: {
- *n_args = 0;
+ struct linux_inotify_add_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = (intptr_t)p->pathname; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 3;
break;
}
/* linux_inotify_rm_watch */
case 28: {
- *n_args = 0;
+ struct linux_inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = p->wd; /* uint32_t */
+ *n_args = 2;
break;
}
/* linux_ioctl */
@@ -2780,9 +2787,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_add_watch */
case 27:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "userland const char *";
+ break;
+ case 2:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_inotify_rm_watch */
case 28:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_ioctl */
case 29:
@@ -6455,8 +6485,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_add_watch */
case 27:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_inotify_rm_watch */
case 28:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_ioctl */
case 29:
if (ndx == 0 || ndx == 1)
diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master
index 79c04c398e00..2babdcaf03bf 100644
--- a/sys/arm64/linux/syscalls.master
+++ b/sys/arm64/linux/syscalls.master
@@ -170,10 +170,17 @@
);
}
27 AUE_NULL STD {
- int linux_inotify_add_watch(void);
+ int linux_inotify_add_watch(
+ l_int fd,
+ const char *pathname,
+ uint32_t mask
+ );
}
28 AUE_NULL STD {
- int linux_inotify_rm_watch(void);
+ int linux_inotify_rm_watch(
+ l_int fd,
+ uint32_t wd
+ );
}
29 AUE_IOCTL STD {
int linux_ioctl(
diff --git a/sys/arm64/nvidia/tegra210/max77620_gpio.c b/sys/arm64/nvidia/tegra210/max77620_gpio.c
index 8dcf98099dac..5d91e23324c7 100644
--- a/sys/arm64/nvidia/tegra210/max77620_gpio.c
+++ b/sys/arm64/nvidia/tegra210/max77620_gpio.c
@@ -672,7 +672,7 @@ max77620_gpio_attach(struct max77620_softc *sc, phandle_t node)
sx_init(&sc->gpio_lock, "MAX77620 GPIO lock");
- sc->gpio_busdev = gpiobus_attach_bus(sc->dev);
+ sc->gpio_busdev = gpiobus_add_bus(sc->dev);
if (sc->gpio_busdev == NULL)
return (ENXIO);
diff --git a/sys/arm64/rockchip/rk_gpio.c b/sys/arm64/rockchip/rk_gpio.c
index a86392f16624..847bc7394dd0 100644
--- a/sys/arm64/rockchip/rk_gpio.c
+++ b/sys/arm64/rockchip/rk_gpio.c
@@ -362,12 +362,6 @@ rk_gpio_attach(device_t dev)
return (ENXIO);
}
- sc->sc_busdev = gpiobus_attach_bus(dev);
- if (sc->sc_busdev == NULL) {
- rk_gpio_detach(dev);
- return (ENXIO);
- }
-
/* Set the cached value to unknown */
for (i = 0; i < RK_GPIO_MAX_PINS; i++)
sc->pin_cached[i].is_gpio = 2;
@@ -377,6 +371,12 @@ rk_gpio_attach(device_t dev)
sc->swporta_ddr = rk_gpio_read_4(sc, RK_GPIO_SWPORTA_DDR);
RK_GPIO_UNLOCK(sc);
+ sc->sc_busdev = gpiobus_attach_bus(dev);
+ if (sc->sc_busdev == NULL) {
+ rk_gpio_detach(dev);
+ return (ENXIO);
+ }
+
return (0);
}
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
index 0f110d5f9ddd..9381396f247c 100644
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -663,6 +663,7 @@
#define AUE_FSPACECTL 43269 /* FreeBSD-specific. */
#define AUE_TIMERFD 43270 /* FreeBSD/Linux. */
#define AUE_SETCRED 43271 /* FreeBSD-specific. */
+#define AUE_INOTIFY 43272 /* FreeBSD/Linux. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/cam/mmc/mmc_da.c b/sys/cam/mmc/mmc_da.c
index 1c455e1951d7..9246f95a080e 100644
--- a/sys/cam/mmc/mmc_da.c
+++ b/sys/cam/mmc/mmc_da.c
@@ -1198,27 +1198,6 @@ sdda_get_host_caps(struct cam_periph *periph, union ccb *ccb)
return (cts->host_caps);
}
-static uint32_t
-sdda_get_max_data(struct cam_periph *periph, union ccb *ccb)
-{
- struct ccb_trans_settings_mmc *cts;
-
- cts = &ccb->cts.proto_specific.mmc;
- memset(cts, 0, sizeof(struct ccb_trans_settings_mmc));
-
- ccb->ccb_h.func_code = XPT_GET_TRAN_SETTINGS;
- ccb->ccb_h.flags = CAM_DIR_NONE;
- ccb->ccb_h.retry_count = 0;
- ccb->ccb_h.timeout = 100;
- ccb->ccb_h.cbfcnp = NULL;
- xpt_action(ccb);
-
- if (ccb->ccb_h.status != CAM_REQ_CMP)
- panic("Cannot get host max data");
- KASSERT(cts->host_max_data != 0, ("host_max_data == 0?!"));
- return (cts->host_max_data);
-}
-
static void
sdda_start_init(void *context, union ccb *start_ccb)
{
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index eaa086188b5f..8d2748098c00 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -511,4 +511,6 @@
#define FREEBSD32_SYS_fchroot 590
#define FREEBSD32_SYS_freebsd32_setcred 591
#define FREEBSD32_SYS_exterrctl 592
-#define FREEBSD32_SYS_MAXSYSCALL 593
+#define FREEBSD32_SYS_inotify_add_watch_at 593
+#define FREEBSD32_SYS_inotify_rm_watch 594
+#define FREEBSD32_SYS_MAXSYSCALL 595
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index 989f32a5c6f0..bda373268cc5 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -598,4 +598,6 @@ const char *freebsd32_syscallnames[] = {
"fchroot", /* 590 = fchroot */
"freebsd32_setcred", /* 591 = freebsd32_setcred */
"exterrctl", /* 592 = exterrctl */
+ "inotify_add_watch_at", /* 593 = inotify_add_watch_at */
+ "inotify_rm_watch", /* 594 = inotify_rm_watch */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 476fe2ac3f80..3718a1b0c8ee 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -660,4 +660,6 @@ struct sysent freebsd32_sysent[] = {
{ .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */
{ .sy_narg = AS(freebsd32_setcred_args), .sy_call = (sy_call_t *)freebsd32_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = freebsd32_setcred */
{ .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */
+ { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */
+ { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
index cf08938cd5de..37564a737a62 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3395,6 +3395,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 3;
break;
}
+ /* inotify_add_watch_at */
+ case 593: {
+ struct inotify_add_watch_at_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ iarg[a++] = p->dfd; /* int */
+ uarg[a++] = (intptr_t)p->path; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 4;
+ break;
+ }
+ /* inotify_rm_watch */
+ case 594: {
+ struct inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ iarg[a++] = p->wd; /* int */
+ *n_args = 2;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9172,6 +9190,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* inotify_add_watch_at */
+ case 593:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ case 2:
+ p = "userland const char *";
+ break;
+ case 3:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* inotify_rm_watch */
+ case 594:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11070,6 +11120,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* inotify_add_watch_at */
+ case 593:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* inotify_rm_watch */
+ case 594:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c
index 35d6debe0da9..19cd55849f65 100644
--- a/sys/compat/linux/linux_dummy.c
+++ b/sys/compat/linux/linux_dummy.c
@@ -74,9 +74,6 @@ DUMMY(kexec_load);
DUMMY(add_key);
DUMMY(request_key);
DUMMY(keyctl);
-/* Linux 2.6.13: */
-DUMMY(inotify_add_watch);
-DUMMY(inotify_rm_watch);
/* Linux 2.6.16: */
DUMMY(migrate_pages);
DUMMY(unshare);
@@ -87,7 +84,6 @@ DUMMY(vmsplice);
DUMMY(move_pages);
/* Linux 2.6.27: */
DUMMY(signalfd4);
-DUMMY(inotify_init1);
/* Linux 2.6.31: */
DUMMY(perf_event_open);
/* Linux 2.6.36: */
diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c
index 246bc26d85d4..86834a7ecea8 100644
--- a/sys/compat/linux/linux_file.c
+++ b/sys/compat/linux/linux_file.c
@@ -32,11 +32,13 @@
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
+#include <sys/inotify.h>
#include <sys/lock.h>
#include <sys/mman.h>
#include <sys/selinfo.h>
#include <sys/pipe.h>
#include <sys/proc.h>
+#include <sys/specialfd.h>
#include <sys/stat.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
@@ -1877,3 +1879,122 @@ linux_writev(struct thread *td, struct linux_writev_args *args)
freeuio(auio);
return (linux_enobufs2eagain(td, args->fd, error));
}
+
+static int
+linux_inotify_init_flags(int l_flags)
+{
+ int bsd_flags;
+
+ if ((l_flags & ~(LINUX_IN_CLOEXEC | LINUX_IN_NONBLOCK)) != 0)
+ linux_msg(NULL, "inotify_init1 unsupported flags 0x%x",
+ l_flags);
+
+ bsd_flags = 0;
+ if ((l_flags & LINUX_IN_CLOEXEC) != 0)
+ bsd_flags |= O_CLOEXEC;
+ if ((l_flags & LINUX_IN_NONBLOCK) != 0)
+ bsd_flags |= O_NONBLOCK;
+ return (bsd_flags);
+}
+
+static int
+inotify_init_common(struct thread *td, int flags)
+{
+ struct specialfd_inotify si;
+
+ si.flags = linux_inotify_init_flags(flags);
+ return (kern_specialfd(td, SPECIALFD_INOTIFY, &si));
+}
+
+#if defined(__i386__) || defined(__amd64__)
+int
+linux_inotify_init(struct thread *td, struct linux_inotify_init_args *args)
+{
+ return (inotify_init_common(td, 0));
+}
+#endif
+
+int
+linux_inotify_init1(struct thread *td, struct linux_inotify_init1_args *args)
+{
+ return (inotify_init_common(td, args->flags));
+}
+
+/*
+ * The native implementation uses the same values for inotify events as
+ * libinotify, which gives us binary compatibility with Linux. This simplifies
+ * the shim implementation a lot, as otherwise we would have to handle read(2)
+ * calls on inotify descriptors and translate events to Linux's ABI.
+ */
+_Static_assert(LINUX_IN_ACCESS == IN_ACCESS,
+ "IN_ACCESS mismatch");
+_Static_assert(LINUX_IN_MODIFY == IN_MODIFY,
+ "IN_MODIFY mismatch");
+_Static_assert(LINUX_IN_ATTRIB == IN_ATTRIB,
+ "IN_ATTRIB mismatch");
+_Static_assert(LINUX_IN_CLOSE_WRITE == IN_CLOSE_WRITE,
+ "IN_CLOSE_WRITE mismatch");
+_Static_assert(LINUX_IN_CLOSE_NOWRITE == IN_CLOSE_NOWRITE,
+ "IN_CLOSE_NOWRITE mismatch");
+_Static_assert(LINUX_IN_OPEN == IN_OPEN,
+ "IN_OPEN mismatch");
+_Static_assert(LINUX_IN_MOVED_FROM == IN_MOVED_FROM,
+ "IN_MOVED_FROM mismatch");
+_Static_assert(LINUX_IN_MOVED_TO == IN_MOVED_TO,
+ "IN_MOVED_TO mismatch");
+_Static_assert(LINUX_IN_CREATE == IN_CREATE,
+ "IN_CREATE mismatch");
+_Static_assert(LINUX_IN_DELETE == IN_DELETE,
+ "IN_DELETE mismatch");
+_Static_assert(LINUX_IN_DELETE_SELF == IN_DELETE_SELF,
+ "IN_DELETE_SELF mismatch");
+_Static_assert(LINUX_IN_MOVE_SELF == IN_MOVE_SELF,
+ "IN_MOVE_SELF mismatch");
+
+_Static_assert(LINUX_IN_UNMOUNT == IN_UNMOUNT,
+ "IN_UNMOUNT mismatch");
+_Static_assert(LINUX_IN_Q_OVERFLOW == IN_Q_OVERFLOW,
+ "IN_Q_OVERFLOW mismatch");
+_Static_assert(LINUX_IN_IGNORED == IN_IGNORED,
+ "IN_IGNORED mismatch");
+
+_Static_assert(LINUX_IN_ISDIR == IN_ISDIR,
+ "IN_ISDIR mismatch");
+_Static_assert(LINUX_IN_ONLYDIR == IN_ONLYDIR,
+ "IN_ONLYDIR mismatch");
+_Static_assert(LINUX_IN_DONT_FOLLOW == IN_DONT_FOLLOW,
+ "IN_DONT_FOLLOW mismatch");
+_Static_assert(LINUX_IN_MASK_CREATE == IN_MASK_CREATE,
+ "IN_MASK_CREATE mismatch");
+_Static_assert(LINUX_IN_MASK_ADD == IN_MASK_ADD,
+ "IN_MASK_ADD mismatch");
+_Static_assert(LINUX_IN_ONESHOT == IN_ONESHOT,
+ "IN_ONESHOT mismatch");
+_Static_assert(LINUX_IN_EXCL_UNLINK == IN_EXCL_UNLINK,
+ "IN_EXCL_UNLINK mismatch");
+
+static int
+linux_inotify_watch_flags(int l_flags)
+{
+ if ((l_flags & ~(LINUX_IN_ALL_EVENTS | LINUX_IN_ALL_FLAGS)) != 0) {
+ linux_msg(NULL, "inotify_add_watch unsupported flags 0x%x",
+ l_flags);
+ }
+
+ return (l_flags);
+}
+
+int
+linux_inotify_add_watch(struct thread *td,
+ struct linux_inotify_add_watch_args *args)
+{
+ return (kern_inotify_add_watch(args->fd, AT_FDCWD, args->pathname,
+ linux_inotify_watch_flags(args->mask), td));
+}
+
+int
+linux_inotify_rm_watch(struct thread *td,
+ struct linux_inotify_rm_watch_args *args)
+{
+ return (kern_inotify_rm_watch(args->fd, args->wd, td));
+}
diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h
index 2e56942b0f40..7448dc597230 100644
--- a/sys/compat/linux/linux_file.h
+++ b/sys/compat/linux/linux_file.h
@@ -189,6 +189,38 @@
#define LINUX_HUGETLB_FLAG_ENCODE_2GB (31 << LINUX_HUGETLB_FLAG_ENCODE_SHIFT)
#define LINUX_HUGETLB_FLAG_ENCODE_16GB (34U << LINUX_HUGETLB_FLAG_ENCODE_SHIFT)
+/* inotify flags */
+#define LINUX_IN_ACCESS 0x00000001
+#define LINUX_IN_MODIFY 0x00000002
+#define LINUX_IN_ATTRIB 0x00000004
+#define LINUX_IN_CLOSE_WRITE 0x00000008
+#define LINUX_IN_CLOSE_NOWRITE 0x00000010
+#define LINUX_IN_OPEN 0x00000020
+#define LINUX_IN_MOVED_FROM 0x00000040
+#define LINUX_IN_MOVED_TO 0x00000080
+#define LINUX_IN_CREATE 0x00000100
+#define LINUX_IN_DELETE 0x00000200
+#define LINUX_IN_DELETE_SELF 0x00000400
+#define LINUX_IN_MOVE_SELF 0x00000800
+
+#define LINUX_IN_UNMOUNT 0x00002000
+#define LINUX_IN_Q_OVERFLOW 0x00004000
+#define LINUX_IN_IGNORED 0x00008000
+
+#define LINUX_IN_ONLYDIR 0x01000000
+#define LINUX_IN_DONT_FOLLOW 0x02000000
+#define LINUX_IN_EXCL_UNLINK 0x04000000
+#define LINUX_IN_MASK_CREATE 0x10000000
+#define LINUX_IN_MASK_ADD 0x20000000
+#define LINUX_IN_ISDIR 0x40000000
+#define LINUX_IN_ONESHOT 0x80000000
+
+#define LINUX_IN_ALL_EVENTS 0x00000fff
+#define LINUX_IN_ALL_FLAGS 0xf700e000
+
+#define LINUX_IN_NONBLOCK 0x00000800
+#define LINUX_IN_CLOEXEC 0x00080000
+
#if defined(_KERNEL)
struct l_file_handle {
l_uint handle_bytes;
diff --git a/sys/conf/files b/sys/conf/files
index f6d473b1431b..dd6f9a3021d4 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3992,6 +3992,7 @@ kern/vfs_export.c standard
kern/vfs_extattr.c standard
kern/vfs_hash.c standard
kern/vfs_init.c standard
+kern/vfs_inotify.c standard
kern/vfs_lookup.c standard
kern/vfs_mount.c standard
kern/vfs_mountroot.c standard
diff --git a/sys/contrib/dev/iwlwifi/iwl-debug.h b/sys/contrib/dev/iwlwifi/iwl-debug.h
index 43288a5a8d74..7b3b402766b4 100644
--- a/sys/contrib/dev/iwlwifi/iwl-debug.h
+++ b/sys/contrib/dev/iwlwifi/iwl-debug.h
@@ -47,7 +47,7 @@ enum iwl_dl {
IWL_DL_DROP = 0x00000010,
IWL_DL_EEPROM = 0x00000020,
IWL_DL_FW = 0x00000040,
- /* = 0x00000080, */
+ IWL_DL_DEV_RADIO = 0x00000080,
IWL_DL_HC = 0x00000100,
IWL_DL_HT = 0x00000200,
IWL_DL_INFO = 0x00000400,
@@ -195,6 +195,8 @@ void __iwl_dbg(struct device *, u32, bool, const char *, const char *fmt, ...);
IWL_DPRINTF(_subsys, IWL_DL_WEP, _fmt, ##__VA_ARGS__)
#define IWL_DEBUG_WOWLAN(_subsys, _fmt, ...) \
IWL_DPRINTF(_subsys, IWL_DL_WOWLAN, _fmt, ##__VA_ARGS__)
+#define IWL_DEBUG_DEV_RADIO(_dev, _fmt, ...) \
+ IWL_DPRINTF_DEV((_dev), IWL_DL_DEV_RADIO, _fmt, ##__VA_ARGS__)
#define IWL_DEBUG_PCI_RW(_subsys, _fmt, ...) \
IWL_DPRINTF(_subsys, IWL_DL_PCI_RW, _fmt, ##__VA_ARGS__)
diff --git a/sys/dev/gpio/acpi_gpiobus.c b/sys/dev/gpio/acpi_gpiobus.c
index 2987af634866..f9468e0deda0 100644
--- a/sys/dev/gpio/acpi_gpiobus.c
+++ b/sys/dev/gpio/acpi_gpiobus.c
@@ -36,6 +36,7 @@
#include <dev/gpio/gpiobusvar.h>
#include <dev/gpio/acpi_gpiobusvar.h>
+#include <dev/gpio/gpiobus_internal.h>
#include "gpiobus_if.h"
diff --git a/sys/dev/gpio/gpiobus.c b/sys/dev/gpio/gpiobus.c
index 2e2618805e7b..ab7f13177969 100644
--- a/sys/dev/gpio/gpiobus.c
+++ b/sys/dev/gpio/gpiobus.c
@@ -39,6 +39,7 @@
#include <sys/sbuf.h>
#include <dev/gpio/gpiobusvar.h>
+#include <dev/gpio/gpiobus_internal.h>
#include "gpiobus_if.h"
@@ -213,20 +214,40 @@ gpio_pin_is_active(gpio_pin_t pin, bool *active)
return (0);
}
+/*
+ * Note that this function should only
+ * be used in cases where a pre-existing
+ * gpiobus_pin structure exists. In most
+ * cases, the gpio_pin_get_by_* functions
+ * suffice.
+ */
+int
+gpio_pin_acquire(gpio_pin_t gpio)
+{
+ device_t busdev;
+
+ KASSERT(gpio != NULL, ("GPIO pin is NULL."));
+ KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL."));
+
+ busdev = GPIO_GET_BUS(gpio->dev);
+ if (busdev == NULL)
+ return (ENXIO);
+
+ return (gpiobus_acquire_pin(busdev, gpio->pin));
+}
+
void
gpio_pin_release(gpio_pin_t gpio)
{
device_t busdev;
- if (gpio == NULL)
- return;
-
+ KASSERT(gpio != NULL, ("GPIO pin is NULL."));
KASSERT(gpio->dev != NULL, ("GPIO pin device is NULL."));
busdev = GPIO_GET_BUS(gpio->dev);
- if (busdev != NULL)
- gpiobus_release_pin(busdev, gpio->pin);
+ KASSERT(busdev != NULL, ("gpiobus dev is NULL."));
+ gpiobus_release_pin(busdev, gpio->pin);
free(gpio, M_DEVBUF);
}
@@ -293,7 +314,7 @@ gpiobus_print_pins(struct gpiobus_ivar *devi, struct sbuf *sb)
}
device_t
-gpiobus_attach_bus(device_t dev)
+gpiobus_add_bus(device_t dev)
{
device_t busdev;
@@ -307,8 +328,24 @@ gpiobus_attach_bus(device_t dev)
#ifdef FDT
ofw_gpiobus_register_provider(dev);
#endif
- bus_attach_children(dev);
+ return (busdev);
+}
+
+/*
+ * Attach a gpiobus child.
+ * Note that the controller is expected
+ * to be fully initialized at this point.
+ */
+device_t
+gpiobus_attach_bus(device_t dev)
+{
+ device_t busdev;
+ busdev = gpiobus_add_bus(dev);
+ if (busdev == NULL)
+ return (NULL);
+
+ bus_attach_children(dev);
return (busdev);
}
@@ -385,14 +422,13 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin)
sc = device_get_softc(bus);
/* Consistency check. */
if (pin >= sc->sc_npins) {
- device_printf(bus,
- "invalid pin %d, max: %d\n", pin, sc->sc_npins - 1);
- return (-1);
+ panic("%s: invalid pin %d, max: %d",
+ device_get_nameunit(bus), pin, sc->sc_npins - 1);
}
/* Mark pin as mapped and give warning if it's already mapped. */
if (sc->sc_pins[pin].mapped) {
device_printf(bus, "warning: pin %d is already mapped\n", pin);
- return (-1);
+ return (EBUSY);
}
sc->sc_pins[pin].mapped = 1;
@@ -400,7 +436,7 @@ gpiobus_acquire_pin(device_t bus, uint32_t pin)
}
/* Release mapped pin */
-int
+void
gpiobus_release_pin(device_t bus, uint32_t pin)
{
struct gpiobus_softc *sc;
@@ -408,19 +444,15 @@ gpiobus_release_pin(device_t bus, uint32_t pin)
sc = device_get_softc(bus);
/* Consistency check. */
if (pin >= sc->sc_npins) {
- device_printf(bus,
- "invalid pin %d, max=%d\n",
- pin, sc->sc_npins - 1);
- return (-1);
+ panic("%s: invalid pin %d, max: %d",
+ device_get_nameunit(bus), pin, sc->sc_npins - 1);
}
- if (!sc->sc_pins[pin].mapped) {
- device_printf(bus, "pin %d is not mapped\n", pin);
- return (-1);
- }
- sc->sc_pins[pin].mapped = 0;
+ if (!sc->sc_pins[pin].mapped)
+ panic("%s: pin %d is not mapped", device_get_nameunit(bus),
+ pin);
- return (0);
+ sc->sc_pins[pin].mapped = 0;
}
static int
@@ -435,8 +467,7 @@ gpiobus_acquire_child_pins(device_t dev, device_t child)
device_printf(child, "cannot acquire pin %d\n",
devi->pins[i]);
while (--i >= 0) {
- (void)gpiobus_release_pin(dev,
- devi->pins[i]);
+ gpiobus_release_pin(dev, devi->pins[i]);
}
gpiobus_free_ivars(devi);
return (EBUSY);
diff --git a/sys/dev/gpio/gpiobus_internal.h b/sys/dev/gpio/gpiobus_internal.h
new file mode 100644
index 000000000000..de3f57663132
--- /dev/null
+++ b/sys/dev/gpio/gpiobus_internal.h
@@ -0,0 +1,47 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2009 Oleksandr Tymoshenko <gonzo@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef __GPIOBUS_INTERNAL_H__
+#define __GPIOBUS_INTERNAL_H__
+
+/*
+ * Functions shared between gpiobus and other bus classes that derive from it;
+ * these should not be called directly by other drivers.
+ */
+int gpiobus_attach(device_t);
+int gpiobus_detach(device_t);
+int gpiobus_init_softc(device_t);
+int gpiobus_alloc_ivars(struct gpiobus_ivar *);
+void gpiobus_free_ivars(struct gpiobus_ivar *);
+int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *);
+int gpiobus_acquire_pin(device_t, uint32_t);
+void gpiobus_release_pin(device_t, uint32_t);
+
+extern driver_t gpiobus_driver;
+#endif
diff --git a/sys/dev/gpio/gpiobusvar.h b/sys/dev/gpio/gpiobusvar.h
index 74783e112f89..7f504236a774 100644
--- a/sys/dev/gpio/gpiobusvar.h
+++ b/sys/dev/gpio/gpiobusvar.h
@@ -156,6 +156,8 @@ int gpio_pin_get_by_bus_pinnum(device_t _bus, uint32_t _pinnum, gpio_pin_t *_gp)
/* Acquire a pin by child and index (used by direct children of gpiobus). */
int gpio_pin_get_by_child_index(device_t _child, uint32_t _idx, gpio_pin_t *_gp);
+/* Acquire a pin from an existing gpio_pin_t. */
+int gpio_pin_acquire(gpio_pin_t gpio);
/* Release a pin acquired via any gpio_pin_get_xxx() function. */
void gpio_pin_release(gpio_pin_t gpio);
@@ -167,22 +169,9 @@ int gpio_pin_setflags(gpio_pin_t pin, uint32_t flags);
struct resource *gpio_alloc_intr_resource(device_t consumer_dev, int *rid,
u_int alloc_flags, gpio_pin_t pin, uint32_t intr_mode);
-/*
- * Functions shared between gpiobus and other bus classes that derive from it;
- * these should not be called directly by other drivers.
- */
int gpio_check_flags(uint32_t, uint32_t);
+device_t gpiobus_add_bus(device_t);
device_t gpiobus_attach_bus(device_t);
int gpiobus_detach_bus(device_t);
-int gpiobus_attach(device_t);
-int gpiobus_detach(device_t);
-int gpiobus_init_softc(device_t);
-int gpiobus_alloc_ivars(struct gpiobus_ivar *);
-void gpiobus_free_ivars(struct gpiobus_ivar *);
-int gpiobus_read_ivar(device_t, device_t, int, uintptr_t *);
-int gpiobus_acquire_pin(device_t, uint32_t);
-int gpiobus_release_pin(device_t, uint32_t);
-
-extern driver_t gpiobus_driver;
#endif /* __GPIOBUS_H__ */
diff --git a/sys/dev/gpio/gpiopps.c b/sys/dev/gpio/gpiopps.c
index bb8afa5e062c..82620a50a798 100644
--- a/sys/dev/gpio/gpiopps.c
+++ b/sys/dev/gpio/gpiopps.c
@@ -160,7 +160,7 @@ gpiopps_detach(device_t dev)
if (sc->ires != NULL)
bus_release_resource(dev, SYS_RES_IRQ, sc->irid, sc->ires);
if (sc->gpin != NULL)
- gpiobus_release_pin(GPIO_GET_BUS(sc->gpin->dev), sc->gpin->pin);
+ gpio_pin_release(sc->gpin);
return (0);
}
diff --git a/sys/dev/gpio/ofw_gpiobus.c b/sys/dev/gpio/ofw_gpiobus.c
index 32dc5b55e698..fc5fb03d6824 100644
--- a/sys/dev/gpio/ofw_gpiobus.c
+++ b/sys/dev/gpio/ofw_gpiobus.c
@@ -36,6 +36,7 @@
#include <sys/module.h>
#include <dev/gpio/gpiobusvar.h>
+#include <dev/gpio/gpiobus_internal.h>
#include <dev/ofw/ofw_bus.h>
#include "gpiobus_if.h"
diff --git a/sys/dev/gpio/pl061.c b/sys/dev/gpio/pl061.c
index cc39790322b6..87d4310a6396 100644
--- a/sys/dev/gpio/pl061.c
+++ b/sys/dev/gpio/pl061.c
@@ -487,14 +487,21 @@ pl061_attach(device_t dev)
}
}
+ mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN);
+
+ if (sc->sc_xref != 0 && !intr_pic_register(dev, sc->sc_xref)) {
+ device_printf(dev, "couldn't register PIC\n");
+ PL061_LOCK_DESTROY(sc);
+ goto free_isrc;
+ }
+
sc->sc_busdev = gpiobus_attach_bus(dev);
if (sc->sc_busdev == NULL) {
device_printf(dev, "couldn't attach gpio bus\n");
+ PL061_LOCK_DESTROY(sc);
goto free_isrc;
}
- mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "pl061", MTX_SPIN);
-
return (0);
free_isrc:
@@ -503,6 +510,7 @@ free_isrc:
* for (irq = 0; irq < PL061_NUM_GPIO; irq++)
* intr_isrc_deregister(PIC_INTR_ISRC(sc, irq));
*/
+ bus_teardown_intr(dev, sc->sc_irq_res, sc->sc_irq_hdlr);
bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irq_rid,
sc->sc_irq_res);
free_pic:
diff --git a/sys/dev/gpio/pl061.h b/sys/dev/gpio/pl061.h
index 809a1168493d..d9fe23e502b1 100644
--- a/sys/dev/gpio/pl061.h
+++ b/sys/dev/gpio/pl061.h
@@ -46,6 +46,7 @@ struct pl061_softc {
struct resource *sc_mem_res;
struct resource *sc_irq_res;
void *sc_irq_hdlr;
+ intptr_t sc_xref;
int sc_mem_rid;
int sc_irq_rid;
struct pl061_pin_irqsrc sc_isrcs[PL061_NUM_GPIO];
diff --git a/sys/dev/gpio/pl061_acpi.c b/sys/dev/gpio/pl061_acpi.c
index f5885025083e..8e9921261e4e 100644
--- a/sys/dev/gpio/pl061_acpi.c
+++ b/sys/dev/gpio/pl061_acpi.c
@@ -67,19 +67,12 @@ pl061_acpi_probe(device_t dev)
static int
pl061_acpi_attach(device_t dev)
{
- int error;
+ struct pl061_softc *sc;
- error = pl061_attach(dev);
- if (error != 0)
- return (error);
+ sc = device_get_softc(dev);
+ sc->sc_xref = ACPI_GPIO_XREF;
- if (!intr_pic_register(dev, ACPI_GPIO_XREF)) {
- device_printf(dev, "couldn't register PIC\n");
- pl061_detach(dev);
- error = ENXIO;
- }
-
- return (error);
+ return (pl061_attach(dev));
}
static device_method_t pl061_acpi_methods[] = {
diff --git a/sys/dev/gpio/pl061_fdt.c b/sys/dev/gpio/pl061_fdt.c
index aa22298b43c6..681b3ccdfdeb 100644
--- a/sys/dev/gpio/pl061_fdt.c
+++ b/sys/dev/gpio/pl061_fdt.c
@@ -61,19 +61,12 @@ pl061_fdt_probe(device_t dev)
static int
pl061_fdt_attach(device_t dev)
{
- int error;
+ struct pl061_softc *sc;
- error = pl061_attach(dev);
- if (error != 0)
- return (error);
+ sc = device_get_softc(dev);
+ sc->sc_xref = OF_xref_from_node(ofw_bus_get_node(dev));
- if (!intr_pic_register(dev, OF_xref_from_node(ofw_bus_get_node(dev)))) {
- device_printf(dev, "couldn't register PIC\n");
- pl061_detach(dev);
- error = ENXIO;
- }
-
- return (error);
+ return (pl061_attach(dev));
}
static device_method_t pl061_fdt_methods[] = {
diff --git a/sys/dev/gpio/qoriq_gpio.c b/sys/dev/gpio/qoriq_gpio.c
index 25dfccede29f..8b44cd256c79 100644
--- a/sys/dev/gpio/qoriq_gpio.c
+++ b/sys/dev/gpio/qoriq_gpio.c
@@ -369,11 +369,6 @@ qoriq_gpio_attach(device_t dev)
for (i = 0; i <= MAXPIN; i++)
sc->sc_pins[i].gp_caps = DEFAULT_CAPS;
- sc->busdev = gpiobus_attach_bus(dev);
- if (sc->busdev == NULL) {
- qoriq_gpio_detach(dev);
- return (ENOMEM);
- }
/*
* Enable the GPIO Input Buffer for all GPIOs.
* This is safe on devices without a GPIBE register, because those
@@ -384,6 +379,12 @@ qoriq_gpio_attach(device_t dev)
OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev);
+ sc->busdev = gpiobus_attach_bus(dev);
+ if (sc->busdev == NULL) {
+ qoriq_gpio_detach(dev);
+ return (ENOMEM);
+ }
+
return (0);
}
diff --git a/sys/dev/iicbus/gpio/tca64xx.c b/sys/dev/iicbus/gpio/tca64xx.c
index 3b3bca9936f1..cd011ae9be75 100644
--- a/sys/dev/iicbus/gpio/tca64xx.c
+++ b/sys/dev/iicbus/gpio/tca64xx.c
@@ -261,14 +261,13 @@ tca64xx_attach(device_t dev)
sc->addr = iicbus_get_addr(dev);
mtx_init(&sc->mtx, "tca64xx gpio", "gpio", MTX_DEF);
+ OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev);
sc->busdev = gpiobus_attach_bus(dev);
if (sc->busdev == NULL) {
device_printf(dev, "Could not create busdev child\n");
return (ENXIO);
}
- OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev);
-
#ifdef DEBUG
switch (sc->chip) {
case TCA6416_TYPE:
diff --git a/sys/dev/regulator/regulator_fixed.c b/sys/dev/regulator/regulator_fixed.c
index 0a76da7140a0..55cdb5e4aeae 100644
--- a/sys/dev/regulator/regulator_fixed.c
+++ b/sys/dev/regulator/regulator_fixed.c
@@ -100,12 +100,8 @@ static struct gpio_entry *
regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin)
{
struct gpio_entry *entry, *tmp;
- device_t busdev;
int rv;
- busdev = GPIO_GET_BUS(gpio_pin->dev);
- if (busdev == NULL)
- return (NULL);
entry = malloc(sizeof(struct gpio_entry), M_FIXEDREGULATOR,
M_WAITOK | M_ZERO);
@@ -122,8 +118,8 @@ regnode_get_gpio_entry(struct gpiobus_pin *gpio_pin)
}
/* Reserve pin. */
- /* XXX Can we call gpiobus_acquire_pin() with gpio_list_mtx held? */
- rv = gpiobus_acquire_pin(busdev, gpio_pin->pin);
+ /* XXX Can we call gpio_pin_acquire() with gpio_list_mtx held? */
+ rv = gpio_pin_acquire(gpio_pin);
if (rv != 0) {
mtx_unlock(&gpio_list_mtx);
free(entry, M_FIXEDREGULATOR);
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index c0ec5a94b8d3..ae28617537fd 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -89,6 +89,8 @@
#include <sys/buf.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
+#define EXTERR_CATEGORY EXTERR_CAT_FUSE
+#include <sys/exterrvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@@ -439,7 +441,8 @@ fuse_vnop_access(struct vop_access_args *ap)
if (vnode_isvroot(vp)) {
return 0;
}
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (!(data->dataflags & FSESS_INITED)) {
if (vnode_isvroot(vp)) {
@@ -448,7 +451,8 @@ fuse_vnop_access(struct vop_access_args *ap)
return 0;
}
}
- return EBADF;
+ return (EXTERROR(EBADF, "Access denied until FUSE session "
+ "is initialized"));
}
if (vnode_islnk(vp)) {
return 0;
@@ -489,7 +493,8 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
switch(ap->a_op) {
@@ -506,7 +511,7 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
op = FUSE_SETLK;
break;
default:
- return EINVAL;
+ return (EXTERROR(EINVAL, "Unsupported lock flags"));
}
if (!(dataflags & FSESS_POSIX_LOCKS))
@@ -534,14 +539,14 @@ fuse_vnop_advlock(struct vop_advlock_args *ap)
size = vattr.va_size;
if (size > OFF_MAX ||
(fl->l_start > 0 && size > OFF_MAX - fl->l_start)) {
- err = EOVERFLOW;
+ err = EXTERROR(EOVERFLOW, "Offset is too large");
goto out;
}
start = size + fl->l_start;
break;
default:
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Unsupported offset type"));
}
err = fuse_filehandle_get_anyflags(vp, &fufh, cred, pid);
@@ -603,15 +608,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
switch (vp->v_type) {
case VFIFO:
return (ESPIPE);
case VLNK:
case VREG:
- if (vfs_isrdonly(mp))
- return (EROFS);
break;
default:
return (ENODEV);
@@ -621,7 +625,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
return (EROFS);
if (fsess_not_impl(mp, FUSE_FALLOCATE))
- return (EINVAL);
+ return (EXTERROR(EINVAL, "This server does not implement "
+ "FUSE_FALLOCATE"));
io.uio_offset = *offset;
io.uio_resid = *len;
@@ -651,13 +656,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_FALLOCATE);
- err = EINVAL;
+ err = EXTERROR(EINVAL, "This server does not implement "
+ "FUSE_ALLOCATE");
} else if (err == EOPNOTSUPP) {
/*
* The file system server does not support FUSE_FALLOCATE with
* the supplied mode for this particular file.
*/
- err = EINVAL;
+ err = EXTERROR(EINVAL, "This file can't be pre-allocated");
} else if (!err) {
*offset += *len;
*len = 0;
@@ -703,7 +709,8 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
int maxrun;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
mp = vnode_mount(vp);
@@ -870,19 +877,21 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
pid_t pid;
int err;
- err = ENOSYS;
if (mp == NULL || mp != vnode_mount(outvp))
- goto fallback;
+ return (EXTERROR(ENOSYS, "Mount points do not match"));
if (incred->cr_uid != outcred->cr_uid)
- goto fallback;
+ return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
+ "support different credentials for infd and outfd"));
if (incred->cr_groups[0] != outcred->cr_groups[0])
- goto fallback;
+ return (EXTERROR(ENOSYS, "FUSE_COPY_FILE_RANGE does not "
+ "support different credentials for infd and outfd"));
/* Caller busied mp, mnt_data can be safely accessed. */
if (fsess_not_impl(mp, FUSE_COPY_FILE_RANGE))
- goto fallback;
+ return (EXTERROR(ENOSYS, "This daemon does not "
+ "implement COPY_FILE_RANGE"));
if (ap->a_fsizetd == NULL)
td = curthread;
@@ -892,7 +901,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
vn_lock_pair(invp, false, LK_SHARED, outvp, false, LK_EXCLUSIVE);
if (invp->v_data == NULL || outvp->v_data == NULL) {
- err = EBADF;
+ err = EXTERROR(EBADF, "vnode got reclaimed");
goto unlock;
}
@@ -956,7 +965,6 @@ unlock:
if (err == ENOSYS)
fsess_set_notimpl(mp, FUSE_COPY_FILE_RANGE);
-fallback:
/*
* No need to call vn_rlimit_fsizex_res before return, since the uio is
@@ -1024,7 +1032,8 @@ fuse_vnop_create(struct vop_create_args *ap)
int flags;
if (fuse_isdeadfs(dvp))
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
/* FUSE expects sockets to be created with FUSE_MKNOD */
if (vap->va_type == VSOCK)
@@ -1040,7 +1049,7 @@ fuse_vnop_create(struct vop_create_args *ap)
bzero(&fdi, sizeof(fdi));
if (vap->va_type != VREG)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "Only regular files can be created"));
if (fsess_not_impl(mp, FUSE_CREATE) || vap->va_type == VSOCK) {
/* Fallback to FUSE_MKNOD/FUSE_OPEN */
@@ -1221,8 +1230,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap)
if (!(dataflags & FSESS_INITED)) {
if (!vnode_isvroot(vp)) {
fdata_set_dead(fuse_get_mpdata(vnode_mount(vp)));
- err = ENOTCONN;
- return err;
+ return (EXTERROR(ENOTCONN, "FUSE daemon is not "
+ "initialized"));
} else {
goto fake;
}
@@ -1351,10 +1360,11 @@ fuse_vnop_link(struct vop_link_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (vnode_mount(tdvp) != vnode_mount(vp)) {
- return EXDEV;
+ return (EXDEV);
}
/*
@@ -1364,7 +1374,7 @@ fuse_vnop_link(struct vop_link_args *ap)
* validating that nlink does not overflow.
*/
if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
- return EMLINK;
+ return (EMLINK);
fli.oldnodeid = VTOI(vp);
fdisp_init(&fdi, 0);
@@ -1376,12 +1386,13 @@ fuse_vnop_link(struct vop_link_args *ap)
feo = fdi.answ;
if (fli.oldnodeid != feo->nodeid) {
+ static const char exterr[] = "Server assigned wrong inode "
+ "for a hard link.";
struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
- fuse_warn(data, FSESS_WARN_ILLEGAL_INODE,
- "Assigned wrong inode for a hard link.");
+ fuse_warn(data, FSESS_WARN_ILLEGAL_INODE, exterr);
fuse_vnode_clear_attr_cache(vp);
fuse_vnode_clear_attr_cache(tdvp);
- err = EIO;
+ err = EXTERROR(EIO, exterr);
goto out;
}
@@ -1458,7 +1469,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
if (fuse_isdeadfs(dvp)) {
*vpp = NULL;
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (!vnode_isdir(dvp))
return ENOTDIR;
@@ -1478,7 +1490,8 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
* Since the file system doesn't support ".." lookups,
* we have no way to find this entry.
*/
- return ESTALE;
+ return (EXTERROR(ESTALE, "This server does not support "
+ "'..' lookups"));
}
nid = VTOFUD(dvp)->parent_nid;
if (nid == 0)
@@ -1601,11 +1614,11 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
vref(dvp);
*vpp = dvp;
} else {
+ static const char exterr[] = "Server assigned "
+ "same inode to both parent and child.";
fuse_warn(fuse_get_mpdata(mp),
- FSESS_WARN_ILLEGAL_INODE,
- "Assigned same inode to both parent and "
- "child.");
- err = EIO;
+ FSESS_WARN_ILLEGAL_INODE, exterr);
+ err = EXTERROR(EIO, exterr);
}
} else {
@@ -1693,7 +1706,8 @@ fuse_vnop_mkdir(struct vop_mkdir_args *ap)
struct fuse_mkdir_in fmdi;
if (fuse_isdeadfs(dvp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode);
fmdi.umask = curthread->td_proc->p_pd->pd_cmask;
@@ -1720,7 +1734,8 @@ fuse_vnop_mknod(struct vop_mknod_args *ap)
struct vattr *vap = ap->a_vap;
if (fuse_isdeadfs(dvp))
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
return fuse_internal_mknod(dvp, vpp, cnp, vap);
}
@@ -1744,11 +1759,13 @@ fuse_vnop_open(struct vop_open_args *ap)
pid_t pid = td->td_proc->p_pid;
if (fuse_isdeadfs(vp))
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (vp->v_type == VCHR || vp->v_type == VBLK || vp->v_type == VFIFO)
- return (EOPNOTSUPP);
+ return (EXTERROR(EOPNOTSUPP, "Unsupported vnode type",
+ vp->v_type));
if ((a_mode & (FREAD | FWRITE | FEXEC)) == 0)
- return EINVAL;
+ return (EXTERROR(EINVAL, "Illegal mode", a_mode));
if (fuse_filehandle_validrw(vp, a_mode, cred, pid)) {
fuse_vnode_open(vp, 0, td);
@@ -1830,7 +1847,8 @@ fuse_vnop_pathconf(struct vop_pathconf_args *ap)
return (0);
} else if (fsess_not_impl(mp, FUSE_LSEEK)) {
/* FUSE_LSEEK is not implemented */
- return (EINVAL);
+ return (EXTERROR(EINVAL, "This server does not "
+ "implement FUSE_LSEEK"));
} else {
return (err);
}
@@ -1864,7 +1882,8 @@ fuse_vnop_read(struct vop_read_args *ap)
MPASS(vp->v_type == VREG || vp->v_type == VDIR);
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (VTOFUD(vp)->flag & FN_DIRECTIO) {
@@ -1941,10 +1960,11 @@ fuse_vnop_readdir(struct vop_readdir_args *ap)
if (ap->a_eofflag)
*ap->a_eofflag = 0;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (uio_resid(uio) < sizeof(struct dirent))
- return EINVAL;
+ return (EXTERROR(EINVAL, "Buffer is too small"));
tresid = uio->uio_resid;
err = fuse_filehandle_get_dir(vp, &fufh, cred, pid);
@@ -2014,7 +2034,8 @@ fuse_vnop_readlink(struct vop_readlink_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (!vnode_islnk(vp)) {
return EINVAL;
@@ -2025,10 +2046,11 @@ fuse_vnop_readlink(struct vop_readlink_args *ap)
goto out;
}
if (strnlen(fdi.answ, fdi.iosize) + 1 < fdi.iosize) {
+ static const char exterr[] = "Server returned an embedded NUL "
+ "from FUSE_READLINK.";
struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp));
- fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL,
- "Returned an embedded NUL from FUSE_READLINK.");
- err = EIO;
+ fuse_warn(data, FSESS_WARN_READLINK_EMBEDDED_NUL, exterr);
+ err = EXTERROR(EIO, exterr);
goto out;
}
if (((char *)fdi.answ)[0] == '/' &&
@@ -2112,10 +2134,11 @@ fuse_vnop_remove(struct vop_remove_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (vnode_isdir(vp)) {
- return EPERM;
+ return (EXTERROR(EPERM, "vnode is a directory"));
}
err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK);
@@ -2148,12 +2171,13 @@ fuse_vnop_rename(struct vop_rename_args *ap)
int err = 0;
if (fuse_isdeadfs(fdvp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (fvp->v_mount != tdvp->v_mount ||
(tvp && fvp->v_mount != tvp->v_mount)) {
SDT_PROBE2(fusefs, , vnops, trace, 1, "cross-device rename");
- err = EXDEV;
+ err = EXTERROR(EXDEV, "Cross-device rename");
goto out;
}
cache_purge(fvp);
@@ -2224,10 +2248,12 @@ fuse_vnop_rmdir(struct vop_rmdir_args *ap)
int err;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (VTOFUD(vp) == VTOFUD(dvp)) {
- return EINVAL;
+ return (EXTERROR(EINVAL, "Directory to be removed "
+ "contains itself"));
}
err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR);
@@ -2264,7 +2290,8 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
checkperm = dataflags & FSESS_DEFAULT_PERMISSIONS;
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (vap->va_uid != (uid_t)VNOVAL) {
@@ -2429,7 +2456,8 @@ fuse_vnop_symlink(struct vop_symlink_args *ap)
size_t len;
if (fuse_isdeadfs(dvp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
/*
* Unlike the other creator type calls, here we have to create a message
@@ -2475,7 +2503,8 @@ fuse_vnop_write(struct vop_write_args *ap)
MPASS(vp->v_type == VREG || vp->v_type == VDIR);
if (fuse_isdeadfs(vp)) {
- return ENXIO;
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
}
if (VTOFUD(vp)->flag & FN_DIRECTIO) {
@@ -2628,10 +2657,12 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_GETXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "extended attributes"));
err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
if (err)
@@ -2669,7 +2700,8 @@ fuse_vnop_getextattr(struct vop_getextattr_args *ap)
if (err != 0) {
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_GETXATTR);
- err = EOPNOTSUPP;
+ err = (EXTERROR(EOPNOTSUPP, "This server does not "
+ "implement extended attributes"));
}
goto out;
}
@@ -2715,10 +2747,12 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_SETXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "setting extended attributes"));
if (vfs_isrdonly(mp))
return EROFS;
@@ -2730,9 +2764,11 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
* return EOPNOTSUPP.
*/
if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
- return (EOPNOTSUPP);
+ return (EXTERROR(EOPNOTSUPP, "This server does not "
+ "implement removing extended attributess"));
else
- return (EINVAL);
+ return (EXTERROR(EINVAL, "DELETEEXTATTR should be used "
+ "to remove extattrs"));
}
err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td,
@@ -2778,7 +2814,8 @@ fuse_vnop_setextattr(struct vop_setextattr_args *ap)
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_SETXATTR);
- err = EOPNOTSUPP;
+ err = EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "setting extended attributes");
}
if (err == ERESTART) {
/* Can't restart after calling uiomove */
@@ -2889,10 +2926,12 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_LISTXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "extended attributes"));
err = fuse_extattr_check_cred(vp, ap->a_attrnamespace, cred, td, VREAD);
if (err)
@@ -2920,7 +2959,8 @@ fuse_vnop_listextattr(struct vop_listextattr_args *ap)
if (err != 0) {
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_LISTXATTR);
- err = EOPNOTSUPP;
+ err = EXTERROR(EOPNOTSUPP, "This server does not "
+ "implement extended attributes");
}
goto out;
}
@@ -3020,7 +3060,8 @@ fuse_vnop_deallocate(struct vop_deallocate_args *ap)
bool closefufh = false;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (vfs_isrdonly(mp))
return (EROFS);
@@ -3126,10 +3167,12 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
int err;
if (fuse_isdeadfs(vp))
- return (ENXIO);
+ return (EXTERROR(ENXIO, "This FUSE session is about "
+ "to be closed"));
if (fsess_not_impl(mp, FUSE_REMOVEXATTR))
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "removing extended attributes"));
if (vfs_isrdonly(mp))
return EROFS;
@@ -3158,7 +3201,8 @@ fuse_vnop_deleteextattr(struct vop_deleteextattr_args *ap)
err = fdisp_wait_answ(&fdi);
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_REMOVEXATTR);
- err = EOPNOTSUPP;
+ err = EXTERROR(EOPNOTSUPP, "This server does not implement "
+ "removing extended attributes");
}
fdisp_destroy(&fdi);
@@ -3212,7 +3256,8 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
/* NFS requires lookups for "." and ".." */
SDT_PROBE2(fusefs, , vnops, trace, 1,
"VOP_VPTOFH without FUSE_EXPORT_SUPPORT");
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server is "
+ "missing FUSE_EXPORT_SUPPORT"));
}
if ((mp->mnt_flag & MNT_EXPORTED) &&
fsess_is_impl(mp, FUSE_OPENDIR))
@@ -3230,7 +3275,8 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
*/
SDT_PROBE2(fusefs, , vnops, trace, 1,
"VOP_VPTOFH with FUSE_OPENDIR");
- return EOPNOTSUPP;
+ return (EXTERROR(EOPNOTSUPP, "This server implements "
+ "FUSE_OPENDIR so is not compatible with getfh"));
}
err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread);
@@ -3244,6 +3290,7 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap)
if (fvdat->generation <= UINT32_MAX)
fhp->gen = fvdat->generation;
else
- return EOVERFLOW;
+ return (EXTERROR(EOVERFLOW, "inode generation "
+ "number overflow"));
return (0);
}
diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c
index e799a5ce05f6..8ab6d35a2685 100644
--- a/sys/fs/msdosfs/msdosfs_lookup.c
+++ b/sys/fs/msdosfs/msdosfs_lookup.c
@@ -845,7 +845,6 @@ doscheckpath(struct denode *source, struct denode *target, daddr_t *wait_scn)
*wait_scn = 0;
pmp = target->de_pmp;
- lockmgr_assert(&pmp->pm_checkpath_lock, KA_XLOCKED);
KASSERT(pmp == source->de_pmp,
("doscheckpath: source and target on different filesystems"));
diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c
index adcffe45df82..4431d36c8a8e 100644
--- a/sys/fs/msdosfs/msdosfs_vfsops.c
+++ b/sys/fs/msdosfs/msdosfs_vfsops.c
@@ -575,7 +575,6 @@ mountmsdosfs(struct vnode *odevvp, struct mount *mp)
pmp->pm_bo = bo;
lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0);
- lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0);
TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp);
@@ -871,7 +870,6 @@ error_exit:
}
if (pmp != NULL) {
lockdestroy(&pmp->pm_fatlock);
- lockdestroy(&pmp->pm_checkpath_lock);
free(pmp->pm_inusemap, M_MSDOSFSFAT);
free(pmp, M_MSDOSFSMNT);
mp->mnt_data = NULL;
@@ -971,7 +969,6 @@ msdosfs_unmount(struct mount *mp, int mntflags)
dev_rel(pmp->pm_dev);
free(pmp->pm_inusemap, M_MSDOSFSFAT);
lockdestroy(&pmp->pm_fatlock);
- lockdestroy(&pmp->pm_checkpath_lock);
free(pmp, M_MSDOSFSMNT);
mp->mnt_data = NULL;
return (error);
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
index 6417b7dac16b..120b97ba72d5 100644
--- a/sys/fs/msdosfs/msdosfs_vnops.c
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -945,7 +945,7 @@ msdosfs_rename(struct vop_rename_args *ap)
struct denode *fdip, *fip, *tdip, *tip, *nip;
u_char toname[12], oldname[11];
u_long to_diroffset;
- bool checkpath_locked, doingdirectory, newparent;
+ bool doingdirectory, newparent;
int error;
u_long cn, pcl, blkoff;
daddr_t bn, wait_scn, scn;
@@ -986,8 +986,6 @@ msdosfs_rename(struct vop_rename_args *ap)
if (tvp != NULL && tvp != tdvp)
VOP_UNLOCK(tvp);
- checkpath_locked = false;
-
relock:
doingdirectory = newparent = false;
@@ -1108,12 +1106,8 @@ relock:
if (doingdirectory && newparent) {
if (error != 0) /* write access check above */
goto unlock;
- lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL);
- checkpath_locked = true;
error = doscheckpath(fip, tdip, &wait_scn);
if (wait_scn != 0) {
- lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
- checkpath_locked = false;
VOP_UNLOCK(fdvp);
VOP_UNLOCK(tdvp);
VOP_UNLOCK(fvp);
@@ -1276,8 +1270,6 @@ relock:
cache_purge(fvp);
unlock:
- if (checkpath_locked)
- lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
vput(fdvp);
vput(fvp);
if (tvp != NULL) {
@@ -1289,7 +1281,6 @@ unlock:
vput(tdvp);
return (error);
releout:
- MPASS(!checkpath_locked);
vrele(tdvp);
if (tvp != NULL)
vrele(tvp);
diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h
index fcaac544a74d..04e6b75bea2a 100644
--- a/sys/fs/msdosfs/msdosfsmount.h
+++ b/sys/fs/msdosfs/msdosfsmount.h
@@ -118,7 +118,6 @@ struct msdosfsmount {
void *pm_u2d; /* Unicode->DOS iconv handle */
void *pm_d2u; /* DOS->Local iconv handle */
struct lock pm_fatlock; /* lockmgr protecting allocations */
- struct lock pm_checkpath_lock; /* protects doscheckpath result */
struct task pm_rw2ro_task; /* context for emergency remount ro */
};
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 0356877eaf05..7dcc83880bb9 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -245,6 +245,10 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
vp->v_object = lowervp->v_object;
vn_irflag_set(vp, VIRF_PGREAD);
}
+ if ((vn_irflag_read(lowervp) & VIRF_INOTIFY) != 0)
+ vn_irflag_set(vp, VIRF_INOTIFY);
+ if ((vn_irflag_read(lowervp) & VIRF_INOTIFY_PARENT) != 0)
+ vn_irflag_set(vp, VIRF_INOTIFY_PARENT);
if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
vp->v_vflag |= VV_ROOT;
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 8608216e10e5..74c1a8f3acb6 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -190,6 +190,26 @@ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
/*
+ * Synchronize inotify flags with the lower vnode:
+ * - If the upper vnode has the flag set and the lower does not, then the lower
+ * vnode is unwatched and the upper vnode does not need to go through
+ * VOP_INOTIFY.
+ * - If the lower vnode is watched, then the upper vnode should go through
+ * VOP_INOTIFY, so copy the flag up.
+ */
+static void
+null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag)
+{
+ if ((vn_irflag_read(vp) & flag) != 0) {
+ if (__predict_false((vn_irflag_read(lvp) & flag) == 0))
+ vn_irflag_unset(vp, flag);
+ } else if ((vn_irflag_read(lvp) & flag) != 0) {
+ if (__predict_false((vn_irflag_read(vp) & flag) == 0))
+ vn_irflag_set(vp, flag);
+ }
+}
+
+/*
* This is the 10-Apr-92 bypass routine.
* This version has been optimized for speed, throwing away some
* safety checks. It should still always work, but it's not as
@@ -305,7 +325,10 @@ null_bypass(struct vop_generic_args *ap)
lvp = *(vps_p[i]);
/*
- * Get rid of the transient hold on lvp.
+ * Get rid of the transient hold on lvp. Copy inotify
+ * flags up in case something is watching the lower
+ * layer.
+ *
* If lowervp was unlocked during VOP
* operation, nullfs upper vnode could have
* been reclaimed, which changes its v_vnlock
@@ -314,6 +337,10 @@ null_bypass(struct vop_generic_args *ap)
* upper (reclaimed) vnode.
*/
if (lvp != NULLVP) {
+ null_copy_inotify(old_vps[i], lvp,
+ VIRF_INOTIFY);
+ null_copy_inotify(old_vps[i], lvp,
+ VIRF_INOTIFY_PARENT);
if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE &&
old_vps[i]->v_vnlock != lvp->v_vnlock) {
VOP_UNLOCK(lvp);
diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h
index aa2dfbb68745..49f002a633d2 100644
--- a/sys/i386/linux/linux_proto.h
+++ b/sys/i386/linux/linux_proto.h
@@ -981,10 +981,13 @@ struct linux_inotify_init_args {
syscallarg_t dummy;
};
struct linux_inotify_add_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char pathname_l_[PADL_(const char *)]; const char * pathname; char pathname_r_[PADR_(const char *)];
+ char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)];
};
struct linux_inotify_rm_watch_args {
- syscallarg_t dummy;
+ char fd_l_[PADL_(l_int)]; l_int fd; char fd_r_[PADR_(l_int)];
+ char wd_l_[PADL_(uint32_t)]; uint32_t wd; char wd_r_[PADR_(uint32_t)];
};
struct linux_migrate_pages_args {
syscallarg_t dummy;
@@ -1178,7 +1181,7 @@ struct linux_pipe2_args {
char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
};
struct linux_inotify_init1_args {
- syscallarg_t dummy;
+ char flags_l_[PADL_(l_int)]; l_int flags; char flags_r_[PADR_(l_int)];
};
struct linux_preadv_args {
char fd_l_[PADL_(l_ulong)]; l_ulong fd; char fd_r_[PADR_(l_ulong)];
diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c
index 7be646f34144..b8893008944b 100644
--- a/sys/i386/linux/linux_sysent.c
+++ b/sys/i386/linux/linux_sysent.c
@@ -306,8 +306,8 @@ struct sysent linux_sysent[] = {
{ .sy_narg = AS(linux_ioprio_set_args), .sy_call = (sy_call_t *)linux_ioprio_set, .sy_auevent = AUE_SETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 289 = linux_ioprio_set */
{ .sy_narg = AS(linux_ioprio_get_args), .sy_call = (sy_call_t *)linux_ioprio_get, .sy_auevent = AUE_GETPRIORITY, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 290 = linux_ioprio_get */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 291 = linux_inotify_init */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */
+ { .sy_narg = AS(linux_inotify_add_watch_args), .sy_call = (sy_call_t *)linux_inotify_add_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 292 = linux_inotify_add_watch */
+ { .sy_narg = AS(linux_inotify_rm_watch_args), .sy_call = (sy_call_t *)linux_inotify_rm_watch, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 293 = linux_inotify_rm_watch */
{ .sy_narg = 0, .sy_call = (sy_call_t *)linux_migrate_pages, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 294 = linux_migrate_pages */
{ .sy_narg = AS(linux_openat_args), .sy_call = (sy_call_t *)linux_openat, .sy_auevent = AUE_OPEN_RWTC, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 295 = linux_openat */
{ .sy_narg = AS(linux_mkdirat_args), .sy_call = (sy_call_t *)linux_mkdirat, .sy_auevent = AUE_MKDIRAT, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 296 = linux_mkdirat */
@@ -346,7 +346,7 @@ struct sysent linux_sysent[] = {
{ .sy_narg = AS(linux_epoll_create1_args), .sy_call = (sy_call_t *)linux_epoll_create1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 329 = linux_epoll_create1 */
{ .sy_narg = AS(linux_dup3_args), .sy_call = (sy_call_t *)linux_dup3, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 330 = linux_dup3 */
{ .sy_narg = AS(linux_pipe2_args), .sy_call = (sy_call_t *)linux_pipe2, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 331 = linux_pipe2 */
- { .sy_narg = 0, .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */
+ { .sy_narg = AS(linux_inotify_init1_args), .sy_call = (sy_call_t *)linux_inotify_init1, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 332 = linux_inotify_init1 */
{ .sy_narg = AS(linux_preadv_args), .sy_call = (sy_call_t *)linux_preadv, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 333 = linux_preadv */
{ .sy_narg = AS(linux_pwritev_args), .sy_call = (sy_call_t *)linux_pwritev, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 334 = linux_pwritev */
{ .sy_narg = AS(linux_rt_tgsigqueueinfo_args), .sy_call = (sy_call_t *)linux_rt_tgsigqueueinfo, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 335 = linux_rt_tgsigqueueinfo */
diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c
index f3e3c32a2bbf..563d1a795ae1 100644
--- a/sys/i386/linux/linux_systrace_args.c
+++ b/sys/i386/linux/linux_systrace_args.c
@@ -2071,12 +2071,19 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_add_watch */
case 292: {
- *n_args = 0;
+ struct linux_inotify_add_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = (intptr_t)p->pathname; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 3;
break;
}
/* linux_inotify_rm_watch */
case 293: {
- *n_args = 0;
+ struct linux_inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* l_int */
+ uarg[a++] = p->wd; /* uint32_t */
+ *n_args = 2;
break;
}
/* linux_migrate_pages */
@@ -2410,7 +2417,9 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
}
/* linux_inotify_init1 */
case 332: {
- *n_args = 0;
+ struct linux_inotify_init1_args *p = params;
+ iarg[a++] = p->flags; /* l_int */
+ *n_args = 1;
break;
}
/* linux_preadv */
@@ -6604,9 +6613,32 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_add_watch */
case 292:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "userland const char *";
+ break;
+ case 2:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_inotify_rm_watch */
case 293:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ case 1:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
break;
/* linux_migrate_pages */
case 294:
@@ -7172,6 +7204,13 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_init1 */
case 332:
+ switch (ndx) {
+ case 0:
+ p = "l_int";
+ break;
+ default:
+ break;
+ };
break;
/* linux_preadv */
case 333:
@@ -9889,8 +9928,14 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 291:
/* linux_inotify_add_watch */
case 292:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_inotify_rm_watch */
case 293:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_migrate_pages */
case 294:
/* linux_openat */
@@ -10062,6 +10107,9 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
/* linux_inotify_init1 */
case 332:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
/* linux_preadv */
case 333:
if (ndx == 0 || ndx == 1)
diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master
index 958336be0f08..2113ea51ac5d 100644
--- a/sys/i386/linux/syscalls.master
+++ b/sys/i386/linux/syscalls.master
@@ -1605,10 +1605,17 @@
int linux_inotify_init(void);
}
292 AUE_NULL STD {
- int linux_inotify_add_watch(void);
+ int linux_inotify_add_watch(
+ l_int fd,
+ const char *pathname,
+ uint32_t mask
+ );
}
293 AUE_NULL STD {
- int linux_inotify_rm_watch(void);
+ int linux_inotify_rm_watch(
+ l_int fd,
+ uint32_t wd
+ );
}
; Linux 2.6.16:
294 AUE_NULL STD {
@@ -1872,7 +1879,9 @@
);
}
332 AUE_NULL STD {
- int linux_inotify_init1(void);
+ int linux_inotify_init1(
+ l_int flags
+ );
}
; Linux 2.6.30:
333 AUE_NULL STD {
diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c
index a48a513aa3b5..34e71a0665ed 100644
--- a/sys/kern/init_sysent.c
+++ b/sys/kern/init_sysent.c
@@ -659,4 +659,6 @@ struct sysent sysent[] = {
{ .sy_narg = AS(fchroot_args), .sy_call = (sy_call_t *)sys_fchroot, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 590 = fchroot */
{ .sy_narg = AS(setcred_args), .sy_call = (sy_call_t *)sys_setcred, .sy_auevent = AUE_SETCRED, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 591 = setcred */
{ .sy_narg = AS(exterrctl_args), .sy_call = (sy_call_t *)sys_exterrctl, .sy_auevent = AUE_NULL, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 592 = exterrctl */
+ { .sy_narg = AS(inotify_add_watch_at_args), .sy_call = (sy_call_t *)sys_inotify_add_watch_at, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 593 = inotify_add_watch_at */
+ { .sy_narg = AS(inotify_rm_watch_args), .sy_call = (sy_call_t *)sys_inotify_rm_watch, .sy_auevent = AUE_INOTIFY, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 594 = inotify_rm_watch */
};
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index c8b01afeab4f..dcd38c6e6fbe 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -1637,6 +1637,12 @@ uifree(struct uidinfo *uip)
if (uip->ui_pipecnt != 0)
printf("freeing uidinfo: uid = %d, pipecnt = %ld\n",
uip->ui_uid, uip->ui_pipecnt);
+ if (uip->ui_inotifycnt != 0)
+ printf("freeing uidinfo: uid = %d, inotifycnt = %ld\n",
+ uip->ui_uid, uip->ui_inotifycnt);
+ if (uip->ui_inotifywatchcnt != 0)
+ printf("freeing uidinfo: uid = %d, inotifywatchcnt = %ld\n",
+ uip->ui_uid, uip->ui_inotifywatchcnt);
free(uip, M_UIDINFO);
}
@@ -1742,6 +1748,21 @@ chgpipecnt(struct uidinfo *uip, int diff, rlim_t max)
return (chglimit(uip, &uip->ui_pipecnt, diff, max, "pipecnt"));
}
+int
+chginotifycnt(struct uidinfo *uip, int diff, rlim_t max)
+{
+
+ return (chglimit(uip, &uip->ui_inotifycnt, diff, max, "inotifycnt"));
+}
+
+int
+chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t max)
+{
+
+ return (chglimit(uip, &uip->ui_inotifywatchcnt, diff, max,
+ "inotifywatchcnt"));
+}
+
static int
sysctl_kern_proc_rlimit_usage(SYSCTL_HANDLER_ARGS)
{
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
index 17b53208157a..35b258e68701 100644
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -27,12 +27,12 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/capsicum.h>
+#include <sys/inotify.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/ktls.h>
@@ -1246,6 +1246,8 @@ out:
*/
if (error == 0) {
td->td_retval[0] = 0;
+ if (sbytes > 0 && vp != NULL)
+ INOTIFY(vp, IN_ACCESS);
}
if (sent != NULL) {
(*sent) = sbytes;
diff --git a/sys/kern/subr_capability.c b/sys/kern/subr_capability.c
index 7cc6fb593697..5ad5b0af1681 100644
--- a/sys/kern/subr_capability.c
+++ b/sys/kern/subr_capability.c
@@ -74,6 +74,10 @@ const cap_rights_t cap_getsockopt_rights =
CAP_RIGHTS_INITIALIZER(CAP_GETSOCKOPT);
const cap_rights_t cap_getsockname_rights =
CAP_RIGHTS_INITIALIZER(CAP_GETSOCKNAME);
+const cap_rights_t cap_inotify_add_rights =
+ CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_ADD);
+const cap_rights_t cap_inotify_rm_rights =
+ CAP_RIGHTS_INITIALIZER(CAP_INOTIFY_RM);
const cap_rights_t cap_ioctl_rights = CAP_RIGHTS_INITIALIZER(CAP_IOCTL);
const cap_rights_t cap_listen_rights = CAP_RIGHTS_INITIALIZER(CAP_LISTEN);
const cap_rights_t cap_linkat_source_rights =
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index d31ff3b939cc..94e44d888181 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -37,16 +37,17 @@
#include "opt_capsicum.h"
#include "opt_ktrace.h"
-#define EXTERR_CATEGORY EXTERR_CAT_FILEDESC
+#define EXTERR_CATEGORY EXTERR_CAT_GENIO
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/capsicum.h>
+#include <sys/exterrvar.h>
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/fcntl.h>
#include <sys/file.h>
-#include <sys/exterrvar.h>
+#include <sys/inotify.h>
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
@@ -195,7 +196,7 @@ sys_read(struct thread *td, struct read_args *uap)
int error;
if (uap->nbyte > IOSIZE_MAX)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "length > iosize_max"));
aiov.iov_base = uap->buf;
aiov.iov_len = uap->nbyte;
auio.uio_iov = &aiov;
@@ -233,7 +234,7 @@ kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset)
int error;
if (nbyte > IOSIZE_MAX)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "length > iosize_max"));
aiov.iov_base = buf;
aiov.iov_len = nbyte;
auio.uio_iov = &aiov;
@@ -329,7 +330,7 @@ kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset)
error = ESPIPE;
else if (offset < 0 &&
(fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR))
- error = EINVAL;
+ error = EXTERROR(EINVAL, "neg offset");
else
error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET);
fdrop(fp, td);
@@ -396,7 +397,7 @@ sys_write(struct thread *td, struct write_args *uap)
int error;
if (uap->nbyte > IOSIZE_MAX)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "length > iosize_max"));
aiov.iov_base = (void *)(uintptr_t)uap->buf;
aiov.iov_len = uap->nbyte;
auio.uio_iov = &aiov;
@@ -435,7 +436,7 @@ kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte,
int error;
if (nbyte > IOSIZE_MAX)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "length > iosize_max"));
aiov.iov_base = (void *)(uintptr_t)buf;
aiov.iov_len = nbyte;
auio.uio_iov = &aiov;
@@ -531,7 +532,7 @@ kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset)
error = ESPIPE;
else if (offset < 0 &&
(fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR))
- error = EINVAL;
+ error = EXTERROR(EINVAL, "neg offset");
else
error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET);
fdrop(fp, td);
@@ -602,14 +603,14 @@ kern_ftruncate(struct thread *td, int fd, off_t length)
AUDIT_ARG_FD(fd);
if (length < 0)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "negative length"));
error = fget(td, fd, &cap_ftruncate_rights, &fp);
if (error)
return (error);
AUDIT_ARG_FILE(td->td_proc, fp);
if (!(fp->f_flag & FWRITE)) {
fdrop(fp, td);
- return (EINVAL);
+ return (EXTERROR(EINVAL, "non-writable"));
}
error = fo_truncate(fp, length, td->td_ucred, td);
fdrop(fp, td);
@@ -840,8 +841,10 @@ kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
int error;
AUDIT_ARG_FD(fd);
- if (offset < 0 || len <= 0)
- return (EINVAL);
+ if (offset < 0)
+ return (EXTERROR(EINVAL, "negative offset"));
+ if (len <= 0)
+ return (EXTERROR(EINVAL, "negative length"));
/* Check for wrap. */
if (offset > OFF_MAX - len)
return (EFBIG);
@@ -898,16 +901,21 @@ kern_fspacectl(struct thread *td, int fd, int cmd,
AUDIT_ARG_FFLAGS(flags);
if (rqsr == NULL)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "no range"));
rmsr = *rqsr;
if (rmsrp != NULL)
*rmsrp = rmsr;
- if (cmd != SPACECTL_DEALLOC ||
- rqsr->r_offset < 0 || rqsr->r_len <= 0 ||
- rqsr->r_offset > OFF_MAX - rqsr->r_len ||
- (flags & ~SPACECTL_F_SUPPORTED) != 0)
- return (EINVAL);
+ if (cmd != SPACECTL_DEALLOC)
+ return (EXTERROR(EINVAL, "cmd", cmd));
+ if (rqsr->r_offset < 0)
+ return (EXTERROR(EINVAL, "neg offset"));
+ if (rqsr->r_len <= 0)
+ return (EXTERROR(EINVAL, "neg len"));
+ if (rqsr->r_offset > OFF_MAX - rqsr->r_len)
+ return (EXTERROR(EINVAL, "offset too large"));
+ if ((flags & ~SPACECTL_F_SUPPORTED) != 0)
+ return (EXTERROR(EINVAL, "reserved flags", flags));
error = fget_write(td, fd, &cap_pwrite_rights, &fp);
if (error != 0)
@@ -939,7 +947,6 @@ int
kern_specialfd(struct thread *td, int type, void *arg)
{
struct file *fp;
- struct specialfd_eventfd *ae;
int error, fd, fflags;
fflags = 0;
@@ -948,14 +955,24 @@ kern_specialfd(struct thread *td, int type, void *arg)
return (error);
switch (type) {
- case SPECIALFD_EVENTFD:
+ case SPECIALFD_EVENTFD: {
+ struct specialfd_eventfd *ae;
+
ae = arg;
if ((ae->flags & EFD_CLOEXEC) != 0)
fflags |= O_CLOEXEC;
error = eventfd_create_file(td, fp, ae->initval, ae->flags);
break;
+ }
+ case SPECIALFD_INOTIFY: {
+ struct specialfd_inotify *si;
+
+ si = arg;
+ error = inotify_create_file(td, fp, si->flags, &fflags);
+ break;
+ }
default:
- error = EINVAL;
+ error = EXTERROR(EINVAL, "invalid type", type);
break;
}
@@ -970,13 +987,14 @@ kern_specialfd(struct thread *td, int type, void *arg)
int
sys___specialfd(struct thread *td, struct __specialfd_args *args)
{
- struct specialfd_eventfd ae;
int error;
switch (args->type) {
- case SPECIALFD_EVENTFD:
+ case SPECIALFD_EVENTFD: {
+ struct specialfd_eventfd ae;
+
if (args->len != sizeof(struct specialfd_eventfd)) {
- error = EINVAL;
+ error = EXTERROR(EINVAL, "eventfd params ABI");
break;
}
error = copyin(args->req, &ae, sizeof(ae));
@@ -984,13 +1002,27 @@ sys___specialfd(struct thread *td, struct __specialfd_args *args)
break;
if ((ae.flags & ~(EFD_CLOEXEC | EFD_NONBLOCK |
EFD_SEMAPHORE)) != 0) {
- error = EINVAL;
+ error = EXTERROR(EINVAL, "reserved flag");
break;
}
error = kern_specialfd(td, args->type, &ae);
break;
+ }
+ case SPECIALFD_INOTIFY: {
+ struct specialfd_inotify si;
+
+ if (args->len != sizeof(si)) {
+ error = EINVAL;
+ break;
+ }
+ error = copyin(args->req, &si, sizeof(si));
+ if (error != 0)
+ break;
+ error = kern_specialfd(td, args->type, &si);
+ break;
+ }
default:
- error = EINVAL;
+ error = EXTERROR(EINVAL, "unknown type", args->type);
break;
}
return (error);
@@ -1166,7 +1198,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
int error, lf, ndu;
if (nd < 0)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "negative ndescs"));
fdp = td->td_proc->p_fd;
ndu = nd;
lf = fdp->fd_nfiles;
@@ -1259,7 +1291,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
rtv = *tvp;
if (rtv.tv_sec < 0 || rtv.tv_usec < 0 ||
rtv.tv_usec >= 1000000) {
- error = EINVAL;
+ error = EXTERROR(EINVAL, "invalid timeval");
goto done;
}
if (!timevalisset(&rtv))
@@ -1491,7 +1523,7 @@ sys_poll(struct thread *td, struct poll_args *uap)
if (uap->timeout != INFTIM) {
if (uap->timeout < 0)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "invalid timeout"));
ts.tv_sec = uap->timeout / 1000;
ts.tv_nsec = (uap->timeout % 1000) * 1000000;
tsp = &ts;
@@ -1516,7 +1548,7 @@ kern_poll_kfds(struct thread *td, struct pollfd *kfds, u_int nfds,
precision = 0;
if (tsp != NULL) {
if (!timespecvalid_interval(tsp))
- return (EINVAL);
+ return (EXTERROR(EINVAL, "invalid timespec"));
if (tsp->tv_sec == 0 && tsp->tv_nsec == 0)
sbt = 0;
else {
@@ -1619,7 +1651,7 @@ kern_poll(struct thread *td, struct pollfd *ufds, u_int nfds,
int error;
if (kern_poll_maxfds(nfds))
- return (EINVAL);
+ return (EXTERROR(EINVAL, "too large nfds"));
if (nfds > nitems(stackfds))
kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK);
else
@@ -1796,7 +1828,7 @@ selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
rtv = *tvp;
if (rtv.tv_sec < 0 || rtv.tv_usec < 0 ||
rtv.tv_usec >= 1000000)
- return (EINVAL);
+ return (EXTERROR(EINVAL, "invalid timeval"));
if (!timevalisset(&rtv))
asbt = 0;
else if (rtv.tv_sec <= INT32_MAX) {
@@ -2173,7 +2205,7 @@ kern_kcmp(struct thread *td, pid_t pid1, pid_t pid2, int type,
(uintptr_t)p2->p_vmspace);
break;
default:
- error = EINVAL;
+ error = EXTERROR(EINVAL, "unknown op");
break;
}
diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c
index fa36cc824078..90a4f3a7dad8 100644
--- a/sys/kern/syscalls.c
+++ b/sys/kern/syscalls.c
@@ -598,4 +598,6 @@ const char *syscallnames[] = {
"fchroot", /* 590 = fchroot */
"setcred", /* 591 = setcred */
"exterrctl", /* 592 = exterrctl */
+ "inotify_add_watch_at", /* 593 = inotify_add_watch_at */
+ "inotify_rm_watch", /* 594 = inotify_rm_watch */
};
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 08b557a7a540..2ab17e036d5c 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -3356,4 +3356,19 @@
_In_reads_bytes_(4) void *ptr
);
}
+593 AUE_INOTIFY STD|CAPENABLED {
+ int inotify_add_watch_at(
+ int fd,
+ int dfd,
+ _In_z_ const char *path,
+ uint32_t mask
+ );
+ }
+594 AUE_INOTIFY STD|CAPENABLED {
+ int inotify_rm_watch(
+ int fd,
+ int wd
+ );
+ }
+
; vim: syntax=off
diff --git a/sys/kern/systrace_args.c b/sys/kern/systrace_args.c
index 15789d3eb5fa..90b21616a558 100644
--- a/sys/kern/systrace_args.c
+++ b/sys/kern/systrace_args.c
@@ -3482,6 +3482,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 3;
break;
}
+ /* inotify_add_watch_at */
+ case 593: {
+ struct inotify_add_watch_at_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ iarg[a++] = p->dfd; /* int */
+ uarg[a++] = (intptr_t)p->path; /* const char * */
+ uarg[a++] = p->mask; /* uint32_t */
+ *n_args = 4;
+ break;
+ }
+ /* inotify_rm_watch */
+ case 594: {
+ struct inotify_rm_watch_args *p = params;
+ iarg[a++] = p->fd; /* int */
+ iarg[a++] = p->wd; /* int */
+ *n_args = 2;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -9317,6 +9335,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* inotify_add_watch_at */
+ case 593:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ case 2:
+ p = "userland const char *";
+ break;
+ case 3:
+ p = "uint32_t";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* inotify_rm_watch */
+ case 594:
+ switch (ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -11305,6 +11355,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* inotify_add_watch_at */
+ case 593:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* inotify_rm_watch */
+ case 594:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 883beaf6d1da..3d455b3874cc 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -41,6 +41,7 @@
#include <sys/counter.h>
#include <sys/filedesc.h>
#include <sys/fnv_hash.h>
+#include <sys/inotify.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -2629,6 +2630,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
atomic_store_ptr(&dvp->v_cache_dd, ncp);
} else if (vp != NULL) {
/*
+ * Take the slow path in INOTIFY(). This flag will be lazily
+ * cleared by cache_vop_inotify() once all directories referring
+ * to vp are unwatched.
+ */
+ if (__predict_false((vn_irflag_read(dvp) & VIRF_INOTIFY) != 0))
+ vn_irflag_set_cond(vp, VIRF_INOTIFY_PARENT);
+
+ /*
* For this case, the cache entry maps both the
* directory name in it and the name ".." for the
* directory's parent.
@@ -4008,6 +4017,56 @@ out:
return (error);
}
+void
+cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie)
+{
+ struct mtx *vlp;
+ struct namecache *ncp;
+ int isdir;
+ bool logged, self;
+
+ isdir = vp->v_type == VDIR ? IN_ISDIR : 0;
+ self = (vn_irflag_read(vp) & VIRF_INOTIFY) != 0 &&
+ (vp->v_type != VDIR || (event & ~_IN_DIR_EVENTS) != 0);
+
+ if (self) {
+ int selfevent;
+
+ if (event == _IN_ATTRIB_LINKCOUNT)
+ selfevent = IN_ATTRIB;
+ else
+ selfevent = event;
+ inotify_log(vp, NULL, 0, selfevent | isdir, cookie);
+ }
+ if ((event & IN_ALL_EVENTS) == 0)
+ return;
+
+ logged = false;
+ vlp = VP2VNODELOCK(vp);
+ mtx_lock(vlp);
+ TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) {
+ if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
+ continue;
+ if ((vn_irflag_read(ncp->nc_dvp) & VIRF_INOTIFY) != 0) {
+ /*
+ * XXX-MJ if the vnode has two links in the same
+ * dir, we'll log the same event twice.
+ */
+ inotify_log(ncp->nc_dvp, ncp->nc_name, ncp->nc_nlen,
+ event | isdir, cookie);
+ logged = true;
+ }
+ }
+ if (!logged && (vn_irflag_read(vp) & VIRF_INOTIFY_PARENT) != 0) {
+ /*
+ * We didn't find a watched directory that contains this vnode,
+ * so stop calling VOP_INOTIFY for operations on the vnode.
+ */
+ vn_irflag_unset(vp, VIRF_INOTIFY_PARENT);
+ }
+ mtx_unlock(vlp);
+}
+
#ifdef DDB
static void
db_print_vpath(struct vnode *vp)
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index be49c0887609..2a01ec1e307e 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -39,6 +39,7 @@
#include <sys/conf.h>
#include <sys/event.h>
#include <sys/filio.h>
+#include <sys/inotify.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/lock.h>
@@ -119,6 +120,8 @@ struct vop_vector default_vnodeops = {
.vop_getwritemount = vop_stdgetwritemount,
.vop_inactive = VOP_NULL,
.vop_need_inactive = vop_stdneed_inactive,
+ .vop_inotify = vop_stdinotify,
+ .vop_inotify_add_watch = vop_stdinotify_add_watch,
.vop_ioctl = vop_stdioctl,
.vop_kqfilter = vop_stdkqfilter,
.vop_islocked = vop_stdislocked,
@@ -1306,6 +1309,20 @@ vop_stdneed_inactive(struct vop_need_inactive_args *ap)
}
int
+vop_stdinotify(struct vop_inotify_args *ap)
+{
+ vn_inotify(ap->a_vp, ap->a_dvp, ap->a_cnp, ap->a_event, ap->a_cookie);
+ return (0);
+}
+
+int
+vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *ap)
+{
+ return (vn_inotify_add_watch(ap->a_vp, ap->a_sc, ap->a_mask,
+ ap->a_wdp, ap->a_td));
+}
+
+int
vop_stdioctl(struct vop_ioctl_args *ap)
{
struct vnode *vp;
diff --git a/sys/kern/vfs_inotify.c b/sys/kern/vfs_inotify.c
new file mode 100644
index 000000000000..9562350c897f
--- /dev/null
+++ b/sys/kern/vfs_inotify.c
@@ -0,0 +1,1008 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+#include "opt_ktrace.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/caprights.h>
+#include <sys/counter.h>
+#include <sys/dirent.h>
+#define EXTERR_CATEGORY EXTERR_CAT_INOTIFY
+#include <sys/exterrvar.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/inotify.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/ktrace.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/namei.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/resourcevar.h>
+#include <sys/selinfo.h>
+#include <sys/stat.h>
+#include <sys/syscallsubr.h>
+#include <sys/sysctl.h>
+#include <sys/sysent.h>
+#include <sys/syslimits.h>
+#include <sys/sysproto.h>
+#include <sys/tree.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+
+uint32_t inotify_rename_cookie;
+
+static SYSCTL_NODE(_vfs, OID_AUTO, inotify, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "inotify configuration");
+
+static int inotify_max_queued_events = 16384;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, max_queued_events, CTLFLAG_RWTUN,
+ &inotify_max_queued_events, 0,
+ "Maximum number of events to queue on an inotify descriptor");
+
+static int inotify_max_user_instances = 256;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_instances, CTLFLAG_RWTUN,
+ &inotify_max_user_instances, 0,
+ "Maximum number of inotify descriptors per user");
+
+static int inotify_max_user_watches;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, max_user_watches, CTLFLAG_RWTUN,
+ &inotify_max_user_watches, 0,
+ "Maximum number of inotify watches per user");
+
+static int inotify_max_watches;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, max_watches, CTLFLAG_RWTUN,
+ &inotify_max_watches, 0,
+ "Maximum number of inotify watches system-wide");
+
+static int inotify_watches;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, watches, CTLFLAG_RD,
+ &inotify_watches, 0,
+ "Total number of inotify watches currently in use");
+
+static int inotify_coalesce = 1;
+SYSCTL_INT(_vfs_inotify, OID_AUTO, coalesce, CTLFLAG_RWTUN,
+ &inotify_coalesce, 0,
+ "Coalesce inotify events when possible");
+
+static COUNTER_U64_DEFINE_EARLY(inotify_event_drops);
+SYSCTL_COUNTER_U64(_vfs_inotify, OID_AUTO, event_drops, CTLFLAG_RD,
+ &inotify_event_drops,
+ "Number of inotify events dropped due to limits or allocation failures");
+
+static fo_rdwr_t inotify_read;
+static fo_ioctl_t inotify_ioctl;
+static fo_poll_t inotify_poll;
+static fo_kqfilter_t inotify_kqfilter;
+static fo_stat_t inotify_stat;
+static fo_close_t inotify_close;
+static fo_fill_kinfo_t inotify_fill_kinfo;
+
+static const struct fileops inotifyfdops = {
+ .fo_read = inotify_read,
+ .fo_write = invfo_rdwr,
+ .fo_truncate = invfo_truncate,
+ .fo_ioctl = inotify_ioctl,
+ .fo_poll = inotify_poll,
+ .fo_kqfilter = inotify_kqfilter,
+ .fo_stat = inotify_stat,
+ .fo_close = inotify_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_fill_kinfo = inotify_fill_kinfo,
+ .fo_cmp = file_kcmp_generic,
+ .fo_flags = DFLAG_PASSABLE,
+};
+
+static void filt_inotifydetach(struct knote *kn);
+static int filt_inotifyevent(struct knote *kn, long hint);
+
+static const struct filterops inotify_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_inotifydetach,
+ .f_event = filt_inotifyevent,
+};
+
+static MALLOC_DEFINE(M_INOTIFY, "inotify", "inotify data structures");
+
+struct inotify_record {
+ STAILQ_ENTRY(inotify_record) link;
+ struct inotify_event ev;
+};
+
+static uint64_t inotify_ino = 1;
+
+/*
+ * On LP64 systems this occupies 64 bytes, so we don't get internal
+ * fragmentation by allocating watches with malloc(9). If the size changes,
+ * consider using a UMA zone to improve memory efficiency.
+ */
+struct inotify_watch {
+ struct inotify_softc *sc; /* back-pointer */
+ int wd; /* unique ID */
+ uint32_t mask; /* event mask */
+ struct vnode *vp; /* vnode being watched, refed */
+ RB_ENTRY(inotify_watch) ilink; /* inotify linkage */
+ TAILQ_ENTRY(inotify_watch) vlink; /* vnode linkage */
+};
+
+static void
+inotify_init(void *arg __unused)
+{
+ /* Don't let a user hold too many vnodes. */
+ inotify_max_user_watches = desiredvnodes / 3;
+ /* Don't let the system hold too many vnodes. */
+ inotify_max_watches = desiredvnodes / 2;
+}
+SYSINIT(inotify, SI_SUB_VFS, SI_ORDER_ANY, inotify_init, NULL);
+
+static int
+inotify_watch_cmp(const struct inotify_watch *a,
+ const struct inotify_watch *b)
+{
+ if (a->wd < b->wd)
+ return (-1);
+ else if (a->wd > b->wd)
+ return (1);
+ else
+ return (0);
+}
+RB_HEAD(inotify_watch_tree, inotify_watch);
+RB_GENERATE_STATIC(inotify_watch_tree, inotify_watch, ilink, inotify_watch_cmp);
+
+struct inotify_softc {
+ struct mtx lock; /* serialize all softc writes */
+ STAILQ_HEAD(, inotify_record) pending; /* events waiting to be read */
+ struct inotify_record overflow; /* preallocated record */
+ int nextwatch; /* next watch ID to try */
+ int npending; /* number of pending events */
+ size_t nbpending; /* bytes available to read */
+ uint64_t ino; /* unique identifier */
+ struct inotify_watch_tree watches; /* active watches */
+ struct selinfo sel; /* select/poll/kevent info */
+ struct ucred *cred; /* credential ref */
+};
+
+static struct inotify_record *
+inotify_dequeue(struct inotify_softc *sc)
+{
+ struct inotify_record *rec;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+ KASSERT(!STAILQ_EMPTY(&sc->pending),
+ ("%s: queue for %p is empty", __func__, sc));
+
+ rec = STAILQ_FIRST(&sc->pending);
+ STAILQ_REMOVE_HEAD(&sc->pending, link);
+ sc->npending--;
+ sc->nbpending -= sizeof(rec->ev) + rec->ev.len;
+ return (rec);
+}
+
+static void
+inotify_enqueue(struct inotify_softc *sc, struct inotify_record *rec, bool head)
+{
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ if (head)
+ STAILQ_INSERT_HEAD(&sc->pending, rec, link);
+ else
+ STAILQ_INSERT_TAIL(&sc->pending, rec, link);
+ sc->npending++;
+ sc->nbpending += sizeof(rec->ev) + rec->ev.len;
+}
+
+static int
+inotify_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags,
+ struct thread *td)
+{
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ int error;
+ bool first;
+
+ sc = fp->f_data;
+ error = 0;
+
+ mtx_lock(&sc->lock);
+ while (STAILQ_EMPTY(&sc->pending)) {
+ if ((flags & IO_NDELAY) != 0 || (fp->f_flag & FNONBLOCK) != 0) {
+ mtx_unlock(&sc->lock);
+ return (EWOULDBLOCK);
+ }
+ error = msleep(&sc->pending, &sc->lock, PCATCH, "inotify", 0);
+ if (error != 0) {
+ mtx_unlock(&sc->lock);
+ return (error);
+ }
+ }
+ for (first = true; !STAILQ_EMPTY(&sc->pending); first = false) {
+ size_t len;
+
+ rec = inotify_dequeue(sc);
+ len = sizeof(rec->ev) + rec->ev.len;
+ if (uio->uio_resid < (ssize_t)len) {
+ inotify_enqueue(sc, rec, true);
+ if (first) {
+ error = EXTERROR(EINVAL,
+ "read buffer is too small");
+ }
+ break;
+ }
+ mtx_unlock(&sc->lock);
+ error = uiomove(&rec->ev, len, uio);
+#ifdef KTRACE
+ if (error == 0 && KTRPOINT(td, KTR_STRUCT))
+ ktrstruct("inotify", &rec->ev, len);
+#endif
+ mtx_lock(&sc->lock);
+ if (error != 0) {
+ inotify_enqueue(sc, rec, true);
+ mtx_unlock(&sc->lock);
+ return (error);
+ }
+ if (rec == &sc->overflow) {
+ /*
+ * Signal to inotify_queue_record() that the overflow
+ * record can be reused.
+ */
+ memset(rec, 0, sizeof(*rec));
+ } else {
+ free(rec, M_INOTIFY);
+ }
+ }
+ mtx_unlock(&sc->lock);
+ return (error);
+}
+
+static int
+inotify_ioctl(struct file *fp, u_long com, void *data, struct ucred *cred,
+ struct thread *td)
+{
+ struct inotify_softc *sc;
+
+ sc = fp->f_data;
+
+ switch (com) {
+ case FIONREAD:
+ *(int *)data = (int)sc->nbpending;
+ return (0);
+ case FIONBIO:
+ case FIOASYNC:
+ return (0);
+ default:
+ return (ENOTTY);
+ }
+
+ return (0);
+}
+
+static int
+inotify_poll(struct file *fp, int events, struct ucred *cred, struct thread *td)
+{
+ struct inotify_softc *sc;
+ int revents;
+
+ sc = fp->f_data;
+ revents = 0;
+
+ mtx_lock(&sc->lock);
+ if ((events & (POLLIN | POLLRDNORM)) != 0 && sc->npending > 0)
+ revents |= events & (POLLIN | POLLRDNORM);
+ else
+ selrecord(td, &sc->sel);
+ mtx_unlock(&sc->lock);
+ return (revents);
+}
+
+static void
+filt_inotifydetach(struct knote *kn)
+{
+ struct inotify_softc *sc;
+
+ sc = kn->kn_hook;
+ knlist_remove(&sc->sel.si_note, kn, 0);
+}
+
+static int
+filt_inotifyevent(struct knote *kn, long hint)
+{
+ struct inotify_softc *sc;
+
+ sc = kn->kn_hook;
+ mtx_assert(&sc->lock, MA_OWNED);
+ kn->kn_data = sc->nbpending;
+ return (kn->kn_data > 0);
+}
+
+static int
+inotify_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct inotify_softc *sc;
+
+ if (kn->kn_filter != EVFILT_READ)
+ return (EINVAL);
+ sc = fp->f_data;
+ kn->kn_fop = &inotify_rfiltops;
+ kn->kn_hook = sc;
+ knlist_add(&sc->sel.si_note, kn, 0);
+ return (0);
+}
+
+static int
+inotify_stat(struct file *fp, struct stat *sb, struct ucred *cred)
+{
+ struct inotify_softc *sc;
+
+ sc = fp->f_data;
+
+ memset(sb, 0, sizeof(*sb));
+ sb->st_mode = S_IFREG | S_IRUSR;
+ sb->st_blksize = sizeof(struct inotify_event) + _IN_NAMESIZE(NAME_MAX);
+ mtx_lock(&sc->lock);
+ sb->st_size = sc->nbpending;
+ sb->st_blocks = sc->npending;
+ sb->st_uid = sc->cred->cr_ruid;
+ sb->st_gid = sc->cred->cr_rgid;
+ sb->st_ino = sc->ino;
+ mtx_unlock(&sc->lock);
+ return (0);
+}
+
+static void
+inotify_unlink_watch_locked(struct inotify_softc *sc, struct inotify_watch *watch)
+{
+ struct vnode *vp;
+
+ vp = watch->vp;
+ mtx_assert(&vp->v_pollinfo->vpi_lock, MA_OWNED);
+
+ atomic_subtract_int(&inotify_watches, 1);
+ (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0);
+
+ TAILQ_REMOVE(&vp->v_pollinfo->vpi_inotify, watch, vlink);
+ if (TAILQ_EMPTY(&vp->v_pollinfo->vpi_inotify))
+ vn_irflag_unset_locked(vp, VIRF_INOTIFY);
+}
+
+/*
+ * Assumes that the watch has already been removed from its softc.
+ */
+static void
+inotify_remove_watch(struct inotify_watch *watch)
+{
+ struct inotify_softc *sc;
+ struct vnode *vp;
+
+ sc = watch->sc;
+
+ vp = watch->vp;
+ mtx_lock(&vp->v_pollinfo->vpi_lock);
+ inotify_unlink_watch_locked(sc, watch);
+ mtx_unlock(&vp->v_pollinfo->vpi_lock);
+
+ vrele(vp);
+ free(watch, M_INOTIFY);
+}
+
+static int
+inotify_close(struct file *fp, struct thread *td)
+{
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ struct inotify_watch *watch;
+
+ sc = fp->f_data;
+
+ mtx_lock(&sc->lock);
+ (void)chginotifycnt(sc->cred->cr_ruidinfo, -1, 0);
+ while ((watch = RB_MIN(inotify_watch_tree, &sc->watches)) != NULL) {
+ RB_REMOVE(inotify_watch_tree, &sc->watches, watch);
+ mtx_unlock(&sc->lock);
+ inotify_remove_watch(watch);
+ mtx_lock(&sc->lock);
+ }
+ while (!STAILQ_EMPTY(&sc->pending)) {
+ rec = inotify_dequeue(sc);
+ if (rec != &sc->overflow)
+ free(rec, M_INOTIFY);
+ }
+ mtx_unlock(&sc->lock);
+ seldrain(&sc->sel);
+ knlist_destroy(&sc->sel.si_note);
+ mtx_destroy(&sc->lock);
+ crfree(sc->cred);
+ free(sc, M_INOTIFY);
+ return (0);
+}
+
+static int
+inotify_fill_kinfo(struct file *fp, struct kinfo_file *kif,
+ struct filedesc *fdp)
+{
+ struct inotify_softc *sc;
+
+ sc = fp->f_data;
+
+ kif->kf_type = KF_TYPE_INOTIFY;
+ kif->kf_un.kf_inotify.kf_inotify_npending = sc->npending;
+ kif->kf_un.kf_inotify.kf_inotify_nbpending = sc->nbpending;
+ return (0);
+}
+
+int
+inotify_create_file(struct thread *td, struct file *fp, int flags, int *fflagsp)
+{
+ struct inotify_softc *sc;
+ int fflags;
+
+ if ((flags & ~(IN_NONBLOCK | IN_CLOEXEC)) != 0)
+ return (EINVAL);
+
+ if (!chginotifycnt(td->td_ucred->cr_ruidinfo, 1,
+ inotify_max_user_instances))
+ return (EMFILE);
+
+ sc = malloc(sizeof(*sc), M_INOTIFY, M_WAITOK | M_ZERO);
+ sc->nextwatch = 1; /* Required for compatibility. */
+ STAILQ_INIT(&sc->pending);
+ RB_INIT(&sc->watches);
+ mtx_init(&sc->lock, "inotify", NULL, MTX_DEF);
+ knlist_init_mtx(&sc->sel.si_note, &sc->lock);
+ sc->cred = crhold(td->td_ucred);
+ sc->ino = atomic_fetchadd_64(&inotify_ino, 1);
+
+ fflags = FREAD;
+ if ((flags & IN_NONBLOCK) != 0)
+ fflags |= FNONBLOCK;
+ if ((flags & IN_CLOEXEC) != 0)
+ *fflagsp |= O_CLOEXEC;
+ finit(fp, fflags, DTYPE_INOTIFY, sc, &inotifyfdops);
+
+ return (0);
+}
+
+static struct inotify_record *
+inotify_alloc_record(uint32_t wd, const char *name, size_t namelen, int event,
+ uint32_t cookie, int waitok)
+{
+ struct inotify_event *evp;
+ struct inotify_record *rec;
+
+ rec = malloc(sizeof(*rec) + _IN_NAMESIZE(namelen), M_INOTIFY,
+ waitok | M_ZERO);
+ if (rec == NULL)
+ return (NULL);
+ evp = &rec->ev;
+ evp->wd = wd;
+ evp->mask = event;
+ evp->cookie = cookie;
+ evp->len = _IN_NAMESIZE(namelen);
+ if (name != NULL)
+ memcpy(evp->name, name, namelen);
+ return (rec);
+}
+
+static bool
+inotify_can_coalesce(struct inotify_softc *sc, struct inotify_event *evp)
+{
+ struct inotify_record *prev;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ prev = STAILQ_LAST(&sc->pending, inotify_record, link);
+ return (prev != NULL && prev->ev.mask == evp->mask &&
+ prev->ev.wd == evp->wd && prev->ev.cookie == evp->cookie &&
+ prev->ev.len == evp->len &&
+ (evp->len == 0 || strcmp(prev->ev.name, evp->name) == 0));
+}
+
+static void
+inotify_overflow_event(struct inotify_event *evp)
+{
+ evp->mask = IN_Q_OVERFLOW;
+ evp->wd = -1;
+ evp->cookie = 0;
+ evp->len = 0;
+}
+
+/*
+ * Put an event record on the queue for an inotify desscriptor. Return false if
+ * the record was not enqueued for some reason, true otherwise.
+ */
+static bool
+inotify_queue_record(struct inotify_softc *sc, struct inotify_record *rec)
+{
+ struct inotify_event *evp;
+
+ mtx_assert(&sc->lock, MA_OWNED);
+
+ evp = &rec->ev;
+ if (__predict_false(rec == &sc->overflow)) {
+ /*
+ * Is the overflow record already in the queue? If so, there's
+ * not much else we can do: we're here because a kernel memory
+ * shortage prevented new record allocations.
+ */
+ counter_u64_add(inotify_event_drops, 1);
+ if (evp->mask == IN_Q_OVERFLOW)
+ return (false);
+ inotify_overflow_event(evp);
+ } else {
+ /* Try to coalesce duplicate events. */
+ if (inotify_coalesce && inotify_can_coalesce(sc, evp))
+ return (false);
+
+ /*
+ * Would this one overflow the queue? If so, convert it to an
+ * overflow event and try again to coalesce.
+ */
+ if (sc->npending >= inotify_max_queued_events) {
+ counter_u64_add(inotify_event_drops, 1);
+ inotify_overflow_event(evp);
+ if (inotify_can_coalesce(sc, evp))
+ return (false);
+ }
+ }
+ inotify_enqueue(sc, rec, false);
+ selwakeup(&sc->sel);
+ KNOTE_LOCKED(&sc->sel.si_note, 0);
+ wakeup(&sc->pending);
+ return (true);
+}
+
+static int
+inotify_log_one(struct inotify_watch *watch, const char *name, size_t namelen,
+ int event, uint32_t cookie)
+{
+ struct inotify_watch key;
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ int relecount;
+ bool allocfail;
+
+ relecount = 0;
+
+ sc = watch->sc;
+ rec = inotify_alloc_record(watch->wd, name, namelen, event, cookie,
+ M_NOWAIT);
+ if (rec == NULL) {
+ rec = &sc->overflow;
+ allocfail = true;
+ } else {
+ allocfail = false;
+ }
+
+ mtx_lock(&sc->lock);
+ if (!inotify_queue_record(sc, rec) && rec != &sc->overflow)
+ free(rec, M_INOTIFY);
+ if ((watch->mask & IN_ONESHOT) != 0 ||
+ (event & (IN_DELETE_SELF | IN_UNMOUNT)) != 0) {
+ if (!allocfail) {
+ rec = inotify_alloc_record(watch->wd, NULL, 0,
+ IN_IGNORED, 0, M_NOWAIT);
+ if (rec == NULL)
+ rec = &sc->overflow;
+ if (!inotify_queue_record(sc, rec) &&
+ rec != &sc->overflow)
+ free(rec, M_INOTIFY);
+ }
+
+ /*
+ * Remove the watch, taking care to handle races with
+ * inotify_close().
+ */
+ key.wd = watch->wd;
+ if (RB_FIND(inotify_watch_tree, &sc->watches, &key) != NULL) {
+ RB_REMOVE(inotify_watch_tree, &sc->watches, watch);
+ inotify_unlink_watch_locked(sc, watch);
+ free(watch, M_INOTIFY);
+
+ /* Defer vrele() to until locks are dropped. */
+ relecount++;
+ }
+ }
+ mtx_unlock(&sc->lock);
+ return (relecount);
+}
+
+void
+inotify_log(struct vnode *vp, const char *name, size_t namelen, int event,
+ uint32_t cookie)
+{
+ struct inotify_watch *watch, *tmp;
+ int relecount;
+
+ KASSERT((event & ~(IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT)) == 0,
+ ("inotify_log: invalid event %#x", event));
+
+ relecount = 0;
+ mtx_lock(&vp->v_pollinfo->vpi_lock);
+ TAILQ_FOREACH_SAFE(watch, &vp->v_pollinfo->vpi_inotify, vlink, tmp) {
+ KASSERT(watch->vp == vp,
+ ("inotify_log: watch %p vp != vp", watch));
+ if ((watch->mask & event) != 0 || event == IN_UNMOUNT) {
+ relecount += inotify_log_one(watch, name, namelen, event,
+ cookie);
+ }
+ }
+ mtx_unlock(&vp->v_pollinfo->vpi_lock);
+
+ for (int i = 0; i < relecount; i++)
+ vrele(vp);
+}
+
+/*
+ * An inotify event occurred on a watched vnode.
+ */
+void
+vn_inotify(struct vnode *vp, struct vnode *dvp, struct componentname *cnp,
+ int event, uint32_t cookie)
+{
+ int isdir;
+
+ VNPASS(vp->v_holdcnt > 0, vp);
+
+ isdir = vp->v_type == VDIR ? IN_ISDIR : 0;
+
+ if (dvp != NULL) {
+ VNPASS(dvp->v_holdcnt > 0, dvp);
+
+ /*
+ * Should we log an event for the vnode itself?
+ */
+ if ((vn_irflag_read(vp) & VIRF_INOTIFY) != 0) {
+ int selfevent;
+
+ switch (event) {
+ case _IN_MOVE_DELETE:
+ case IN_DELETE:
+ /*
+ * IN_DELETE_SELF is only generated when the
+ * last hard link of a file is removed.
+ */
+ selfevent = IN_DELETE_SELF;
+ if (vp->v_type != VDIR) {
+ struct vattr va;
+ int error;
+
+ error = VOP_GETATTR(vp, &va, cnp->cn_cred);
+ if (error == 0 && va.va_nlink != 0)
+ selfevent = 0;
+ }
+ break;
+ case IN_MOVED_FROM:
+ cookie = 0;
+ selfevent = IN_MOVE_SELF;
+ break;
+ case _IN_ATTRIB_LINKCOUNT:
+ selfevent = IN_ATTRIB;
+ break;
+ default:
+ selfevent = event;
+ break;
+ }
+
+ if ((selfevent & ~_IN_DIR_EVENTS) != 0) {
+ inotify_log(vp, NULL, 0, selfevent | isdir,
+ cookie);
+ }
+ }
+
+ /*
+ * Something is watching the directory through which this vnode
+ * was referenced, so we may need to log the event.
+ */
+ if ((event & IN_ALL_EVENTS) != 0 &&
+ (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0) {
+ inotify_log(dvp, cnp->cn_nameptr,
+ cnp->cn_namelen, event | isdir, cookie);
+ }
+ } else {
+ /*
+ * We don't know which watched directory might contain the
+ * vnode, so we have to fall back to searching the name cache.
+ */
+ cache_vop_inotify(vp, event, cookie);
+ }
+}
+
+int
+vn_inotify_add_watch(struct vnode *vp, struct inotify_softc *sc, uint32_t mask,
+ uint32_t *wdp, struct thread *td)
+{
+ struct inotify_watch *watch, *watch1;
+ uint32_t wd;
+
+ /*
+ * If this is a directory, make sure all of its entries are present in
+ * the name cache so that we're able to look them up if an event occurs.
+ * The persistent reference on the directory prevents the outgoing name
+ * cache entries from being reclaimed.
+ */
+ if (vp->v_type == VDIR) {
+ struct dirent *dp;
+ char *buf;
+ off_t off;
+ size_t buflen, len;
+ int eof, error;
+
+ buflen = 128 * sizeof(struct dirent);
+ buf = malloc(buflen, M_TEMP, M_WAITOK);
+
+ error = 0;
+ len = off = eof = 0;
+ for (;;) {
+ struct nameidata nd;
+
+ error = vn_dir_next_dirent(vp, td, buf, buflen, &dp,
+ &len, &off, &eof);
+ if (error != 0)
+ break;
+ if (len == 0)
+ /* Finished reading. */
+ break;
+ if (strcmp(dp->d_name, ".") == 0 ||
+ strcmp(dp->d_name, "..") == 0)
+ continue;
+
+ /*
+ * namei() consumes a reference on the starting
+ * directory if it's specified as a vnode.
+ */
+ vrefact(vp);
+ NDINIT_ATVP(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE,
+ dp->d_name, vp);
+ error = namei(&nd);
+ if (error != 0)
+ break;
+ vn_irflag_set_cond(nd.ni_vp, VIRF_INOTIFY_PARENT);
+ vrele(nd.ni_vp);
+ }
+ free(buf, M_TEMP);
+ if (error != 0)
+ return (error);
+ }
+
+ /*
+ * The vnode referenced in kern_inotify_add_watch() might be different
+ * than this one if nullfs is in the picture.
+ */
+ vrefact(vp);
+ watch = malloc(sizeof(*watch), M_INOTIFY, M_WAITOK | M_ZERO);
+ watch->sc = sc;
+ watch->vp = vp;
+ watch->mask = mask;
+
+ /*
+ * Are we updating an existing watch? Search the vnode's list rather
+ * than that of the softc, as the former is likely to be shorter.
+ */
+ v_addpollinfo(vp);
+ mtx_lock(&vp->v_pollinfo->vpi_lock);
+ TAILQ_FOREACH(watch1, &vp->v_pollinfo->vpi_inotify, vlink) {
+ if (watch1->sc == sc)
+ break;
+ }
+ mtx_lock(&sc->lock);
+ if (watch1 != NULL) {
+ mtx_unlock(&vp->v_pollinfo->vpi_lock);
+
+ /*
+ * We found an existing watch, update it based on our flags.
+ */
+ if ((mask & IN_MASK_CREATE) != 0) {
+ mtx_unlock(&sc->lock);
+ vrele(vp);
+ free(watch, M_INOTIFY);
+ return (EEXIST);
+ }
+ if ((mask & IN_MASK_ADD) != 0)
+ watch1->mask |= mask;
+ else
+ watch1->mask = mask;
+ *wdp = watch1->wd;
+ mtx_unlock(&sc->lock);
+ vrele(vp);
+ free(watch, M_INOTIFY);
+ return (EJUSTRETURN);
+ }
+
+ /*
+ * We're creating a new watch. Add it to the softc and vnode watch
+ * lists.
+ */
+ do {
+ struct inotify_watch key;
+
+ /*
+ * Search for the next available watch descriptor. This is
+ * implemented so as to avoid reusing watch descriptors for as
+ * long as possible.
+ */
+ key.wd = wd = sc->nextwatch++;
+ watch1 = RB_FIND(inotify_watch_tree, &sc->watches, &key);
+ } while (watch1 != NULL || wd == 0);
+ watch->wd = wd;
+ RB_INSERT(inotify_watch_tree, &sc->watches, watch);
+ TAILQ_INSERT_TAIL(&vp->v_pollinfo->vpi_inotify, watch, vlink);
+ mtx_unlock(&sc->lock);
+ mtx_unlock(&vp->v_pollinfo->vpi_lock);
+ vn_irflag_set_cond(vp, VIRF_INOTIFY);
+
+ *wdp = wd;
+
+ return (0);
+}
+
+void
+vn_inotify_revoke(struct vnode *vp)
+{
+ if (vp->v_pollinfo == NULL) {
+ /* This is a nullfs vnode which shadows a watched vnode. */
+ return;
+ }
+ inotify_log(vp, NULL, 0, IN_UNMOUNT, 0);
+}
+
+static int
+fget_inotify(struct thread *td, int fd, const cap_rights_t *needrightsp,
+ struct file **fpp)
+{
+ struct file *fp;
+ int error;
+
+ error = fget(td, fd, needrightsp, &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_INOTIFY) {
+ fdrop(fp, td);
+ return (EINVAL);
+ }
+ *fpp = fp;
+ return (0);
+}
+
+int
+kern_inotify_add_watch(int fd, int dfd, const char *path, uint32_t mask,
+ struct thread *td)
+{
+ struct nameidata nd;
+ struct file *fp;
+ struct inotify_softc *sc;
+ struct vnode *vp;
+ uint32_t wd;
+ int count, error;
+
+ fp = NULL;
+ vp = NULL;
+
+ if ((mask & IN_ALL_EVENTS) == 0)
+ return (EXTERROR(EINVAL, "no events specified"));
+ if ((mask & (IN_MASK_ADD | IN_MASK_CREATE)) ==
+ (IN_MASK_ADD | IN_MASK_CREATE))
+ return (EXTERROR(EINVAL,
+ "IN_MASK_ADD and IN_MASK_CREATE are mutually exclusive"));
+ if ((mask & ~(IN_ALL_EVENTS | _IN_ALL_FLAGS | IN_UNMOUNT)) != 0)
+ return (EXTERROR(EINVAL, "unrecognized flag"));
+
+ error = fget_inotify(td, fd, &cap_inotify_add_rights, &fp);
+ if (error != 0)
+ return (error);
+ sc = fp->f_data;
+
+ NDINIT_AT(&nd, LOOKUP,
+ ((mask & IN_DONT_FOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF |
+ LOCKSHARED | AUDITVNODE1, UIO_USERSPACE, path, dfd);
+ error = namei(&nd);
+ if (error != 0)
+ goto out;
+ NDFREE_PNBUF(&nd);
+ vp = nd.ni_vp;
+
+ error = VOP_ACCESS(vp, VREAD, td->td_ucred, td);
+ if (error != 0)
+ goto out;
+
+ if ((mask & IN_ONLYDIR) != 0 && vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+
+ count = atomic_fetchadd_int(&inotify_watches, 1);
+ if (count > inotify_max_watches) {
+ atomic_subtract_int(&inotify_watches, 1);
+ error = ENOSPC;
+ goto out;
+ }
+ if (!chginotifywatchcnt(sc->cred->cr_ruidinfo, 1,
+ inotify_max_user_watches)) {
+ atomic_subtract_int(&inotify_watches, 1);
+ error = ENOSPC;
+ goto out;
+ }
+ error = VOP_INOTIFY_ADD_WATCH(vp, sc, mask, &wd, td);
+ if (error != 0) {
+ atomic_subtract_int(&inotify_watches, 1);
+ (void)chginotifywatchcnt(sc->cred->cr_ruidinfo, -1, 0);
+ if (error == EJUSTRETURN) {
+ /* We updated an existing watch, everything is ok. */
+ error = 0;
+ } else {
+ goto out;
+ }
+ }
+ td->td_retval[0] = wd;
+
+out:
+ if (vp != NULL)
+ vput(vp);
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+sys_inotify_add_watch_at(struct thread *td,
+ struct inotify_add_watch_at_args *uap)
+{
+ return (kern_inotify_add_watch(uap->fd, uap->dfd, uap->path,
+ uap->mask, td));
+}
+
+int
+kern_inotify_rm_watch(int fd, uint32_t wd, struct thread *td)
+{
+ struct file *fp;
+ struct inotify_softc *sc;
+ struct inotify_record *rec;
+ struct inotify_watch key, *watch;
+ int error;
+
+ error = fget_inotify(td, fd, &cap_inotify_rm_rights, &fp);
+ if (error != 0)
+ return (error);
+ sc = fp->f_data;
+
+ rec = inotify_alloc_record(wd, NULL, 0, IN_IGNORED, 0, M_WAITOK);
+
+ /*
+ * For compatibility with Linux, we do not remove pending events
+ * associated with the watch. Watch descriptors are implemented so as
+ * to avoid being reused for as long as possible, so one hopes that any
+ * pending events from the removed watch descriptor will be removed
+ * before the watch descriptor is recycled.
+ */
+ key.wd = wd;
+ mtx_lock(&sc->lock);
+ watch = RB_FIND(inotify_watch_tree, &sc->watches, &key);
+ if (watch == NULL) {
+ free(rec, M_INOTIFY);
+ error = EINVAL;
+ } else {
+ RB_REMOVE(inotify_watch_tree, &sc->watches, watch);
+ if (!inotify_queue_record(sc, rec)) {
+ free(rec, M_INOTIFY);
+ error = 0;
+ }
+ }
+ mtx_unlock(&sc->lock);
+ if (watch != NULL)
+ inotify_remove_watch(watch);
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+sys_inotify_rm_watch(struct thread *td, struct inotify_rm_watch_args *uap)
+{
+ return (kern_inotify_rm_watch(uap->fd, uap->wd, td));
+}
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index 86c7bdaa02c0..fb3e6a7a2534 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -75,14 +75,20 @@ static void NDVALIDATE_impl(struct nameidata *, int);
#endif
/*
+ * Reset ndp to its original state.
+ */
+#define NDRESET(ndp) do { \
+ NDREINIT_DBG(ndp); \
+ ndp->ni_resflags = 0; \
+ ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \
+} while (0)
+/*
* Prepare namei() to restart. Reset components to its original state and set
* ISRESTARTED flag which signals the underlying lookup code to change the root
* from ABI root to actual root and prevents a further restarts.
*/
#define NDRESTART(ndp) do { \
- NDREINIT_DBG(ndp); \
- ndp->ni_resflags = 0; \
- ndp->ni_cnd.cn_flags &= ~NAMEI_INTERNAL_FLAGS; \
+ NDRESET(ndp); \
ndp->ni_cnd.cn_flags |= ISRESTARTED; \
} while (0)
@@ -162,8 +168,8 @@ static struct vop_vector crossmp_vnodeops = {
*/
struct nameicap_tracker {
- struct vnode *dp;
TAILQ_ENTRY(nameicap_tracker) nm_link;
+ struct mount *mp;
};
/* Zone for cap mode tracker elements used for dotdot capability checks. */
@@ -192,49 +198,75 @@ SYSCTL_INT(_vfs, OID_AUTO, lookup_cap_dotdot_nonlocal, CTLFLAG_RWTUN,
"enables \"..\" components in path lookup in capability mode "
"on non-local mount");
-static void
+static int
nameicap_tracker_add(struct nameidata *ndp, struct vnode *dp)
{
struct nameicap_tracker *nt;
+ struct mount *mp;
+ int error;
if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0 || dp->v_type != VDIR)
- return;
+ return (0);
+ mp = NULL;
+ error = VOP_GETWRITEMOUNT(dp, &mp);
+ if (error != 0)
+ return (error);
nt = TAILQ_LAST(&ndp->ni_cap_tracker, nameicap_tracker_head);
- if (nt != NULL && nt->dp == dp)
- return;
+ if (nt != NULL && nt->mp == mp) {
+ vfs_rel(mp);
+ return (0);
+ }
nt = malloc(sizeof(*nt), M_NAMEITRACKER, M_WAITOK);
- vhold(dp);
- nt->dp = dp;
- TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link);
+ nt->mp = mp;
+ error = lockmgr(&mp->mnt_renamelock, LK_SHARED | LK_NOWAIT, 0);
+ if (error != 0) {
+ MPASS(ndp->ni_nctrack_mnt == NULL);
+ ndp->ni_nctrack_mnt = mp;
+ free(nt, M_NAMEITRACKER);
+ error = ERESTART;
+ } else {
+ TAILQ_INSERT_TAIL(&ndp->ni_cap_tracker, nt, nm_link);
+ }
+ return (error);
}
static void
-nameicap_cleanup_from(struct nameidata *ndp, struct nameicap_tracker *first)
+nameicap_cleanup(struct nameidata *ndp, int error)
{
struct nameicap_tracker *nt, *nt1;
+ struct mount *mp;
+
+ KASSERT((ndp->ni_nctrack_mnt == NULL &&
+ TAILQ_EMPTY(&ndp->ni_cap_tracker)) ||
+ (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0,
+ ("tracker active and not strictrelative"));
- nt = first;
- TAILQ_FOREACH_FROM_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) {
+ TAILQ_FOREACH_SAFE(nt, &ndp->ni_cap_tracker, nm_link, nt1) {
+ mp = nt->mp;
+ lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+ vfs_rel(mp);
TAILQ_REMOVE(&ndp->ni_cap_tracker, nt, nm_link);
- vdrop(nt->dp);
free(nt, M_NAMEITRACKER);
}
-}
-static void
-nameicap_cleanup(struct nameidata *ndp)
-{
- KASSERT(TAILQ_EMPTY(&ndp->ni_cap_tracker) ||
- (ndp->ni_lcf & NI_LCF_CAP_DOTDOT) != 0, ("not strictrelative"));
- nameicap_cleanup_from(ndp, NULL);
+ mp = ndp->ni_nctrack_mnt;
+ if (mp != NULL) {
+ if (error == ERESTART) {
+ lockmgr(&mp->mnt_renamelock, LK_EXCLUSIVE, 0);
+ lockmgr(&mp->mnt_renamelock, LK_RELEASE, 0);
+ }
+ vfs_rel(mp);
+ ndp->ni_nctrack_mnt = NULL;
+ }
}
/*
- * For dotdot lookups in capability mode, only allow the component
- * lookup to succeed if the resulting directory was already traversed
- * during the operation. This catches situations where already
- * traversed directory is moved to different parent, and then we walk
- * over it with dotdots.
+ * For dotdot lookups in capability mode, disallow walking over the
+ * directory no_rbeneath_dpp that was used as the starting point of
+ * the lookup. Since we take the mnt_renamelocks of all mounts we
+ * ever walked over during lookup, parallel renames are disabled.
+ * This prevents the situation where we circumvent walk over
+ * ni_rbeneath_dpp following dotdots.
*
* Also allow to force failure of dotdot lookups for non-local
* filesystems, where external agents might assist local lookups to
@@ -243,7 +275,6 @@ nameicap_cleanup(struct nameidata *ndp)
static int
nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
{
- struct nameicap_tracker *nt;
struct mount *mp;
if (dp == NULL || dp->v_type != VDIR || (ndp->ni_lcf &
@@ -253,22 +284,16 @@ nameicap_check_dotdot(struct nameidata *ndp, struct vnode *dp)
NI_LCF_CAP_DOTDOT_KTR)) == NI_LCF_STRICTREL_KTR))
NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf);
if ((ndp->ni_lcf & NI_LCF_CAP_DOTDOT) == 0)
- return (ENOTCAPABLE);
+ goto violation;
+ if (dp == ndp->ni_rbeneath_dpp)
+ goto violation;
mp = dp->v_mount;
if (lookup_cap_dotdot_nonlocal == 0 && mp != NULL &&
(mp->mnt_flag & MNT_LOCAL) == 0)
- goto capfail;
- TAILQ_FOREACH_REVERSE(nt, &ndp->ni_cap_tracker, nameicap_tracker_head,
- nm_link) {
- if (dp == nt->dp) {
- nt = TAILQ_NEXT(nt, nm_link);
- if (nt != NULL)
- nameicap_cleanup_from(ndp, nt);
- return (0);
- }
- }
+ goto violation;
+ return (0);
-capfail:
+violation:
if (__predict_false((ndp->ni_lcf & NI_LCF_STRICTREL_KTR) != 0))
NI_CAP_VIOLATION(ndp, ndp->ni_cnd.cn_pnbuf);
return (ENOTCAPABLE);
@@ -394,6 +419,8 @@ namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
NI_LCF_CAP_DOTDOT;
}
}
+ if (error == 0 && (ndp->ni_lcf & NI_LCF_STRICTREL) != 0)
+ ndp->ni_rbeneath_dpp = *dpp;
/*
* If we are auditing the kernel pathname, save the user pathname.
@@ -631,6 +658,7 @@ restart:
error = namei_getpath(ndp);
if (__predict_false(error != 0)) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, error);
SDT_PROBE4(vfs, namei, lookup, return, error, NULL,
false, ndp);
return (error);
@@ -661,12 +689,12 @@ restart:
else if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir &&
(cnp->cn_flags & ISRESTARTED) == 0)) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, ERESTART);
NDRESTART(ndp);
goto restart;
}
return (error);
case CACHE_FPL_STATUS_PARTIAL:
- TAILQ_INIT(&ndp->ni_cap_tracker);
dp = ndp->ni_startdir;
break;
case CACHE_FPL_STATUS_DESTROYED:
@@ -674,18 +702,21 @@ restart:
error = namei_getpath(ndp);
if (__predict_false(error != 0)) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, error);
return (error);
}
cnp->cn_nameptr = cnp->cn_pnbuf;
/* FALLTHROUGH */
case CACHE_FPL_STATUS_ABORTED:
- TAILQ_INIT(&ndp->ni_cap_tracker);
MPASS(ndp->ni_lcf == 0);
if (*cnp->cn_pnbuf == '\0') {
if ((cnp->cn_flags & EMPTYPATH) != 0) {
- return (namei_emptypath(ndp));
+ error = namei_emptypath(ndp);
+ nameicap_cleanup(ndp, error);
+ return (error);
}
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, ENOENT);
SDT_PROBE4(vfs, namei, lookup, return, ENOENT, NULL,
false, ndp);
return (ENOENT);
@@ -693,6 +724,7 @@ restart:
error = namei_setup(ndp, &dp, &pwd);
if (error != 0) {
namei_cleanup_cnp(cnp);
+ nameicap_cleanup(ndp, error);
return (error);
}
break;
@@ -705,16 +737,23 @@ restart:
ndp->ni_startdir = dp;
error = vfs_lookup(ndp);
if (error != 0) {
- if (__predict_false(pwd->pwd_adir != pwd->pwd_rdir &&
- error == ENOENT &&
- (cnp->cn_flags & ISRESTARTED) == 0)) {
- nameicap_cleanup(ndp);
- pwd_drop(pwd);
- namei_cleanup_cnp(cnp);
- NDRESTART(ndp);
- goto restart;
- } else
+ uint64_t was_restarted;
+ bool abi_restart;
+
+ was_restarted = ndp->ni_cnd.cn_flags &
+ ISRESTARTED;
+ abi_restart = pwd->pwd_adir != pwd->pwd_rdir &&
+ error == ENOENT && was_restarted == 0;
+ if (error != ERESTART && !abi_restart)
goto out;
+ nameicap_cleanup(ndp, error);
+ pwd_drop(pwd);
+ namei_cleanup_cnp(cnp);
+ NDRESET(ndp);
+ if (abi_restart)
+ was_restarted = ISRESTARTED;
+ ndp->ni_cnd.cn_flags |= was_restarted;
+ goto restart;
}
/*
@@ -723,7 +762,7 @@ restart:
if ((cnp->cn_flags & ISSYMLINK) == 0) {
SDT_PROBE4(vfs, namei, lookup, return, error,
ndp->ni_vp, false, ndp);
- nameicap_cleanup(ndp);
+ nameicap_cleanup(ndp, 0);
pwd_drop(pwd);
NDVALIDATE(ndp);
return (0);
@@ -756,10 +795,10 @@ restart:
ndp->ni_vp = NULL;
vrele(ndp->ni_dvp);
out:
- MPASS(error != 0);
+ MPASS(error != 0 && error != ERESTART);
SDT_PROBE4(vfs, namei, lookup, return, error, NULL, false, ndp);
namei_cleanup_cnp(cnp);
- nameicap_cleanup(ndp);
+ nameicap_cleanup(ndp, error);
pwd_drop(pwd);
return (error);
}
@@ -1185,7 +1224,9 @@ dirloop:
}
}
- nameicap_tracker_add(ndp, dp);
+ error = nameicap_tracker_add(ndp, dp);
+ if (error != 0)
+ goto bad;
/*
* Make sure degenerate names don't get here, their handling was
@@ -1210,9 +1251,7 @@ dirloop:
* the jail or chroot, don't let them out.
* 5. If doing a capability lookup and lookup_cap_dotdot is
* enabled, return ENOTCAPABLE if the lookup would escape
- * from the initial file descriptor directory. Checks are
- * done by ensuring that namei() already traversed the
- * result of dotdot lookup.
+ * from the initial file descriptor directory.
*/
if (cnp->cn_flags & ISDOTDOT) {
if (__predict_false((ndp->ni_lcf & (NI_LCF_STRICTREL_KTR |
@@ -1238,7 +1277,7 @@ dirloop:
NI_CAP_VIOLATION(ndp, cnp->cn_pnbuf);
if ((ndp->ni_lcf & NI_LCF_STRICTREL) != 0) {
error = ENOTCAPABLE;
- goto capdotdot;
+ goto bad;
}
}
if (isroot || ((dp->v_vflag & VV_ROOT) != 0 &&
@@ -1261,11 +1300,6 @@ dirloop:
vn_lock(dp,
enforce_lkflags(dp->v_mount, cnp->cn_lkflags |
LK_RETRY));
- error = nameicap_check_dotdot(ndp, dp);
- if (error != 0) {
-capdotdot:
- goto bad;
- }
}
}
@@ -1314,7 +1348,9 @@ unionlookup:
vn_lock(dp,
enforce_lkflags(dp->v_mount, cnp->cn_lkflags |
LK_RETRY));
- nameicap_tracker_add(ndp, dp);
+ error = nameicap_tracker_add(ndp, dp);
+ if (error != 0)
+ goto bad;
goto unionlookup;
}
@@ -1415,7 +1451,7 @@ nextname:
goto dirloop;
}
if (cnp->cn_flags & ISDOTDOT) {
- error = nameicap_check_dotdot(ndp, ndp->ni_vp);
+ error = nameicap_check_dotdot(ndp, ndp->ni_dvp);
if (error != 0)
goto bad2;
}
@@ -1485,8 +1521,11 @@ success:
}
success_right_lock:
if (ndp->ni_vp != NULL) {
- if ((cnp->cn_flags & ISDOTDOT) == 0)
- nameicap_tracker_add(ndp, ndp->ni_vp);
+ if ((cnp->cn_flags & ISDOTDOT) == 0) {
+ error = nameicap_tracker_add(ndp, ndp->ni_vp);
+ if (error != 0)
+ goto bad2;
+ }
if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS)
return (vfs_lookup_failifexists(ndp));
}
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index cb18468d28bc..8e64a7fe966b 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -156,6 +156,7 @@ mount_init(void *mem, int size, int flags)
mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
+ lockinit(&mp->mnt_renamelock, PVFS, "rename", 0, 0);
mp->mnt_pcpu = uma_zalloc_pcpu(pcpu_zone_16, M_WAITOK | M_ZERO);
mp->mnt_ref = 0;
mp->mnt_vfs_ops = 1;
@@ -170,6 +171,7 @@ mount_fini(void *mem, int size)
mp = (struct mount *)mem;
uma_zfree_pcpu(pcpu_zone_16, mp->mnt_pcpu);
+ lockdestroy(&mp->mnt_renamelock);
lockdestroy(&mp->mnt_explock);
mtx_destroy(&mp->mnt_listmtx);
mtx_destroy(&mp->mnt_mtx);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index dc2fb59fb81c..918b256e6c59 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -38,7 +38,6 @@
* External virtual filesystem routines
*/
-#include <sys/cdefs.h>
#include "opt_ddb.h"
#include "opt_watchdog.h"
@@ -57,6 +56,7 @@
#include <sys/extattr.h>
#include <sys/file.h>
#include <sys/fcntl.h>
+#include <sys/inotify.h>
#include <sys/jail.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
@@ -5246,7 +5246,8 @@ destroy_vpollinfo_free(struct vpollinfo *vi)
static void
destroy_vpollinfo(struct vpollinfo *vi)
{
-
+ KASSERT(TAILQ_EMPTY(&vi->vpi_inotify),
+ ("%s: pollinfo %p has lingering watches", __func__, vi));
knlist_clear(&vi->vpi_selinfo.si_note, 1);
seldrain(&vi->vpi_selinfo);
destroy_vpollinfo_free(vi);
@@ -5260,12 +5261,13 @@ v_addpollinfo(struct vnode *vp)
{
struct vpollinfo *vi;
- if (vp->v_pollinfo != NULL)
+ if (atomic_load_ptr(&vp->v_pollinfo) != NULL)
return;
vi = malloc(sizeof(*vi), M_VNODEPOLL, M_WAITOK | M_ZERO);
mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF);
knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock,
vfs_knlunlock, vfs_knl_assert_lock);
+ TAILQ_INIT(&vi->vpi_inotify);
VI_LOCK(vp);
if (vp->v_pollinfo != NULL) {
VI_UNLOCK(vp);
@@ -5851,6 +5853,8 @@ vop_rename_pre(void *ap)
struct vop_rename_args *a = ap;
#ifdef DEBUG_VFS_LOCKS
+ struct mount *tmp;
+
if (a->a_tvp)
ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME");
ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME");
@@ -5868,6 +5872,11 @@ vop_rename_pre(void *ap)
if (a->a_tvp)
ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked");
ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked");
+
+ tmp = NULL;
+ VOP_GETWRITEMOUNT(a->a_tdvp, &tmp);
+ lockmgr_assert(&tmp->mnt_renamelock, KA_XLOCKED);
+ vfs_rel(tmp);
#endif
/*
* It may be tempting to add vn_seqc_write_begin/end calls here and
@@ -6057,6 +6066,28 @@ vop_need_inactive_debugpost(void *ap, int rc)
#endif
void
+vop_allocate_post(void *ap, int rc)
+{
+ struct vop_allocate_args *a;
+
+ a = ap;
+ if (rc == 0)
+ INOTIFY(a->a_vp, IN_MODIFY);
+}
+
+void
+vop_copy_file_range_post(void *ap, int rc)
+{
+ struct vop_copy_file_range_args *a;
+
+ a = ap;
+ if (rc == 0) {
+ INOTIFY(a->a_invp, IN_ACCESS);
+ INOTIFY(a->a_outvp, IN_MODIFY);
+ }
+}
+
+void
vop_create_pre(void *ap)
{
struct vop_create_args *a;
@@ -6076,8 +6107,20 @@ vop_create_post(void *ap, int rc)
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
+}
+
+void
+vop_deallocate_post(void *ap, int rc)
+{
+ struct vop_deallocate_args *a;
+
+ a = ap;
+ if (rc == 0)
+ INOTIFY(a->a_vp, IN_MODIFY);
}
void
@@ -6122,8 +6165,10 @@ vop_deleteextattr_post(void *ap, int rc)
a = ap;
vp = a->a_vp;
vn_seqc_write_end(vp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
+ INOTIFY(vp, IN_ATTRIB);
+ }
}
void
@@ -6153,6 +6198,8 @@ vop_link_post(void *ap, int rc)
if (!rc) {
VFS_KNOTE_LOCKED(vp, NOTE_LINK);
VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE);
+ INOTIFY_NAME(vp, tdvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT);
+ INOTIFY_NAME(vp, tdvp, a->a_cnp, IN_CREATE);
}
}
@@ -6176,8 +6223,10 @@ vop_mkdir_post(void *ap, int rc)
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
}
#ifdef DEBUG_VFS_LOCKS
@@ -6212,8 +6261,10 @@ vop_mknod_post(void *ap, int rc)
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
}
void
@@ -6225,8 +6276,10 @@ vop_reclaim_post(void *ap, int rc)
a = ap;
vp = a->a_vp;
ASSERT_VOP_IN_SEQC(vp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(vp, NOTE_REVOKE);
+ INOTIFY_REVOKE(vp);
+ }
}
void
@@ -6257,6 +6310,8 @@ vop_remove_post(void *ap, int rc)
if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
VFS_KNOTE_LOCKED(vp, NOTE_DELETE);
+ INOTIFY_NAME(vp, dvp, a->a_cnp, _IN_ATTRIB_LINKCOUNT);
+ INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE);
}
}
@@ -6288,6 +6343,8 @@ vop_rename_post(void *ap, int rc)
VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME);
if (a->a_tvp)
VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE);
+ INOTIFY_MOVE(a->a_fvp, a->a_fdvp, a->a_fcnp, a->a_tvp,
+ a->a_tdvp, a->a_tcnp);
}
if (a->a_tdvp != a->a_fdvp)
vdrop(a->a_fdvp);
@@ -6327,6 +6384,7 @@ vop_rmdir_post(void *ap, int rc)
vp->v_vflag |= VV_UNLINKED;
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
VFS_KNOTE_LOCKED(vp, NOTE_DELETE);
+ INOTIFY_NAME(vp, dvp, a->a_cnp, IN_DELETE);
}
}
@@ -6350,8 +6408,10 @@ vop_setattr_post(void *ap, int rc)
a = ap;
vp = a->a_vp;
vn_seqc_write_end(vp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
+ INOTIFY(vp, IN_ATTRIB);
+ }
}
void
@@ -6396,8 +6456,10 @@ vop_setextattr_post(void *ap, int rc)
a = ap;
vp = a->a_vp;
vn_seqc_write_end(vp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
+ INOTIFY(vp, IN_ATTRIB);
+ }
}
void
@@ -6420,8 +6482,10 @@ vop_symlink_post(void *ap, int rc)
a = ap;
dvp = a->a_dvp;
vn_seqc_write_end(dvp);
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE);
+ INOTIFY_NAME(*a->a_vpp, dvp, a->a_cnp, IN_CREATE);
+ }
}
void
@@ -6429,8 +6493,10 @@ vop_open_post(void *ap, int rc)
{
struct vop_open_args *a = ap;
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(a->a_vp, NOTE_OPEN);
+ INOTIFY(a->a_vp, IN_OPEN);
+ }
}
void
@@ -6442,6 +6508,8 @@ vop_close_post(void *ap, int rc)
!VN_IS_DOOMED(a->a_vp))) {
VFS_KNOTE_LOCKED(a->a_vp, (a->a_fflag & FWRITE) != 0 ?
NOTE_CLOSE_WRITE : NOTE_CLOSE);
+ INOTIFY(a->a_vp, (a->a_fflag & FWRITE) != 0 ?
+ IN_CLOSE_WRITE : IN_CLOSE_NOWRITE);
}
}
@@ -6450,8 +6518,10 @@ vop_read_post(void *ap, int rc)
{
struct vop_read_args *a = ap;
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
+ INOTIFY(a->a_vp, IN_ACCESS);
+ }
}
void
@@ -6468,8 +6538,10 @@ vop_readdir_post(void *ap, int rc)
{
struct vop_readdir_args *a = ap;
- if (!rc)
+ if (!rc) {
VFS_KNOTE_LOCKED(a->a_vp, NOTE_READ);
+ INOTIFY(a->a_vp, IN_ACCESS);
+ }
}
static struct knlist fs_knlist;
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index c236f241bf20..d880733cbfe7 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -3766,7 +3766,7 @@ int
kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
const char *new, enum uio_seg pathseg)
{
- struct mount *mp = NULL;
+ struct mount *mp, *tmp;
struct vnode *tvp, *fvp, *tdvp;
struct nameidata fromnd, tond;
uint64_t tondflags;
@@ -3774,6 +3774,7 @@ kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
short irflag;
again:
+ tmp = mp = NULL;
bwillwrite();
#ifdef MAC
if (mac_vnode_check_rename_from_enabled()) {
@@ -3809,6 +3810,7 @@ again:
tvp = tond.ni_vp;
error = vn_start_write(fvp, &mp, V_NOWAIT);
if (error != 0) {
+again1:
NDFREE_PNBUF(&fromnd);
NDFREE_PNBUF(&tond);
if (tvp != NULL)
@@ -3819,11 +3821,25 @@ again:
vput(tdvp);
vrele(fromnd.ni_dvp);
vrele(fvp);
+ if (tmp != NULL) {
+ lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL);
+ lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL);
+ vfs_rel(tmp);
+ tmp = NULL;
+ }
error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH);
if (error != 0)
return (error);
goto again;
}
+ error = VOP_GETWRITEMOUNT(tdvp, &tmp);
+ if (error != 0 || tmp == NULL)
+ goto again1;
+ error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL);
+ if (error != 0) {
+ vn_finished_write(mp);
+ goto again1;
+ }
irflag = vn_irflag_read(fvp);
if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) ||
(irflag & VIRF_NAMEDDIR) != 0) {
@@ -3884,6 +3900,8 @@ out:
vrele(fromnd.ni_dvp);
vrele(fvp);
}
+ lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0);
+ vfs_rel(tmp);
vn_finished_write(mp);
out1:
if (error == ERESTART)
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 7487f93e4880..6451c9e07a60 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -52,6 +52,7 @@
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filio.h>
+#include <sys/inotify.h>
#include <sys/ktr.h>
#include <sys/ktrace.h>
#include <sys/limits.h>
@@ -308,7 +309,8 @@ restart:
NDREINIT(ndp);
goto restart;
}
- if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0)
+ if ((vn_open_flags & VN_OPEN_NAMECACHE) != 0 ||
+ (vn_irflag_read(ndp->ni_dvp) & VIRF_INOTIFY) != 0)
ndp->ni_cnd.cn_flags |= MAKEENTRY;
#ifdef MAC
error = mac_vnode_check_create(cred, ndp->ni_dvp,
@@ -484,6 +486,7 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
if (vp->v_type != VFIFO && vp->v_type != VSOCK &&
VOP_ACCESS(vp, VREAD, cred, td) == 0)
fp->f_flag |= FKQALLOWED;
+ INOTIFY(vp, IN_OPEN);
return (0);
}
@@ -1746,6 +1749,8 @@ vn_truncate_locked(struct vnode *vp, off_t length, bool sync,
vattr.va_vaflags |= VA_SYNC;
error = VOP_SETATTR(vp, &vattr, cred);
VOP_ADD_WRITECOUNT_CHECKED(vp, -1);
+ if (error == 0)
+ INOTIFY(vp, IN_MODIFY);
}
return (error);
}
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index a2b6a7c8ff9f..38138a4af921 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -702,6 +702,7 @@ vop_vptocnp {
%% allocate vp E E E
+%! allocate post vop_allocate_post
vop_allocate {
IN struct vnode *vp;
@@ -786,6 +787,7 @@ vop_fdatasync {
%% copy_file_range invp U U U
%% copy_file_range outvp U U U
+%! copy_file_range post vop_copy_file_range_post
vop_copy_file_range {
IN struct vnode *invp;
@@ -810,6 +812,7 @@ vop_vput_pair {
%% deallocate vp L L L
+%! deallocate post vop_deallocate_post
vop_deallocate {
IN struct vnode *vp;
@@ -821,6 +824,27 @@ vop_deallocate {
};
+%% inotify vp - - -
+
+vop_inotify {
+ IN struct vnode *vp;
+ IN struct vnode *dvp;
+ IN struct componentname *cnp;
+ IN int event;
+ IN uint32_t cookie;
+};
+
+
+%% inotify_add_watch vp L L L
+
+vop_inotify_add_watch {
+ IN struct vnode *vp;
+ IN struct inotify_softc *sc;
+ IN uint32_t mask;
+ OUT uint32_t *wdp;
+ IN struct thread *td;
+};
+
# The VOPs below are spares at the end of the table to allow new VOPs to be
# added in stable branches without breaking the KBI. New VOPs in HEAD should
# be added above these spares. When merging a new VOP to a stable branch,
diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile
index 6e0fea6efc3a..9774c3da61ee 100644
--- a/sys/modules/iwlwifi/Makefile
+++ b/sys/modules/iwlwifi/Makefile
@@ -4,6 +4,7 @@ DEVIWLWIFIDIR= ${SRCTOP}/sys/contrib/dev/iwlwifi
WITH_CONFIG_PM= 0
WITH_DEBUGFS= 1
+WITH_CONFIG_ACPI= 1
KMOD= if_iwlwifi
@@ -40,6 +41,12 @@ CFLAGS+= -DCONFIG_PM
CFLAGS+= -DCONFIG_PM_SLEEP
.endif
+.if defined(WITH_CONFIG_ACPI) && ${WITH_CONFIG_ACPI} > 0
+SRCS+= fw/acpi.c
+CFLAGS+= -DCONFIG_ACPI
+CFLAGS+= -DLINUXKPI_WANT_LINUX_ACPI
+.endif
+
SRCS+= iwl-devtrace.c
# Other
@@ -56,7 +63,6 @@ CFLAGS+= -DCONFIG_IWLMVM=1
# Helpful after fresh imports.
#CFLAGS+= -ferror-limit=0
-#CFLAGS+= -DCONFIG_ACPI=1
#CFLAGS+= -DCONFIG_INET=1 # Need LKPI TSO implementation.
#CFLAGS+= -DCONFIG_IPV6=1
CFLAGS+= -DCONFIG_IWLWIFI_DEBUG=1
diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h
index 6eefedba8775..cf4f75bd0b6c 100644
--- a/sys/net/ethernet.h
+++ b/sys/net/ethernet.h
@@ -81,6 +81,23 @@ struct ether_addr {
(addr)[3] | (addr)[4] | (addr)[5]) == 0x00)
/*
+ * 802.1q VID constants from IEEE 802.1Q-2014, table 9-2.
+ */
+
+/* Null VID: The tag contains only PCP (priority) and DEI information. */
+#define DOT1Q_VID_NULL 0x0
+/* The default PVID for a bridge port. NB: bridge(4) does not honor this. */
+#define DOT1Q_VID_DEF_PVID 0x1
+/* The default SR_PVID for SRP Stream related traffic. */
+#define DOT1Q_VID_DEF_SR_PVID 0x2
+/* A VID reserved for implementation use, not permitted on the wire. */
+#define DOT1Q_VID_RSVD_IMPL 0xfff
+/* The lowest valid VID. */
+#define DOT1Q_VID_MIN 0x1
+/* The highest valid VID. */
+#define DOT1Q_VID_MAX 0xffe
+
+/*
* This is the type of the VLAN ID inside the tag, not the tag itself.
*/
typedef uint16_t ether_vlanid_t;
diff --git a/sys/net/if_vlan_var.h b/sys/net/if_vlan_var.h
index f0b09445d04b..695bb81f77b3 100644
--- a/sys/net/if_vlan_var.h
+++ b/sys/net/if_vlan_var.h
@@ -126,13 +126,6 @@ struct vlanreq {
#define VLAN_PCP_MAX 7
-#define DOT1Q_VID_NULL 0x0
-#define DOT1Q_VID_DEF_PVID 0x1
-#define DOT1Q_VID_DEF_SR_PVID 0x2
-#define DOT1Q_VID_RSVD_IMPL 0xfff
-#define DOT1Q_VID_MIN 1 /* minimum valid vlan id */
-#define DOT1Q_VID_MAX 4094 /* maximum valid vlan id */
-
/*
* 802.1q full tag. Proto and vid are stored in host byte order.
*/
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 71cb1862aabf..1416f0c2cdbe 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -508,18 +508,6 @@ extern struct sx pf_end_lock;
(c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \
!(a)->addr32[2] && !(a)->addr32[3] )) \
-#define PF_MATCHA(n, a, m, b, f) \
- pf_match_addr(n, a, m, b, f)
-
-#define PF_ACPY(a, b, f) \
- pf_addrcpy(a, b, f)
-
-#define PF_AINC(a, f) \
- pf_addr_inc(a, f)
-
-#define PF_POOLMASK(a, b, c, d, f) \
- pf_poolmask(a, b, c, d, f)
-
#else
/* Just IPv6 */
@@ -544,18 +532,6 @@ extern struct sx pf_end_lock;
!(a)->addr32[2] && \
!(a)->addr32[3] ) \
-#define PF_MATCHA(n, a, m, b, f) \
- pf_match_addr(n, a, m, b, f)
-
-#define PF_ACPY(a, b, f) \
- pf_addrcpy(a, b, f)
-
-#define PF_AINC(a, f) \
- pf_addr_inc(a, f)
-
-#define PF_POOLMASK(a, b, c, d, f) \
- pf_poolmask(a, b, c, d, f)
-
#else
/* Just IPv4 */
@@ -570,29 +546,11 @@ extern struct sx pf_end_lock;
#define PF_AZERO(a, c) \
(!(a)->addr32[0])
-#define PF_MATCHA(n, a, m, b, f) \
- pf_match_addr(n, a, m, b, f)
-
-#define PF_ACPY(a, b, f) \
- (a)->v4.s_addr = (b)->v4.s_addr
-
-#define PF_AINC(a, f) \
- do { \
- (a)->addr32[0] = htonl(ntohl((a)->addr32[0]) + 1); \
- } while (0)
-
-#define PF_POOLMASK(a, b, c, d, f) \
- do { \
- (a)->addr32[0] = ((b)->addr32[0] & (c)->addr32[0]) | \
- (((c)->addr32[0] ^ 0xffffffff ) & (d)->addr32[0]); \
- } while (0)
-
#endif /* PF_INET_ONLY */
#endif /* PF_INET6_ONLY */
#endif /* PF_INET_INET6 */
#ifdef _KERNEL
-#ifdef INET6
static void inline
pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af)
{
@@ -602,12 +560,13 @@ pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af)
memcpy(&dst->v4, &src->v4, sizeof(dst->v4));
break;
#endif /* INET */
+#ifdef INET6
case AF_INET6:
memcpy(&dst->v6, &src->v6, sizeof(dst->v6));
break;
+#endif /* INET6 */
}
}
-#endif /* INET6 */
#endif
/*
@@ -629,7 +588,7 @@ pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af)
&(aw)->v.a.mask, (x), (af))) || \
((aw)->type == PF_ADDR_ADDRMASK && \
!PF_AZERO(&(aw)->v.a.mask, (af)) && \
- !PF_MATCHA(0, &(aw)->v.a.addr, \
+ !pf_match_addr(0, &(aw)->v.a.addr, \
&(aw)->v.a.mask, (x), (af))))) != \
(neg) \
)
@@ -2477,11 +2436,11 @@ int pf_test(sa_family_t, int, int, struct ifnet *, struct mbuf **, struct inpcb
int pf_normalize_ip(u_short *, struct pf_pdesc *);
#endif /* INET */
-#ifdef INET6
-int pf_normalize_ip6(int, u_short *, struct pf_pdesc *);
void pf_poolmask(struct pf_addr *, struct pf_addr*,
struct pf_addr *, struct pf_addr *, sa_family_t);
void pf_addr_inc(struct pf_addr *, sa_family_t);
+#ifdef INET6
+int pf_normalize_ip6(int, u_short *, struct pf_pdesc *);
int pf_max_frag_size(struct mbuf *);
int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *,
struct ifnet *, bool);
@@ -2674,11 +2633,10 @@ int pf_kanchor_copyout(const struct pf_kruleset *,
const struct pf_krule *, char *, size_t);
int pf_kanchor_nvcopyout(const struct pf_kruleset *,
const struct pf_krule *, nvlist_t *);
-void pf_kanchor_remove(struct pf_krule *);
+void pf_remove_kanchor(struct pf_krule *);
void pf_remove_if_empty_kruleset(struct pf_kruleset *);
struct pf_kruleset *pf_find_kruleset(const char *);
struct pf_kruleset *pf_get_leaf_kruleset(char *, char **);
-struct pf_kanchor *pf_create_kanchor(struct pf_kanchor *, const char *);
struct pf_kruleset *pf_find_or_create_kruleset(const char *);
void pf_rs_initialize(void);
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index accc811a12ba..127b29320acb 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -682,7 +682,8 @@ pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
0);
break;
case AF_INET6:
- PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
+ pf_addrcpy(pd->src, &nk->addr[pd->sidx],
+ pd->af);
break;
default:
unhandled_af(pd->af);
@@ -696,7 +697,8 @@ pf_packet_rework_nat(struct pf_pdesc *pd, int off, struct pf_state_key *nk)
0);
break;
case AF_INET6:
- PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
+ pf_addrcpy(pd->dst, &nk->addr[pd->didx],
+ pd->af);
break;
default:
unhandled_af(pd->af);
@@ -1084,9 +1086,9 @@ pf_insert_src_node(struct pf_ksrc_node *sns[PF_SN_MAX],
(*sn)->af = af;
(*sn)->rule = r_track;
- PF_ACPY(&(*sn)->addr, src, af);
+ pf_addrcpy(&(*sn)->addr, src, af);
if (raddr != NULL)
- PF_ACPY(&(*sn)->raddr, raddr, af);
+ pf_addrcpy(&(*sn)->raddr, raddr, af);
(*sn)->rkif = rkif;
LIST_INSERT_HEAD(&(*sh)->nodes, *sn, entry);
(*sn)->creation = time_uptime;
@@ -1687,9 +1689,9 @@ pf_state_key_addr_setup(struct pf_pdesc *pd,
copy:
#endif /* INET6 */
if (saddr)
- PF_ACPY(&key->addr[pd->sidx], saddr, pd->af);
+ pf_addrcpy(&key->addr[pd->sidx], saddr, pd->af);
if (daddr)
- PF_ACPY(&key->addr[pd->didx], daddr, pd->af);
+ pf_addrcpy(&key->addr[pd->didx], daddr, pd->af);
return (0);
}
@@ -1734,13 +1736,17 @@ pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport,
bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0]));
bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1]));
if (pd->dir == PF_IN) {
- PF_ACPY(&(*nk)->addr[pd->didx], &pd->nsaddr, pd->naf);
- PF_ACPY(&(*nk)->addr[pd->sidx], &pd->ndaddr, pd->naf);
+ pf_addrcpy(&(*nk)->addr[pd->didx], &pd->nsaddr,
+ pd->naf);
+ pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->ndaddr,
+ pd->naf);
(*nk)->port[pd->didx] = pd->nsport;
(*nk)->port[pd->sidx] = pd->ndport;
} else {
- PF_ACPY(&(*nk)->addr[pd->sidx], &pd->nsaddr, pd->naf);
- PF_ACPY(&(*nk)->addr[pd->didx], &pd->ndaddr, pd->naf);
+ pf_addrcpy(&(*nk)->addr[pd->sidx], &pd->nsaddr,
+ pd->naf);
+ pf_addrcpy(&(*nk)->addr[pd->didx], &pd->ndaddr,
+ pd->naf);
(*nk)->port[pd->sidx] = pd->nsport;
(*nk)->port[pd->didx] = pd->ndport;
}
@@ -2053,11 +2059,11 @@ pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_por
mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
if (mapping == NULL)
return (NULL);
- PF_ACPY(&mapping->endpoints[0].addr, src_addr, af);
+ pf_addrcpy(&mapping->endpoints[0].addr, src_addr, af);
mapping->endpoints[0].port = src_port;
mapping->endpoints[0].af = af;
mapping->endpoints[0].mapping = mapping;
- PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af);
+ pf_addrcpy(&mapping->endpoints[1].addr, nat_addr, af);
mapping->endpoints[1].port = nat_port;
mapping->endpoints[1].af = af;
mapping->endpoints[1].mapping = mapping;
@@ -3295,9 +3301,9 @@ pf_change_ap(struct pf_pdesc *pd, struct pf_addr *a, u_int16_t *p,
MPASS(pd->ip_sum);
}
- PF_ACPY(&ao, a, pd->af);
+ pf_addrcpy(&ao, a, pd->af);
if (pd->af == pd->naf)
- PF_ACPY(a, an, pd->af);
+ pf_addrcpy(a, an, pd->af);
if (pd->m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
*pd->pcksum = ~*pd->pcksum;
@@ -3426,8 +3432,8 @@ pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
{
struct pf_addr ao;
- PF_ACPY(&ao, a, AF_INET6);
- PF_ACPY(a, an, AF_INET6);
+ pf_addrcpy(&ao, a, AF_INET6);
+ pf_addrcpy(a, an, AF_INET6);
*c = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
@@ -3450,9 +3456,9 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
{
struct pf_addr oia, ooa;
- PF_ACPY(&oia, ia, af);
+ pf_addrcpy(&oia, ia, af);
if (oa)
- PF_ACPY(&ooa, oa, af);
+ pf_addrcpy(&ooa, oa, af);
/* Change inner protocol port, fix inner protocol checksum. */
if (ip != NULL) {
@@ -3469,7 +3475,7 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
*ic = pf_cksum_fixup(*ic, opc, *pc, 0);
}
/* Change inner ip address, fix inner ip and icmp checksums. */
- PF_ACPY(ia, na, af);
+ pf_addrcpy(ia, na, af);
switch (af) {
#ifdef INET
case AF_INET: {
@@ -3503,7 +3509,7 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
}
/* Outer ip address, fix outer ip or icmpv6 checksum, if necessary. */
if (oa) {
- PF_ACPY(oa, na, af);
+ pf_addrcpy(oa, na, af);
switch (af) {
#ifdef INET
case AF_INET:
@@ -4299,8 +4305,8 @@ pf_undo_nat(struct pf_krule *nr, struct pf_pdesc *pd, uint16_t bip_sum)
{
/* undo NAT changes, if they have taken place */
if (nr != NULL) {
- PF_ACPY(pd->src, &pd->osrc, pd->af);
- PF_ACPY(pd->dst, &pd->odst, pd->af);
+ pf_addrcpy(pd->src, &pd->osrc, pd->af);
+ pf_addrcpy(pd->dst, &pd->odst, pd->af);
if (pd->sport)
*pd->sport = pd->osport;
if (pd->dport)
@@ -4676,10 +4682,11 @@ pf_step_into_anchor(struct pf_test_ctx *ctx, struct pf_krule *r)
} else {
rv = pf_match_rule(ctx, &r->anchor->ruleset);
/*
- * Unless there was an error inside the anchor,
- * retain its quick state.
+ * Unless errors occured, stop iff any rule matched
+ * within quick anchors.
*/
- if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK)
+ if (rv != PF_TEST_FAIL && r->quick == PF_TEST_QUICK &&
+ *ctx->am == r)
rv = PF_TEST_QUICK;
}
@@ -4790,7 +4797,6 @@ pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *stack, int *depth,
return (quick);
}
-#ifdef INET6
void
pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
@@ -4802,6 +4808,7 @@ pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
break;
#endif /* INET */
+#ifdef INET6
case AF_INET6:
naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
@@ -4812,6 +4819,7 @@ pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
break;
+#endif /* INET6 */
}
}
@@ -4824,6 +4832,7 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af)
addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
break;
#endif /* INET */
+#ifdef INET6
case AF_INET6:
if (addr->addr32[3] == 0xffffffff) {
addr->addr32[3] = 0;
@@ -4843,9 +4852,9 @@ pf_addr_inc(struct pf_addr *addr, sa_family_t af)
addr->addr32[3] =
htonl(ntohl(addr->addr32[3]) + 1);
break;
+#endif /* INET6 */
}
}
-#endif /* INET6 */
void
pf_rule_to_actions(struct pf_krule *r, struct pf_rule_actions *a)
@@ -5744,8 +5753,8 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm,
ctx.reason = *reason;
SLIST_INIT(&ctx.rules);
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+ pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
+ pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
if (inp != NULL) {
INP_LOCK_ASSERT(inp);
@@ -6363,7 +6372,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
&nk->addr[pd->sidx], nk->port[pd->sidx]);
pd->sport = &th->th_sport;
pd->nsport = th->th_sport;
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+ pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
@@ -6372,7 +6381,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
&nk->addr[pd->didx], nk->port[pd->didx]);
pd->dport = &th->th_dport;
pd->ndport = th->th_dport;
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+ pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
rewrite++;
break;
@@ -6385,7 +6394,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
nk->port[pd->sidx]);
pd->sport = &pd->hdr.udp.uh_sport;
pd->nsport = pd->hdr.udp.uh_sport;
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+ pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
@@ -6396,7 +6405,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
nk->port[pd->didx]);
pd->dport = &pd->hdr.udp.uh_dport;
pd->ndport = pd->hdr.udp.uh_dport;
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+ pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
rewrite++;
break;
@@ -6409,7 +6418,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
nk->port[pd->sidx]);
pd->sport = &pd->hdr.sctp.src_port;
pd->nsport = pd->hdr.sctp.src_port;
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+ pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], pd->af) ||
nk->port[pd->didx] != pd->ndport) {
@@ -6419,7 +6428,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
nk->port[pd->didx]);
pd->dport = &pd->hdr.sctp.dest_port;
pd->ndport = pd->hdr.sctp.dest_port;
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+ pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
break;
}
@@ -6428,13 +6437,13 @@ pf_translate_compat(struct pf_test_ctx *ctx)
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET)) {
pf_change_a(&pd->src->v4.s_addr, pd->ip_sum,
nk->addr[pd->sidx].v4.s_addr, 0);
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+ pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET)) {
pf_change_a(&pd->dst->v4.s_addr, pd->ip_sum,
nk->addr[pd->didx].v4.s_addr, 0);
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+ pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
if (ctx->virtual_type == htons(ICMP_ECHO) &&
@@ -6453,13 +6462,13 @@ pf_translate_compat(struct pf_test_ctx *ctx)
if (PF_ANEQ(&pd->nsaddr, &nk->addr[pd->sidx], AF_INET6)) {
pf_change_a6(pd->src, &pd->hdr.icmp6.icmp6_cksum,
&nk->addr[pd->sidx], 0);
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+ pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr, &nk->addr[pd->didx], AF_INET6)) {
pf_change_a6(pd->dst, &pd->hdr.icmp6.icmp6_cksum,
&nk->addr[pd->didx], 0);
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+ pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
rewrite++;
break;
@@ -6473,7 +6482,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
pf_change_a(&pd->src->v4.s_addr,
pd->ip_sum,
nk->addr[pd->sidx].v4.s_addr, 0);
- PF_ACPY(&pd->nsaddr, pd->src, pd->af);
+ pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
}
if (PF_ANEQ(&pd->ndaddr,
@@ -6481,7 +6490,7 @@ pf_translate_compat(struct pf_test_ctx *ctx)
pf_change_a(&pd->dst->v4.s_addr,
pd->ip_sum,
nk->addr[pd->didx].v4.s_addr, 0);
- PF_ACPY(&pd->ndaddr, pd->dst, pd->af);
+ pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
}
break;
#endif /* INET */
@@ -6489,14 +6498,17 @@ pf_translate_compat(struct pf_test_ctx *ctx)
case AF_INET6:
if (PF_ANEQ(&pd->nsaddr,
&nk->addr[pd->sidx], AF_INET6)) {
- PF_ACPY(&pd->nsaddr, &nk->addr[pd->sidx], pd->af);
- PF_ACPY(pd->src, &nk->addr[pd->sidx], pd->af);
+ pf_addrcpy(&pd->nsaddr, &nk->addr[pd->sidx],
+ pd->af);
+ pf_addrcpy(pd->src, &nk->addr[pd->sidx], pd->af);
}
if (PF_ANEQ(&pd->ndaddr,
&nk->addr[pd->didx], AF_INET6)) {
- PF_ACPY(&pd->ndaddr, &nk->addr[pd->didx], pd->af);
- PF_ACPY(pd->dst, &nk->addr[pd->didx], pd->af);
+ pf_addrcpy(&pd->ndaddr, &nk->addr[pd->didx],
+ pd->af);
+ pf_addrcpy(pd->dst, &nk->addr[pd->didx],
+ pd->af);
}
break;
#endif /* INET6 */
@@ -7015,8 +7027,8 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
bzero(&key, sizeof(key));
key.af = pd->af;
key.proto = pd->virtual_proto;
- PF_ACPY(&key.addr[pd->sidx], pd->src, key.af);
- PF_ACPY(&key.addr[pd->didx], pd->dst, key.af);
+ pf_addrcpy(&key.addr[pd->sidx], pd->src, key.af);
+ pf_addrcpy(&key.addr[pd->didx], pd->dst, key.af);
key.port[pd->sidx] = pd->osport;
key.port[pd->didx] = pd->odport;
@@ -7207,8 +7219,8 @@ pf_test_state(struct pf_kstate **state, struct pf_pdesc *pd, u_short *reason)
}
if (afto) {
- PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af);
- PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af);
+ pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af);
+ pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af);
pd->naf = nk->af;
action = PF_AFRT;
}
@@ -7502,13 +7514,13 @@ again:
key.af = j->pd.af;
key.proto = IPPROTO_SCTP;
if (j->pd.dir == PF_IN) { /* wire side, straight */
- PF_ACPY(&key.addr[0], j->pd.src, key.af);
- PF_ACPY(&key.addr[1], j->pd.dst, key.af);
+ pf_addrcpy(&key.addr[0], j->pd.src, key.af);
+ pf_addrcpy(&key.addr[1], j->pd.dst, key.af);
key.port[0] = j->pd.hdr.sctp.src_port;
key.port[1] = j->pd.hdr.sctp.dest_port;
} else { /* stack side, reverse */
- PF_ACPY(&key.addr[1], j->pd.src, key.af);
- PF_ACPY(&key.addr[0], j->pd.dst, key.af);
+ pf_addrcpy(&key.addr[1], j->pd.src, key.af);
+ pf_addrcpy(&key.addr[0], j->pd.dst, key.af);
key.port[1] = j->pd.hdr.sctp.src_port;
key.port[0] = j->pd.hdr.sctp.dest_port;
}
@@ -7904,8 +7916,10 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
#endif /* INET6 */
}
if (afto) {
- PF_ACPY(&pd->nsaddr, &nk->addr[sidx], nk->af);
- PF_ACPY(&pd->ndaddr, &nk->addr[didx], nk->af);
+ pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
+ nk->af);
+ pf_addrcpy(&pd->ndaddr, &nk->addr[didx],
+ nk->af);
pd->naf = nk->af;
return (PF_AFRT);
}
@@ -8037,8 +8051,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
key.af = pd2.af;
key.proto = IPPROTO_TCP;
- PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
- PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
+ pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[pd2.sidx] = th->th_sport;
key.port[pd2.didx] = th->th_dport;
@@ -8141,9 +8155,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
&nk->addr[didx], pd->af,
nk->af))
return (PF_DROP);
- PF_ACPY(&pd->nsaddr, &nk->addr[pd2.sidx],
- nk->af);
- PF_ACPY(&pd->ndaddr,
+ pf_addrcpy(&pd->nsaddr,
+ &nk->addr[pd2.sidx], nk->af);
+ pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
if (nk->af == AF_INET) {
pd->proto = IPPROTO_ICMP;
@@ -8232,8 +8246,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
key.af = pd2.af;
key.proto = IPPROTO_UDP;
- PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
- PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
+ pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[pd2.sidx] = uh->uh_sport;
key.port[pd2.didx] = uh->uh_dport;
@@ -8276,9 +8290,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
&nk->addr[didx], pd->af,
nk->af))
return (PF_DROP);
- PF_ACPY(&pd->nsaddr,
+ pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
- PF_ACPY(&pd->ndaddr,
+ pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
if (nk->af == AF_INET) {
pd->proto = IPPROTO_ICMP;
@@ -8364,8 +8378,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
key.af = pd2.af;
key.proto = IPPROTO_SCTP;
- PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
- PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
+ pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[pd2.sidx] = sh->src_port;
key.port[pd2.didx] = sh->dest_port;
@@ -8431,9 +8445,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
sh->src_port = nk->port[sidx];
sh->dest_port = nk->port[didx];
m_copyback(pd2.m, pd2.off, sizeof(*sh), (c_caddr_t)sh);
- PF_ACPY(&pd->nsaddr,
+ pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
- PF_ACPY(&pd->ndaddr,
+ pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
if (nk->af == AF_INET) {
pd->proto = IPPROTO_ICMP;
@@ -8574,9 +8588,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
iih->icmp_id = nk->port[iidx];
m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
(c_caddr_t)iih);
- PF_ACPY(&pd->nsaddr,
+ pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
- PF_ACPY(&pd->ndaddr,
+ pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
/*
* IPv4 becomes IPv6 so we must copy
@@ -8702,9 +8716,9 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
iih->icmp6_id = nk->port[iidx];
m_copyback(pd2.m, pd2.off,
sizeof(struct icmp6_hdr), (c_caddr_t)iih);
- PF_ACPY(&pd->nsaddr,
+ pf_addrcpy(&pd->nsaddr,
&nk->addr[pd2.sidx], nk->af);
- PF_ACPY(&pd->ndaddr,
+ pf_addrcpy(&pd->ndaddr,
&nk->addr[pd2.didx], nk->af);
pd->naf = nk->af;
return (PF_AFRT);
@@ -8746,8 +8760,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
key.af = pd2.af;
key.proto = pd2.proto;
- PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
- PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+ pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
+ pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
key.port[0] = key.port[1] = 0;
action = pf_find_state(&pd2, &key, state);
@@ -9283,7 +9297,8 @@ pf_route6(struct pf_krule *r, struct ifnet *oifp,
bzero(&dst, sizeof(dst));
dst.sin6_family = AF_INET6;
dst.sin6_len = sizeof(dst);
- PF_ACPY((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr, AF_INET6);
+ pf_addrcpy((struct pf_addr *)&dst.sin6_addr, &pd->act.rt_addr,
+ AF_INET6);
if (pd->dir == PF_IN) {
if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
@@ -10083,8 +10098,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
pd->src = (struct pf_addr *)&h->ip_src;
pd->dst = (struct pf_addr *)&h->ip_dst;
- PF_ACPY(&pd->osrc, pd->src, af);
- PF_ACPY(&pd->odst, pd->dst, af);
+ pf_addrcpy(&pd->osrc, pd->src, af);
+ pf_addrcpy(&pd->odst, pd->dst, af);
pd->ip_sum = &h->ip_sum;
pd->tos = h->ip_tos & ~IPTOS_ECN_MASK;
pd->ttl = h->ip_ttl;
@@ -10121,8 +10136,8 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
h = mtod(pd->m, struct ip6_hdr *);
pd->src = (struct pf_addr *)&h->ip6_src;
pd->dst = (struct pf_addr *)&h->ip6_dst;
- PF_ACPY(&pd->osrc, pd->src, af);
- PF_ACPY(&pd->odst, pd->dst, af);
+ pf_addrcpy(&pd->osrc, pd->src, af);
+ pf_addrcpy(&pd->odst, pd->dst, af);
pd->ip_sum = NULL;
pd->tos = IPV6_DSCP(h);
pd->ttl = h->ip6_hlim;
diff --git a/sys/netpfil/pf/pf_if.c b/sys/netpfil/pf/pf_if.c
index 389b74d09d37..e2200c15c704 100644
--- a/sys/netpfil/pf/pf_if.c
+++ b/sys/netpfil/pf/pf_if.c
@@ -522,7 +522,7 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
case 0:
return (0);
case 1:
- return (PF_MATCHA(0, &dyn->pfid_addr4,
+ return (pf_match_addr(0, &dyn->pfid_addr4,
&dyn->pfid_mask4, a, AF_INET));
default:
return (pfr_match_addr(dyn->pfid_kt, a, AF_INET));
@@ -535,7 +535,7 @@ pfi_match_addr(struct pfi_dynaddr *dyn, struct pf_addr *a, sa_family_t af)
case 0:
return (0);
case 1:
- return (PF_MATCHA(0, &dyn->pfid_addr6,
+ return (pf_match_addr(0, &dyn->pfid_addr6,
&dyn->pfid_mask6, a, AF_INET6));
default:
return (pfr_match_addr(dyn->pfid_kt, a, AF_INET6));
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index 05a7e1311ad8..357b2be194a5 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -615,7 +615,7 @@ pf_free_rule(struct pf_krule *rule)
pfi_kkif_unref(rule->kif);
if (rule->rcv_kif)
pfi_kkif_unref(rule->rcv_kif);
- pf_kanchor_remove(rule);
+ pf_remove_kanchor(rule);
pf_empty_kpool(&rule->rdr.list);
pf_empty_kpool(&rule->nat.list);
pf_empty_kpool(&rule->route.list);
@@ -2350,15 +2350,17 @@ relock_DIOCKILLSTATES:
if (psk->psk_proto && psk->psk_proto != sk->proto)
continue;
- if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr,
+ if (! pf_match_addr(psk->psk_src.neg,
+ &psk->psk_src.addr.v.a.addr,
&psk->psk_src.addr.v.a.mask, srcaddr, sk->af))
continue;
- if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr,
+ if (! pf_match_addr(psk->psk_dst.neg,
+ &psk->psk_dst.addr.v.a.addr,
&psk->psk_dst.addr.v.a.mask, dstaddr, sk->af))
continue;
- if (! PF_MATCHA(psk->psk_rt_addr.neg,
+ if (! pf_match_addr(psk->psk_rt_addr.neg,
&psk->psk_rt_addr.addr.v.a.addr,
&psk->psk_rt_addr.addr.v.a.mask,
&s->act.rt_addr, sk->af))
@@ -2398,10 +2400,10 @@ relock_DIOCKILLSTATES:
match_key.af = s->key[idx]->af;
match_key.proto = s->key[idx]->proto;
- PF_ACPY(&match_key.addr[0],
+ pf_addrcpy(&match_key.addr[0],
&s->key[idx]->addr[1], match_key.af);
match_key.port[0] = s->key[idx]->port[1];
- PF_ACPY(&match_key.addr[1],
+ pf_addrcpy(&match_key.addr[1],
&s->key[idx]->addr[0], match_key.af);
match_key.port[1] = s->key[idx]->port[0];
}
@@ -2738,7 +2740,7 @@ pf_ioctl_get_rulesets(struct pfioc_ruleset *pr)
return (ENOENT);
}
pr->nr = 0;
- if (ruleset->anchor == NULL) {
+ if (ruleset == &pf_main_ruleset) {
/* XXX kludge for pf_main_ruleset */
RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
if (anchor->parent == NULL)
@@ -2770,7 +2772,7 @@ pf_ioctl_get_ruleset(struct pfioc_ruleset *pr)
}
pr->name[0] = 0;
- if (ruleset->anchor == NULL) {
+ if (ruleset == &pf_main_ruleset) {
/* XXX kludge for pf_main_ruleset */
RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
if (anchor->parent == NULL && nr++ == pr->nr) {
@@ -4152,9 +4154,9 @@ DIOCGETSTATESV2_full:
bzero(&key, sizeof(key));
key.af = pnl->af;
key.proto = pnl->proto;
- PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
+ pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af);
key.port[sidx] = pnl->sport;
- PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
+ pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af);
key.port[didx] = pnl->dport;
state = pf_find_state_all(&key, direction, &m);
@@ -4166,9 +4168,11 @@ DIOCGETSTATESV2_full:
error = E2BIG; /* more than one state */
} else {
sk = state->key[sidx];
- PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
+ pf_addrcpy(&pnl->rsaddr,
+ &sk->addr[sidx], sk->af);
pnl->rsport = sk->port[sidx];
- PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
+ pf_addrcpy(&pnl->rdaddr,
+ &sk->addr[didx], sk->af);
pnl->rdport = sk->port[didx];
PF_STATE_UNLOCK(state);
}
@@ -4606,7 +4610,7 @@ DIOCGETSTATESV2_full:
}
pool->cur = TAILQ_FIRST(&pool->list);
- PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
+ pf_addrcpy(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
PF_RULES_WUNLOCK();
break;
@@ -6024,11 +6028,11 @@ pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
PF_HASHROW_LOCK(sh);
LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
if (psnk == NULL ||
- (PF_MATCHA(psnk->psnk_src.neg,
+ (pf_match_addr(psnk->psnk_src.neg,
&psnk->psnk_src.addr.v.a.addr,
&psnk->psnk_src.addr.v.a.mask,
&sn->addr, sn->af) &&
- PF_MATCHA(psnk->psnk_dst.neg,
+ pf_match_addr(psnk->psnk_dst.neg,
&psnk->psnk_dst.addr.v.a.addr,
&psnk->psnk_dst.addr.v.a.mask,
&sn->raddr, sn->af))) {
@@ -6132,10 +6136,10 @@ relock_DIOCCLRSTATES:
match_key.af = s->key[idx]->af;
match_key.proto = s->key[idx]->proto;
- PF_ACPY(&match_key.addr[0],
+ pf_addrcpy(&match_key.addr[0],
&s->key[idx]->addr[1], match_key.af);
match_key.port[0] = s->key[idx]->port[1];
- PF_ACPY(&match_key.addr[1],
+ pf_addrcpy(&match_key.addr[1],
&s->key[idx]->addr[0], match_key.af);
match_key.port[1] = s->key[idx]->port[0];
}
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
index 5e7865e4fac5..308d76c46e5b 100644
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -319,12 +319,14 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
bzero(&udp_source, sizeof(udp_source));
udp_source.af = pd->af;
- PF_ACPY(&udp_source.addr, &pd->nsaddr, pd->af);
+ pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af);
udp_source.port = pd->nsport;
if (udp_mapping) {
*udp_mapping = pf_udp_mapping_find(&udp_source);
if (*udp_mapping) {
- PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af);
+ pf_addrcpy(naddr,
+ &(*udp_mapping)->endpoints[1].addr,
+ pd->af);
*nport = (*udp_mapping)->endpoints[1].port;
/* Try to find a src_node as per pf_map_addr(). */
if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
@@ -369,12 +371,13 @@ pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r,
key.proto = pd->proto;
do {
- PF_ACPY(&key.addr[didx], &pd->ndaddr, key.af);
- PF_ACPY(&key.addr[sidx], naddr, key.af);
+ pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
+ pf_addrcpy(&key.addr[sidx], naddr, key.af);
key.port[didx] = pd->ndport;
if (udp_mapping && *udp_mapping)
- PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af);
+ pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr,
+ pd->af);
/*
* port search; start random, step;
@@ -591,10 +594,10 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
switch (rpool->opts & PF_POOL_TYPEMASK) {
case PF_POOL_NONE:
- PF_ACPY(naddr, raddr, af);
+ pf_addrcpy(naddr, raddr, af);
break;
case PF_POOL_BITMASK:
- PF_POOLMASK(naddr, raddr, rmask, saddr, af);
+ pf_poolmask(naddr, raddr, rmask, saddr, af);
break;
case PF_POOL_RANDOM:
if (rpool->cur->addr.type == PF_ADDR_TABLE) {
@@ -609,7 +612,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
reason = PFRES_MAPFAILED;
goto done_pool_mtx; /* unsupported */
}
- PF_ACPY(naddr, &rpool->counter, af);
+ pf_addrcpy(naddr, &rpool->counter, af);
} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
if (cnt == 0)
@@ -623,7 +626,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
reason = PFRES_MAPFAILED;
goto done_pool_mtx; /* unsupported */
}
- PF_ACPY(naddr, &rpool->counter, af);
+ pf_addrcpy(naddr, &rpool->counter, af);
} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
switch (af) {
#ifdef INET
@@ -654,12 +657,12 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
break;
#endif /* INET6 */
}
- PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
- PF_ACPY(init_addr, naddr, af);
+ pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
+ pf_addrcpy(init_addr, naddr, af);
} else {
- PF_AINC(&rpool->counter, af);
- PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
+ pf_addr_inc(&rpool->counter, af);
+ pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
}
break;
case PF_POOL_SRCHASH:
@@ -680,7 +683,7 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
reason = PFRES_MAPFAILED;
goto done_pool_mtx; /* unsupported */
}
- PF_ACPY(naddr, &rpool->counter, af);
+ pf_addrcpy(naddr, &rpool->counter, af);
} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
cnt = rpool->cur->addr.p.dyn->pfid_kt->pfrkt_cnt;
if (cnt == 0)
@@ -694,9 +697,9 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
reason = PFRES_MAPFAILED;
goto done_pool_mtx; /* unsupported */
}
- PF_ACPY(naddr, &rpool->counter, af);
+ pf_addrcpy(naddr, &rpool->counter, af);
} else {
- PF_POOLMASK(naddr, raddr, rmask,
+ pf_poolmask(naddr, raddr, rmask,
(struct pf_addr *)&hash, af);
}
break;
@@ -743,14 +746,14 @@ pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
} else {
raddr = &rpool->cur->addr.v.a.addr;
rmask = &rpool->cur->addr.v.a.mask;
- PF_ACPY(&rpool->counter, raddr, af);
+ pf_addrcpy(&rpool->counter, raddr, af);
}
get_addr:
- PF_ACPY(naddr, &rpool->counter, af);
+ pf_addrcpy(naddr, &rpool->counter, af);
if (init_addr != NULL && PF_AZERO(init_addr, af))
- PF_ACPY(init_addr, naddr, af);
- PF_AINC(&rpool->counter, af);
+ pf_addrcpy(init_addr, naddr, af);
+ pf_addr_inc(&rpool->counter, af);
break;
}
}
@@ -798,7 +801,7 @@ pf_map_addr_sn(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
goto done;
}
- PF_ACPY(naddr, &(*sn)->raddr, af);
+ pf_addrcpy(naddr, &(*sn)->raddr, af);
if (nkif)
*nkif = (*sn)->rkif;
if (V_pf_status.debug >= PF_DEBUG_NOISY) {
@@ -948,7 +951,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
reason = PFRES_MAPFAILED;
goto notrans;
}
- PF_POOLMASK(naddr,
+ pf_poolmask(naddr,
&rpool->cur->addr.p.dyn->pfid_addr4,
&rpool->cur->addr.p.dyn->pfid_mask4,
&pd->nsaddr, AF_INET);
@@ -961,7 +964,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
reason = PFRES_MAPFAILED;
goto notrans;
}
- PF_POOLMASK(naddr,
+ pf_poolmask(naddr,
&rpool->cur->addr.p.dyn->pfid_addr6,
&rpool->cur->addr.p.dyn->pfid_mask6,
&pd->nsaddr, AF_INET6);
@@ -969,7 +972,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
#endif /* INET6 */
}
} else
- PF_POOLMASK(naddr,
+ pf_poolmask(naddr,
&rpool->cur->addr.v.a.addr,
&rpool->cur->addr.v.a.mask, &pd->nsaddr,
pd->af);
@@ -983,7 +986,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
reason = PFRES_MAPFAILED;
goto notrans;
}
- PF_POOLMASK(naddr,
+ pf_poolmask(naddr,
&r->src.addr.p.dyn->pfid_addr4,
&r->src.addr.p.dyn->pfid_mask4,
&pd->ndaddr, AF_INET);
@@ -995,7 +998,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
reason = PFRES_MAPFAILED;
goto notrans;
}
- PF_POOLMASK(naddr,
+ pf_poolmask(naddr,
&r->src.addr.p.dyn->pfid_addr6,
&r->src.addr.p.dyn->pfid_mask6,
&pd->ndaddr, AF_INET6);
@@ -1003,7 +1006,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
#endif /* INET6 */
}
} else
- PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
+ pf_poolmask(naddr, &r->src.addr.v.a.addr,
&r->src.addr.v.a.mask, &pd->ndaddr, pd->af);
break;
}
@@ -1018,7 +1021,7 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
if (reason != 0)
goto notrans;
if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
- PF_POOLMASK(naddr, naddr, &rpool->cur->addr.v.a.mask,
+ pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask,
&pd->ndaddr, pd->af);
/* Do not change SCTP ports. */
@@ -1056,9 +1059,9 @@ pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r,
key.af = pd->af;
key.proto = pd->proto;
key.port[0] = pd->nsport;
- PF_ACPY(&key.addr[0], &pd->nsaddr, key.af);
+ pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af);
key.port[1] = nport;
- PF_ACPY(&key.addr[1], naddr, key.af);
+ pf_addrcpy(&key.addr[1], naddr, key.af);
if (!pf_find_state_all_exists(&key, PF_OUT))
break;
@@ -1220,8 +1223,8 @@ pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd)
}
}
- PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
- PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
+ pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
+ pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
if (V_pf_status.debug >= PF_DEBUG_MISC) {
printf("pf: af-to %s done, prefixlen %d, ",
diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c
index 381e966eacf1..d5d6dc70255e 100644
--- a/sys/netpfil/pf/pf_nl.c
+++ b/sys/netpfil/pf/pf_nl.c
@@ -1308,9 +1308,9 @@ pf_handle_natlook(struct nlmsghdr *hdr, struct nl_pstate *npt)
key.af = attrs.af;
key.proto = attrs.proto;
- PF_ACPY(&key.addr[sidx], &attrs.src, attrs.af);
+ pf_addrcpy(&key.addr[sidx], &attrs.src, attrs.af);
key.port[sidx] = attrs.sport;
- PF_ACPY(&key.addr[didx], &attrs.dst, attrs.af);
+ pf_addrcpy(&key.addr[didx], &attrs.dst, attrs.af);
key.port[didx] = attrs.dport;
state = pf_find_state_all(&key, attrs.direction, &m);
diff --git a/sys/netpfil/pf/pf_ruleset.c b/sys/netpfil/pf/pf_ruleset.c
index 865c5ecd72d9..2e5165a9900c 100644
--- a/sys/netpfil/pf/pf_ruleset.c
+++ b/sys/netpfil/pf/pf_ruleset.c
@@ -232,7 +232,7 @@ pf_get_leaf_kruleset(char *path, char **path_remainder)
return (ruleset);
}
-struct pf_kanchor *
+static struct pf_kanchor *
pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
{
struct pf_kanchor *anchor, *dup;
@@ -259,8 +259,8 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
if ((dup = RB_INSERT(pf_kanchor_global, &V_pf_anchors, anchor)) !=
NULL) {
- printf("pf_find_or_create_ruleset: RB_INSERT1 "
- "'%s' '%s' collides with '%s' '%s'\n",
+ printf("%s: RB_INSERT1 "
+ "'%s' '%s' collides with '%s' '%s'\n", __func__,
anchor->path, anchor->name, dup->path, dup->name);
rs_free(anchor);
return (NULL);
@@ -270,10 +270,10 @@ pf_create_kanchor(struct pf_kanchor *parent, const char *aname)
anchor->parent = parent;
if ((dup = RB_INSERT(pf_kanchor_node, &parent->children,
anchor)) != NULL) {
- printf("pf_find_or_create_ruleset: "
+ printf("%s: "
"RB_INSERT2 '%s' '%s' collides with "
- "'%s' '%s'\n", anchor->path, anchor->name,
- dup->path, dup->name);
+ "'%s' '%s'\n", __func__, anchor->path,
+ anchor->name, dup->path, dup->name);
RB_REMOVE(pf_kanchor_global, &V_pf_anchors,
anchor);
rs_free(anchor);
@@ -339,7 +339,7 @@ pf_remove_if_empty_kruleset(struct pf_kruleset *ruleset)
int i;
while (ruleset != NULL) {
- if (ruleset == &pf_main_ruleset || ruleset->anchor == NULL ||
+ if (ruleset == &pf_main_ruleset ||
!RB_EMPTY(&ruleset->anchor->children) ||
ruleset->anchor->refcnt > 0 || ruleset->tables > 0 ||
ruleset->topen)
@@ -407,7 +407,7 @@ pf_kanchor_setup(struct pf_krule *r, const struct pf_kruleset *s,
}
ruleset = pf_find_or_create_kruleset(path);
rs_free(path);
- if (ruleset == NULL || ruleset->anchor == NULL) {
+ if (ruleset == NULL || ruleset == &pf_main_ruleset) {
DPFPRINTF("%s: ruleset\n", __func__);
return (1);
}
@@ -432,7 +432,7 @@ pf_kanchor_copyout(const struct pf_kruleset *rs, const struct pf_krule *r,
char a[MAXPATHLEN];
char *p;
int i;
- if (rs->anchor == NULL)
+ if (rs == &pf_main_ruleset)
a[0] = 0;
else
strlcpy(a, rs->anchor->path, MAXPATHLEN);
@@ -444,7 +444,7 @@ pf_kanchor_copyout(const struct pf_kruleset *rs, const struct pf_krule *r,
anchor_call_len);
}
if (strncmp(a, r->anchor->path, strlen(a))) {
- printf("pf_anchor_copyout: '%s' '%s'\n", a,
+ printf("%s: '%s' '%s'\n", __func__, a,
r->anchor->path);
return (1);
}
@@ -525,16 +525,13 @@ done:
}
void
-pf_kanchor_remove(struct pf_krule *r)
+pf_remove_kanchor(struct pf_krule *r)
{
if (r->anchor == NULL)
return;
- if (r->anchor->refcnt <= 0) {
- printf("pf_anchor_remove: broken refcount\n");
- r->anchor = NULL;
- return;
- }
- if (!--r->anchor->refcnt)
+ if (r->anchor->refcnt <= 0)
+ printf("%s: broken refcount\n", __func__);
+ else if (!--r->anchor->refcnt)
pf_remove_if_empty_kruleset(&r->anchor->ruleset);
r->anchor = NULL;
}
diff --git a/sys/netpfil/pf/pf_table.c b/sys/netpfil/pf/pf_table.c
index d5874df3df66..43e4366845a2 100644
--- a/sys/netpfil/pf/pf_table.c
+++ b/sys/netpfil/pf/pf_table.c
@@ -704,7 +704,7 @@ pfr_validate_addr(struct pfr_addr *ad)
return (-1);
if (ad->pfra_not && ad->pfra_not != 1)
return (-1);
- if (ad->pfra_fback)
+ if (ad->pfra_fback != PFR_FB_NONE)
return (-1);
return (0);
}
@@ -2340,16 +2340,16 @@ _next_block:
if (use_counter && !PF_AZERO(counter, af)) {
/* is supplied address within block? */
- if (!PF_MATCHA(0, &cur, &mask, counter, af)) {
+ if (!pf_match_addr(0, &cur, &mask, counter, af)) {
/* no, go to next block in table */
idx++;
use_counter = 0;
goto _next_block;
}
- PF_ACPY(addr, counter, af);
+ pf_addrcpy(addr, counter, af);
} else {
/* use first address of block */
- PF_ACPY(addr, &cur, af);
+ pf_addrcpy(addr, &cur, af);
}
if (!KENTRY_NETWORK(ke)) {
@@ -2358,7 +2358,7 @@ _next_block:
idx++;
goto _next_block;
}
- PF_ACPY(counter, addr, af);
+ pf_addrcpy(counter, addr, af);
*pidx = idx;
pfr_kstate_counter_add(&kt->pfrkt_match, 1);
return (0);
@@ -2382,7 +2382,7 @@ _next_block:
/* lookup return the same block - perfect */
if (filter && filter(af, addr))
goto _next_entry;
- PF_ACPY(counter, addr, af);
+ pf_addrcpy(counter, addr, af);
*pidx = idx;
pfr_kstate_counter_add(&kt->pfrkt_match, 1);
return (0);
@@ -2392,9 +2392,9 @@ _next_entry:
/* we need to increase the counter past the nested block */
pfr_prepare_network(&umask, AF_INET, ke2->pfrke_net);
pfr_sockaddr_to_pf_addr(&umask, &umask_addr);
- PF_POOLMASK(addr, addr, &umask_addr, &pfr_ffaddr, af);
- PF_AINC(addr, af);
- if (!PF_MATCHA(0, &cur, &mask, addr, af)) {
+ pf_poolmask(addr, addr, &umask_addr, &pfr_ffaddr, af);
+ pf_addr_inc(addr, af);
+ if (!pf_match_addr(0, &cur, &mask, addr, af)) {
/* ok, we reached the end of our main block */
/* go to next block in table */
idx++;
diff --git a/sys/powerpc/mpc85xx/mpc85xx_gpio.c b/sys/powerpc/mpc85xx/mpc85xx_gpio.c
index 0f333feb747f..cb96d768adef 100644
--- a/sys/powerpc/mpc85xx/mpc85xx_gpio.c
+++ b/sys/powerpc/mpc85xx/mpc85xx_gpio.c
@@ -226,14 +226,14 @@ mpc85xx_gpio_attach(device_t dev)
return (ENOMEM);
}
+ OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev);
+
sc->busdev = gpiobus_attach_bus(dev);
if (sc->busdev == NULL) {
mpc85xx_gpio_detach(dev);
return (ENOMEM);
}
- OF_device_register_xref(OF_xref_from_node(ofw_bus_get_node(dev)), dev);
-
return (0);
}
diff --git a/sys/riscv/allwinner/files.allwinner b/sys/riscv/allwinner/files.allwinner
index 423a89c10c78..73fa9660e2d2 100644
--- a/sys/riscv/allwinner/files.allwinner
+++ b/sys/riscv/allwinner/files.allwinner
@@ -1,5 +1,6 @@
arm/allwinner/aw_gpio.c optional gpio aw_gpio fdt
+arm/allwinner/aw_rtc.c optional aw_rtc fdt
arm/allwinner/aw_syscon.c optional syscon
arm/allwinner/aw_sid.c optional aw_sid nvmem
arm/allwinner/aw_timer.c optional aw_timer fdt
diff --git a/sys/riscv/conf/std.allwinner b/sys/riscv/conf/std.allwinner
index 1bf6b027a4cb..2b1e0d4e09dc 100644
--- a/sys/riscv/conf/std.allwinner
+++ b/sys/riscv/conf/std.allwinner
@@ -7,6 +7,7 @@ options SOC_ALLWINNER_D1
device aw_ccu # Allwinner clock controller
device aw_gpio # Allwinner GPIO controller
+device aw_rtc # Allwinner Real-time Clock
device aw_sid # Allwinner Secure ID EFUSE
device aw_timer # Allwinner Timer
device aw_usbphy # Allwinner USB PHY
diff --git a/sys/sys/caprights.h b/sys/sys/caprights.h
index 48c75afc62a0..6a5a17eda5ee 100644
--- a/sys/sys/caprights.h
+++ b/sys/sys/caprights.h
@@ -79,6 +79,8 @@ extern const cap_rights_t cap_futimes_rights;
extern const cap_rights_t cap_getpeername_rights;
extern const cap_rights_t cap_getsockopt_rights;
extern const cap_rights_t cap_getsockname_rights;
+extern const cap_rights_t cap_inotify_add_rights;
+extern const cap_rights_t cap_inotify_rm_rights;
extern const cap_rights_t cap_ioctl_rights;
extern const cap_rights_t cap_linkat_source_rights;
extern const cap_rights_t cap_linkat_target_rights;
diff --git a/sys/sys/capsicum.h b/sys/sys/capsicum.h
index d493535454e9..3847c4c73e75 100644
--- a/sys/sys/capsicum.h
+++ b/sys/sys/capsicum.h
@@ -279,11 +279,15 @@
#define CAP_KQUEUE (CAP_KQUEUE_EVENT | CAP_KQUEUE_CHANGE)
+/* Allows operations on inotify descriptors. */
+#define CAP_INOTIFY_ADD CAPRIGHT(1, 0x0000000000200000ULL)
+#define CAP_INOTIFY_RM CAPRIGHT(1, 0x0000000000400000ULL)
+
/* All used bits for index 1. */
-#define CAP_ALL1 CAPRIGHT(1, 0x00000000001FFFFFULL)
+#define CAP_ALL1 CAPRIGHT(1, 0x00000000007FFFFFULL)
/* Available bits for index 1. */
-#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000200000ULL)
+#define CAP_UNUSED1_22 CAPRIGHT(1, 0x0000000000800000ULL)
/* ... */
#define CAP_UNUSED1_57 CAPRIGHT(1, 0x0100000000000000ULL)
diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h
index d770c274d7b7..cab94ac511a5 100644
--- a/sys/sys/exterr_cat.h
+++ b/sys/sys/exterr_cat.h
@@ -16,6 +16,8 @@
#define EXTERR_KTRACE 3 /* To allow inclusion of this
file into kern_ktrace.c */
#define EXTERR_CAT_FUSE 4
+#define EXTERR_CAT_INOTIFY 5
+#define EXTERR_CAT_GENIO 6
#endif
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 284d523147b6..63313926c4f0 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -71,6 +71,7 @@ struct nameidata;
#define DTYPE_PROCDESC 12 /* process descriptor */
#define DTYPE_EVENTFD 13 /* eventfd */
#define DTYPE_TIMERFD 14 /* timerfd */
+#define DTYPE_INOTIFY 15 /* inotify descriptor */
#ifdef _KERNEL
diff --git a/sys/sys/inotify.h b/sys/sys/inotify.h
new file mode 100644
index 000000000000..65dc5dba43f3
--- /dev/null
+++ b/sys/sys/inotify.h
@@ -0,0 +1,150 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+#ifndef _INOTIFY_H_
+#define _INOTIFY_H_
+
+#include <sys/_types.h>
+
+/* Flags for inotify_init1(). */
+#define IN_NONBLOCK 0x00000004 /* O_NONBLOCK */
+#define IN_CLOEXEC 0x00100000 /* O_CLOEXEC */
+
+struct inotify_event {
+ int wd;
+ __uint32_t mask;
+ __uint32_t cookie;
+ __uint32_t len;
+ char name[0];
+};
+
+/* Events, set in the mask field. */
+#define IN_ACCESS 0x00000001
+#define IN_MODIFY 0x00000002
+#define IN_ATTRIB 0x00000004
+#define IN_CLOSE_WRITE 0x00000008
+#define IN_CLOSE_NOWRITE 0x00000010
+#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE)
+#define IN_OPEN 0x00000020
+#define IN_MOVED_FROM 0x00000040
+#define IN_MOVED_TO 0x00000080
+#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO)
+#define IN_CREATE 0x00000100
+#define IN_DELETE 0x00000200
+#define IN_DELETE_SELF 0x00000400
+#define IN_MOVE_SELF 0x00000800
+#define IN_ALL_EVENTS 0x00000fff
+
+/* Events report only for entries in a watched dir, not the dir itself. */
+#define _IN_DIR_EVENTS (IN_CLOSE_WRITE | IN_DELETE | IN_MODIFY | \
+ IN_MOVED_FROM | IN_MOVED_TO)
+
+#ifdef _KERNEL
+/*
+ * An unlink that's done as part of a rename only records IN_DELETE if the
+ * unlinked vnode itself is watched, and not when the containing directory is
+ * watched.
+ */
+#define _IN_MOVE_DELETE 0x40000000
+/*
+ * Inode link count changes only trigger IN_ATTRIB events if the inode itself is
+ * watched, and not when the containing directory is watched.
+ */
+#define _IN_ATTRIB_LINKCOUNT 0x80000000
+#endif
+
+/* Flags, set in the mask field. */
+#define IN_ONLYDIR 0x01000000
+#define IN_DONT_FOLLOW 0x02000000
+#define IN_EXCL_UNLINK 0x04000000
+#define IN_MASK_CREATE 0x10000000
+#define IN_MASK_ADD 0x20000000
+#define IN_ONESHOT 0x80000000
+#define _IN_ALL_FLAGS (IN_ONLYDIR | IN_DONT_FOLLOW | \
+ IN_EXCL_UNLINK | IN_MASK_CREATE | \
+ IN_MASK_ADD | IN_ONESHOT)
+
+/* Flags returned by the kernel. */
+#define IN_UNMOUNT 0x00002000
+#define IN_Q_OVERFLOW 0x00004000
+#define IN_IGNORED 0x00008000
+#define IN_ISDIR 0x40000000
+#define _IN_ALL_RETFLAGS (IN_Q_OVERFLOW | IN_UNMOUNT | IN_IGNORED | \
+ IN_ISDIR)
+
+#define _IN_ALIGN _Alignof(struct inotify_event)
+#define _IN_NAMESIZE(namelen) \
+ ((namelen) == 0 ? 0 : __align_up((namelen) + 1, _IN_ALIGN))
+
+#ifdef _KERNEL
+struct componentname;
+struct file;
+struct inotify_softc;
+struct thread;
+struct vnode;
+
+int inotify_create_file(struct thread *, struct file *, int, int *);
+void inotify_log(struct vnode *, const char *, size_t, int, __uint32_t);
+
+int kern_inotify_rm_watch(int, uint32_t, struct thread *);
+int kern_inotify_add_watch(int, int, const char *, uint32_t,
+ struct thread *);
+
+void vn_inotify(struct vnode *, struct vnode *, struct componentname *, int,
+ uint32_t);
+int vn_inotify_add_watch(struct vnode *, struct inotify_softc *,
+ __uint32_t, __uint32_t *, struct thread *);
+void vn_inotify_revoke(struct vnode *);
+
+/* Log an inotify event. */
+#define INOTIFY(vp, ev) do { \
+ if (__predict_false((vn_irflag_read(vp) & (VIRF_INOTIFY | \
+ VIRF_INOTIFY_PARENT)) != 0)) \
+ VOP_INOTIFY((vp), NULL, NULL, (ev), 0); \
+} while (0)
+
+/* Log an inotify event using a specific name for the vnode. */
+#define INOTIFY_NAME(vp, dvp, cnp, ev) do { \
+ if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0 || \
+ (vn_irflag_read(dvp) & VIRF_INOTIFY) != 0)) \
+ VOP_INOTIFY((vp), (dvp), (cnp), (ev), 0); \
+} while (0)
+
+extern __uint32_t inotify_rename_cookie;
+
+#define INOTIFY_MOVE(vp, fdvp, fcnp, tvp, tdvp, tcnp) do { \
+ if (__predict_false((vn_irflag_read(fdvp) & VIRF_INOTIFY) != 0 || \
+ (vn_irflag_read(tdvp) & VIRF_INOTIFY) != 0 || \
+ (vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) { \
+ __uint32_t cookie; \
+ \
+ cookie = atomic_fetchadd_32(&inotify_rename_cookie, 1); \
+ VOP_INOTIFY((vp), (fdvp), (fcnp), IN_MOVED_FROM, cookie); \
+ VOP_INOTIFY((vp), (tdvp), (tcnp), IN_MOVED_TO, cookie); \
+ } \
+ if ((tvp) != NULL) \
+ INOTIFY_NAME((tvp), (tdvp), (tcnp), _IN_MOVE_DELETE); \
+} while (0)
+
+#define INOTIFY_REVOKE(vp) do { \
+ if (__predict_false((vn_irflag_read(vp) & VIRF_INOTIFY) != 0)) \
+ vn_inotify_revoke((vp)); \
+} while (0)
+
+#else
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int inotify_init(void);
+int inotify_init1(int flags);
+int inotify_add_watch(int fd, const char *pathname, __uint32_t mask);
+int inotify_add_watch_at(int fd, int dfd, const char *pathname,
+ __uint32_t mask);
+int inotify_rm_watch(int fd, int wd);
+__END_DECLS
+#endif /* !_KERNEL */
+
+#endif /* !_INOTIFY_H_ */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index a6f858e02395..f6480b173a5c 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -267,6 +267,7 @@ struct mount {
int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */
int mnt_upper_pending; /* (i) # of pending ops on mnt_uppers */
struct lock mnt_explock; /* vfs_export walkers lock */
+ struct lock mnt_renamelock; /* renames and O_RESOLVE_BENEATH */
TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */
TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */
STAILQ_ENTRY(mount) mnt_taskqueue_link; /* (d) our place in deferred unmount list */
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index 5c245235ace5..6008d83f729d 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -108,7 +108,12 @@ struct nameidata {
* through the VOP interface.
*/
struct componentname ni_cnd;
+
+ /* Serving RBENEATH. */
struct nameicap_tracker_head ni_cap_tracker;
+ struct vnode *ni_rbeneath_dpp;
+ struct mount *ni_nctrack_mnt;
+
/*
* Private helper data for UFS, must be at the end. See
* NDINIT_PREFILL().
@@ -235,6 +240,10 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
panic("namei data not inited"); \
if (((arg)->ni_debugflags & NAMEI_DBG_HADSTARTDIR) != 0) \
panic("NDREINIT on namei data with NAMEI_DBG_HADSTARTDIR"); \
+ if ((arg)->ni_nctrack_mnt != NULL) \
+ panic("NDREINIT on namei data with leaked ni_nctrack_mnt"); \
+ if (!TAILQ_EMPTY(&(arg)->ni_cap_tracker)) \
+ panic("NDREINIT on namei data with leaked ni_cap_tracker"); \
(arg)->ni_debugflags = NAMEI_DBG_INITED; \
}
#else
@@ -259,6 +268,9 @@ do { \
_ndp->ni_resflags = 0; \
filecaps_init(&_ndp->ni_filecaps); \
_ndp->ni_rightsneeded = _rightsp; \
+ _ndp->ni_rbeneath_dpp = NULL; \
+ _ndp->ni_nctrack_mnt = NULL; \
+ TAILQ_INIT(&_ndp->ni_cap_tracker); \
} while (0)
#define NDREINIT(ndp) do { \
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index b15dace8cfa0..61411890c85b 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -122,6 +122,8 @@ struct uidinfo {
long ui_kqcnt; /* (b) number of kqueues */
long ui_umtxcnt; /* (b) number of shared umtxs */
long ui_pipecnt; /* (b) consumption of pipe buffers */
+ long ui_inotifycnt; /* (b) number of inotify descriptors */
+ long ui_inotifywatchcnt; /* (b) number of inotify watches */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
#ifdef RACCT
@@ -144,6 +146,8 @@ int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to,
int chgptscnt(struct uidinfo *uip, int diff, rlim_t maxval);
int chgumtxcnt(struct uidinfo *uip, int diff, rlim_t maxval);
int chgpipecnt(struct uidinfo *uip, int diff, rlim_t max);
+int chginotifycnt(struct uidinfo *uip, int diff, rlim_t maxval);
+int chginotifywatchcnt(struct uidinfo *uip, int diff, rlim_t maxval);
int kern_proc_setrlimit(struct thread *td, struct proc *p, u_int which,
struct rlimit *limp);
struct plimit
diff --git a/sys/sys/specialfd.h b/sys/sys/specialfd.h
index dc4d88ce689f..0b79c841d149 100644
--- a/sys/sys/specialfd.h
+++ b/sys/sys/specialfd.h
@@ -30,6 +30,7 @@
enum specialfd_type {
SPECIALFD_EVENTFD = 1,
+ SPECIALFD_INOTIFY = 2,
};
struct specialfd_eventfd {
@@ -37,4 +38,8 @@ struct specialfd_eventfd {
int flags;
};
+struct specialfd_inotify {
+ int flags;
+};
+
#endif /* !_SYS_SPECIALFD_H_ */
diff --git a/sys/sys/syscall.h b/sys/sys/syscall.h
index 68406a2dfc29..eec923d0b82e 100644
--- a/sys/sys/syscall.h
+++ b/sys/sys/syscall.h
@@ -529,4 +529,6 @@
#define SYS_fchroot 590
#define SYS_setcred 591
#define SYS_exterrctl 592
-#define SYS_MAXSYSCALL 593
+#define SYS_inotify_add_watch_at 593
+#define SYS_inotify_rm_watch 594
+#define SYS_MAXSYSCALL 595
diff --git a/sys/sys/syscall.mk b/sys/sys/syscall.mk
index 9a90a63f35a3..547242a73277 100644
--- a/sys/sys/syscall.mk
+++ b/sys/sys/syscall.mk
@@ -434,4 +434,6 @@ MIASM = \
getrlimitusage.o \
fchroot.o \
setcred.o \
- exterrctl.o
+ exterrctl.o \
+ inotify_add_watch_at.o \
+ inotify_rm_watch.o
diff --git a/sys/sys/sysproto.h b/sys/sys/sysproto.h
index 94da81c84d25..94b5a0a7a95e 100644
--- a/sys/sys/sysproto.h
+++ b/sys/sys/sysproto.h
@@ -1891,6 +1891,16 @@ struct exterrctl_args {
char flags_l_[PADL_(u_int)]; u_int flags; char flags_r_[PADR_(u_int)];
char ptr_l_[PADL_(void *)]; void * ptr; char ptr_r_[PADR_(void *)];
};
+struct inotify_add_watch_at_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char dfd_l_[PADL_(int)]; int dfd; char dfd_r_[PADR_(int)];
+ char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)];
+ char mask_l_[PADL_(uint32_t)]; uint32_t mask; char mask_r_[PADR_(uint32_t)];
+};
+struct inotify_rm_watch_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char wd_l_[PADL_(int)]; int wd; char wd_r_[PADR_(int)];
+};
int sys_exit(struct thread *, struct exit_args *);
int sys_fork(struct thread *, struct fork_args *);
int sys_read(struct thread *, struct read_args *);
@@ -2293,6 +2303,8 @@ int sys_getrlimitusage(struct thread *, struct getrlimitusage_args *);
int sys_fchroot(struct thread *, struct fchroot_args *);
int sys_setcred(struct thread *, struct setcred_args *);
int sys_exterrctl(struct thread *, struct exterrctl_args *);
+int sys_inotify_add_watch_at(struct thread *, struct inotify_add_watch_at_args *);
+int sys_inotify_rm_watch(struct thread *, struct inotify_rm_watch_args *);
#ifdef COMPAT_43
@@ -3275,6 +3287,8 @@ int freebsd13_swapoff(struct thread *, struct freebsd13_swapoff_args *);
#define SYS_AUE_fchroot AUE_NULL
#define SYS_AUE_setcred AUE_SETCRED
#define SYS_AUE_exterrctl AUE_NULL
+#define SYS_AUE_inotify_add_watch_at AUE_INOTIFY
+#define SYS_AUE_inotify_rm_watch AUE_INOTIFY
#undef PAD_
#undef PADL_
diff --git a/sys/sys/user.h b/sys/sys/user.h
index f94a91ca1238..103236b6ed1b 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -265,6 +265,7 @@ struct user {
#define KF_TYPE_DEV 12
#define KF_TYPE_EVENTFD 13
#define KF_TYPE_TIMERFD 14
+#define KF_TYPE_INOTIFY 15
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -456,6 +457,10 @@ struct kinfo_file {
int32_t kf_kqueue_count;
int32_t kf_kqueue_state;
} kf_kqueue;
+ struct {
+ uint64_t kf_inotify_npending;
+ uint64_t kf_inotify_nbpending;
+ } kf_inotify;
} kf_un;
};
uint16_t kf_status; /* Status flags. */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index bed20f607339..3ed469bdce6d 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -86,11 +86,13 @@ enum vgetstate {
* it from v_data. If non-null, this area is freed in getnewvnode().
*/
-struct namecache;
struct cache_fpl;
+struct inotify_watch;
+struct namecache;
struct vpollinfo {
struct mtx vpi_lock; /* lock to protect below */
+ TAILQ_HEAD(, inotify_watch) vpi_inotify; /* list of inotify watchers */
struct selinfo vpi_selinfo; /* identity of poller(s) */
short vpi_events; /* what they are looking for */
short vpi_revents; /* what has happened */
@@ -248,6 +250,9 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes");
#define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */
#define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */
#define VIRF_NAMEDATTR 0x0040 /* Named attribute */
+#define VIRF_INOTIFY 0x0080 /* This vnode is being watched */
+#define VIRF_INOTIFY_PARENT 0x0100 /* A parent of this vnode may be being
+ watched */
#define VI_UNUSED0 0x0001 /* unused */
#define VI_MOUNT 0x0002 /* Mount in progress */
@@ -667,6 +672,7 @@ char *cache_symlink_alloc(size_t size, int flags);
void cache_symlink_free(char *string, size_t size);
int cache_symlink_resolve(struct cache_fpl *fpl, const char *string,
size_t len);
+void cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie);
void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp);
@@ -869,8 +875,10 @@ int vop_stdfsync(struct vop_fsync_args *);
int vop_stdgetwritemount(struct vop_getwritemount_args *);
int vop_stdgetpages(struct vop_getpages_args *);
int vop_stdinactive(struct vop_inactive_args *);
-int vop_stdioctl(struct vop_ioctl_args *);
int vop_stdneed_inactive(struct vop_need_inactive_args *);
+int vop_stdinotify(struct vop_inotify_args *);
+int vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *);
+int vop_stdioctl(struct vop_ioctl_args *);
int vop_stdkqfilter(struct vop_kqfilter_args *);
int vop_stdlock(struct vop_lock1_args *);
int vop_stdunlock(struct vop_unlock_args *);
@@ -910,9 +918,12 @@ int dead_read(struct vop_read_args *ap);
int dead_write(struct vop_write_args *ap);
/* These are called from within the actual VOPS. */
+void vop_allocate_post(void *a, int rc);
+void vop_copy_file_range_post(void *ap, int rc);
void vop_close_post(void *a, int rc);
void vop_create_pre(void *a);
void vop_create_post(void *a, int rc);
+void vop_deallocate_post(void *a, int rc);
void vop_whiteout_pre(void *a);
void vop_whiteout_post(void *a, int rc);
void vop_deleteextattr_pre(void *a);
@@ -1020,9 +1031,12 @@ void vop_rename_fail(struct vop_rename_args *ap);
#define VOP_WRITE_POST(ap, ret) \
noffset = (ap)->a_uio->uio_offset; \
- if (noffset > ooffset && !VN_KNLIST_EMPTY((ap)->a_vp)) { \
- VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE \
- | (noffset > osize ? NOTE_EXTEND : 0)); \
+ if (noffset > ooffset) { \
+ if (VN_KNLIST_EMPTY((ap)->a_vp)) { \
+ VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE | \
+ (noffset > osize ? NOTE_EXTEND : 0)); \
+ } \
+ INOTIFY((ap)->a_vp, IN_MODIFY); \
}
#define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__)
diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk
index d23c2af9bd9a..e829105197cc 100644
--- a/sys/tools/vnode_if.awk
+++ b/sys/tools/vnode_if.awk
@@ -193,6 +193,7 @@ if (cfile) {
printc(common_head \
"#include <sys/param.h>\n" \
"#include <sys/event.h>\n" \
+ "#include <sys/inotify.h>\n" \
"#include <sys/kernel.h>\n" \
"#include <sys/mount.h>\n" \
"#include <sys/sdt.h>\n" \
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 891e490a7031..75f5fe716c31 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1012,7 +1012,6 @@ ffs_mountfs(struct vnode *odevvp, struct mount *mp, struct thread *td)
else
ump->um_check_blkno = NULL;
mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF);
- sx_init(&ump->um_checkpath_lock, "uchpth");
fs->fs_ronly = ronly;
fs->fs_active = NULL;
mp->mnt_data = ump;
@@ -1182,7 +1181,6 @@ out:
}
if (ump != NULL) {
mtx_destroy(UFS_MTX(ump));
- sx_destroy(&ump->um_checkpath_lock);
if (mp->mnt_gjprovider != NULL) {
free(mp->mnt_gjprovider, M_UFSMNT);
mp->mnt_gjprovider = NULL;
@@ -1306,7 +1304,6 @@ ffs_unmount(struct mount *mp, int mntflags)
vrele(ump->um_odevvp);
dev_rel(ump->um_dev);
mtx_destroy(UFS_MTX(ump));
- sx_destroy(&ump->um_checkpath_lock);
if (mp->mnt_gjprovider != NULL) {
free(mp->mnt_gjprovider, M_UFSMNT);
mp->mnt_gjprovider = NULL;
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index eaf37c58756b..3f9c95e934fc 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1412,7 +1412,6 @@ ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target,
vp = tvp = ITOV(target);
mp = vp->v_mount;
*wait_ino = 0;
- sx_assert(&VFSTOUFS(mp)->um_checkpath_lock, SA_XLOCKED);
if (target->i_number == source_ino)
return (EEXIST);
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 9aea01e70951..74cb094bdfe4 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1273,9 +1273,9 @@ ufs_rename(
struct mount *mp;
ino_t ino;
seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s;
- bool checkpath_locked, want_seqc_end;
+ bool want_seqc_end;
- checkpath_locked = want_seqc_end = false;
+ want_seqc_end = false;
endoff = 0;
mp = tdvp->v_mount;
@@ -1427,10 +1427,6 @@ relock:
}
vfs_ref(mp);
MPASS(!want_seqc_end);
- if (checkpath_locked) {
- sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
- checkpath_locked = false;
- }
VOP_UNLOCK(fdvp);
VOP_UNLOCK(fvp);
vref(tdvp);
@@ -1484,8 +1480,6 @@ relock:
if (error)
goto unlockout;
- sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock);
- checkpath_locked = true;
error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
&ino);
/*
@@ -1493,8 +1487,6 @@ relock:
* everything else and VGET before restarting.
*/
if (ino) {
- sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
- checkpath_locked = false;
VOP_UNLOCK(fdvp);
VOP_UNLOCK(fvp);
VOP_UNLOCK(tdvp);
@@ -1574,9 +1566,6 @@ relock:
vn_seqc_write_end(fdvp);
want_seqc_end = false;
vfs_ref(mp);
- MPASS(checkpath_locked);
- sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
- checkpath_locked = false;
VOP_UNLOCK(fdvp);
VOP_UNLOCK(fvp);
vref(tdvp);
@@ -1763,9 +1752,6 @@ unlockout:
vn_seqc_write_end(fdvp);
}
- if (checkpath_locked)
- sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock);
-
vput(fdvp);
vput(fvp);
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index 5c7fa11dae6a..d33b01e4425e 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -97,8 +97,6 @@ struct ufsmount {
uint64_t um_maxsymlinklen; /* (c) max size of short
symlink */
struct mtx um_lock; /* (c) Protects ufsmount & fs */
- struct sx um_checkpath_lock; /* (c) Protects ufs_checkpath()
- result */
struct mount_softdeps *um_softdep; /* (c) softdep mgmt structure */
struct vnode *um_quotas[MAXQUOTAS]; /* (q) pointer to quota files */
struct ucred *um_cred[MAXQUOTAS]; /* (q) quota file access cred */
diff --git a/sys/x86/linux/linux_dummy_x86.c b/sys/x86/linux/linux_dummy_x86.c
index ae1d23e811e7..221f5dbf5ba3 100644
--- a/sys/x86/linux/linux_dummy_x86.c
+++ b/sys/x86/linux/linux_dummy_x86.c
@@ -46,7 +46,5 @@ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
DUMMY(sysfs);
DUMMY(quotactl);
-/* Linux 2.6.13: */
-DUMMY(inotify_init);
/* Linux 2.6.22: */
DUMMY(signalfd);
diff --git a/tests/ci/tools/freebsdci b/tests/ci/tools/freebsdci
index f0030fe00aba..7b4ce9669ab2 100755
--- a/tests/ci/tools/freebsdci
+++ b/tests/ci/tools/freebsdci
@@ -34,6 +34,7 @@ rcvar=freebsdci_enable
start_cmd="firstboot_ci_run"
stop_cmd=":"
os_arch=$(uname -p)
+parallelism=$(nproc)
tardev=/dev/vtbd1
metadir=/meta
istar=$(file -s ${tardev} | grep "POSIX tar archive" | wc -l)
@@ -74,7 +75,7 @@ full_tests()
tar xvf ${tardev} -C ${metadir}
cd /usr/tests
set +e
- kyua test
+ kyua -v parallelism=${parallelism} test
rc=$?
set -e
if [ ${rc} -ne 0 ] && [ ${rc} -ne 1 ]; then
diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile
index 8cc7beade3f3..26c0013696c7 100644
--- a/tests/sys/kern/Makefile
+++ b/tests/sys/kern/Makefile
@@ -19,6 +19,7 @@ ATF_TESTS_C+= kern_descrip_test
TEST_METADATA.kern_descrip_test+= is_exclusive="true"
ATF_TESTS_C+= fdgrowtable_test
ATF_TESTS_C+= jail_lookup_root
+ATF_TESTS_C+= inotify_test
ATF_TESTS_C+= kill_zombie
.if ${MK_OPENSSL} != "no"
ATF_TESTS_C+= ktls_test
@@ -85,6 +86,7 @@ LIBADD.sys_getrandom+= c
LIBADD.sys_getrandom+= pthread
LIBADD.ptrace_test+= pthread
LIBADD.unix_seqpacket_test+= pthread
+LIBADD.inotify_test+= util
LIBADD.kcov+= pthread
CFLAGS.ktls_test+= -DOPENSSL_API_COMPAT=0x10100000L
LIBADD.ktls_test+= crypto util
diff --git a/tests/sys/kern/inotify_test.c b/tests/sys/kern/inotify_test.c
new file mode 100644
index 000000000000..ed7cef5d148c
--- /dev/null
+++ b/tests/sys/kern/inotify_test.c
@@ -0,0 +1,862 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Klara, Inc.
+ */
+
+#include <sys/capsicum.h>
+#include <sys/filio.h>
+#include <sys/inotify.h>
+#include <sys/ioccom.h>
+#include <sys/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/un.h>
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <mntopts.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <atf-c.h>
+
+static const char *
+ev2name(int event)
+{
+ switch (event) {
+ case IN_ACCESS:
+ return ("IN_ACCESS");
+ case IN_ATTRIB:
+ return ("IN_ATTRIB");
+ case IN_CLOSE_WRITE:
+ return ("IN_CLOSE_WRITE");
+ case IN_CLOSE_NOWRITE:
+ return ("IN_CLOSE_NOWRITE");
+ case IN_CREATE:
+ return ("IN_CREATE");
+ case IN_DELETE:
+ return ("IN_DELETE");
+ case IN_DELETE_SELF:
+ return ("IN_DELETE_SELF");
+ case IN_MODIFY:
+ return ("IN_MODIFY");
+ case IN_MOVE_SELF:
+ return ("IN_MOVE_SELF");
+ case IN_MOVED_FROM:
+ return ("IN_MOVED_FROM");
+ case IN_MOVED_TO:
+ return ("IN_MOVED_TO");
+ case IN_OPEN:
+ return ("IN_OPEN");
+ default:
+ return (NULL);
+ }
+}
+
+static void
+close_checked(int fd)
+{
+ ATF_REQUIRE(close(fd) == 0);
+}
+
+/*
+ * Make sure that no other events are pending, and close the inotify descriptor.
+ */
+static void
+close_inotify(int fd)
+{
+ int n;
+
+ ATF_REQUIRE(ioctl(fd, FIONREAD, &n) == 0);
+ ATF_REQUIRE(n == 0);
+ close_checked(fd);
+}
+
+static uint32_t
+consume_event_cookie(int ifd, int wd, int event, int flags, const char *name)
+{
+ struct inotify_event *ev;
+ size_t evsz, namelen;
+ ssize_t n;
+ uint32_t cookie;
+
+ /* Only read one record. */
+ namelen = name == NULL ? 0 : strlen(name);
+ evsz = sizeof(*ev) + _IN_NAMESIZE(namelen);
+ ev = malloc(evsz);
+ ATF_REQUIRE(ev != NULL);
+
+ n = read(ifd, ev, evsz);
+ ATF_REQUIRE_MSG(n >= 0, "failed to read event %s", ev2name(event));
+ ATF_REQUIRE((size_t)n >= sizeof(*ev));
+ ATF_REQUIRE((size_t)n == sizeof(*ev) + ev->len);
+ ATF_REQUIRE((size_t)n == evsz);
+
+ ATF_REQUIRE_MSG((ev->mask & IN_ALL_EVENTS) == event,
+ "expected event %#x, got %#x", event, ev->mask);
+ ATF_REQUIRE_MSG((ev->mask & _IN_ALL_RETFLAGS) == flags,
+ "expected flags %#x, got %#x", flags, ev->mask);
+ ATF_REQUIRE_MSG(ev->wd == wd,
+ "expected wd %d, got %d", wd, ev->wd);
+ ATF_REQUIRE_MSG(name == NULL || strcmp(name, ev->name) == 0,
+ "expected name '%s', got '%s'", name, ev->name);
+ cookie = ev->cookie;
+ if ((ev->mask & (IN_MOVED_FROM | IN_MOVED_TO)) == 0)
+ ATF_REQUIRE(cookie == 0);
+ free(ev);
+ return (cookie);
+}
+
+/*
+ * Read an event from the inotify file descriptor and check that it
+ * matches the expected values.
+ */
+static void
+consume_event(int ifd, int wd, int event, int flags, const char *name)
+{
+ (void)consume_event_cookie(ifd, wd, event, flags, name);
+}
+
+static int
+inotify(int flags)
+{
+ int ifd;
+
+ ifd = inotify_init1(flags);
+ ATF_REQUIRE(ifd != -1);
+ return (ifd);
+}
+
+static void
+mount_nullfs(char *dir, char *src)
+{
+ struct iovec *iov;
+ char errmsg[1024];
+ int error, iovlen;
+
+ iov = NULL;
+ iovlen = 0;
+
+ build_iovec(&iov, &iovlen, "fstype", "nullfs", (size_t)-1);
+ build_iovec(&iov, &iovlen, "fspath", dir, (size_t)-1);
+ build_iovec(&iov, &iovlen, "target", src, (size_t)-1);
+ build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg));
+
+ errmsg[0] = '\0';
+ error = nmount(iov, iovlen, 0);
+ ATF_REQUIRE_MSG(error == 0,
+ "mount nullfs %s %s: %s", src, dir,
+ errmsg[0] == '\0' ? strerror(errno) : errmsg);
+
+ free_iovec(&iov, &iovlen);
+}
+
+static void
+mount_tmpfs(const char *dir)
+{
+ struct iovec *iov;
+ char errmsg[1024];
+ int error, iovlen;
+
+ iov = NULL;
+ iovlen = 0;
+
+ build_iovec(&iov, &iovlen, "fstype", "tmpfs", (size_t)-1);
+ build_iovec(&iov, &iovlen, "fspath", __DECONST(char *, dir),
+ (size_t)-1);
+ build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg));
+
+ errmsg[0] = '\0';
+ error = nmount(iov, iovlen, 0);
+ ATF_REQUIRE_MSG(error == 0,
+ "mount tmpfs %s: %s", dir,
+ errmsg[0] == '\0' ? strerror(errno) : errmsg);
+
+ free_iovec(&iov, &iovlen);
+}
+
+static int
+watch_file(int ifd, int events, char *path)
+{
+ int fd, wd;
+
+ strncpy(path, "test.XXXXXX", PATH_MAX);
+ fd = mkstemp(path);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+
+ wd = inotify_add_watch(ifd, path, events);
+ ATF_REQUIRE(wd != -1);
+
+ return (wd);
+}
+
+static int
+watch_dir(int ifd, int events, char *path)
+{
+ char *p;
+ int wd;
+
+ strlcpy(path, "test.XXXXXX", PATH_MAX);
+ p = mkdtemp(path);
+ ATF_REQUIRE(p == path);
+
+ wd = inotify_add_watch(ifd, path, events);
+ ATF_REQUIRE(wd != -1);
+
+ return (wd);
+}
+
+/*
+ * Verify that Capsicum restrictions are applied as expected.
+ */
+ATF_TC_WITHOUT_HEAD(inotify_capsicum);
+ATF_TC_BODY(inotify_capsicum, tc)
+{
+ int error, dfd, ifd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+ ATF_REQUIRE(ifd != -1);
+
+ dfd = open(".", O_RDONLY | O_DIRECTORY);
+ ATF_REQUIRE(dfd != -1);
+
+ error = mkdirat(dfd, "testdir", 0755);
+ ATF_REQUIRE(error == 0);
+
+ error = cap_enter();
+ ATF_REQUIRE(error == 0);
+
+ /*
+ * Plain inotify_add_watch() is disallowed.
+ */
+ wd = inotify_add_watch(ifd, ".", IN_DELETE_SELF);
+ ATF_REQUIRE_ERRNO(ECAPMODE, wd == -1);
+ wd = inotify_add_watch_at(ifd, dfd, "testdir", IN_DELETE_SELF);
+ ATF_REQUIRE(wd >= 0);
+
+ /*
+ * Generate a record and consume it.
+ */
+ error = unlinkat(dfd, "testdir", AT_REMOVEDIR);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL);
+ consume_event(ifd, wd, 0, IN_IGNORED, NULL);
+
+ close_checked(dfd);
+ close_inotify(ifd);
+}
+
+/*
+ * Make sure that duplicate, back-to-back events are coalesced.
+ */
+ATF_TC_WITHOUT_HEAD(inotify_coalesce);
+ATF_TC_BODY(inotify_coalesce, tc)
+{
+ char file[PATH_MAX], path[PATH_MAX];
+ int fd, fd1, ifd, n, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ /* Create a directory and watch it. */
+ wd = watch_dir(ifd, IN_OPEN, path);
+ /* Create a file in the directory and open it. */
+ snprintf(file, sizeof(file), "%s/file", path);
+ fd = open(file, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ fd = open(file, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ fd1 = open(file, O_RDONLY);
+ ATF_REQUIRE(fd1 != -1);
+ close_checked(fd1);
+ close_checked(fd);
+
+ consume_event(ifd, wd, IN_OPEN, 0, "file");
+ ATF_REQUIRE(ioctl(ifd, FIONREAD, &n) == 0);
+ ATF_REQUIRE(n == 0);
+
+ close_inotify(ifd);
+}
+
+/*
+ * Check handling of IN_MASK_CREATE.
+ */
+ATF_TC_WITHOUT_HEAD(inotify_mask_create);
+ATF_TC_BODY(inotify_mask_create, tc)
+{
+ char path[PATH_MAX];
+ int ifd, wd, wd1;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ /* Create a directory and watch it. */
+ wd = watch_dir(ifd, IN_CREATE, path);
+ /* Updating the watch with IN_MASK_CREATE should result in an error. */
+ wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_CREATE);
+ ATF_REQUIRE_ERRNO(EEXIST, wd1 == -1);
+ /* It's an error to specify IN_MASK_ADD with IN_MASK_CREATE. */
+ wd1 = inotify_add_watch(ifd, path, IN_MODIFY | IN_MASK_ADD |
+ IN_MASK_CREATE);
+ ATF_REQUIRE_ERRNO(EINVAL, wd1 == -1);
+ /* Updating the watch without IN_MASK_CREATE should work. */
+ wd1 = inotify_add_watch(ifd, path, IN_MODIFY);
+ ATF_REQUIRE(wd1 != -1);
+ ATF_REQUIRE_EQ(wd, wd1);
+
+ close_inotify(ifd);
+}
+
+/*
+ * Make sure that inotify cooperates with nullfs: if a lower vnode is the
+ * subject of an event, the upper vnode should be notified, and if the upper
+ * vnode is the subject of an event, the lower vnode should be notified.
+ */
+ATF_TC_WITH_CLEANUP(inotify_nullfs);
+ATF_TC_HEAD(inotify_nullfs, tc)
+{
+ atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(inotify_nullfs, tc)
+{
+ char path[PATH_MAX], *p;
+ int dfd, error, fd, ifd, mask, wd;
+
+ mask = IN_CREATE | IN_OPEN;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ strlcpy(path, "./test.XXXXXX", sizeof(path));
+ p = mkdtemp(path);
+ ATF_REQUIRE(p == path);
+
+ error = mkdir("./mnt", 0755);
+ ATF_REQUIRE(error == 0);
+
+ /* Mount the testdir onto ./mnt. */
+ mount_nullfs("./mnt", path);
+
+ wd = inotify_add_watch(ifd, "./mnt", mask);
+ ATF_REQUIRE(wd != -1);
+
+ /* Create a file in the lower directory and open it. */
+ dfd = open(path, O_RDONLY | O_DIRECTORY);
+ ATF_REQUIRE(dfd != -1);
+ fd = openat(dfd, "file", O_RDWR | O_CREAT, 0644);
+ close_checked(fd);
+ close_checked(dfd);
+
+ /* We should see events via the nullfs mount. */
+ consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL);
+ consume_event(ifd, wd, IN_CREATE, 0, "file");
+ consume_event(ifd, wd, IN_OPEN, 0, "file");
+
+ error = inotify_rm_watch(ifd, wd);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, 0, IN_IGNORED, NULL);
+
+ /* Watch the lower directory. */
+ wd = inotify_add_watch(ifd, path, mask);
+ ATF_REQUIRE(wd != -1);
+ /* ... and create a file in the upper directory and open it. */
+ dfd = open("./mnt", O_RDONLY | O_DIRECTORY);
+ ATF_REQUIRE(dfd != -1);
+ fd = openat(dfd, "file2", O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ close_checked(dfd);
+
+ /* We should see events via the lower directory. */
+ consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL);
+ consume_event(ifd, wd, IN_CREATE, 0, "file2");
+ consume_event(ifd, wd, IN_OPEN, 0, "file2");
+
+ close_inotify(ifd);
+}
+ATF_TC_CLEANUP(inotify_nullfs, tc)
+{
+ int error;
+
+ error = unmount("./mnt", 0);
+ if (error != 0) {
+ perror("unmount");
+ exit(1);
+ }
+}
+
+/*
+ * Make sure that exceeding max_events pending events results in an overflow
+ * event.
+ */
+ATF_TC_WITHOUT_HEAD(inotify_queue_overflow);
+ATF_TC_BODY(inotify_queue_overflow, tc)
+{
+ char path[PATH_MAX];
+ size_t size;
+ int error, dfd, ifd, max, wd;
+
+ size = sizeof(max);
+ error = sysctlbyname("vfs.inotify.max_queued_events", &max, &size, NULL,
+ 0);
+ ATF_REQUIRE(error == 0);
+
+ ifd = inotify(IN_NONBLOCK);
+
+ /* Create a directory and watch it for file creation events. */
+ wd = watch_dir(ifd, IN_CREATE, path);
+ dfd = open(path, O_DIRECTORY);
+ ATF_REQUIRE(dfd != -1);
+ /* Generate max+1 file creation events. */
+ for (int i = 0; i < max + 1; i++) {
+ char name[NAME_MAX];
+ int fd;
+
+ (void)snprintf(name, sizeof(name), "file%d", i);
+ fd = openat(dfd, name, O_CREAT | O_RDWR, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ }
+
+ /*
+ * Read our events. We should see files 0..max-1 and then an overflow
+ * event.
+ */
+ for (int i = 0; i < max; i++) {
+ char name[NAME_MAX];
+
+ (void)snprintf(name, sizeof(name), "file%d", i);
+ consume_event(ifd, wd, IN_CREATE, 0, name);
+ }
+
+ /* Look for an overflow event. */
+ consume_event(ifd, -1, 0, IN_Q_OVERFLOW, NULL);
+
+ close_checked(dfd);
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_access_file);
+ATF_TC_BODY(inotify_event_access_file, tc)
+{
+ char path[PATH_MAX], buf[16];
+ off_t nb;
+ ssize_t n;
+ int error, fd, fd1, ifd, s[2], wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_file(ifd, IN_ACCESS, path);
+
+ fd = open(path, O_RDWR);
+ n = write(fd, "test", 4);
+ ATF_REQUIRE(n == 4);
+
+ /* A simple read(2) should generate an access. */
+ ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0);
+ n = read(fd, buf, sizeof(buf));
+ ATF_REQUIRE(n == 4);
+ ATF_REQUIRE(memcmp(buf, "test", 4) == 0);
+ consume_event(ifd, wd, IN_ACCESS, 0, NULL);
+
+ /* copy_file_range(2) should as well. */
+ ATF_REQUIRE(lseek(fd, 0, SEEK_SET) == 0);
+ fd1 = open("sink", O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd1 != -1);
+ n = copy_file_range(fd, NULL, fd1, NULL, 4, 0);
+ ATF_REQUIRE(n == 4);
+ close_checked(fd1);
+ consume_event(ifd, wd, IN_ACCESS, 0, NULL);
+
+ /* As should sendfile(2). */
+ error = socketpair(AF_UNIX, SOCK_STREAM, 0, s);
+ ATF_REQUIRE(error == 0);
+ error = sendfile(fd, s[0], 0, 4, NULL, &nb, 0);
+ ATF_REQUIRE(error == 0);
+ ATF_REQUIRE(nb == 4);
+ consume_event(ifd, wd, IN_ACCESS, 0, NULL);
+ close_checked(s[0]);
+ close_checked(s[1]);
+
+ close_checked(fd);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_access_dir);
+ATF_TC_BODY(inotify_event_access_dir, tc)
+{
+ char root[PATH_MAX], path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ int error, ifd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_ACCESS, root);
+ snprintf(path, sizeof(path), "%s/dir", root);
+ error = mkdir(path, 0755);
+ ATF_REQUIRE(error == 0);
+
+ /* Read an entry and generate an access. */
+ dir = opendir(path);
+ ATF_REQUIRE(dir != NULL);
+ ent = readdir(dir);
+ ATF_REQUIRE(ent != NULL);
+ ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 ||
+ strcmp(ent->d_name, "..") == 0);
+ ATF_REQUIRE(closedir(dir) == 0);
+ consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, "dir");
+
+ /*
+ * Reading the watched directory should generate an access event.
+ * This is contrary to Linux's inotify man page, which states that
+ * IN_ACCESS is only generated for accesses to objects in a watched
+ * directory.
+ */
+ dir = opendir(root);
+ ATF_REQUIRE(dir != NULL);
+ ent = readdir(dir);
+ ATF_REQUIRE(ent != NULL);
+ ATF_REQUIRE(strcmp(ent->d_name, ".") == 0 ||
+ strcmp(ent->d_name, "..") == 0);
+ ATF_REQUIRE(closedir(dir) == 0);
+ consume_event(ifd, wd, IN_ACCESS, IN_ISDIR, NULL);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_attrib);
+ATF_TC_BODY(inotify_event_attrib, tc)
+{
+ char path[PATH_MAX];
+ int error, ifd, fd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_file(ifd, IN_ATTRIB, path);
+
+ fd = open(path, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ error = fchmod(fd, 0600);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_ATTRIB, 0, NULL);
+
+ error = fchown(fd, getuid(), getgid());
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_ATTRIB, 0, NULL);
+
+ close_checked(fd);
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_close_nowrite);
+ATF_TC_BODY(inotify_event_close_nowrite, tc)
+{
+ char file[PATH_MAX], file1[PATH_MAX], dir[PATH_MAX];
+ int ifd, fd, wd1, wd2;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd1 = watch_dir(ifd, IN_CLOSE_NOWRITE, dir);
+ wd2 = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, file);
+
+ fd = open(dir, O_DIRECTORY);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd1, IN_CLOSE_NOWRITE, IN_ISDIR, NULL);
+
+ fd = open(file, O_RDONLY);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd2, IN_CLOSE_NOWRITE, 0, NULL);
+
+ snprintf(file1, sizeof(file1), "%s/file", dir);
+ fd = open(file1, O_RDONLY | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd1, IN_CLOSE_NOWRITE, 0, "file");
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_close_write);
+ATF_TC_BODY(inotify_event_close_write, tc)
+{
+ char path[PATH_MAX];
+ int ifd, fd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_file(ifd, IN_CLOSE_NOWRITE | IN_CLOSE_WRITE, path);
+
+ fd = open(path, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_CLOSE_WRITE, 0, NULL);
+
+ close_inotify(ifd);
+}
+
+/* Verify that various operations in a directory generate IN_CREATE events. */
+ATF_TC_WITHOUT_HEAD(inotify_event_create);
+ATF_TC_BODY(inotify_event_create, tc)
+{
+ struct sockaddr_un sun;
+ char path[PATH_MAX], path1[PATH_MAX], root[PATH_MAX];
+ ssize_t n;
+ int error, ifd, ifd1, fd, s, wd, wd1;
+ char b;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_CREATE, root);
+
+ /* Regular file. */
+ snprintf(path, sizeof(path), "%s/file", root);
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ /*
+ * Make sure we get an event triggered by the fd used to create the
+ * file.
+ */
+ ifd1 = inotify(IN_NONBLOCK);
+ wd1 = inotify_add_watch(ifd1, root, IN_MODIFY);
+ b = 42;
+ n = write(fd, &b, sizeof(b));
+ ATF_REQUIRE(n == sizeof(b));
+ close_checked(fd);
+ consume_event(ifd, wd, IN_CREATE, 0, "file");
+ consume_event(ifd1, wd1, IN_MODIFY, 0, "file");
+ close_inotify(ifd1);
+
+ /* Hard link. */
+ snprintf(path1, sizeof(path1), "%s/link", root);
+ error = link(path, path1);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, 0, "link");
+
+ /* Directory. */
+ snprintf(path, sizeof(path), "%s/dir", root);
+ error = mkdir(path, 0755);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, IN_ISDIR, "dir");
+
+ /* Symbolic link. */
+ snprintf(path1, sizeof(path1), "%s/symlink", root);
+ error = symlink(path, path1);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, 0, "symlink");
+
+ /* FIFO. */
+ snprintf(path, sizeof(path), "%s/fifo", root);
+ error = mkfifo(path, 0644);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_CREATE, 0, "fifo");
+
+ /* Binding a socket. */
+ s = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&sun, 0, sizeof(sun));
+ sun.sun_family = AF_UNIX;
+ sun.sun_len = sizeof(sun);
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "%s/socket", root);
+ error = bind(s, (struct sockaddr *)&sun, sizeof(sun));
+ ATF_REQUIRE(error == 0);
+ close_checked(s);
+ consume_event(ifd, wd, IN_CREATE, 0, "socket");
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_delete);
+ATF_TC_BODY(inotify_event_delete, tc)
+{
+ char root[PATH_MAX], path[PATH_MAX], file[PATH_MAX];
+ int error, fd, ifd, wd, wd2;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_DELETE | IN_DELETE_SELF, root);
+
+ snprintf(path, sizeof(path), "%s/file", root);
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ error = unlink(path);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE, 0, "file");
+ close_checked(fd);
+
+ /*
+ * Make sure that renaming over a file generates a delete event when and
+ * only when that file is watched.
+ */
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ wd2 = inotify_add_watch(ifd, path, IN_DELETE | IN_DELETE_SELF);
+ ATF_REQUIRE(wd2 != -1);
+ snprintf(file, sizeof(file), "%s/file2", root);
+ fd = open(file, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ error = rename(file, path);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd2, IN_DELETE_SELF, 0, NULL);
+ consume_event(ifd, wd2, 0, IN_IGNORED, NULL);
+
+ error = unlink(path);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE, 0, "file");
+ error = rmdir(root);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd, IN_DELETE_SELF, IN_ISDIR, NULL);
+ consume_event(ifd, wd, 0, IN_IGNORED, NULL);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_move);
+ATF_TC_BODY(inotify_event_move, tc)
+{
+ char dir1[PATH_MAX], dir2[PATH_MAX], path1[PATH_MAX], path2[PATH_MAX];
+ char path3[PATH_MAX];
+ int error, ifd, fd, wd1, wd2, wd3;
+ uint32_t cookie1, cookie2;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd1 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir1);
+ wd2 = watch_dir(ifd, IN_MOVE | IN_MOVE_SELF, dir2);
+
+ snprintf(path1, sizeof(path1), "%s/file", dir1);
+ fd = open(path1, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ snprintf(path2, sizeof(path2), "%s/file2", dir2);
+ error = rename(path1, path2);
+ ATF_REQUIRE(error == 0);
+ cookie1 = consume_event_cookie(ifd, wd1, IN_MOVED_FROM, 0, "file");
+ cookie2 = consume_event_cookie(ifd, wd2, IN_MOVED_TO, 0, "file2");
+ ATF_REQUIRE_MSG(cookie1 == cookie2,
+ "expected cookie %u, got %u", cookie1, cookie2);
+
+ snprintf(path2, sizeof(path2), "%s/dir", dir2);
+ error = rename(dir1, path2);
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd1, IN_MOVE_SELF, IN_ISDIR, NULL);
+ consume_event(ifd, wd2, IN_MOVED_TO, IN_ISDIR, "dir");
+
+ wd3 = watch_file(ifd, IN_MOVE_SELF, path3);
+ error = rename(path3, "foo");
+ ATF_REQUIRE(error == 0);
+ consume_event(ifd, wd3, IN_MOVE_SELF, 0, NULL);
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITHOUT_HEAD(inotify_event_open);
+ATF_TC_BODY(inotify_event_open, tc)
+{
+ char root[PATH_MAX], path[PATH_MAX];
+ int error, ifd, fd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ wd = watch_dir(ifd, IN_OPEN, root);
+
+ snprintf(path, sizeof(path), "%s/file", root);
+ fd = open(path, O_RDWR | O_CREAT, 0644);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, 0, "file");
+
+ fd = open(path, O_PATH);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, 0, "file");
+
+ fd = open(root, O_DIRECTORY);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL);
+
+ snprintf(path, sizeof(path), "%s/fifo", root);
+ error = mkfifo(path, 0644);
+ ATF_REQUIRE(error == 0);
+ fd = open(path, O_RDWR);
+ ATF_REQUIRE(fd != -1);
+ close_checked(fd);
+ consume_event(ifd, wd, IN_OPEN, 0, "fifo");
+
+ close_inotify(ifd);
+}
+
+ATF_TC_WITH_CLEANUP(inotify_event_unmount);
+ATF_TC_HEAD(inotify_event_unmount, tc)
+{
+ atf_tc_set_md_var(tc, "require.user", "root");
+}
+ATF_TC_BODY(inotify_event_unmount, tc)
+{
+ int error, fd, ifd, wd;
+
+ ifd = inotify(IN_NONBLOCK);
+
+ error = mkdir("./root", 0755);
+ ATF_REQUIRE(error == 0);
+
+ mount_tmpfs("./root");
+
+ error = mkdir("./root/dir", 0755);
+ ATF_REQUIRE(error == 0);
+ wd = inotify_add_watch(ifd, "./root/dir", IN_OPEN);
+ ATF_REQUIRE(wd >= 0);
+
+ fd = open("./root/dir", O_RDONLY | O_DIRECTORY);
+ ATF_REQUIRE(fd != -1);
+ consume_event(ifd, wd, IN_OPEN, IN_ISDIR, NULL);
+ close_checked(fd);
+
+ /* A regular unmount should fail, as inotify holds a vnode reference. */
+ error = unmount("./root", 0);
+ ATF_REQUIRE_ERRNO(EBUSY, error == -1);
+ error = unmount("./root", MNT_FORCE);
+ ATF_REQUIRE_MSG(error == 0,
+ "unmounting ./root failed: %s", strerror(errno));
+
+ consume_event(ifd, wd, 0, IN_UNMOUNT, NULL);
+ consume_event(ifd, wd, 0, IN_IGNORED, NULL);
+
+ close_inotify(ifd);
+}
+ATF_TC_CLEANUP(inotify_event_unmount, tc)
+{
+ (void)unmount("./root", MNT_FORCE);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+ /* Tests for the inotify syscalls. */
+ ATF_TP_ADD_TC(tp, inotify_capsicum);
+ ATF_TP_ADD_TC(tp, inotify_coalesce);
+ ATF_TP_ADD_TC(tp, inotify_mask_create);
+ ATF_TP_ADD_TC(tp, inotify_nullfs);
+ ATF_TP_ADD_TC(tp, inotify_queue_overflow);
+ /* Tests for the various inotify event types. */
+ ATF_TP_ADD_TC(tp, inotify_event_access_file);
+ ATF_TP_ADD_TC(tp, inotify_event_access_dir);
+ ATF_TP_ADD_TC(tp, inotify_event_attrib);
+ ATF_TP_ADD_TC(tp, inotify_event_close_nowrite);
+ ATF_TP_ADD_TC(tp, inotify_event_close_write);
+ ATF_TP_ADD_TC(tp, inotify_event_create);
+ ATF_TP_ADD_TC(tp, inotify_event_delete);
+ ATF_TP_ADD_TC(tp, inotify_event_move);
+ ATF_TP_ADD_TC(tp, inotify_event_open);
+ ATF_TP_ADD_TC(tp, inotify_event_unmount);
+ return (atf_no_error());
+}
diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c
index 9cc22d382de5..17ed43b55c5a 100644
--- a/usr.bin/kdump/kdump.c
+++ b/usr.bin/kdump/kdump.c
@@ -46,6 +46,7 @@
#include <sys/ktrace.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
+#include <sys/inotify.h>
#include <sys/poll.h>
#include <sys/socket.h>
#include <sys/stat.h>
@@ -105,6 +106,7 @@ static void ktrcsw(struct ktr_csw *);
static void ktrcsw_old(struct ktr_csw_old *);
static void ktruser(int, void *);
static void ktrcaprights(cap_rights_t *);
+static void ktrinotify(struct inotify_event *);
static void ktritimerval(struct itimerval *it);
static void ktrsockaddr(struct sockaddr *);
static void ktrsplice(struct splice *);
@@ -1861,6 +1863,14 @@ ktrtimeval(struct timeval *tv)
}
static void
+ktrinotify(struct inotify_event *ev)
+{
+ printf(
+ "inotify { .wd = %d, .mask = %#x, .cookie = %u, .len = %u, .name = %s }\n",
+ ev->wd, ev->mask, ev->cookie, ev->len, ev->name);
+}
+
+static void
ktritimerval(struct itimerval *it)
{
@@ -2128,6 +2138,17 @@ ktrstruct(char *buf, size_t buflen)
goto invalid;
memcpy(&rights, data, datalen);
ktrcaprights(&rights);
+ } else if (strcmp(name, "inotify") == 0) {
+ struct inotify_event *ev;
+
+ if (datalen < sizeof(struct inotify_event) ||
+ datalen > sizeof(struct inotify_event) + NAME_MAX + 1)
+ goto invalid;
+ ev = malloc(datalen);
+ if (ev == NULL)
+ err(1, "malloc");
+ memcpy(ev, data, datalen);
+ ktrinotify(ev);
} else if (strcmp(name, "itimerval") == 0) {
if (datalen != sizeof(struct itimerval))
goto invalid;
diff --git a/usr.bin/procstat/procstat.1 b/usr.bin/procstat/procstat.1
index cc775ffe133b..1e05e235e619 100644
--- a/usr.bin/procstat/procstat.1
+++ b/usr.bin/procstat/procstat.1
@@ -377,6 +377,8 @@ eventfd
fifo
.It h
shared memory
+.It i
+inotify descriptor
.It k
kqueue
.It m
@@ -862,6 +864,7 @@ procstat: procstat_getprocs()
.Xr sockstat 1 ,
.Xr cap_enter 2 ,
.Xr cap_rights_limit 2 ,
+.Xr inotify 2 ,
.Xr mlock 2 ,
.Xr mlockall 2 ,
.Xr libprocstat 3 ,
diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c
index d61cf1693053..aa4850632aa7 100644
--- a/usr.bin/procstat/procstat_files.c
+++ b/usr.bin/procstat/procstat_files.c
@@ -226,6 +226,10 @@ static struct cap_desc {
{ CAP_BINDAT, "ba" },
{ CAP_CONNECTAT, "ca" },
+ /* Inotify descriptor rights. */
+ { CAP_INOTIFY_ADD, "ina" },
+ { CAP_INOTIFY_RM, "inr" },
+
/* Aliases and defines that combine multiple rights. */
{ CAP_PREAD, "prd" },
{ CAP_PWRITE, "pwr" },
@@ -416,6 +420,11 @@ procstat_files(struct procstat *procstat, struct kinfo_proc *kipp)
xo_emit("{eq:fd_type/eventfd}");
break;
+ case PS_FST_TYPE_INOTIFY:
+ str = "i";
+ xo_emit("{eq:fd_type/inotify}");
+ break;
+
case PS_FST_TYPE_NONE:
str = "?";
xo_emit("{eq:fd_type/none}");
diff --git a/usr.bin/sockstat/sockstat.c b/usr.bin/sockstat/sockstat.c
index 52243910a31c..1a24ff67c321 100644
--- a/usr.bin/sockstat/sockstat.c
+++ b/usr.bin/sockstat/sockstat.c
@@ -951,7 +951,7 @@ formataddr(struct sockaddr_storage *ss, char *buf, size_t bufsize)
}
if (addrstr[0] == '\0') {
error = cap_getnameinfo(capnet, sstosa(ss), ss->ss_len,
- addrstr, sizeof(addrstr), buf, bufsize, NI_NUMERICHOST);
+ addrstr, sizeof(addrstr), NULL, 0, NI_NUMERICHOST);
if (error)
errx(1, "cap_getnameinfo()");
}
diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h
index d79ef882cff0..47d973326dfb 100644
--- a/usr.bin/truss/syscall.h
+++ b/usr.bin/truss/syscall.h
@@ -99,6 +99,7 @@ enum Argtype {
Getfsstatmode,
Idtype,
Ioctl,
+ Inotifyflags,
Itimerwhich,
Kldsymcmd,
Kldunloadflags,
diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c
index 47d6aef8f6ff..656d642e1f19 100644
--- a/usr.bin/truss/syscalls.c
+++ b/usr.bin/truss/syscalls.c
@@ -31,7 +31,6 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
/*
* This file has routines used to print out system calls and their
* arguments.
@@ -316,6 +315,9 @@ static const struct syscall_decode decoded_syscalls[] = {
{ Ptr | OUT, 3 }, { Ptr | OUT, 4 } } },
{ .name = "gettimeofday", .ret_type = 1, .nargs = 2,
.args = { { Timeval | OUT, 0 }, { Ptr, 1 } } },
+ { .name = "inotify_add_watch_at", .ret_type = 1, .nargs = 4,
+ .args = { { Int, 0 }, { Atfd, 1 }, { Name | IN, 2 },
+ { Inotifyflags, 3 } } },
{ .name = "ioctl", .ret_type = 1, .nargs = 3,
.args = { { Int, 0 }, { Ioctl, 1 }, { Ptr, 2 } } },
{ .name = "kevent", .ret_type = 1, .nargs = 6,
@@ -2447,6 +2449,9 @@ print_arg(struct syscall_arg *sc, syscallarg_t *args, syscallarg_t *retval,
print_integer_arg(sysdecode_getfsstat_mode, fp,
args[sc->offset]);
break;
+ case Inotifyflags:
+ print_mask_arg(sysdecode_inotifyflags, fp, args[sc->offset]);
+ break;
case Itimerwhich:
print_integer_arg(sysdecode_itimer, fp, args[sc->offset]);
break;