aboutsummaryrefslogtreecommitdiff
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/imgact_elf.c2
-rw-r--r--sys/kern/kern_descrip.c38
-rw-r--r--sys/kern/kern_event.c136
-rw-r--r--sys/kern/kern_jail.c15
-rw-r--r--sys/kern/kern_time.c4
-rw-r--r--sys/kern/subr_boot.c2
-rw-r--r--sys/kern/subr_early.c2
-rw-r--r--sys/kern/subr_hash.c404
-rw-r--r--sys/kern/subr_module.c13
-rw-r--r--sys/kern/subr_sbuf.c4
-rw-r--r--sys/kern/subr_ticks.S2
-rw-r--r--sys/kern/sys_timerfd.c44
-rw-r--r--sys/kern/uipc_ktls.c75
-rw-r--r--sys/kern/uipc_shm.c2
-rw-r--r--sys/kern/uipc_socket.c2
-rw-r--r--sys/kern/vfs_aio.c6
-rw-r--r--sys/kern/vfs_syscalls.c5
-rw-r--r--sys/kern/vfs_vnops.c22
18 files changed, 575 insertions, 203 deletions
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index 7410efca4807..c91fd8089487 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -2714,6 +2714,7 @@ __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
int structsize;
p = arg;
+ structsize = sizeof(Elf_Auxinfo);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, AT_COUNT * sizeof(Elf_Auxinfo),
@@ -2727,7 +2728,6 @@ __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep)
sbuf_delete(sb);
*sizep = size;
} else {
- structsize = sizeof(Elf_Auxinfo);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PHOLD(p);
proc_getauxv(curthread, p, sb);
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 2fa0621bdfca..48303926759b 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2006,6 +2006,21 @@ fdgrowtable(struct filedesc *fdp, int nfd)
NDSLOTTYPE *nmap, *omap;
KASSERT(fdp->fd_nfiles > 0, ("zero-length file table"));
+ KASSERT(fdp->fd_nfiles >= NDFILE, ("file table of length %d shorter "
+ "than NDFILE (%d)", fdp->fd_nfiles, NDFILE));
+ KASSERT(fdp->fd_nfiles == NDFILE || fdp->fd_nfiles % NDENTRIES == 0,
+ ("file table of length %d should be multiple of NDENTRIES (%zu)",
+ fdp->fd_nfiles, NDENTRIES));
+ KASSERT((fdp->fd_nfiles == NDFILE) == ((intptr_t)fdp->fd_files -
+ offsetof(struct filedesc0, fd_dfiles) == (intptr_t)fdp -
+ offsetof(struct filedesc0, fd_fd)), ("file table of length %d "
+ "should have %s table", fdp->fd_nfiles, fdp->fd_nfiles == NDFILE ?
+ "initial" : "dynamic"));
+ KASSERT((NDSLOTS(fdp->fd_nfiles) <= NDSLOTS(NDFILE)) == ((intptr_t)
+ fdp->fd_map - offsetof(struct filedesc0, fd_dmap) == (intptr_t)fdp -
+ offsetof(struct filedesc0, fd_fd)), ("file table of length %d "
+ "should have %s map", fdp->fd_nfiles, NDSLOTS(fdp->fd_nfiles) <=
+ NDSLOTS(NDFILE) ? "initial" : "dynamic"));
/* save old values */
onfiles = fdp->fd_nfiles;
@@ -2035,9 +2050,19 @@ fdgrowtable(struct filedesc *fdp, int nfd)
onfiles * sizeof(ntable->fdt_ofiles[0]));
/*
- * Allocate a new map only if the old is not large enough. It will
- * grow at a slower rate than the table as it can map more
- * entries than the table can hold.
+ * Allocate a new map only if the old one is not large enough.
+ *
+ * The initial struct filedesc0 object contains a table and map sized
+ * for NDFILE (20) entries which means the initial map can accomodate
+ * up to NDENTRIES (32 or 64) before requiring reallocation.
+ *
+ * As the new table size (nnfiles) is always rounded up to a multiple
+ * of NDENTRIES, the map will be fully utilised following the first
+ * enlargement, whether it is still the initial map (which will be the
+ * case if nnfiles == NDENTRIES) or if a new one that has has been
+ * allocated (which will be the case if nnfiles == X*NDENTRIES for some
+ * X > 1). In either case, subsequent enlargements will always allocate
+ * a new map to go along with the new table.
*/
if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC,
@@ -2045,6 +2070,8 @@ fdgrowtable(struct filedesc *fdp, int nfd)
/* copy over the old data and update the pointer */
memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap));
fdp->fd_map = nmap;
+ } else {
+ nmap = NULL;
}
/*
@@ -2085,9 +2112,10 @@ fdgrowtable(struct filedesc *fdp, int nfd)
/*
* The map does not have the same possibility of threads still
* holding references to it. So always free it as long as it
- * does not reference the original static allocation.
+ * does not reference the original static allocation and a new
+ * map was allocated.
*/
- if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
+ if (nmap != NULL && NDSLOTS(onfiles) > NDSLOTS(NDFILE))
free(omap, M_FILEDESC);
}
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index e8e670d39d09..8c7a0949f024 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -28,7 +28,6 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include "opt_ktrace.h"
#include "opt_kqueue.h"
@@ -1740,9 +1739,11 @@ findkn:
KQ_LOCK(kq);
if (kev->ident < kq->kq_knlistsize) {
- SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link)
+ SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) {
+ MPASS(kn->kn_kq == kq);
if (kev->filter == kn->kn_filter)
break;
+ }
}
} else {
if ((kev->flags & EV_ADD) == EV_ADD) {
@@ -1768,10 +1769,12 @@ findkn:
list = &kq->kq_knhash[
KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
- SLIST_FOREACH(kn, list, kn_link)
+ SLIST_FOREACH(kn, list, kn_link) {
+ MPASS(kn->kn_kq == kq);
if (kev->ident == kn->kn_id &&
kev->filter == kn->kn_filter)
break;
+ }
}
}
@@ -1803,15 +1806,22 @@ findkn:
error = ENOMEM;
goto done;
}
+
+ /*
+ * Now that the kqueue is locked, make sure the fd
+ * didn't change out from under us.
+ */
+ if (fops->f_isfd &&
+ fget_noref_unlocked(td->td_proc->p_fd,
+ kev->ident) != fp) {
+ KQ_UNLOCK(kq);
+ tkn = kn;
+ error = EBADF;
+ goto done;
+ }
kn->kn_fp = fp;
kn->kn_kq = kq;
kn->kn_fop = fops;
- /*
- * apply reference counts to knote structure, and
- * do not release it at the end of this routine.
- */
- fops = NULL;
- fp = NULL;
kn->kn_sfflags = kev->fflags;
kn->kn_sdata = kev->data;
@@ -1832,6 +1842,16 @@ findkn:
goto done;
}
+ /*
+ * We transfer ownership of fops/fp to the knote
+ * structure and avoid releasing them at the end of
+ * this routine, now that all of the remaining exit
+ * paths will knote_drop() to release the reference
+ * counts we held on them above.
+ */
+ fops = NULL;
+ fp = NULL;
+
if ((error = kn->kn_fop->f_attach(kn)) != 0) {
knote_drop_detached(kn, td);
goto done;
@@ -2001,10 +2021,11 @@ kqueue_expand(struct kqueue *kq, const struct filterops *fops, uintptr_t ident,
to_free = NULL;
if (fops->f_isfd) {
fd = ident;
- if (kq->kq_knlistsize <= fd) {
- size = kq->kq_knlistsize;
- while (size <= fd)
+ size = atomic_load_int(&kq->kq_knlistsize);
+ if (size <= fd) {
+ do {
size += KQEXTENT;
+ } while (size <= fd);
list = malloc(size * sizeof(*list), M_KQUEUE, mflag);
if (list == NULL)
return ENOMEM;
@@ -2012,7 +2033,7 @@ kqueue_expand(struct kqueue *kq, const struct filterops *fops, uintptr_t ident,
if ((kq->kq_state & KQ_CLOSING) != 0) {
to_free = list;
error = EBADF;
- } else if (kq->kq_knlistsize > fd) {
+ } else if (kq->kq_knlistsize >= size) {
to_free = list;
} else {
if (kq->kq_knlist != NULL) {
@@ -2027,6 +2048,7 @@ kqueue_expand(struct kqueue *kq, const struct filterops *fops, uintptr_t ident,
kq->kq_knlistsize = size;
kq->kq_knlist = list;
}
+ MPASS(error != 0 || kq->kq_knlistsize > fd);
KQ_UNLOCK(kq);
}
} else {
@@ -2603,6 +2625,8 @@ knlist_add(struct knlist *knl, struct knote *kn, int islocked)
KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn));
KASSERT((kn->kn_status & KN_DETACHED) != 0,
("knote %p was not detached", kn));
+ KASSERT(kn->kn_knlist == NULL,
+ ("knote %p was already on knlist %p", kn, kn->kn_knlist));
if (!islocked)
knl->kl_lock(knl->kl_lockarg);
SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext);
@@ -2625,6 +2649,8 @@ knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked,
KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn));
KASSERT((kn->kn_status & KN_DETACHED) == 0,
("knote %p was already detached", kn));
+ KASSERT(kn->kn_knlist == knl,
+ ("knote %p was not on knlist %p", kn, knl));
if (!knlislocked)
knl->kl_lock(knl->kl_lockarg);
SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext);
@@ -2766,31 +2792,39 @@ knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn)
KNL_ASSERT_LOCKED(knl);
else {
KNL_ASSERT_UNLOCKED(knl);
-again: /* need to reacquire lock since we have dropped it */
knl->kl_lock(knl->kl_lockarg);
}
- SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) {
- kq = kn->kn_kq;
- KQ_LOCK(kq);
- if (kn_in_flux(kn)) {
- KQ_UNLOCK(kq);
- continue;
- }
- knlist_remove_kq(knl, kn, 1, 1);
- if (killkn) {
- kn_enter_flux(kn);
- KQ_UNLOCK(kq);
- knote_drop_detached(kn, td);
- } else {
- /* Make sure cleared knotes disappear soon */
- kn->kn_flags |= EV_EOF | EV_ONESHOT;
- KQ_UNLOCK(kq);
+ for (;;) {
+ /*
+ * Each pass removes as many knotes as we can before dropping
+ * into FLUXWAIT. Active knotes are simply detached and either
+ * freed or converted to one-shot, as the attached subject is
+ * essentially disappearing.
+ */
+ SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) {
+ kq = kn->kn_kq;
+ KQ_LOCK(kq);
+ if (kn_in_flux(kn)) {
+ KQ_UNLOCK(kq);
+ continue;
+ }
+ knlist_remove_kq(knl, kn, 1, 1);
+ if (killkn) {
+ kn_enter_flux(kn);
+ KQ_UNLOCK(kq);
+ knote_drop_detached(kn, td);
+ } else {
+ /* Make sure cleared knotes disappear soon */
+ kn->kn_flags |= EV_EOF | EV_ONESHOT;
+ KQ_UNLOCK(kq);
+ }
+ kq = NULL;
}
- kq = NULL;
- }
- if (!SLIST_EMPTY(&knl->kl_list)) {
+ if (SLIST_EMPTY(&knl->kl_list))
+ break;
+
/* there are still in flux knotes remaining */
kn = SLIST_FIRST(&knl->kl_list);
kq = kn->kn_kq;
@@ -2800,7 +2834,7 @@ again: /* need to reacquire lock since we have dropped it */
kq->kq_state |= KQ_FLUXWAIT;
msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0);
kq = NULL;
- goto again;
+ knl->kl_lock(knl->kl_lockarg);
}
if (islocked)
@@ -2822,7 +2856,6 @@ knote_fdclose(struct thread *td, int fd)
struct filedesc *fdp = td->td_proc->p_fd;
struct kqueue *kq;
struct knote *kn;
- int influx;
FILEDESC_XLOCK_ASSERT(fdp);
@@ -2832,22 +2865,25 @@ knote_fdclose(struct thread *td, int fd)
*/
TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) {
KQ_LOCK(kq);
+ if (kq->kq_knlistsize <= fd ||
+ SLIST_EMPTY(&kq->kq_knlist[fd])) {
+ KQ_UNLOCK(kq);
+ continue;
+ }
-again:
- influx = 0;
- while (kq->kq_knlistsize > fd &&
- (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
+ while ((kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) {
if (kn_in_flux(kn)) {
- /* someone else might be waiting on our knote */
- if (influx)
- wakeup(kq);
+ /*
+ * Wait for this knote to stabilize, it could be
+ * the case that it's in the process of being
+ * dropped anyways.
+ */
kq->kq_state |= KQ_FLUXWAIT;
msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
- goto again;
+ continue;
}
kn_enter_flux(kn);
KQ_UNLOCK(kq);
- influx = 1;
knote_drop(kn, td);
KQ_LOCK(kq);
}
@@ -2862,6 +2898,7 @@ knote_attach(struct knote *kn, struct kqueue *kq)
KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn));
KQ_OWNED(kq);
+ MPASS(kn->kn_kq == kq);
if ((kq->kq_state & KQ_CLOSING) != 0)
return (EBADF);
@@ -2910,13 +2947,13 @@ knote_drop_detached(struct knote *kn, struct thread *td)
msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0);
}
+ MPASS(kn->kn_kq == kq);
if (kn->kn_fop->f_isfd)
list = &kq->kq_knlist[kn->kn_id];
else
list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
- if (!SLIST_EMPTY(list))
- SLIST_REMOVE(list, kn, knote, kn_link);
+ SLIST_REMOVE(list, kn, knote, kn_link);
if (kn->kn_status & KN_QUEUED)
knote_dequeue(kn);
KQ_UNLOCK_FLUX(kq);
@@ -3054,6 +3091,7 @@ kqueue_fork_copy_knote(struct kqueue *kq1, struct knote *kn, struct proc *p1,
kn1->kn_status |= KN_DETACHED;
kn1->kn_status &= ~KN_QUEUED;
kn1->kn_kq = kq1;
+ kn1->kn_knlist = NULL;
error = fop->f_copy(kn1, p1);
if (error != 0) {
knote_free(kn1);
@@ -3086,6 +3124,7 @@ kqueue_fork_copy_list(struct klist *knlist, struct knote *marker,
KQ_OWNED(kq);
kn = SLIST_FIRST(knlist);
while (kn != NULL) {
+ MPASS(kn->kn_kq == kq);
if ((kn->kn_status & KN_DETACHED) != 0 ||
(kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0)) {
kn = SLIST_NEXT(kn, kn_link);
@@ -3119,6 +3158,7 @@ kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1,
kq = kq1->kq_forksrc;
marker = knote_alloc(M_WAITOK);
marker->kn_status = KN_MARKER;
+ marker->kn_kq = kq;
KQ_LOCK(kq);
for (i = 0; i < kq->kq_knlistsize; i++) {
@@ -3133,7 +3173,7 @@ kqueue_fork_copy(struct filedesc *fdp, struct file *fp, struct file *fp1,
}
kqueue_release(kq, 1);
kq1->kq_forksrc = NULL;
- KQ_UNLOCK(kq);
+ KQ_UNLOCK_FLUX(kq);
knote_free(marker);
return (error);
@@ -3230,6 +3270,7 @@ kern_proc_kqueue_report(struct sbuf *s, struct proc *p, int kq_fd,
KQ_LOCK(kq);
for (i = 0; i < kq->kq_knlistsize; i++) {
SLIST_FOREACH(kn, &kq->kq_knlist[i], kn_link) {
+ MPASS(kn->kn_kq == kq);
error = kern_proc_kqueue_report_one(s, p, kq_fd,
kq, kn, compat32);
if (error != 0)
@@ -3240,6 +3281,7 @@ kern_proc_kqueue_report(struct sbuf *s, struct proc *p, int kq_fd,
goto out;
for (i = 0; i <= kq->kq_knhashmask; i++) {
SLIST_FOREACH(kn, &kq->kq_knhash[i], kn_link) {
+ MPASS(kn->kn_kq == kq);
error = kern_proc_kqueue_report_one(s, p, kq_fd,
kq, kn, compat32);
if (error != 0)
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 369b6aca926c..bc80adb91cd6 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -4385,6 +4385,7 @@ prison_priv_check(struct ucred *cred, int priv)
case PRIV_NET_SETIFVNET:
case PRIV_NET_SETIFFIB:
case PRIV_NET_OVPN:
+ case PRIV_NET_GENEVE:
case PRIV_NET_ME:
case PRIV_NET_WG:
@@ -4736,6 +4737,14 @@ prison_priv_check(struct ucred *cred, int priv)
else
return (EPERM);
+ case PRIV_VMM_CREATE:
+ case PRIV_VMM_DESTROY:
+ /*
+ * Jailed root can create and destroy VMs; the vmm module
+ * additionally checks for the allow.vmm flag.
+ */
+ return (0);
+
case PRIV_VMM_PPTDEV:
/*
* Allow jailed root to manage passthrough devices. vmm(4) also
@@ -4988,6 +4997,10 @@ sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
return (0);
}
+/*
+ * Do not add more here. Use SYSCTL_JAIL_PARAM (allow flags for jails)
+ * instead.
+ */
SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
@@ -5015,7 +5028,7 @@ SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
SYSCTL_PROC(_security_jail, OID_AUTO, mlock_allowed,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
NULL, PR_ALLOW_MLOCK, sysctl_jail_default_allow, "I",
- "Processes in jail can lock/unlock physical pages in memory");
+ "Processes in jail can lock/unlock physical pages in memory (deprecated)");
static int
sysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 0c16045ca610..82c2f7367ab2 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -601,7 +601,9 @@ kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
} while (error == 0 && is_abs_real && td->td_rtcgen == 0);
td->td_rtcgen = 0;
if (error != EWOULDBLOCK) {
- if (TIMESEL(&sbtt, tmp))
+ if (precise)
+ sbtt = sbinuptime();
+ else if (TIMESEL(&sbtt, tmp))
sbtt += tc_tick_sbt;
if (sbtt >= sbt)
return (0);
diff --git a/sys/kern/subr_boot.c b/sys/kern/subr_boot.c
index b721abf7013c..00c8e66617b8 100644
--- a/sys/kern/subr_boot.c
+++ b/sys/kern/subr_boot.c
@@ -53,7 +53,7 @@
#define GETENV(k) kern_getenv(k)
#define FREE(v) freeenv(v)
#else /* Boot loader */
-#define SETENV(k, v) setenv(k, v, 1)
+#define SETENV(k, v) boot_setenv(k, v)
#define GETENV(k) getenv(k)
#define FREE(v)
#endif
diff --git a/sys/kern/subr_early.c b/sys/kern/subr_early.c
index 62d271f56d75..ae06e4b4cbd2 100644
--- a/sys/kern/subr_early.c
+++ b/sys/kern/subr_early.c
@@ -26,8 +26,6 @@
* SUCH DAMAGE.
*/
-#include <sys/param.h>
-#include <sys/types.h>
#include <sys/systm.h>
#include <machine/cpu.h>
diff --git a/sys/kern/subr_hash.c b/sys/kern/subr_hash.c
index 23bb205909b1..e74ad825966e 100644
--- a/sys/kern/subr_hash.c
+++ b/sys/kern/subr_hash.c
@@ -1,6 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
+ * Copyright (c) 2026 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
@@ -37,6 +38,329 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
+#include <sys/ck.h>
+#include <sys/queue.h>
+#include <sys/mutex.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/sx.h>
+#include <sys/hash.h>
+
+#define ASSERT_NOPAD(head, lock) _Static_assert( \
+ sizeof(head ## _HEAD(, foo)) + sizeof(struct lock) == \
+ sizeof(struct { head ## _HEAD(, foo) h; struct lock l; }), \
+ "Structure of " #head "_HEAD and " #lock " has padding")
+ASSERT_NOPAD(LIST, mtx);
+ASSERT_NOPAD(CK_LIST, mtx);
+ASSERT_NOPAD(SLIST, mtx);
+ASSERT_NOPAD(CK_SLIST, mtx);
+ASSERT_NOPAD(STAILQ, mtx);
+ASSERT_NOPAD(CK_STAILQ, mtx);
+ASSERT_NOPAD(TAILQ, mtx);
+ASSERT_NOPAD(LIST, rwlock);
+ASSERT_NOPAD(CK_LIST, rwlock);
+ASSERT_NOPAD(SLIST, rwlock);
+ASSERT_NOPAD(CK_SLIST, rwlock);
+ASSERT_NOPAD(STAILQ, rwlock);
+ASSERT_NOPAD(CK_STAILQ, rwlock);
+ASSERT_NOPAD(TAILQ, rwlock);
+ASSERT_NOPAD(LIST, sx);
+ASSERT_NOPAD(CK_LIST, sx);
+ASSERT_NOPAD(SLIST, sx);
+ASSERT_NOPAD(CK_SLIST, sx);
+ASSERT_NOPAD(STAILQ, sx);
+ASSERT_NOPAD(CK_STAILQ, sx);
+ASSERT_NOPAD(TAILQ, sx);
+ASSERT_NOPAD(LIST, rmlock);
+ASSERT_NOPAD(CK_LIST, rmlock);
+ASSERT_NOPAD(SLIST, rmlock);
+ASSERT_NOPAD(CK_SLIST, rmlock);
+ASSERT_NOPAD(STAILQ, rmlock);
+ASSERT_NOPAD(CK_STAILQ, rmlock);
+ASSERT_NOPAD(TAILQ, rmlock);
+ASSERT_NOPAD(LIST, rmslock);
+ASSERT_NOPAD(CK_LIST, rmslock);
+ASSERT_NOPAD(SLIST, rmslock);
+ASSERT_NOPAD(CK_SLIST, rmslock);
+ASSERT_NOPAD(STAILQ, rmslock);
+ASSERT_NOPAD(CK_STAILQ, rmslock);
+ASSERT_NOPAD(TAILQ, rmslock);
+#undef ASSERT_NOPAD
+
+static inline void
+hashalloc_sizes(struct hashalloc_args *args, size_t *hdrsize, size_t *loffset)
+{
+ switch (args->head) {
+ case HASH_HEAD_LIST:
+ *loffset = sizeof(LIST_HEAD(, foo));
+ break;
+ case HASH_HEAD_CK_LIST:
+ *loffset = sizeof(CK_LIST_HEAD(, foo));
+ break;
+ case HASH_HEAD_SLIST:
+ *loffset = sizeof(SLIST_HEAD(, foo));
+ break;
+ case HASH_HEAD_CK_SLIST:
+ *loffset = sizeof(CK_SLIST_HEAD(, foo));
+ break;
+ case HASH_HEAD_STAILQ:
+ *loffset = sizeof(STAILQ_HEAD(, foo));
+ break;
+ case HASH_HEAD_CK_STAILQ:
+ *loffset = sizeof(CK_STAILQ_HEAD(, foo));
+ break;
+ case HASH_HEAD_TAILQ:
+ *loffset = sizeof(TAILQ_HEAD(, foo));
+ break;
+ }
+
+ switch (args->lock) {
+ case HASH_LOCK_NONE:
+ *hdrsize = *loffset;
+ break;
+ case HASH_LOCK_MTX:
+ *hdrsize = *loffset + sizeof(struct mtx);
+ break;
+ case HASH_LOCK_RWLOCK:
+ *hdrsize = *loffset + sizeof(struct rwlock);
+ break;
+ case HASH_LOCK_SX:
+ *hdrsize = *loffset + sizeof(struct sx);
+ break;
+ case HASH_LOCK_RMLOCK:
+ *hdrsize = *loffset + sizeof(struct rmlock);
+ break;
+ case HASH_LOCK_RMSLOCK:
+ *hdrsize = *loffset + sizeof(struct rmslock);
+ break;
+ }
+
+ if (args->hdrsize > 0) {
+ MPASS(args->hdrsize >= *hdrsize);
+ *hdrsize = args->hdrsize;
+ } else
+ args->hdrsize = *hdrsize;
+}
+
+void *
+hashalloc(struct hashalloc_args *args)
+{
+ static const int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021,
+ 1531, 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653,
+ 7159, 7673, 8191, 12281, 16381, 24571, 32749 };
+ void *mem;
+ size_t size, hdrsize, loffset;
+ u_int i;
+
+ MPASS(args->version == 0);
+ MPASS(args->size > 0);
+
+ switch (args->type) {
+ case HASH_TYPE_POWER2:
+ for (size = 1; size <= args->size; size <<= 1)
+ continue;
+ size >>= 1;
+ break;
+ case HASH_TYPE_PRIME:
+ for (i = nitems(primes) - 1; args->size < primes[i]; i--)
+ ;
+ size = primes[i];
+ break;
+ }
+
+ hashalloc_sizes(args, &hdrsize, &loffset);
+
+ mem = malloc(size * hdrsize, args->mtype, args->mflags);
+ if (mem == NULL) {
+ args->error = ENOMEM;
+ return (NULL);
+ }
+
+ switch (args->lock) {
+ case HASH_LOCK_NONE:
+ break;
+ case HASH_LOCK_MTX:
+ MPASS(args->lname != NULL);
+ if ((args->mflags & M_ZERO) == 0)
+ args->lopts |= MTX_NEW;
+ break;
+ case HASH_LOCK_RWLOCK:
+ MPASS(args->lname != NULL);
+ if ((args->mflags & M_ZERO) == 0)
+ args->lopts |= RW_NEW;
+ break;
+ case HASH_LOCK_SX:
+ MPASS(args->lname != NULL);
+ if ((args->mflags & M_ZERO) == 0)
+ args->lopts |= SX_NEW;
+ break;
+ case HASH_LOCK_RMLOCK:
+ MPASS(args->lname != NULL);
+ if ((args->mflags & M_ZERO) == 0)
+ args->lopts |= RM_NEW;
+ break;
+ case HASH_LOCK_RMSLOCK:
+ MPASS(args->lname != NULL);
+ break;
+ }
+
+ for (i = 0; i < size; i++) {
+ void *slot;
+
+ slot = (char *)mem + i * hdrsize;
+ switch (args->head) {
+ case HASH_HEAD_LIST:
+ LIST_INIT((LIST_HEAD(, foo) *)slot);
+ break;
+ case HASH_HEAD_CK_LIST:
+ CK_LIST_INIT((CK_LIST_HEAD(, foo) *)slot);
+ break;
+ case HASH_HEAD_SLIST:
+ SLIST_INIT((SLIST_HEAD(, foo) *)slot);
+ break;
+ case HASH_HEAD_CK_SLIST:
+ CK_SLIST_INIT((CK_SLIST_HEAD(, foo) *)slot);
+ break;
+ case HASH_HEAD_STAILQ:
+ STAILQ_INIT((STAILQ_HEAD(, foo) *)slot);
+ break;
+ case HASH_HEAD_CK_STAILQ:
+ CK_STAILQ_INIT((CK_STAILQ_HEAD(, foo) *)slot);
+ break;
+ case HASH_HEAD_TAILQ:
+ TAILQ_INIT((TAILQ_HEAD(, foo) *)slot);
+ break;
+ }
+
+ slot = (char *)slot + loffset;
+ switch (args->lock) {
+ case HASH_LOCK_NONE:
+ break;
+ case HASH_LOCK_MTX:
+ mtx_init((struct mtx *)slot, args->lname, NULL,
+ args->lopts);
+ break;
+ case HASH_LOCK_RWLOCK:
+ rw_init_flags((struct rwlock *)slot, args->lname,
+ args->lopts);
+ break;
+ case HASH_LOCK_SX:
+ sx_init_flags((struct sx *)slot, args->lname,
+ args->lopts);
+ break;
+ case HASH_LOCK_RMLOCK:
+ rm_init_flags((struct rmlock *)slot, args->lname,
+ args->lopts);
+ break;
+ case HASH_LOCK_RMSLOCK:
+ rms_init((struct rmslock *)slot, args->lname);
+ break;
+ }
+
+ if (args->ctor != NULL) {
+ slot = (char *)mem + i * hdrsize;
+ if ((args->error = args->ctor(slot)) != 0) {
+ slot = (char *)slot + loffset;
+ switch (args->lock) {
+ case HASH_LOCK_NONE:
+ break;
+ case HASH_LOCK_MTX:
+ mtx_destroy((struct mtx *)slot);
+ break;
+ case HASH_LOCK_RWLOCK:
+ rw_destroy((struct rwlock *)slot);
+ break;
+ case HASH_LOCK_SX:
+ sx_destroy((struct sx *)slot);
+ break;
+ case HASH_LOCK_RMLOCK:
+ rm_destroy((struct rmlock *)slot);
+ break;
+ case HASH_LOCK_RMSLOCK:
+ rms_destroy((struct rmslock *)slot);
+ break;
+ }
+ args->size = i;
+ hashfree(mem, args);
+ return (NULL);
+ }
+ }
+ }
+
+ args->size = size;
+ return (mem);
+}
+
+void
+hashfree(void *mem, struct hashalloc_args *args)
+{
+ size_t hdrsize, loffset;
+
+ if (__predict_false(mem == NULL))
+ return;
+
+ hashalloc_sizes(args, &hdrsize, &loffset);
+
+ for (u_int i = 0; i < args->size; i++) {
+#ifdef INVARIANTS
+ static const char msg[] =
+ "%s: hashtbl %p not empty (malloc type %s)";
+#endif
+#define HPASS(exp) KASSERT(exp, (msg, __func__, mem, args->mtype->ks_shortdesc))
+ void *slot;
+
+ slot = (char *)mem + i * hdrsize;
+ if (args->dtor != NULL)
+ args->dtor(slot);
+ switch (args->head) {
+ case HASH_HEAD_LIST:
+ HPASS(LIST_EMPTY((LIST_HEAD(, foo) *)slot));
+ break;
+ case HASH_HEAD_CK_LIST:
+ HPASS(CK_LIST_EMPTY((CK_LIST_HEAD(, foo) *)slot));
+ break;
+ case HASH_HEAD_SLIST:
+ HPASS(SLIST_EMPTY((SLIST_HEAD(, foo) *)slot));
+ break;
+ case HASH_HEAD_CK_SLIST:
+ HPASS(CK_SLIST_EMPTY((CK_SLIST_HEAD(, foo) *)slot));
+ break;
+ case HASH_HEAD_STAILQ:
+ HPASS(STAILQ_EMPTY((STAILQ_HEAD(, foo) *)slot));
+ break;
+ case HASH_HEAD_CK_STAILQ:
+ HPASS(CK_STAILQ_EMPTY((CK_STAILQ_HEAD(, foo) *)slot));
+ break;
+ case HASH_HEAD_TAILQ:
+ HPASS(TAILQ_EMPTY((TAILQ_HEAD(, foo) *)slot));
+ break;
+ }
+#undef HPASS
+
+ slot = (char *)slot + loffset;
+ switch (args->lock) {
+ case HASH_LOCK_NONE:
+ break;
+ case HASH_LOCK_MTX:
+ mtx_destroy((struct mtx *)slot);
+ break;
+ case HASH_LOCK_RWLOCK:
+ rw_destroy((struct rwlock *)slot);
+ break;
+ case HASH_LOCK_SX:
+ sx_destroy((struct sx *)slot);
+ break;
+ case HASH_LOCK_RMLOCK:
+ rm_destroy((struct rmlock *)slot);
+ break;
+ case HASH_LOCK_RMSLOCK:
+ rms_destroy((struct rmslock *)slot);
+ break;
+ }
+ }
+
+ free(mem, args->mtype);
+}
static __inline int
hash_mflags(int flags)
@@ -52,26 +376,17 @@ void *
hashinit_flags(int elements, struct malloc_type *type, u_long *hashmask,
int flags)
{
- long hashsize, i;
- LIST_HEAD(generic, generic) *hashtbl;
-
- KASSERT(elements > 0, ("%s: bad elements", __func__));
- /* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */
- KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT),
- ("Bad flags (0x%x) passed to hashinit_flags", flags));
+ struct hashalloc_args args = {
+ .size = elements,
+ .mtype = type,
+ .mflags = hash_mflags(flags),
+ };
+ void *rv;
- for (hashsize = 1; hashsize <= elements; hashsize <<= 1)
- continue;
- hashsize >>= 1;
-
- hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type,
- hash_mflags(flags));
- if (hashtbl != NULL) {
- for (i = 0; i < hashsize; i++)
- LIST_INIT(&hashtbl[i]);
- *hashmask = hashsize - 1;
- }
- return (hashtbl);
+ rv = hashalloc(&args);
+ if (rv != NULL)
+ *hashmask = args.size - 1;
+ return (rv);
}
/*
@@ -87,20 +402,14 @@ hashinit(int elements, struct malloc_type *type, u_long *hashmask)
void
hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask)
{
- LIST_HEAD(generic, generic) *hashtbl, *hp;
+ struct hashalloc_args args = {
+ .size = hashmask + 1,
+ .mtype = type,
+ };
- hashtbl = vhashtbl;
- for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++)
- KASSERT(LIST_EMPTY(hp), ("%s: hashtbl %p not empty "
- "(malloc type %s)", __func__, hashtbl, type->ks_shortdesc));
- free(hashtbl, type);
+ hashfree(vhashtbl, &args);
}
-static const int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531,
- 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143,
- 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 };
-#define NPRIMES nitems(primes)
-
/*
* General routine to allocate a prime number sized hash table with control of
* memory flags.
@@ -108,31 +417,18 @@ static const int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531,
void *
phashinit_flags(int elements, struct malloc_type *type, u_long *nentries, int flags)
{
- long hashsize, i;
- LIST_HEAD(generic, generic) *hashtbl;
-
- KASSERT(elements > 0, ("%s: bad elements", __func__));
- /* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */
- KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT),
- ("Bad flags (0x%x) passed to phashinit_flags", flags));
-
- for (i = 1, hashsize = primes[1]; hashsize <= elements;) {
- i++;
- if (i == NPRIMES)
- break;
- hashsize = primes[i];
- }
- hashsize = primes[i - 1];
-
- hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type,
- hash_mflags(flags));
- if (hashtbl == NULL)
- return (NULL);
+ struct hashalloc_args args = {
+ .size = elements,
+ .mtype = type,
+ .type = HASH_TYPE_PRIME,
+ .mflags = hash_mflags(flags),
+ };
+ void *rv;
- for (i = 0; i < hashsize; i++)
- LIST_INIT(&hashtbl[i]);
- *nentries = hashsize;
- return (hashtbl);
+ rv = hashalloc(&args);
+ if (rv != NULL)
+ *nentries = args.size;
+ return (rv);
}
/*
diff --git a/sys/kern/subr_module.c b/sys/kern/subr_module.c
index f8b6770e3189..92f22206f8cf 100644
--- a/sys/kern/subr_module.c
+++ b/sys/kern/subr_module.c
@@ -307,6 +307,7 @@ preload_bootstrap_relocate(vm_offset_t offset)
case MODINFO_ADDR:
case MODINFO_METADATA|MODINFOMD_FONT:
case MODINFO_METADATA|MODINFOMD_SPLASH:
+ case MODINFO_METADATA|MODINFOMD_SHTDWNSPLASH:
case MODINFO_METADATA|MODINFOMD_SSYM:
case MODINFO_METADATA|MODINFOMD_ESYM:
ptr = (vm_offset_t *)(curp + (sizeof(uint32_t) * 2));
@@ -439,11 +440,19 @@ preload_modinfo_type(struct sbuf *sbp, int type)
case MODINFOMD_SPLASH:
sbuf_cat(sbp, "MODINFOMD_SPLASH");
break;
+ case MODINFOMD_SHTDWNSPLASH:
+ sbuf_cat(sbp, "MODINFOMD_SHTDWNSPLASH");
+ break;
#ifdef MODINFOMD_BOOT_HARTID
case MODINFOMD_BOOT_HARTID:
sbuf_cat(sbp, "MODINFOMD_BOOT_HARTID");
break;
#endif
+#ifdef MODINFOMD_EFI_ARCH
+ case MODINFOMD_EFI_ARCH:
+ sbuf_cat(sbp, "MODINFOMD_EFI_ARCH");
+ break;
+#endif
default:
sbuf_cat(sbp, "unrecognized metadata type");
}
@@ -465,6 +474,9 @@ preload_modinfo_value(struct sbuf *sbp, uint32_t *bptr, int type, int len)
case MODINFO_NAME:
case MODINFO_TYPE:
case MODINFO_ARGS:
+#ifdef MODINFOMD_EFI_ARCH
+ case MODINFO_METADATA | MODINFOMD_EFI_ARCH:
+#endif
sbuf_printf(sbp, "%s", (char *)bptr);
break;
case MODINFO_SIZE:
@@ -495,6 +507,7 @@ preload_modinfo_value(struct sbuf *sbp, uint32_t *bptr, int type, int len)
#endif
case MODINFO_METADATA | MODINFOMD_FONT:
case MODINFO_METADATA | MODINFOMD_SPLASH:
+ case MODINFO_METADATA | MODINFOMD_SHTDWNSPLASH:
sbuf_print_vmoffset(sbp, *(vm_offset_t *)bptr);
break;
case MODINFO_METADATA | MODINFOMD_HOWTO:
diff --git a/sys/kern/subr_sbuf.c b/sys/kern/subr_sbuf.c
index 27e18c114afd..c5673e871df4 100644
--- a/sys/kern/subr_sbuf.c
+++ b/sys/kern/subr_sbuf.c
@@ -124,8 +124,8 @@ _assert_sbuf_state(const char *fun, struct sbuf *s, int state)
{
KASSERT((s->s_flags & SBUF_FINISHED) == state,
- ("%s called with %sfinished or corrupt sbuf", fun,
- (state ? "un" : "")));
+ ("%s called with %sfinished or corrupt sbuf %p { s_flags %#010x }, "
+ "state %#010x", fun, (state ? "un" : ""), s, s->s_flags, state));
}
#define assert_sbuf_integrity(s) _assert_sbuf_integrity(__func__, (s))
diff --git a/sys/kern/subr_ticks.S b/sys/kern/subr_ticks.S
index 5cb994293d91..29f44c7b2f78 100644
--- a/sys/kern/subr_ticks.S
+++ b/sys/kern/subr_ticks.S
@@ -40,3 +40,5 @@ ticks =ticksl + TICKS_OFFSET
.type jiffies, %object
jiffies = ticksl
.size jiffies, __SIZEOF_LONG__
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c
index 565ab3ad6ee6..22e6a30faa21 100644
--- a/sys/kern/sys_timerfd.c
+++ b/sys/kern/sys_timerfd.c
@@ -118,6 +118,14 @@ timerfd_getboottime(struct timespec *ts)
TIMEVAL_TO_TIMESPEC(&tv, ts);
}
+static void
+timerfd_wakeup(struct timerfd *tfd)
+{
+ wakeup(&tfd->tfd_count);
+ selwakeup(&tfd->tfd_sel);
+ KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
+}
+
/*
* Call when a discontinuous jump has occured in CLOCK_REALTIME and
* update timerfd's cached boottime. A jump can be triggered using
@@ -165,13 +173,15 @@ timerfd_jumped(void)
&diff, &tfd->tfd_time.it_value);
if (callout_stop(&tfd->tfd_callout) == 1) {
callout_schedule_sbt(&tfd->tfd_callout,
- tstosbt(tfd->tfd_time.it_value),
+ tstosbt_sat(tfd->tfd_time.it_value),
0, C_ABSOLUTE);
}
}
}
tfd->tfd_boottim = boottime;
+ if ((tfd->tfd_jumped & TFD_JUMPED) != 0)
+ timerfd_wakeup(tfd);
mtx_unlock(&tfd->tfd_lock);
}
mtx_unlock(&timerfd_list_lock);
@@ -280,7 +290,7 @@ filt_timerfdread(struct knote *kn, long hint)
mtx_assert(&tfd->tfd_lock, MA_OWNED);
kn->kn_data = (int64_t)tfd->tfd_count;
- return (tfd->tfd_count > 0);
+ return (tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ);
}
static const struct filterops timerfd_rfiltops = {
@@ -393,32 +403,32 @@ static void
timerfd_expire(void *arg)
{
struct timerfd *tfd = (struct timerfd *)arg;
- struct timespec uptime;
+ sbintime_t exp, interval, now, next, diff;
++tfd->tfd_count;
tfd->tfd_expired = true;
if (timespecisset(&tfd->tfd_time.it_interval)) {
+ exp = tstosbt_sat(tfd->tfd_time.it_value);
+ interval = tstosbt_sat(tfd->tfd_time.it_interval);
+ now = sbinuptime();
+ next = now > SBT_MAX - interval ? SBT_MAX : now + interval;
+
/* Count missed events. */
- nanouptime(&uptime);
- if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) {
- timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime);
- tfd->tfd_count += tstosbt(uptime) /
- tstosbt(tfd->tfd_time.it_interval);
+ if (now > exp) {
+ diff = now - exp;
+ tfd->tfd_count += diff / interval;
+ next -= diff % interval;
}
- timespecadd(&tfd->tfd_time.it_value,
- &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value);
- callout_schedule_sbt(&tfd->tfd_callout,
- tstosbt(tfd->tfd_time.it_value),
- 0, C_ABSOLUTE);
+
+ callout_schedule_sbt(&tfd->tfd_callout, next, 0, C_ABSOLUTE);
+ tfd->tfd_time.it_value = sbttots(next);
} else {
/* Single shot timer. */
callout_deactivate(&tfd->tfd_callout);
timespecclear(&tfd->tfd_time.it_value);
}
- wakeup(&tfd->tfd_count);
- selwakeup(&tfd->tfd_sel);
- KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0);
+ timerfd_wakeup(tfd);
}
int
@@ -551,7 +561,7 @@ kern_timerfd_settime(struct thread *td, int fd, int flags,
&tfd->tfd_time.it_value);
}
callout_reset_sbt(&tfd->tfd_callout,
- tstosbt(tfd->tfd_time.it_value),
+ tstosbt_sat(tfd->tfd_time.it_value),
0, timerfd_expire, tfd, C_ABSOLUTE);
} else {
callout_stop(&tfd->tfd_callout);
diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c
index 35009ad77722..4c3a4085b8db 100644
--- a/sys/kern/uipc_ktls.c
+++ b/sys/kern/uipc_ktls.c
@@ -870,21 +870,15 @@ ktls_clone_session(struct ktls_session *tls, int direction)
static int
ktls_try_toe(struct socket *so, struct ktls_session *tls, int direction)
{
- struct inpcb *inp;
- struct tcpcb *tp;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
int error;
- inp = so->so_pcb;
INP_WLOCK(inp);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- return (ECONNRESET);
- }
- if (inp->inp_socket == NULL) {
+ if (tp->t_flags & TF_DISCONNECTED) {
INP_WUNLOCK(inp);
return (ECONNRESET);
}
- tp = intotcpcb(inp);
if (!(tp->t_flags & TF_TOE)) {
INP_WUNLOCK(inp);
return (EOPNOTSUPP);
@@ -923,19 +917,14 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force,
union if_snd_tag_alloc_params params;
struct ifnet *ifp;
struct nhop_object *nh;
- struct tcpcb *tp;
+ struct tcpcb *tp = intotcpcb(inp);
int error;
INP_RLOCK(inp);
- if (inp->inp_flags & INP_DROPPED) {
+ if (tp->t_flags & TF_DISCONNECTED) {
INP_RUNLOCK(inp);
return (ECONNRESET);
}
- if (inp->inp_socket == NULL) {
- INP_RUNLOCK(inp);
- return (ECONNRESET);
- }
- tp = intotcpcb(inp);
/*
* Check administrative controls on ifnet TLS to determine if
@@ -1027,11 +1016,7 @@ ktls_alloc_rcv_tag(struct inpcb *inp, struct ktls_session *tls,
return (ENXIO);
INP_RLOCK(inp);
- if (inp->inp_flags & INP_DROPPED) {
- INP_RUNLOCK(inp);
- return (ECONNRESET);
- }
- if (inp->inp_socket == NULL) {
+ if (intotcpcb(inp)->t_flags & TF_DISCONNECTED) {
INP_RUNLOCK(inp);
return (ECONNRESET);
}
@@ -1506,23 +1491,15 @@ ktls_get_rx_mode(struct socket *so, int *modep)
int
ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq)
{
- struct socket *so;
- struct tcpcb *tp;
+ struct socket *so = inp->inp_socket;
+ struct tcpcb *tp = intotcpcb(inp);
INP_RLOCK(inp);
- so = inp->inp_socket;
- if (__predict_false(so == NULL)) {
- INP_RUNLOCK(inp);
- return (EINVAL);
- }
- if (inp->inp_flags & INP_DROPPED) {
+ if (tp->t_flags & TF_DISCONNECTED) {
INP_RUNLOCK(inp);
return (ECONNRESET);
}
- tp = intotcpcb(inp);
- MPASS(tp != NULL);
-
SOCKBUF_LOCK(&so->so_rcv);
*tcpseq = tp->rcv_nxt - so->so_rcv.sb_tlscc;
*tlsseq = so->so_rcv.sb_tls_seqno;
@@ -1697,7 +1674,7 @@ ktls_reset_receive_tag(void *context, int pending)
ifp = NULL;
INP_RLOCK(inp);
- if (inp->inp_flags & INP_DROPPED) {
+ if (intotcpcb(inp)->t_flags & TF_DISCONNECTED) {
INP_RUNLOCK(inp);
goto out;
}
@@ -1818,9 +1795,9 @@ ktls_reset_send_tag(void *context, int pending)
} else {
NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
- if (!(inp->inp_flags & INP_DROPPED)) {
- tp = intotcpcb(inp);
- CURVNET_SET(inp->inp_vnet);
+ tp = intotcpcb(inp);
+ if (!(tp->t_flags & TF_DISCONNECTED)) {
+ CURVNET_SET(inp->inp_socket->so_vnet);
tp = tcp_drop(tp, ECONNABORTED);
CURVNET_RESTORE();
if (tp != NULL) {
@@ -2461,26 +2438,19 @@ ktls_resync_ifnet(struct socket *so, uint32_t tls_len, uint64_t tls_rcd_num)
{
union if_snd_tag_modify_params params;
struct m_snd_tag *mst;
- struct inpcb *inp;
- struct tcpcb *tp;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
mst = so->so_rcv.sb_tls_info->snd_tag;
if (__predict_false(mst == NULL))
return (EINVAL);
- inp = sotoinpcb(so);
- if (__predict_false(inp == NULL))
- return (EINVAL);
-
INP_RLOCK(inp);
- if (inp->inp_flags & INP_DROPPED) {
+ if (tp->t_flags & TF_DISCONNECTED) {
INP_RUNLOCK(inp);
return (ECONNRESET);
}
- tp = intotcpcb(inp);
- MPASS(tp != NULL);
-
/* Get the TCP sequence number of the next valid TLS header. */
SOCKBUF_LOCK(&so->so_rcv);
params.tls_rx.tls_hdr_tcp_sn =
@@ -2500,13 +2470,12 @@ ktls_drop(struct socket *so, int error)
{
struct epoch_tracker et;
struct inpcb *inp = sotoinpcb(so);
- struct tcpcb *tp;
+ struct tcpcb *tp = intotcpcb(inp);
NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
- if (!(inp->inp_flags & INP_DROPPED)) {
- tp = intotcpcb(inp);
- CURVNET_SET(inp->inp_vnet);
+ if (!(tp->t_flags & TF_DISCONNECTED)) {
+ CURVNET_SET(inp->inp_socket->so_vnet);
tp = tcp_drop(tp, error);
CURVNET_RESTORE();
if (tp != NULL)
@@ -3372,7 +3341,8 @@ ktls_disable_ifnet_help(void *context, int pending __unused)
INP_WLOCK(inp);
so = inp->inp_socket;
MPASS(so != NULL);
- if (inp->inp_flags & INP_DROPPED) {
+ tp = intotcpcb(inp);
+ if (tp->t_flags & TF_DISCONNECTED) {
goto out;
}
@@ -3383,8 +3353,7 @@ ktls_disable_ifnet_help(void *context, int pending __unused)
if (err == 0) {
counter_u64_add(ktls_ifnet_disable_ok, 1);
/* ktls_set_tx_mode() drops inp wlock, so recheck flags */
- if ((inp->inp_flags & INP_DROPPED) == 0 &&
- (tp = intotcpcb(inp)) != NULL &&
+ if ((tp->t_flags & TF_DISCONNECTED) == 0 &&
tp->t_fb->tfb_hwtls_change != NULL)
(*tp->t_fb->tfb_hwtls_change)(tp, 0);
} else {
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index fe3feab4149f..0ad5be2e8d71 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -2149,7 +2149,7 @@ sysctl_posix_shm_list(SYSCTL_HANDLER_ARGS)
{
struct shm_mapping *shmm;
struct sbuf sb;
- struct kinfo_file kif;
+ struct kinfo_file kif = {};
u_long i;
int error, error2;
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 45290c29f629..3debec547a80 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1768,7 +1768,7 @@ so_splice(struct socket *so, struct socket *so2, struct splice *splice)
return (error);
}
SOCK_SENDBUF_LOCK(so2);
- if (so->so_snd.sb_tls_info != NULL) {
+ if (so2->so_snd.sb_tls_info != NULL) {
SOCK_SENDBUF_UNLOCK(so2);
SOCK_UNLOCK(so2);
mtx_lock(&sp->mtx);
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 2a790237d30e..da0e36fc1ec5 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -2668,8 +2668,7 @@ filt_aiodetach(struct knote *kn)
knl = &kn->kn_ptr.p_aio->klist;
knl->kl_lock(knl->kl_lockarg);
- if (!knlist_empty(knl))
- knlist_remove(knl, kn, 1);
+ knlist_remove(knl, kn, 1);
knl->kl_unlock(knl->kl_lockarg);
}
@@ -2718,8 +2717,7 @@ filt_liodetach(struct knote *kn)
knl = &kn->kn_ptr.p_lio->klist;
knl->kl_lock(knl->kl_lockarg);
- if (!knlist_empty(knl))
- knlist_remove(knl, kn, 1);
+ knlist_remove(knl, kn, 1);
knl->kl_unlock(knl->kl_lockarg);
}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 06500909589e..0c4eeb584d41 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1168,12 +1168,15 @@ openflags(int *flagsp)
{
int flags;
+ flags = *flagsp;
+ if ((flags & ~FUSERALLOWED) != 0)
+ return (EINVAL);
+
/*
* Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
* may be specified. On the other hand, for O_PATH any mode
* except O_EXEC is ignored.
*/
- flags = *flagsp;
if ((flags & O_PATH) != 0) {
flags &= ~O_ACCMODE;
} else if ((flags & O_EXEC) != 0) {
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index ea8f8437b743..4061b2272193 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -2078,26 +2078,22 @@ vn_closefile(struct file *fp, struct thread *td)
* suspension is over, and then proceed.
*/
static int
-vn_start_write_refed(struct mount *mp, int flags, bool mplocked)
+vn_start_write_refed(struct mount *mp, int flags)
{
struct mount_pcpu *mpcpu;
int error, mflags;
- if (__predict_true(!mplocked) && (flags & V_XSLEEP) == 0 &&
- vfs_op_thread_enter(mp, mpcpu)) {
+ if ((flags & V_XSLEEP) == 0 && vfs_op_thread_enter(mp, mpcpu)) {
MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) == 0);
vfs_mp_count_add_pcpu(mpcpu, writeopcount, 1);
vfs_op_thread_exit(mp, mpcpu);
return (0);
}
- if (mplocked)
- mtx_assert(MNT_MTX(mp), MA_OWNED);
- else
- MNT_ILOCK(mp);
-
error = 0;
+ MNT_ILOCK(mp);
+
/*
* Check on status of suspension.
*/
@@ -2165,7 +2161,7 @@ vn_start_write(struct vnode *vp, struct mount **mpp, int flags)
if (vp == NULL)
vfs_ref(mp);
- error = vn_start_write_refed(mp, flags, false);
+ error = vn_start_write_refed(mp, flags);
if (error != 0 && (flags & V_NOWAIT) == 0)
*mpp = NULL;
return (error);
@@ -2373,10 +2369,12 @@ vfs_write_resume(struct mount *mp, int flags)
if ((flags & VR_NO_SUSPCLR) == 0)
VFS_SUSP_CLEAN(mp);
vfs_op_exit(mp);
- } else if ((flags & VR_START_WRITE) != 0) {
- MNT_REF(mp);
- vn_start_write_refed(mp, 0, true);
} else {
+ if ((flags & VR_START_WRITE) != 0) {
+ MNT_REF(mp);
+ mp->mnt_writeopcount++;
+ }
+
MNT_IUNLOCK(mp);
}
}