diff options
Diffstat (limited to 'sys')
| -rw-r--r-- | sys/cam/ata/ata_da.c | 72 | ||||
| -rw-r--r-- | sys/cam/scsi/scsi_da.c | 21 | ||||
| -rw-r--r-- | sys/cddl/boot/zfs/zfsimpl.h | 15 | ||||
| -rw-r--r-- | sys/dev/mlx5/mlx5_en/en_hw_tls.h | 3 | ||||
| -rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c | 53 | ||||
| -rw-r--r-- | sys/dev/mlx5/mlx5_en/mlx5_en_main.c | 3 | ||||
| -rw-r--r-- | sys/dev/mmc/mmc_fdt_helpers.c | 11 | ||||
| -rw-r--r-- | sys/fs/fuse/fuse_vnops.c | 6 | ||||
| -rw-r--r-- | sys/fs/nfsclient/nfs_clvnops.c | 8 | ||||
| -rw-r--r-- | sys/geom/geom_subr.c | 4 | ||||
| -rw-r--r-- | sys/kern/kern_jail.c | 7 | ||||
| -rw-r--r-- | sys/kern/vfs_bio.c | 2 | ||||
| -rw-r--r-- | sys/modules/iwlwifi/Makefile | 4 | ||||
| -rw-r--r-- | sys/net/if_tuntap.c | 63 | ||||
| -rw-r--r-- | sys/netpfil/ipfilter/netinet/ip_htable.c | 43 | ||||
| -rw-r--r-- | sys/netpfil/ipfilter/netinet/ip_htable.h | 2 | ||||
| -rw-r--r-- | sys/netpfil/pf/pf_nl.c | 88 | ||||
| -rw-r--r-- | sys/netpfil/pf/pf_nl.h | 2 | ||||
| -rw-r--r-- | sys/sys/exterr_cat.h | 1 |
19 files changed, 371 insertions, 37 deletions
diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 08747cd59131..9434756b87f9 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -2328,15 +2328,38 @@ adastart(struct cam_periph *periph, union ccb *start_ccb) { struct ada_softc *softc = (struct ada_softc *)periph->softc; struct ccb_ataio *ataio = &start_ccb->ataio; + uint32_t priority = start_ccb->ccb_h.pinfo.priority; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("adastart\n")); + /* + * When we're running the state machine, we should only accept DEV CCBs. + * When we're doing normal I/O we should only accept NORMAL CCBs. + * + * While in the state machine, we carefully single step the queue, but + * there's no protection for 'extra' calls to xpt_schedule() at the + * wrong priority. Guard against that so that we filter any CCBs that + * are offered at the wrong priority. This avoids generating requests + * that are at normal priority. +` */ + if ((softc->state != ADA_STATE_NORMAL && priority != CAM_PRIORITY_DEV) || + (softc->state == ADA_STATE_NORMAL && priority != CAM_PRIORITY_NORMAL)) { + xpt_print(periph->path, "Bad priority for state %d prio %d\n", + softc->state, priority); + xpt_release_ccb(start_ccb); + return; + } + switch (softc->state) { case ADA_STATE_NORMAL: { struct bio *bp; uint8_t tag_code; + KASSERT(priority == CAM_PRIORITY_NORMAL, + ("Expected priority %d, found %d in state normal", + CAM_PRIORITY_NORMAL, priority)); + bp = cam_iosched_next_bio(softc->cam_iosched); if (bp == NULL) { xpt_release_ccb(start_ccb); @@ -2555,6 +2578,11 @@ out: case ADA_STATE_RAHEAD: case ADA_STATE_WCACHE: { + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state %s", + CAM_PRIORITY_DEV, priority, + softc->state == ADA_STATE_RAHEAD ? "rahead" : "wcache")); + cam_fill_ataio(ataio, 1, adadone, @@ -2581,6 +2609,10 @@ out: { struct ata_gp_log_dir *log_dir; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state logdir", + CAM_PRIORITY_DEV, priority)); + if ((softc->flags & ADA_FLAG_CAN_LOG) == 0) { adaprobedone(periph, start_ccb); break; @@ -2615,6 +2647,10 @@ out: { struct ata_identify_log_pages *id_dir; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state iddir", + CAM_PRIORITY_DEV, priority)); + id_dir = malloc(sizeof(*id_dir), M_ATADA, M_NOWAIT | M_ZERO); if (id_dir == NULL) { xpt_print(periph->path, "Couldn't malloc id_dir " @@ -2643,6 +2679,10 @@ out: { struct ata_identify_log_sup_cap *sup_cap; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state sup_cap", + CAM_PRIORITY_DEV, priority)); + sup_cap = malloc(sizeof(*sup_cap), M_ATADA, M_NOWAIT|M_ZERO); if (sup_cap == NULL) { xpt_print(periph->path, "Couldn't malloc sup_cap " @@ -2671,6 +2711,10 @@ out: { struct ata_zoned_info_log *ata_zone; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in state zone", + CAM_PRIORITY_DEV, priority)); + ata_zone = malloc(sizeof(*ata_zone), M_ATADA, M_NOWAIT|M_ZERO); if (ata_zone == NULL) { xpt_print(periph->path, "Couldn't malloc ata_zone " @@ -2896,6 +2940,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) struct bio *bp; int error; + KASSERT(priority == CAM_PRIORITY_NORMAL, + ("Expected priority %d, found %d for normal I/O", + CAM_PRIORITY_NORMAL, priority)); + cam_periph_lock(periph); bp = (struct bio *)done_ccb->ccb_h.ccb_bp; if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { @@ -3000,6 +3048,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) } case ADA_CCB_RAHEAD: { + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state rahead", + CAM_PRIORITY_DEV, priority)); + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { if (adaerror(done_ccb, 0, 0) == ERESTART) { /* Drop freeze taken due to CAM_DEV_QFREEZE */ @@ -3023,6 +3075,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) } case ADA_CCB_WCACHE: { + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state wcache", + CAM_PRIORITY_DEV, priority)); + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) { if (adaerror(done_ccb, 0, 0) == ERESTART) { /* Drop freeze taken due to CAM_DEV_QFREEZE */ @@ -3054,6 +3110,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state logdir", + CAM_PRIORITY_DEV, priority)); + if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { error = 0; softc->valid_logdir_len = 0; @@ -3123,6 +3183,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) case ADA_CCB_IDDIR: { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state iddir", + CAM_PRIORITY_DEV, priority)); + if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { off_t entries_offset, max_entries; error = 0; @@ -3208,6 +3272,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) case ADA_CCB_SUP_CAP: { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state sup_cap", + CAM_PRIORITY_DEV, priority)); + if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { uint32_t valid_len; size_t needed_size; @@ -3312,6 +3380,10 @@ adadone(struct cam_periph *periph, union ccb *done_ccb) case ADA_CCB_ZONE: { int error; + KASSERT(priority == CAM_PRIORITY_DEV, + ("Expected priority %d, found %d in ccb state zone", + CAM_PRIORITY_DEV, priority)); + if ((ataio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) { struct ata_zoned_info_log *zi_log; uint32_t valid_len; diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index c0c0be12856b..773a786d08f7 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -3369,12 +3369,33 @@ static void dastart(struct cam_periph *periph, union ccb *start_ccb) { struct da_softc *softc; + uint32_t priority = start_ccb->ccb_h.pinfo.priority; cam_periph_assert(periph, MA_OWNED); softc = (struct da_softc *)periph->softc; CAM_DEBUG(periph->path, CAM_DEBUG_TRACE, ("dastart\n")); + /* + * When we're running the state machine, we should only accept DEV CCBs. + * When we're doing normal I/O we should only accept NORMAL CCBs. + * + * While in the state machine, we carefully single step the queue, but + * there's no protection for 'extra' calls to xpt_schedule() at the + * wrong priority. Guard against that so that we filter any CCBs that + * are offered at the wrong priority. This avoids generating requests + * that are at normal priority. In addition, though we can't easily + * enforce it, one must not transition to the NORMAL state via the + * skipstate mechanism. +` */ + if ((softc->state != DA_STATE_NORMAL && priority != CAM_PRIORITY_DEV) || + (softc->state == DA_STATE_NORMAL && priority != CAM_PRIORITY_NORMAL)) { + xpt_print(periph->path, "Bad priority for state %d prio %d\n", + softc->state, priority); + xpt_release_ccb(start_ccb); + return; + } + skipstate: switch (softc->state) { case DA_STATE_NORMAL: diff --git a/sys/cddl/boot/zfs/zfsimpl.h b/sys/cddl/boot/zfs/zfsimpl.h index c9de1fe4c391..d3ae3c32635d 100644 --- a/sys/cddl/boot/zfs/zfsimpl.h +++ b/sys/cddl/boot/zfs/zfsimpl.h @@ -94,6 +94,7 @@ typedef enum { B_FALSE, B_TRUE } boolean_t; #define P2END(x, align) (-(~(x) & -(align))) #define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align))) #define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1) +#define IS_P2ALIGNED(v, a) ((((uintptr_t)(v)) & ((uintptr_t)(a) - 1)) == 0) /* * General-purpose 32-bit and 64-bit bitfield encodings. @@ -498,19 +499,7 @@ typedef struct zio_eck { * Gang block headers are self-checksumming and contain an array * of block pointers. */ -#define SPA_GANGBLOCKSIZE SPA_MINBLOCKSIZE -#define SPA_GBH_NBLKPTRS ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_eck_t)) / sizeof (blkptr_t)) -#define SPA_GBH_FILLER ((SPA_GANGBLOCKSIZE - \ - sizeof (zio_eck_t) - \ - (SPA_GBH_NBLKPTRS * sizeof (blkptr_t))) /\ - sizeof (uint64_t)) - -typedef struct zio_gbh { - blkptr_t zg_blkptr[SPA_GBH_NBLKPTRS]; - uint64_t zg_filler[SPA_GBH_FILLER]; - zio_eck_t zg_tail; -} zio_gbh_phys_t; +#define SPA_OLD_GANGBLOCKSIZE SPA_MINBLOCKSIZE #define VDEV_RAIDZ_MAXPARITY 3 diff --git a/sys/dev/mlx5/mlx5_en/en_hw_tls.h b/sys/dev/mlx5/mlx5_en/en_hw_tls.h index d637314e040e..cd57d2ac5f72 100644 --- a/sys/dev/mlx5/mlx5_en/en_hw_tls.h +++ b/sys/dev/mlx5/mlx5_en/en_hw_tls.h @@ -82,6 +82,8 @@ struct mlx5e_tls { struct sysctl_ctx_list ctx; struct mlx5e_tls_stats stats; struct workqueue_struct *wq; + struct workqueue_struct *prealloc_wq; + struct work_struct prealloc_work; uma_zone_t zone; uint32_t max_resources; /* max number of resources */ int zone_max; @@ -92,6 +94,7 @@ struct mlx5e_tls { int mlx5e_tls_init(struct mlx5e_priv *); void mlx5e_tls_cleanup(struct mlx5e_priv *); int mlx5e_sq_tls_xmit(struct mlx5e_sq *, struct mlx5e_xmit_args *, struct mbuf **); +void mlx5e_tls_prealloc_tags(struct mlx5e_priv *priv); if_snd_tag_alloc_t mlx5e_tls_snd_tag_alloc; diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c index 6c83de5f3580..851316ccfcd7 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c @@ -80,23 +80,39 @@ static const char *mlx5e_tls_stats_desc[] = { }; static void mlx5e_tls_work(struct work_struct *); +static void mlx5e_tls_prealloc_work(struct work_struct *); /* - * Expand the tls tag UMA zone in a sleepable context + * Expand the tls tag UMA zone in an async context */ static void -mlx5e_prealloc_tags(struct mlx5e_priv *priv, int nitems) +mlx5e_tls_prealloc_work(struct work_struct *work) { + struct mlx5e_priv *priv; + struct mlx5e_tls *ptls; struct mlx5e_tls_tag **tags; - int i; + int i, nitems; + + ptls = container_of(work, struct mlx5e_tls, prealloc_work); + priv = container_of(ptls, struct mlx5e_priv, tls); + nitems = ptls->zone_max; tags = malloc(sizeof(tags[0]) * nitems, - M_MLX5E_TLS, M_WAITOK); - for (i = 0; i < nitems; i++) - tags[i] = uma_zalloc(priv->tls.zone, M_WAITOK); + M_MLX5E_TLS, M_WAITOK | M_ZERO); + for (i = 0; i < nitems; i++) { + tags[i] = uma_zalloc(priv->tls.zone, M_NOWAIT); + /* + * If the allocation fails, its likely we are competing + * with real consumers of tags and the zone is full, + * so exit the loop, and release the tags like we would + * if we allocated all "nitems" + */ + if (tags[i] == NULL) + break; + } __compiler_membar(); - for (i = 0; i < nitems; i++) + for (i = 0; i < nitems && tags[i] != NULL; i++) uma_zfree(priv->tls.zone, tags[i]); free(tags, M_MLX5E_TLS); } @@ -244,8 +260,6 @@ mlx5e_tls_init(struct mlx5e_priv *priv) } uma_zone_set_max(ptls->zone, ptls->zone_max); - if (prealloc_tags != 0) - mlx5e_prealloc_tags(priv, ptls->zone_max); for (x = 0; x != MLX5E_TLS_STATS_NUM; x++) ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK); @@ -271,6 +285,23 @@ mlx5e_tls_init(struct mlx5e_priv *priv) } void +mlx5e_tls_prealloc_tags(struct mlx5e_priv *priv) +{ + struct mlx5e_tls *ptls = &priv->tls; + int prealloc_tags = 0; + + if (ptls->prealloc_wq != NULL) + return; + + TUNABLE_INT_FETCH("hw.mlx5.tls_prealloc_tags", &prealloc_tags); + if (prealloc_tags == 0) + return; + ptls->prealloc_wq = create_singlethread_workqueue("mlx5-tls-prealloc_wq"); + INIT_WORK(&ptls->prealloc_work, mlx5e_tls_prealloc_work); + queue_work(ptls->prealloc_wq, &ptls->prealloc_work); +} + +void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tls *ptls = &priv->tls; @@ -280,6 +311,10 @@ mlx5e_tls_cleanup(struct mlx5e_priv *priv) return; ptls->init = 0; + if (ptls->prealloc_wq != NULL) { + flush_workqueue(ptls->prealloc_wq); + destroy_workqueue(ptls->prealloc_wq); + } flush_workqueue(ptls->wq); sysctl_ctx_free(&ptls->ctx); uma_zdestroy(ptls->zone); diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c index f83506bda1aa..ee9c53bb0a60 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -3335,6 +3335,9 @@ mlx5e_open_locked(if_t ifp) mlx5e_update_carrier(priv); + if ((if_getcapenable(ifp) & (IFCAP_TXTLS4 | IFCAP_TXTLS6)) != 0) + mlx5e_tls_prealloc_tags(priv); + return (0); err_close_channels: diff --git a/sys/dev/mmc/mmc_fdt_helpers.c b/sys/dev/mmc/mmc_fdt_helpers.c index aed85dab55f4..980785464a00 100644 --- a/sys/dev/mmc/mmc_fdt_helpers.c +++ b/sys/dev/mmc/mmc_fdt_helpers.c @@ -160,6 +160,17 @@ cd_setup(struct mmc_helper *helper, phandle_t node) } /* + * If the device has no card-detection, treat it as non-removable. + * This could be improved by polling for detection. + */ + if (helper->props & MMC_PROP_BROKEN_CD) { + helper->cd_disabled = true; + if (bootverbose) + device_printf(dev, "Broken card-detect\n"); + return; + } + + /* * If there is no cd-gpios property, then presumably the hardware * PRESENT_STATE register and interrupts will reflect card state * properly, and there's nothing more for us to do. Our get_present() diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index 6c79e646d2f3..ef5aee5de34c 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -625,7 +625,7 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) return (EROFS); if (fsess_not_impl(mp, FUSE_FALLOCATE)) - return (EXTERROR(EINVAL, "This server does not implement " + return (EXTERROR(EOPNOTSUPP, "This server does not implement " "FUSE_FALLOCATE")); io.uio_offset = *offset; @@ -656,14 +656,14 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) if (err == ENOSYS) { fsess_set_notimpl(mp, FUSE_FALLOCATE); - err = EXTERROR(EINVAL, "This server does not implement " + err = EXTERROR(EOPNOTSUPP, "This server does not implement " "FUSE_ALLOCATE"); } else if (err == EOPNOTSUPP) { /* * The file system server does not support FUSE_FALLOCATE with * the supplied mode for this particular file. */ - err = EXTERROR(EINVAL, "This file can't be pre-allocated"); + err = EXTERROR(EOPNOTSUPP, "This file can't be pre-allocated"); } else if (!err) { *offset += *len; *len = 0; diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 795a8d106051..193d8b6cd5eb 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -3896,11 +3896,15 @@ nfs_allocate(struct vop_allocate_args *ap) mtx_lock(&nmp->nm_mtx); nmp->nm_privflag |= NFSMNTP_NOALLOCATE; mtx_unlock(&nmp->nm_mtx); - error = EINVAL; + error = EOPNOTSUPP; } } else { + /* + * Pre-v4.2 NFS server that doesn't support it, or a newer + * NFS server that has indicated that it doesn't support it. + */ mtx_unlock(&nmp->nm_mtx); - error = EINVAL; + error = EOPNOTSUPP; } if (attrflag != 0) { ret = nfscl_loadattrcache(&vp, &nfsva, NULL, 0, 1); diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c index c70d55c6c321..c5dce730da79 100644 --- a/sys/geom/geom_subr.c +++ b/sys/geom/geom_subr.c @@ -38,9 +38,11 @@ #include <sys/cdefs.h> #include "opt_ddb.h" +#define EXTERR_CATEGORY EXTERR_CAT_GEOM #include <sys/param.h> #include <sys/systm.h> #include <sys/devicestat.h> +#include <sys/exterrvar.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/bio.h> @@ -1674,6 +1676,8 @@ DB_SHOW_COMMAND(bio, db_show_bio) db_printf(" caller2: %p\n", bp->bio_caller2); db_printf(" bio_from: %p\n", bp->bio_from); db_printf(" bio_to: %p\n", bp->bio_to); + if ((bp->bio_flags & BIO_EXTERR) != 0) + exterr_db_print(&bp->bio_exterr); #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING) db_printf(" bio_track_bp: %p\n", bp->bio_track_bp); diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c index 523b7e314a10..26a994ef0c32 100644 --- a/sys/kern/kern_jail.c +++ b/sys/kern/kern_jail.c @@ -1065,8 +1065,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) * than duplicate it under a different name. */ error = vfs_buildopts(optuio, &opts); - if (error) + if (error) { + opts = NULL; goto done_free; + } cuflags = flags & (JAIL_CREATE | JAIL_UPDATE); if (!cuflags) { @@ -2331,7 +2333,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags) (void)kern_close(td, jfd_out); if (g_path != NULL) free(g_path, M_TEMP); - vfs_freeopts(opts); + if (opts != NULL) + vfs_freeopts(opts); prison_free(mypr); return (error); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 22b7fe8d059a..880cc6b99951 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -5529,6 +5529,8 @@ DB_SHOW_COMMAND(buffer, db_show_buffer) db_printf("\n"); } BUF_LOCKPRINTINFO(bp); + if ((bp->b_ioflags & BIO_EXTERR) != 0) + exterr_db_print(&bp->b_exterr); #if defined(FULL_BUF_TRACKING) db_printf("b_io_tracking: b_io_tcnt = %u\n", bp->b_io_tcnt); diff --git a/sys/modules/iwlwifi/Makefile b/sys/modules/iwlwifi/Makefile index 5d4830537a0b..6fe64a611900 100644 --- a/sys/modules/iwlwifi/Makefile +++ b/sys/modules/iwlwifi/Makefile @@ -91,7 +91,7 @@ CFLAGS+= -DCONFIG_IWLWIFI_DEVICE_TRACING=1 #CFLAGS+= -DCONFIG_THERMAL=1 #CFLAGS+= -DCONFIG_EFI=1 -# XXX-BZ how to do this just for pcie/drv.c (and gcc vs. clang)? -CFLAGS += -Wno-override-init -Wno-initializer-overrides +CWARNFLAGS.clang.drv.c+= -Wno-initializer-overrides +CWARNFLAGS.drv.c+= -Wno-override-init ${CWARNFLAGS.${COMPILER_TYPE}.${.IMPSRC:T}} .include <bsd.kmod.mk> diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c index 56bb90cce9bc..0dc3a58f6ae6 100644 --- a/sys/net/if_tuntap.c +++ b/sys/net/if_tuntap.c @@ -138,6 +138,7 @@ struct tuntap_softc { #define TUN_READY (TUN_OPEN | TUN_INITED) pid_t tun_pid; /* owning pid */ + struct epoch_context tun_epoch_ctx; struct ifnet *tun_ifp; /* the interface */ struct sigio *tun_sigio; /* async I/O info */ struct tuntap_driver *tun_drv; /* appropriate driver */ @@ -630,6 +631,18 @@ out: CURVNET_RESTORE(); } +static void +tunfree(struct epoch_context *ctx) +{ + struct tuntap_softc *tp; + + tp = __containerof(ctx, struct tuntap_softc, tun_epoch_ctx); + + /* Any remaining resources that would be needed by a concurrent open. */ + mtx_destroy(&tp->tun_mtx); + free(tp, M_TUN); +} + static int tun_destroy(struct tuntap_softc *tp, bool may_intr) { @@ -649,7 +662,7 @@ tun_destroy(struct tuntap_softc *tp, bool may_intr) error = cv_wait_sig(&tp->tun_cv, &tp->tun_mtx); else cv_wait(&tp->tun_cv, &tp->tun_mtx); - if (error != 0) { + if (error != 0 && tp->tun_busy != 0) { tp->tun_flags &= ~TUN_DYING; TUN_UNLOCK(tp); return (error); @@ -663,8 +676,18 @@ tun_destroy(struct tuntap_softc *tp, bool may_intr) TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); - /* destroy_dev will take care of any alias. */ - destroy_dev(tp->tun_dev); + /* + * destroy_dev will take care of any alias. For transient tunnels, + * we're being called from close(2) so we can't destroy it ourselves + * without deadlocking, but we already know that we can cleanup + * everything else and just continue to prevent it from being reopened. + */ + if ((tp->tun_flags & TUN_TRANSIENT) != 0) { + atomic_store_ptr(&tp->tun_dev->si_drv1, tp->tun_dev); + destroy_dev_sched(tp->tun_dev); + } else { + destroy_dev(tp->tun_dev); + } seldrain(&tp->tun_rsel); knlist_clear(&tp->tun_rsel.si_note, 0); knlist_destroy(&tp->tun_rsel.si_note); @@ -679,9 +702,8 @@ tun_destroy(struct tuntap_softc *tp, bool may_intr) sx_xunlock(&tun_ioctl_sx); free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit); if_free(TUN2IFP(tp)); - mtx_destroy(&tp->tun_mtx); cv_destroy(&tp->tun_cv); - free(tp, M_TUN); + NET_EPOCH_CALL(tunfree, &tp->tun_epoch_ctx); CURVNET_RESTORE(); return (0); @@ -742,9 +764,11 @@ tun_uninit(const void *unused __unused) mtx_unlock(&tunmtx); for (i = 0; i < nitems(tuntap_drivers); ++i) { drv = &tuntap_drivers[i]; + destroy_dev_drain(&drv->cdevsw); delete_unrhdr(drv->unrhdr); clone_cleanup(&drv->clones); } + NET_EPOCH_DRAIN_CALLBACKS(); mtx_destroy(&tunmtx); } SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL); @@ -1104,19 +1128,43 @@ out: static int tunopen(struct cdev *dev, int flag, int mode, struct thread *td) { + struct epoch_tracker et; struct ifnet *ifp; struct tuntap_softc *tp; + void *p; int error __diagused, tunflags; + /* + * Transient tunnels do deferred destroy of the tun device but want + * to immediately cleanup state, so they clobber si_drv1 to avoid a + * use-after-free in case someone does happen to open it in the interim. + * We avoid using NULL to be able to distinguish from an uninitialized + * cdev. + * + * We use the net epoch here to let a concurrent tun_destroy() schedule + * freeing our tuntap_softc, in case we entered here and loaded si_drv1 + * before it was swapped out. If we managed to load this while it was + * still a softc, then the concurrent tun_destroy() hasn't yet scheduled + * it to be free- that will take place sometime after the epoch we just + * entered, so we can safely use it. + */ + NET_EPOCH_ENTER(et); + p = atomic_load_ptr(&dev->si_drv1); + if (p == dev) { + NET_EPOCH_EXIT(et); + return (ENXIO); + } + tunflags = 0; CURVNET_SET(TD_TO_VNET(td)); error = tuntap_name2info(dev->si_name, NULL, &tunflags); if (error != 0) { CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); return (error); /* Shouldn't happen */ } - tp = dev->si_drv1; + tp = p; KASSERT(tp != NULL, ("si_drv1 should have been initialized at creation")); @@ -1124,14 +1172,17 @@ tunopen(struct cdev *dev, int flag, int mode, struct thread *td) if ((tp->tun_flags & TUN_INITED) == 0) { TUN_UNLOCK(tp); CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); return (ENXIO); } if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) { TUN_UNLOCK(tp); CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); return (EBUSY); } + NET_EPOCH_EXIT(et); error = tun_busy_locked(tp); KASSERT(error == 0, ("Must be able to busy an unopen tunnel")); ifp = TUN2IFP(tp); diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.c b/sys/netpfil/ipfilter/netinet/ip_htable.c index 3f765cfab947..5f5c04732d69 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.c +++ b/sys/netpfil/ipfilter/netinet/ip_htable.c @@ -96,6 +96,8 @@ typedef struct ipf_htable_softc_s { u_long ipf_nhtnodes[LOOKUP_POOL_SZ]; iphtable_t *ipf_htables[LOOKUP_POOL_SZ]; iphtent_t *ipf_node_explist; + ipftuneable_t *ipf_htable_tune; + u_int ipf_htable_size_max; } ipf_htable_softc_t; ipf_lookup_t ipf_htable_backend = { @@ -122,6 +124,18 @@ ipf_lookup_t ipf_htable_backend = { }; +static ipftuneable_t ipf_htable_tuneables[] = { + { { (void *)offsetof(ipf_htable_softc_t, ipf_htable_size_max) }, + "htable_size_max", 1, 0x7fffffff, + stsizeof(ipf_htable_softc_t, ipf_htable_size_max), + 0, NULL, NULL }, + { { NULL }, + NULL, 0, 0, + 0, + 0, NULL, NULL } +}; + + /* ------------------------------------------------------------------------ */ /* Function: ipf_htable_soft_create */ /* Returns: void * - NULL = failure, else pointer to local context */ @@ -142,6 +156,18 @@ ipf_htable_soft_create(ipf_main_softc_t *softc) bzero((char *)softh, sizeof(*softh)); + softh->ipf_htable_tune = ipf_tune_array_copy(softh, + sizeof(ipf_htable_tuneables), + ipf_htable_tuneables); + if (softh->ipf_htable_tune == NULL) { + ipf_htable_soft_destroy(softc, softh); + return (NULL); + } + if (ipf_tune_array_link(softc, softh->ipf_htable_tune) == -1) { + ipf_htable_soft_destroy(softc, softh); + return (NULL); + } + return (softh); } @@ -160,6 +186,12 @@ ipf_htable_soft_destroy(ipf_main_softc_t *softc, void *arg) { ipf_htable_softc_t *softh = arg; + if (softh->ipf_htable_tune != NULL) { + ipf_tune_array_unlink(softc, softh->ipf_htable_tune); + KFREES(softh->ipf_htable_tune, sizeof(ipf_htable_tuneables)); + softh->ipf_htable_tune = NULL; + } + KFREE(softh); } @@ -179,6 +211,8 @@ ipf_htable_soft_init(ipf_main_softc_t *softc, void *arg) bzero((char *)softh, sizeof(*softh)); + softh->ipf_htable_size_max = IPHTABLE_MAX_SIZE; + return (0); } @@ -327,6 +361,15 @@ ipf_htable_create(ipf_main_softc_t *softc, void *arg, iplookupop_t *op) iph->iph_name[sizeof(iph->iph_name) - 1] = '\0'; } + if ((iph->iph_size == 0) || + (iph->iph_size > softh->ipf_htable_size_max)) { + IPFERROR(30027); + return (EINVAL); + } + if (iph->iph_size > ( SIZE_MAX / sizeof(*iph->iph_table))) { + IPFERROR(30028); + return (EINVAL); + } KMALLOCS(iph->iph_table, iphtent_t **, iph->iph_size * sizeof(*iph->iph_table)); if (iph->iph_table == NULL) { diff --git a/sys/netpfil/ipfilter/netinet/ip_htable.h b/sys/netpfil/ipfilter/netinet/ip_htable.h index 55c289e57ff6..3a8782ccd4b2 100644 --- a/sys/netpfil/ipfilter/netinet/ip_htable.h +++ b/sys/netpfil/ipfilter/netinet/ip_htable.h @@ -55,6 +55,8 @@ typedef struct iphtable_s { char iph_name[FR_GROUPLEN]; /* hash table number */ } iphtable_t; +#define IPHTABLE_MAX_SIZE 1024 + /* iph_type */ #define IPHASH_LOOKUP 0 #define IPHASH_GROUPMAP 1 diff --git a/sys/netpfil/pf/pf_nl.c b/sys/netpfil/pf/pf_nl.c index 21d4db1b8478..993981a9c0de 100644 --- a/sys/netpfil/pf/pf_nl.c +++ b/sys/netpfil/pf/pf_nl.c @@ -2246,6 +2246,87 @@ pf_handle_table_set_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) return (error); } +static int +nlattr_add_pfr_addr(struct nl_writer *nw, int attr, const struct pfr_addr *a) +{ + int off = nlattr_add_nested(nw, attr); + if (off == 0) + return (false); + + nlattr_add_u32(nw, PFR_A_AF, a->pfra_af); + nlattr_add_u8(nw, PFR_A_NET, a->pfra_net); + nlattr_add_bool(nw, PFR_A_NOT, a->pfra_not); + nlattr_add_in6_addr(nw, PFR_A_ADDR, &a->pfra_u._pfra_ip6addr); + + nlattr_set_len(nw, off); + + return (true); +} + +static int +pf_handle_table_get_addrs(struct nlmsghdr *hdr, struct nl_pstate *npt) +{ + struct pfioc_table attrs = { 0 }; + struct pfr_addr *pfras; + struct nl_writer *nw = npt->nw; + struct genlmsghdr *ghdr_new; + int size = 0; + int error; + + PF_RULES_RLOCK_TRACKER; + + error = nl_parse_nlmsg(hdr, &table_addr_parser, npt, &attrs); + if (error != 0) + return (error); + + PF_RULES_RLOCK(); + /* Get required size. */ + error = pfr_get_addrs(&attrs.pfrio_table, NULL, + &size, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); + if (error != 0) { + PF_RULES_RUNLOCK(); + return (error); + } + pfras = mallocarray(size, sizeof(struct pfr_addr), M_PF, + M_NOWAIT | M_ZERO); + if (pfras == NULL) { + PF_RULES_RUNLOCK(); + return (ENOMEM); + } + /* Now get the addresses. */ + error = pfr_get_addrs(&attrs.pfrio_table, pfras, + &size, attrs.pfrio_flags | PFR_FLAG_USERIOCTL); + PF_RULES_RUNLOCK(); + if (error != 0) + goto out; + + for (int i = 0; i < size; i++) { + if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) { + nlmsg_abort(nw); + error = ENOMEM; + goto out; + } + ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); + ghdr_new->cmd = PFNL_CMD_TABLE_GET_ADDR; + ghdr_new->version = 0; + ghdr_new->reserved = 0; + + if (i == 0) + nlattr_add_u32(nw, PF_TA_ADDR_COUNT, size); + + nlattr_add_pfr_addr(nw, PF_TA_ADDR, &pfras[i]); + if (!nlmsg_end(nw)) { + nlmsg_abort(nw); + error = ENOMEM; + goto out; + } + } + +out: + free(pfras, M_PF); + return (error); +} + static const struct nlhdr_parser *all_parsers[] = { &state_parser, &addrule_parser, @@ -2504,6 +2585,13 @@ static const struct genl_cmd pf_cmds[] = { .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_HASPOL, .cmd_priv = PRIV_NETINET_PF, }, + { + .cmd_num = PFNL_CMD_TABLE_GET_ADDR, + .cmd_name = "TABLE_GET_ADDRS", + .cmd_cb = pf_handle_table_get_addrs, + .cmd_flags = GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, + .cmd_priv = PRIV_NETINET_PF, + }, }; void diff --git a/sys/netpfil/pf/pf_nl.h b/sys/netpfil/pf/pf_nl.h index d1538ab4ff5b..e1eb3e628df5 100644 --- a/sys/netpfil/pf/pf_nl.h +++ b/sys/netpfil/pf/pf_nl.h @@ -70,6 +70,7 @@ enum { PFNL_CMD_TABLE_ADD_ADDR = 32, PFNL_CMD_TABLE_DEL_ADDR = 33, PFNL_CMD_TABLE_SET_ADDR = 34, + PFNL_CMD_TABLE_GET_ADDR = 35, __PFNL_CMD_MAX, }; #define PFNL_CMD_MAX (__PFNL_CMD_MAX -1) @@ -485,6 +486,7 @@ enum pf_table_addrs_t { PF_TA_NBR_ADDED = 4, /* u32 */ PF_TA_NBR_DELETED = 5, /* u32 */ PF_TA_NBR_CHANGED = 6, /* u32 */ + PF_TA_ADDR_COUNT = 7, /* u32 */ }; #ifdef _KERNEL diff --git a/sys/sys/exterr_cat.h b/sys/sys/exterr_cat.h index 34a4b9f86694..318e774542ca 100644 --- a/sys/sys/exterr_cat.h +++ b/sys/sys/exterr_cat.h @@ -23,6 +23,7 @@ #define EXTERR_CAT_VFSSYSCALL 9 #define EXTERR_CAT_VFSBIO 10 #define EXTERR_CAT_GEOMVFS 11 +#define EXTERR_CAT_GEOM 12 #endif |
