diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c | 244 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/arc.c | 32 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/dmu_zfetch.c | 10 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/vdev.c | 2 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/vdev_queue.c | 2 | ||||
-rw-r--r-- | sys/contrib/openzfs/module/zfs/vdev_removal.c | 4 | ||||
-rw-r--r-- | sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg | 4 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_private.h | 6 | ||||
-rw-r--r-- | sys/geom/part/g_part.c | 14 | ||||
-rw-r--r-- | sys/net80211/ieee80211.c | 28 | ||||
-rw-r--r-- | sys/net80211/ieee80211_crypto.c | 87 | ||||
-rw-r--r-- | sys/netinet/tcp_lro.c | 9 | ||||
-rw-r--r-- | sys/vm/uma_core.c | 22 | ||||
-rw-r--r-- | sys/x86/x86/mca.c | 232 |
14 files changed, 562 insertions, 134 deletions
diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c index 393bfaa65ff5..ace2360c032d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c @@ -188,6 +188,11 @@ param_set_arc_max(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, + CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_max, "LU", + "Maximum ARC size in bytes (LEGACY)"); + int param_set_arc_min(SYSCTL_HANDLER_ARGS) { @@ -212,6 +217,11 @@ param_set_arc_min(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, + CTLTYPE_ULONG | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_min, "LU", + "Minimum ARC size in bytes (LEGACY)"); + extern uint_t zfs_arc_free_target; int @@ -235,6 +245,16 @@ param_set_arc_free_target(SYSCTL_HANDLER_ARGS) return (0); } +/* + * NOTE: This sysctl is CTLFLAG_RW not CTLFLAG_RWTUN due to its dependency on + * pagedaemon initialization. + */ +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_free_target, "IU", + "Desired number of free pages below which ARC triggers reclaim" + " (LEGACY)"); + int param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) { @@ -253,6 +273,187 @@ param_set_arc_no_grow_shift(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_no_grow_shift, + CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + NULL, 0, param_set_arc_no_grow_shift, "I", + "log2(fraction of ARC which must be free to allow growing) (LEGACY)"); + +extern uint64_t l2arc_write_max; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_max, + CTLFLAG_RWTUN, &l2arc_write_max, 0, + "Max write bytes per interval (LEGACY)"); + +extern uint64_t l2arc_write_boost; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_write_boost, + CTLFLAG_RWTUN, &l2arc_write_boost, 0, + "Extra write bytes during device warmup (LEGACY)"); + +extern uint64_t l2arc_headroom; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom, + CTLFLAG_RWTUN, &l2arc_headroom, 0, + "Number of max device writes to precache (LEGACY)"); + +extern uint64_t l2arc_headroom_boost; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_headroom_boost, + CTLFLAG_RWTUN, &l2arc_headroom_boost, 0, + "Compressed l2arc_headroom multiplier (LEGACY)"); + +extern uint64_t l2arc_feed_secs; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_secs, + CTLFLAG_RWTUN, &l2arc_feed_secs, 0, + "Seconds between L2ARC writing (LEGACY)"); + +extern uint64_t l2arc_feed_min_ms; + +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, l2arc_feed_min_ms, + CTLFLAG_RWTUN, &l2arc_feed_min_ms, 0, + "Min feed interval in milliseconds (LEGACY)"); + +extern int l2arc_noprefetch; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_noprefetch, + CTLFLAG_RWTUN, &l2arc_noprefetch, 0, + "Skip caching prefetched buffers (LEGACY)"); + +extern int l2arc_feed_again; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_feed_again, + CTLFLAG_RWTUN, &l2arc_feed_again, 0, + "Turbo L2ARC warmup (LEGACY)"); + +extern int l2arc_norw; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, l2arc_norw, + CTLFLAG_RWTUN, &l2arc_norw, 0, + "No reads during writes (LEGACY)"); + +static int +param_get_arc_state_size(SYSCTL_HANDLER_ARGS) +{ + arc_state_t *state = (arc_state_t *)arg1; + int64_t val; + + val = zfs_refcount_count(&state->arcs_size[ARC_BUFC_DATA]) + + zfs_refcount_count(&state->arcs_size[ARC_BUFC_METADATA]); + return (sysctl_handle_64(oidp, &val, 0, req)); +} + +extern arc_state_t ARC_anon; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, anon_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_anon, 0, param_get_arc_state_size, "Q", + "size of anonymous state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_metadata_esize, CTLFLAG_RD, + &ARC_anon.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in anonymous state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, anon_data_esize, CTLFLAG_RD, + &ARC_anon.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in anonymous state"); + +extern arc_state_t ARC_mru; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mru, 0, param_get_arc_state_size, "Q", + "size of mru state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_metadata_esize, CTLFLAG_RD, + &ARC_mru.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mru state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_data_esize, CTLFLAG_RD, + &ARC_mru.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mru state"); + +extern arc_state_t ARC_mru_ghost; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mru_ghost_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mru_ghost, 0, param_get_arc_state_size, "Q", + "size of mru ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_metadata_esize, CTLFLAG_RD, + &ARC_mru_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mru ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mru_ghost_data_esize, CTLFLAG_RD, + &ARC_mru_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mru ghost state"); + +extern arc_state_t ARC_mfu; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mfu, 0, param_get_arc_state_size, "Q", + "size of mfu state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_metadata_esize, CTLFLAG_RD, + &ARC_mfu.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mfu state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_data_esize, CTLFLAG_RD, + &ARC_mfu.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mfu state"); + +extern arc_state_t ARC_mfu_ghost; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, mfu_ghost_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_mfu_ghost, 0, param_get_arc_state_size, "Q", + "size of mfu ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_metadata_esize, CTLFLAG_RD, + &ARC_mfu_ghost.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in mfu ghost state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, mfu_ghost_data_esize, CTLFLAG_RD, + &ARC_mfu_ghost.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in mfu ghost state"); + +extern arc_state_t ARC_uncached; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, uncached_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_uncached, 0, param_get_arc_state_size, "Q", + "size of uncached state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_metadata_esize, CTLFLAG_RD, + &ARC_uncached.arcs_esize[ARC_BUFC_METADATA].rc_count, 0, + "size of evictable metadata in uncached state"); +SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, uncached_data_esize, CTLFLAG_RD, + &ARC_uncached.arcs_esize[ARC_BUFC_DATA].rc_count, 0, + "size of evictable data in uncached state"); + +extern arc_state_t ARC_l2c_only; + +SYSCTL_PROC(_vfs_zfs, OID_AUTO, l2c_only_size, + CTLTYPE_S64 | CTLFLAG_RD | CTLFLAG_MPSAFE, + &ARC_l2c_only, 0, param_get_arc_state_size, "Q", + "size of l2c_only state"); + +/* dbuf.c */ + +/* dmu.c */ + +/* dmu_zfetch.c */ + +SYSCTL_NODE(_vfs_zfs, OID_AUTO, zfetch, CTLFLAG_RW, 0, "ZFS ZFETCH (LEGACY)"); + +extern uint32_t zfetch_max_distance; + +SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_distance, + CTLFLAG_RWTUN, &zfetch_max_distance, 0, + "Max bytes to prefetch per stream (LEGACY)"); + +extern uint32_t zfetch_max_idistance; + +SYSCTL_UINT(_vfs_zfs_zfetch, OID_AUTO, max_idistance, + CTLFLAG_RWTUN, &zfetch_max_idistance, 0, + "Max bytes to prefetch indirects for per stream (LEGACY)"); + +/* dsl_pool.c */ + +/* dnode.c */ + +/* dsl_scan.c */ + /* metaslab.c */ int @@ -313,6 +514,19 @@ SYSCTL_UINT(_vfs_zfs, OID_AUTO, condense_pct, "Condense on-disk spacemap when it is more than this many percents" " of in-memory counterpart"); +extern uint_t zfs_remove_max_segment; + +SYSCTL_UINT(_vfs_zfs, OID_AUTO, remove_max_segment, + CTLFLAG_RWTUN, &zfs_remove_max_segment, 0, + "Largest contiguous segment ZFS will attempt to allocate when removing" + " a device"); + +extern int zfs_removal_suspend_progress; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, removal_suspend_progress, + CTLFLAG_RWTUN, &zfs_removal_suspend_progress, 0, + "Ensures certain actions can happen while in the middle of a removal"); + /* * Minimum size which forces the dynamic allocator to change * it's allocation strategy. Once the space map cannot satisfy @@ -535,6 +749,12 @@ param_set_min_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, min_auto_ashift, + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + &zfs_vdev_min_auto_ashift, sizeof (zfs_vdev_min_auto_ashift), + param_set_min_auto_ashift, "IU", + "Min ashift used when creating new top-level vdev. (LEGACY)"); + int param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) { @@ -554,6 +774,13 @@ param_set_max_auto_ashift(SYSCTL_HANDLER_ARGS) return (0); } +SYSCTL_PROC(_vfs_zfs, OID_AUTO, max_auto_ashift, + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + &zfs_vdev_max_auto_ashift, sizeof (zfs_vdev_max_auto_ashift), + param_set_max_auto_ashift, "IU", + "Max ashift used when optimizing for logical -> physical sector size on" + " new top-level vdevs. (LEGACY)"); + /* * Since the DTL space map of a vdev is not expected to have a lot of * entries, we default its block size to 4K. @@ -575,6 +802,23 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, standard_sm_blksz, CTLFLAG_RDTUN, &zfs_vdev_standard_sm_blksz, 0, "Block size for standard space map. Power of 2 greater than 4096."); +extern int vdev_validate_skip; + +SYSCTL_INT(_vfs_zfs, OID_AUTO, validate_skip, + CTLFLAG_RDTUN, &vdev_validate_skip, 0, + "Enable to bypass vdev_validate()."); + +/* vdev_mirror.c */ + +/* vdev_queue.c */ + +extern uint_t zfs_vdev_max_active; + +SYSCTL_UINT(_vfs_zfs, OID_AUTO, top_maxinflight, + CTLFLAG_RWTUN, &zfs_vdev_max_active, 0, + "The maximum number of I/Os of all types active for each device." + " (LEGACY)"); + /* zio.c */ SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, exclude_metadata, diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 591e2dade59e..b677f90280d7 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -486,13 +486,13 @@ static taskq_t *arc_flush_taskq; static uint_t zfs_arc_evict_threads = 0; /* The 7 states: */ -static arc_state_t ARC_anon; -/* */ arc_state_t ARC_mru; -static arc_state_t ARC_mru_ghost; -/* */ arc_state_t ARC_mfu; -static arc_state_t ARC_mfu_ghost; -static arc_state_t ARC_l2c_only; -static arc_state_t ARC_uncached; +arc_state_t ARC_anon; +arc_state_t ARC_mru; +arc_state_t ARC_mru_ghost; +arc_state_t ARC_mfu; +arc_state_t ARC_mfu_ghost; +arc_state_t ARC_l2c_only; +arc_state_t ARC_uncached; arc_stats_t arc_stats = { { "hits", KSTAT_DATA_UINT64 }, @@ -832,15 +832,15 @@ typedef struct arc_async_flush { #define L2ARC_FEED_TYPES 4 /* L2ARC Performance Tunables */ -static uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ -static uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ -static uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ -static uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; -static uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ -static uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ -static int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ -static int l2arc_feed_again = B_TRUE; /* turbo warmup */ -static int l2arc_norw = B_FALSE; /* no reads during writes */ +uint64_t l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */ +uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */ +uint64_t l2arc_headroom = L2ARC_HEADROOM; /* # of dev writes */ +uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST; +uint64_t l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */ +uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */ +int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */ +int l2arc_feed_again = B_TRUE; /* turbo warmup */ +int l2arc_norw = B_FALSE; /* no reads during writes */ static uint_t l2arc_meta_percent = 33; /* limit on headers size */ /* diff --git a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c index 3d3a9c713568..51165d0bf723 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c +++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c @@ -57,19 +57,19 @@ static unsigned int zfetch_max_sec_reap = 2; /* min bytes to prefetch per stream (default 2MB) */ static unsigned int zfetch_min_distance = 2 * 1024 * 1024; /* max bytes to prefetch per stream (default 8MB) */ -static unsigned int zfetch_max_distance = 8 * 1024 * 1024; +unsigned int zfetch_max_distance = 8 * 1024 * 1024; #else /* min bytes to prefetch per stream (default 4MB) */ static unsigned int zfetch_min_distance = 4 * 1024 * 1024; /* max bytes to prefetch per stream (default 64MB) */ -static unsigned int zfetch_max_distance = 64 * 1024 * 1024; +unsigned int zfetch_max_distance = 64 * 1024 * 1024; #endif /* max bytes to prefetch indirects for per stream (default 128MB) */ -static unsigned int zfetch_max_idistance = 128 * 1024 * 1024; +unsigned int zfetch_max_idistance = 128 * 1024 * 1024; /* max request reorder distance within a stream (default 16MB) */ -static unsigned int zfetch_max_reorder = 16 * 1024 * 1024; +unsigned int zfetch_max_reorder = 16 * 1024 * 1024; /* Max log2 fraction of holes in a stream */ -static unsigned int zfetch_hole_shift = 2; +unsigned int zfetch_hole_shift = 2; typedef struct zfetch_stats { kstat_named_t zfetchstat_hits; diff --git a/sys/contrib/openzfs/module/zfs/vdev.c b/sys/contrib/openzfs/module/zfs/vdev.c index 654e034de9e1..c8d7280387a2 100644 --- a/sys/contrib/openzfs/module/zfs/vdev.c +++ b/sys/contrib/openzfs/module/zfs/vdev.c @@ -100,7 +100,7 @@ static uint_t zfs_vdev_default_ms_shift = 29; /* upper limit for metaslab size (16G) */ static uint_t zfs_vdev_max_ms_shift = 34; -static int vdev_validate_skip = B_FALSE; +int vdev_validate_skip = B_FALSE; /* * Since the DTL space map of a vdev is not expected to have a lot of diff --git a/sys/contrib/openzfs/module/zfs/vdev_queue.c b/sys/contrib/openzfs/module/zfs/vdev_queue.c index e69e5598939e..c12713b107bf 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_queue.c +++ b/sys/contrib/openzfs/module/zfs/vdev_queue.c @@ -122,7 +122,7 @@ * The maximum number of i/os active to each device. Ideally, this will be >= * the sum of each queue's max_active. */ -static uint_t zfs_vdev_max_active = 1000; +uint_t zfs_vdev_max_active = 1000; /* * Per-queue limits on the number of i/os active to each device. If the diff --git a/sys/contrib/openzfs/module/zfs/vdev_removal.c b/sys/contrib/openzfs/module/zfs/vdev_removal.c index 2ce0121324ad..2f7a739da241 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_removal.c +++ b/sys/contrib/openzfs/module/zfs/vdev_removal.c @@ -105,7 +105,7 @@ static const uint_t zfs_remove_max_copy_bytes = 64 * 1024 * 1024; * * See also the accessor function spa_remove_max_segment(). */ -static uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE; +uint_t zfs_remove_max_segment = SPA_MAXBLOCKSIZE; /* * Ignore hard IO errors during device removal. When set if a device @@ -137,7 +137,7 @@ uint_t vdev_removal_max_span = 32 * 1024; * This is used by the test suite so that it can ensure that certain * actions happen while in the middle of a removal. */ -static int zfs_removal_suspend_progress = 0; +int zfs_removal_suspend_progress = 0; #define VDEV_REMOVAL_ZAP_OBJS "lzap" diff --git a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg index 54b50c9dba77..127ea188f17f 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg +++ b/sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg @@ -76,8 +76,8 @@ READ_SIT_OUT_SECS vdev.read_sit_out_secs vdev_read_sit_out_secs SIT_OUT_CHECK_INTERVAL vdev.raidz_outlier_check_interval_ms vdev_raidz_outlier_check_interval_ms SIT_OUT_INSENSITIVITY vdev.raidz_outlier_insensitivity vdev_raidz_outlier_insensitivity REBUILD_SCRUB_ENABLED rebuild_scrub_enabled zfs_rebuild_scrub_enabled -REMOVAL_SUSPEND_PROGRESS vdev.removal_suspend_progress zfs_removal_suspend_progress -REMOVE_MAX_SEGMENT vdev.remove_max_segment zfs_remove_max_segment +REMOVAL_SUSPEND_PROGRESS removal_suspend_progress zfs_removal_suspend_progress +REMOVE_MAX_SEGMENT remove_max_segment zfs_remove_max_segment RESILVER_MIN_TIME_MS resilver_min_time_ms zfs_resilver_min_time_ms RESILVER_DEFER_PERCENT resilver_defer_percent zfs_resilver_defer_percent SCAN_LEGACY scan_legacy zfs_scan_legacy diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 52f9e12f8f9a..52e9fcbbebcd 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -463,13 +463,13 @@ static __inline void nvme_completion_poll(struct nvme_completion_poll_status *status) { int timeout = ticks + 10 * hz; - sbintime_t delta_t = SBT_1US; + sbintime_t delta = SBT_1US; while (!atomic_load_acq_int(&status->done)) { if (timeout - ticks < 0) panic("NVME polled command failed to complete within 10s."); - pause_sbt("nvme", delta_t, 0, C_PREL(1)); - delta_t = min(SBT_1MS, delta_t * 3 / 2); + pause_sbt("nvme", delta, 0, C_PREL(1)); + delta = min(SBT_1MS, delta + delta / 2); } } diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index 4c0d0c3aa902..1e4236507fa4 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -122,13 +122,13 @@ struct g_part_alias_list { { "ntfs", G_PART_ALIAS_MS_NTFS }, { "openbsd-data", G_PART_ALIAS_OPENBSD_DATA }, { "prep-boot", G_PART_ALIAS_PREP_BOOT }, - { "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT }, - { "solaris-root", G_PART_ALIAS_SOLARIS_ROOT }, - { "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP }, - { "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP }, - { "solaris-var", G_PART_ALIAS_SOLARIS_VAR }, - { "solaris-home", G_PART_ALIAS_SOLARIS_HOME }, - { "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC }, + { "solaris-boot", G_PART_ALIAS_SOLARIS_BOOT }, + { "solaris-root", G_PART_ALIAS_SOLARIS_ROOT }, + { "solaris-swap", G_PART_ALIAS_SOLARIS_SWAP }, + { "solaris-backup", G_PART_ALIAS_SOLARIS_BACKUP }, + { "solaris-var", G_PART_ALIAS_SOLARIS_VAR }, + { "solaris-home", G_PART_ALIAS_SOLARIS_HOME }, + { "solaris-altsec", G_PART_ALIAS_SOLARIS_ALTSEC }, { "solaris-reserved", G_PART_ALIAS_SOLARIS_RESERVED }, { "u-boot-env", G_PART_ALIAS_U_BOOT_ENV }, { "vmware-reserved", G_PART_ALIAS_VMRESERVED }, diff --git a/sys/net80211/ieee80211.c b/sys/net80211/ieee80211.c index 2b7cf635b9f5..1299f86ebdc7 100644 --- a/sys/net80211/ieee80211.c +++ b/sys/net80211/ieee80211.c @@ -2689,13 +2689,18 @@ ieee80211_channel_type_char(const struct ieee80211_channel *c) return 'f'; } -/* - * Determine whether the given key in the given VAP is a global key. +/** + * @brief Determine whether the given key in the given VAP is a global key. + * * (key index 0..3, shared between all stations on a VAP.) * * This is either a WEP key or a GROUP key. * * Note this will NOT return true if it is a IGTK key. + * + * @param vap the current VAP + * @param key ieee80211_key to use/check + * @returns true if it's a global/WEP key, false otherwise */ bool ieee80211_is_key_global(const struct ieee80211vap *vap, @@ -2705,8 +2710,23 @@ ieee80211_is_key_global(const struct ieee80211vap *vap, key < &vap->iv_nw_keys[IEEE80211_WEP_NKID]); } -/* - * Determine whether the given key in the given VAP is a unicast key. +/** + * @brief Determine whether the given key in the given VAP is a unicast key. + * + * This only returns true if it's a unicast key. + * + * Note: For now net80211 only supports a single unicast key, stored in + * an ieee80211_node entry. + * + * Code should use this to know if it's a unicast key and then call + * ieee80211_crypto_get_keyid() to get the 802.11 key ID (0..3 for + * unicast/global keys, 4..5 for IGTK keys.) Since the unicast + * and global key indexes "overlap", callers will need to check + * both the type and id. + * + * @param vap the current VAP + * @param key ieee80211_key to use/check + * @returns true if the key is a unicast key, false if it is not */ bool ieee80211_is_key_unicast(const struct ieee80211vap *vap, diff --git a/sys/net80211/ieee80211_crypto.c b/sys/net80211/ieee80211_crypto.c index 1e63ca46f28f..566f0b2e0c23 100644 --- a/sys/net80211/ieee80211_crypto.c +++ b/sys/net80211/ieee80211_crypto.c @@ -611,11 +611,15 @@ ieee80211_crypto_setkey(struct ieee80211vap *vap, struct ieee80211_key *key) return dev_key_set(vap, key); } -/* - * Return index if the key is a WEP key (0..3); -1 otherwise. +/** + * @brief Return index if the key is a WEP key (0..3); -1 otherwise. * * This is different to "get_keyid" which defaults to returning * 0 for unicast keys; it assumes that it won't be used for WEP. + * + * @param vap the current VAP + * @param k ieee80211_key to check + * @returns 0..3 if it's a global/WEP key, -1 otherwise. */ int ieee80211_crypto_get_key_wepidx(const struct ieee80211vap *vap, @@ -628,8 +632,18 @@ ieee80211_crypto_get_key_wepidx(const struct ieee80211vap *vap, return (-1); } -/* - * Note: only supports a single unicast key (0). +/** + * @brief Return the index of a unicast, global or IGTK key. + * + * Return the index of a key. For unicast keys the index is 0..1. + * For global/WEP keys it's 0..3. For IGTK keys its 4..5. + * + * TODO: support >1 unicast key + * TODO: support IGTK keys + * + * @param vap the current VAP + * @param k ieee80211_key to check + * @returns 0..3 for a WEP/global key, 0..1 for unicast key, 4..5 for IGTK key */ uint8_t ieee80211_crypto_get_keyid(struct ieee80211vap *vap, struct ieee80211_key *k) @@ -641,6 +655,19 @@ ieee80211_crypto_get_keyid(struct ieee80211vap *vap, struct ieee80211_key *k) return (0); } +/** + * @param Return the key to use for encrypting an mbuf frame to a node + * + * This routine chooses a suitable key used to encrypt the given frame with. + * It doesn't do the encryption; it only chooses the key. If a key is not + * available then the routine will return NULL. + * + * It's up to the caller to enforce whether a key is absolutely required or not. + * + * @param ni The ieee80211_node to send the frame to + * @param m the mbuf to encrypt + * @returns the ieee80211_key to encrypt with, or NULL if there's no suitable key + */ struct ieee80211_key * ieee80211_crypto_get_txkey(struct ieee80211_node *ni, struct mbuf *m) { @@ -676,8 +703,28 @@ ieee80211_crypto_get_txkey(struct ieee80211_node *ni, struct mbuf *m) return &ni->ni_ucastkey; } -/* - * Add privacy headers appropriate for the specified key. +/** + * @brief Privacy encapsulate and encrypt the given mbuf. + * + * This routine handles the mechanics of encryption - expanding the + * mbuf to add privacy headers, IV, ICV, MIC, MMIC, and then encrypts + * the given mbuf if required. + * + * This should be called by the driver in its TX path as part of + * encapsulation before passing frames to the hardware/firmware + * queues. + * + * Drivers/hardware which does its own entirely offload path + * should still call this for completeness - it indicates to the + * driver that the frame itself should be encrypted. + * + * The driver should have set capability bits in the attach / + * key allocation path to disable various encapsulation/encryption + * features. + * + * @param ni ieee80211_node for this frame + * @param mbuf mbuf to modify + * @returns the key used if the frame is to be encrypted, NULL otherwise */ struct ieee80211_key * ieee80211_crypto_encap(struct ieee80211_node *ni, struct mbuf *m) @@ -693,9 +740,31 @@ ieee80211_crypto_encap(struct ieee80211_node *ni, struct mbuf *m) return NULL; } -/* - * Validate and strip privacy headers (and trailer) for a - * received frame that has the WEP/Privacy bit set. +/** + * @brief Decapsulate and validate an encrypted frame. + * + * This handles an encrypted frame (one with the privacy bit set.) + * It also obeys the key / config / receive packet flags for how + * the driver says its already been processed. + * + * Unlike ieee80211_crypto_encap(), this isn't called in the driver. + * Instead, drivers passed the potentially decrypted frame - fully, + * partial, or not at all - and net80211 will call this as appropriate. + * + * This handles NICs (like ath(4)) which have a variable size between + * the 802.11 header and 802.11 payload due to DMA alignment / encryption + * engine concerns. + * + * If the frame was decrypted and validated successfully then 1 is returned + * and the mbuf can be treated as an 802.11 frame. If it is not decrypted + * successfully or it was decrypted but failed validation/checks, then + * 0 is returned. + * + * @param ni ieee80211_node for received frame + * @param m mbuf frame to receive + * @param hdrlen length of the 802.11 header, including trailing null bytes + * @param key pointer to ieee80211_key that will be set if appropriate + * @returns 0 if the frame wasn't decrypted/validated, 1 if decrypted/validated. */ int ieee80211_crypto_decap(struct ieee80211_node *ni, struct mbuf *m, int hdrlen, diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c index 64efa4bf060f..9b5baf115855 100644 --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -1475,10 +1475,11 @@ tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb) } /* create sequence number */ - lc->lro_mbuf_data[lc->lro_mbuf_count].seq = - (((uint64_t)M_HASHTYPE_GET(mb)) << 56) | - (((uint64_t)mb->m_pkthdr.flowid) << 24) | - ((uint64_t)lc->lro_mbuf_count); + lc->lro_mbuf_data[lc->lro_mbuf_count].seq = lc->lro_mbuf_count; + if (M_HASHTYPE_ISHASH(mb)) + lc->lro_mbuf_data[lc->lro_mbuf_count].seq |= + (((uint64_t)M_HASHTYPE_GET(mb)) << 56) | + (((uint64_t)mb->m_pkthdr.flowid) << 24); /* enter mbuf */ lc->lro_mbuf_data[lc->lro_mbuf_count].mb = mb; diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index 679b2e20e88b..b80b5cc781f7 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -4009,21 +4009,15 @@ restart: /* * Use the keg's policy if upper layers haven't already specified a * domain (as happens with first-touch zones). - * - * To avoid races we run the iterator with the keg lock held, but that - * means that we cannot allow the vm_domainset layer to sleep. Thus, - * clear M_WAITOK and handle low memory conditions locally. */ rr = rdomain == UMA_ANYDOMAIN; + aflags = flags; if (rr) { - aflags = (flags & ~M_WAITOK) | M_NOWAIT; if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &aflags) != 0) return (NULL); - } else { - aflags = flags; + } else domain = rdomain; - } for (;;) { slab = keg_fetch_free_slab(keg, domain, rr, flags); @@ -4053,13 +4047,8 @@ restart: if ((flags & M_WAITOK) == 0) break; vm_wait_domain(domain); - } else if (vm_domainset_iter_policy(&di, &domain) != 0) { - if ((flags & M_WAITOK) != 0) { - vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0); - goto restart; - } + } else if (vm_domainset_iter_policy(&di, &domain) != 0) break; - } } /* @@ -5245,7 +5234,7 @@ uma_prealloc(uma_zone_t zone, int items) KEG_GET(zone, keg); slabs = howmany(items, keg->uk_ipers); while (slabs-- > 0) { - aflags = M_NOWAIT; + aflags = M_WAITOK; if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &aflags) != 0) panic("%s: Domainset is empty", __func__); @@ -5266,7 +5255,8 @@ uma_prealloc(uma_zone_t zone, int items) break; } if (vm_domainset_iter_policy(&di, &domain) != 0) - vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask, 0); + panic("%s: Cannot allocate from any domain", + __func__); } } } diff --git a/sys/x86/x86/mca.c b/sys/x86/x86/mca.c index 4b40f343ac90..735efe307215 100644 --- a/sys/x86/x86/mca.c +++ b/sys/x86/x86/mca.c @@ -46,9 +46,11 @@ #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/sbuf.h> #include <sys/sched.h> #include <sys/smp.h> #include <sys/sysctl.h> +#include <sys/syslog.h> #include <sys/systm.h> #include <sys/taskqueue.h> #include <machine/intr_machdep.h> @@ -135,6 +137,11 @@ SYSCTL_INT(_hw_mca, OID_AUTO, fake_bank, CTLFLAG_RW, "Bank to use for artificial MCAs (testing purpose only)"); #endif +static bool mca_uselog = false; +SYSCTL_BOOL(_hw_mca, OID_AUTO, uselog, CTLFLAG_RWTUN, &mca_uselog, 0, + "Should the system send non-fatal machine check errors to the log " + "(instead of the console)?"); + static STAILQ_HEAD(, mca_internal) mca_freelist; static int mca_freecount; static STAILQ_HEAD(, mca_internal) mca_records; @@ -147,12 +154,40 @@ static struct timeout_task mca_scan_task; static struct mtx mca_lock; static bool mca_startup_done = false; -/* Statistics on number of MCA events by type, updated atomically. */ +/* Static buffer to compose messages while in an interrupt context. */ +static char mca_msg_buf[1024]; +static struct mtx mca_msg_buf_lock; + +/* Statistics on number of MCA events by type, updated with the mca_lock. */ static uint64_t mca_stats[MCA_T_COUNT]; SYSCTL_OPAQUE(_hw_mca, OID_AUTO, stats, CTLFLAG_RD | CTLFLAG_SKIP, mca_stats, MCA_T_COUNT * sizeof(mca_stats[0]), "S", "Array of MCA events by type"); +/* Variables to track and control message rate limiting. */ +static struct timeval mca_last_log_time; +static struct timeval mca_log_interval; +static int mca_log_skipped; + +static int +sysctl_mca_log_interval(SYSCTL_HANDLER_ARGS) +{ + int error; + u_int val; + + val = mca_log_interval.tv_sec; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + mca_log_interval.tv_sec = val; + return (0); +} +SYSCTL_PROC(_hw_mca, OID_AUTO, log_interval, + CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, &mca_log_interval, 0, + sysctl_mca_log_interval, "IU", + "Minimum number of seconds between logging correctable MCAs" + " (0 = no limit)"); + static unsigned int mca_ia32_ctl_reg(int bank) { @@ -448,98 +483,111 @@ mca_mute(const struct mca_record *rec) /* Dump details about a single machine check. */ static void -mca_log(const struct mca_record *rec) +mca_log(enum scan_mode mode, const struct mca_record *rec, bool fatal) { + int error, numskipped; uint16_t mca_error; enum mca_stat_types event_type; + struct sbuf sb; + bool uncor, using_shared_buf; if (mca_mute(rec)) return; - if (!log_corrected && (rec->mr_status & MC_STATUS_UC) == 0 && - (!tes_supported(rec->mr_mcg_cap) || + uncor = (rec->mr_status & MC_STATUS_UC) != 0; + + if (!log_corrected && !uncor && (!tes_supported(rec->mr_mcg_cap) || ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) != 0x2)) return; - printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, + /* Try to use an allocated buffer when not in an interrupt context. */ + if (mode == POLLED && sbuf_new(&sb, NULL, 512, SBUF_AUTOEXTEND) != NULL) + using_shared_buf = false; + else { + using_shared_buf = true; + mtx_lock_spin(&mca_msg_buf_lock); + sbuf_new(&sb, mca_msg_buf, sizeof(mca_msg_buf), SBUF_FIXEDLEN); + } + + sbuf_printf(&sb, "MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank, (long long)rec->mr_status); - printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n", + sbuf_printf(&sb, "MCA: Global Cap 0x%016llx, Status 0x%016llx\n", (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status); - printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor, - rec->mr_cpu_id, rec->mr_apic_id); - printf("MCA: CPU %d ", rec->mr_cpu); + sbuf_printf(&sb, "MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", + cpu_vendor, rec->mr_cpu_id, rec->mr_apic_id); + sbuf_printf(&sb, "MCA: CPU %d ", rec->mr_cpu); if (rec->mr_status & MC_STATUS_UC) - printf("UNCOR "); + sbuf_printf(&sb, "UNCOR "); else { - printf("COR "); + sbuf_printf(&sb, "COR "); if (cmci_supported(rec->mr_mcg_cap)) - printf("(%lld) ", ((long long)rec->mr_status & + sbuf_printf(&sb, "(%lld) ", ((long long)rec->mr_status & MC_STATUS_COR_COUNT) >> 38); if (tes_supported(rec->mr_mcg_cap)) { switch ((rec->mr_status & MC_STATUS_TES_STATUS) >> 53) { case 0x1: - printf("(Green) "); + sbuf_printf(&sb, "(Green) "); break; case 0x2: - printf("(Yellow) "); + sbuf_printf(&sb, "(Yellow) "); break; } } } if (rec->mr_status & MC_STATUS_EN) - printf("EN "); + sbuf_printf(&sb, "EN "); if (rec->mr_status & MC_STATUS_PCC) - printf("PCC "); + sbuf_printf(&sb, "PCC "); if (ser_supported(rec->mr_mcg_cap)) { if (rec->mr_status & MC_STATUS_S) - printf("S "); + sbuf_printf(&sb, "S "); if (rec->mr_status & MC_STATUS_AR) - printf("AR "); + sbuf_printf(&sb, "AR "); } if (rec->mr_status & MC_STATUS_OVER) - printf("OVER "); + sbuf_printf(&sb, "OVER "); mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; event_type = MCA_T_COUNT; switch (mca_error) { /* Simple error codes. */ case 0x0000: - printf("no error"); + sbuf_printf(&sb, "no error"); event_type = MCA_T_NONE; break; case 0x0001: - printf("unclassified error"); + sbuf_printf(&sb, "unclassified error"); event_type = MCA_T_UNCLASSIFIED; break; case 0x0002: - printf("ucode ROM parity error"); + sbuf_printf(&sb, "ucode ROM parity error"); event_type = MCA_T_UCODE_ROM_PARITY; break; case 0x0003: - printf("external error"); + sbuf_printf(&sb, "external error"); event_type = MCA_T_EXTERNAL; break; case 0x0004: - printf("FRC error"); + sbuf_printf(&sb, "FRC error"); event_type = MCA_T_FRC; break; case 0x0005: - printf("internal parity error"); + sbuf_printf(&sb, "internal parity error"); event_type = MCA_T_INTERNAL_PARITY; break; case 0x0006: - printf("SMM handler code access violation"); + sbuf_printf(&sb, "SMM handler code access violation"); event_type = MCA_T_SMM_HANDLER; break; case 0x0400: - printf("internal timer error"); + sbuf_printf(&sb, "internal timer error"); event_type = MCA_T_INTERNAL_TIMER; break; case 0x0e0b: - printf("generic I/O error"); + sbuf_printf(&sb, "generic I/O error"); event_type = MCA_T_GENERIC_IO; if (rec->mr_cpu_vendor_id == CPU_VENDOR_INTEL && (rec->mr_status & MC_STATUS_MISCV)) { - printf(" (pci%d:%d:%d:%d)", + sbuf_printf(&sb, " (pci%d:%d:%d:%d)", (int)((rec->mr_misc & MC_MISC_PCIE_SEG) >> 32), (int)((rec->mr_misc & MC_MISC_PCIE_BUS) >> 24), (int)((rec->mr_misc & MC_MISC_PCIE_SLOT) >> 19), @@ -548,7 +596,8 @@ mca_log(const struct mca_record *rec) break; default: if ((mca_error & 0xfc00) == 0x0400) { - printf("internal error %x", mca_error & 0x03ff); + sbuf_printf(&sb, "internal error %x", + mca_error & 0x03ff); event_type = MCA_T_INTERNAL; break; } @@ -557,14 +606,16 @@ mca_log(const struct mca_record *rec) /* Memory hierarchy error. */ if ((mca_error & 0xeffc) == 0x000c) { - printf("%s memory error", mca_error_level(mca_error)); + sbuf_printf(&sb, "%s memory error", + mca_error_level(mca_error)); event_type = MCA_T_MEMORY; break; } /* TLB error. */ if ((mca_error & 0xeff0) == 0x0010) { - printf("%sTLB %s error", mca_error_ttype(mca_error), + sbuf_printf(&sb, "%sTLB %s error", + mca_error_ttype(mca_error), mca_error_level(mca_error)); event_type = MCA_T_TLB; break; @@ -572,19 +623,19 @@ mca_log(const struct mca_record *rec) /* Memory controller error. */ if ((mca_error & 0xef80) == 0x0080) { - printf("%s channel ", mca_error_mmtype(mca_error, - &event_type)); + sbuf_printf(&sb, "%s channel ", + mca_error_mmtype(mca_error, &event_type)); if ((mca_error & 0x000f) != 0x000f) - printf("%d", mca_error & 0x000f); + sbuf_printf(&sb, "%d", mca_error & 0x000f); else - printf("??"); - printf(" memory error"); + sbuf_printf(&sb, "??"); + sbuf_printf(&sb, " memory error"); break; } /* Cache error. */ if ((mca_error & 0xef00) == 0x0100) { - printf("%sCACHE %s %s error", + sbuf_printf(&sb, "%sCACHE %s %s error", mca_error_ttype(mca_error), mca_error_level(mca_error), mca_error_request(mca_error)); @@ -594,77 +645,129 @@ mca_log(const struct mca_record *rec) /* Extended memory error. */ if ((mca_error & 0xef80) == 0x0280) { - printf("%s channel ", mca_error_mmtype(mca_error, - &event_type)); + sbuf_printf(&sb, "%s channel ", + mca_error_mmtype(mca_error, &event_type)); if ((mca_error & 0x000f) != 0x000f) - printf("%d", mca_error & 0x000f); + sbuf_printf(&sb, "%d", mca_error & 0x000f); else - printf("??"); - printf(" extended memory error"); + sbuf_printf(&sb, "??"); + sbuf_printf(&sb, " extended memory error"); break; } /* Bus and/or Interconnect error. */ if ((mca_error & 0xe800) == 0x0800) { - printf("BUS%s ", mca_error_level(mca_error)); + sbuf_printf(&sb, "BUS%s ", mca_error_level(mca_error)); event_type = MCA_T_BUS; switch ((mca_error & 0x0600) >> 9) { case 0: - printf("Source"); + sbuf_printf(&sb, "Source"); break; case 1: - printf("Responder"); + sbuf_printf(&sb, "Responder"); break; case 2: - printf("Observer"); + sbuf_printf(&sb, "Observer"); break; default: - printf("???"); + sbuf_printf(&sb, "???"); break; } - printf(" %s ", mca_error_request(mca_error)); + sbuf_printf(&sb, " %s ", mca_error_request(mca_error)); switch ((mca_error & 0x000c) >> 2) { case 0: - printf("Memory"); + sbuf_printf(&sb, "Memory"); break; case 2: - printf("I/O"); + sbuf_printf(&sb, "I/O"); break; case 3: - printf("Other"); + sbuf_printf(&sb, "Other"); break; default: - printf("???"); + sbuf_printf(&sb, "???"); break; } if (mca_error & 0x0100) - printf(" timed out"); + sbuf_printf(&sb, " timed out"); break; } - printf("unknown error %x", mca_error); + sbuf_printf(&sb, "unknown error %x", mca_error); event_type = MCA_T_UNKNOWN; break; } - printf("\n"); + sbuf_printf(&sb, "\n"); if (rec->mr_status & MC_STATUS_ADDRV) { - printf("MCA: Address 0x%llx", (long long)rec->mr_addr); + sbuf_printf(&sb, "MCA: Address 0x%llx", + (long long)rec->mr_addr); if (ser_supported(rec->mr_mcg_cap) && (rec->mr_status & MC_STATUS_MISCV)) { - printf(" (Mode: %s, LSB: %d)", + sbuf_printf(&sb, " (Mode: %s, LSB: %d)", mca_addres_mode(rec->mr_misc), (int)(rec->mr_misc & MC_MISC_RA_LSB)); } - printf("\n"); + sbuf_printf(&sb, "\n"); } if (rec->mr_status & MC_STATUS_MISCV) - printf("MCA: Misc 0x%llx\n", (long long)rec->mr_misc); + sbuf_printf(&sb, "MCA: Misc 0x%llx\n", (long long)rec->mr_misc); + if (event_type < 0 || event_type >= MCA_T_COUNT) { KASSERT(0, ("%s: invalid event type (%d)", __func__, event_type)); event_type = MCA_T_UNKNOWN; } - atomic_add_64(&mca_stats[event_type], 1); + numskipped = 0; + if (!fatal && !uncor) { + /* + * Update statistics and check the rate limit for + * correctable errors. The rate limit is only applied + * after the system records a reasonable number of errors + * of the same type. The goal is to reduce the impact of + * the system seeing and attempting to log a burst of + * similar errors, which (especially when printed to the + * console) can be expensive. + */ + mtx_lock_spin(&mca_lock); + mca_stats[event_type]++; + if (mca_log_interval.tv_sec > 0 && mca_stats[event_type] > 50 && + ratecheck(&mca_last_log_time, &mca_log_interval) == 0) { + mca_log_skipped++; + mtx_unlock_spin(&mca_lock); + goto done; + } + numskipped = mca_log_skipped; + mca_log_skipped = 0; + mtx_unlock_spin(&mca_lock); + } + + error = sbuf_finish(&sb); + if (fatal || !mca_uselog) { + if (numskipped > 0) + printf("MCA: %d events skipped due to rate limit\n", + numskipped); + if (error) + printf("MCA: error logging message (sbuf error %d)\n", + error); + else + sbuf_putbuf(&sb); + } else { + if (numskipped > 0) + log(LOG_ERR, + "MCA: %d events skipped due to rate limit\n", + numskipped); + if (error) + log(LOG_ERR, + "MCA: error logging message (sbuf error %d)\n", + error); + else + log(uncor ? LOG_CRIT : LOG_ERR, "%s", sbuf_data(&sb)); + } + +done: + sbuf_delete(&sb); + if (using_shared_buf) + mtx_unlock_spin(&mca_msg_buf_lock); } static bool @@ -825,7 +928,7 @@ mca_record_entry(enum scan_mode mode, const struct mca_record *record) if (rec == NULL) { mtx_unlock_spin(&mca_lock); printf("MCA: Unable to allocate space for an event.\n"); - mca_log(record); + mca_log(mode, record, false); return; } STAILQ_REMOVE_HEAD(&mca_freelist, link); @@ -982,7 +1085,7 @@ mca_scan(enum scan_mode mode, bool *recoverablep) if (*recoverablep) mca_record_entry(mode, &rec); else - mca_log(&rec); + mca_log(mode, &rec, true); } #ifdef DEV_APIC @@ -1066,7 +1169,7 @@ mca_process_records(enum scan_mode mode) mtx_unlock_spin(&mca_lock); STAILQ_FOREACH(mca, &tmplist, link) - mca_log(&mca->rec); + mca_log(mode, &mca->rec, false); mtx_lock_spin(&mca_lock); while ((mca = STAILQ_FIRST(&tmplist)) != NULL) { @@ -1231,6 +1334,7 @@ mca_setup(uint64_t mcg_cap) mca_banks = mcg_cap & MCG_CAP_COUNT; mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); + mtx_init(&mca_msg_buf_lock, "mca_msg_buf", NULL, MTX_SPIN); STAILQ_INIT(&mca_records); STAILQ_INIT(&mca_pending); mca_tq = taskqueue_create_fast("mca", M_WAITOK, |