blob: 3357ae2e35106a8c70f338d123adbd9306f41c25 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
|
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2023, Klara Inc.
#
# DESCRIPTION:
# Verify that delay events from multiple vdevs doesnt degrade
#
# STRATEGY:
# 1. Create a pool with a 3 disk raidz vdev
# 2. Inject slow io errors
# 3. Verify that ZED detects slow I/Os but doesn't degrade any vdevs
#
. $STF_SUITE/include/libtest.shlib
TESTDIR="$TEST_BASE_DIR/zed_slow_io"
VDEV1="$TEST_BASE_DIR/vdevfile1.$$"
VDEV2="$TEST_BASE_DIR/vdevfile2.$$"
VDEV3="$TEST_BASE_DIR/vdevfile3.$$"
VDEV4="$TEST_BASE_DIR/vdevfile4.$$"
VDEVS="$VDEV1 $VDEV2 $VDEV3 $VDEV4"
TESTPOOL="slow_io_pool"
FILEPATH="$TESTDIR/slow_io.testfile"
OLD_SLOW_IO=$(get_tunable ZIO_SLOW_IO_MS)
OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
verify_runnable "both"
function cleanup
{
log_must zinject -c all
# if pool still exists then something failed so log additional info
if poolexists $TESTPOOL ; then
log_note "$(zpool status -s $TESTPOOL)"
echo "=================== zed log search ==================="
grep "Diagnosis Engine" $ZEDLET_DIR/zed.log
destroy_pool $TESTPOOL
fi
log_must zed_stop
log_must rm -f $VDEVS
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
}
function start_slow_io
{
for vdev in $VDEVS
do
log_must zpool set slow_io_n=4 $TESTPOOL $vdev
log_must zpool set slow_io_t=60 $TESTPOOL $vdev
done
zpool sync
log_must set_tunable64 ZIO_SLOW_IO_MS 10
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1000
for vdev in $VDEVS
do
log_must zinject -d $vdev -D10:1 $TESTPOOL
done
zpool sync
}
function stop_slow_io
{
log_must set_tunable64 ZIO_SLOW_IO_MS $OLD_SLOW_IO
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
log_must zinject -c all
}
function multiple_slow_vdevs_test
{
log_must truncate -s 1G $VDEVS
default_raidz_setup_noexit $VDEVS
log_must zpool events -c
log_must zfs set compression=off $TESTPOOL
log_must zfs set primarycache=none $TESTPOOL
log_must zfs set recordsize=4K $TESTPOOL
log_must dd if=/dev/urandom of=$FILEPATH bs=1M count=20
zpool sync
#
# Read the file with slow io injected on the disks
# This will cause multiple errors on each disk to trip ZED SERD
#
# pool: slow_io_pool
# state: ONLINE
# config:
#
# NAME STATE READ WRITE CKSUM SLOW
# slow_io_pool ONLINE 0 0 0 -
# raidz1-0 ONLINE 0 0 0 -
# /var/tmp/vdevfile1.499278 ONLINE 0 0 0 113
# /var/tmp/vdevfile2.499278 ONLINE 0 0 0 109
# /var/tmp/vdevfile3.499278 ONLINE 0 0 0 96
# /var/tmp/vdevfile4.499278 ONLINE 0 0 0 109
#
start_slow_io
dd if=$FILEPATH of=/dev/null bs=1M count=20 2>/dev/null
stop_slow_io
# count events available for processing
typeset -i i=0
typeset -i events=0
while [[ $i -lt 60 ]]; do
events=$(zpool events | grep "ereport\.fs\.zfs.delay" | wc -l)
[[ $events -ge "50" ]] && break
i=$((i+1))
sleep 1
done
log_note "$events delay events found"
if [[ $events -lt "50" ]]; then
log_note "bailing: not enough events to complete the test"
destroy_pool $TESTPOOL
return
fi
#
# give slow ZED a chance to process the delay events
#
typeset -i i=0
typeset -i skips=0
while [[ $i -lt 75 ]]; do
skips=$(grep "retiring case" \
$ZEDLET_DIR/zed.log | wc -l)
[[ $skips -gt "0" ]] && break
i=$((i+1))
sleep 1
done
log_note $skips degrade skips in ZED log after $i seconds
[ $skips -gt "0" ] || log_fail "expecting to see skips"
degrades=$(grep "zpool_vdev_degrade" $ZEDLET_DIR/zed.log | wc -l)
log_note $degrades vdev degrades in ZED log
[ $degrades -eq "0" ] || \
log_fail "expecting no degrade events, found $degrades"
destroy_pool $TESTPOOL
}
log_assert "Test ZED slow io across multiple vdevs"
log_onexit cleanup
log_must zed_events_drain
log_must zed_start
multiple_slow_vdevs_test
log_pass "Test ZED slow io across multiple vdevs"
|