diff options
Diffstat (limited to 'lib/libc/tests/regex/multibyte.sh')
-rwxr-xr-x | lib/libc/tests/regex/multibyte.sh | 93 |
1 files changed, 93 insertions, 0 deletions
diff --git a/lib/libc/tests/regex/multibyte.sh b/lib/libc/tests/regex/multibyte.sh new file mode 100755 index 000000000000..18323f500a2b --- /dev/null +++ b/lib/libc/tests/regex/multibyte.sh @@ -0,0 +1,93 @@ +atf_test_case bmpat +bmpat_head() +{ + atf_set "descr" "Check matching multibyte characters (PR153502)" +} +bmpat_body() +{ + export LC_CTYPE="C.UTF-8" + + printf 'é' | atf_check -o "inline:é" \ + sed -ne '/^.$/p' + printf 'éé' | atf_check -o "inline:éé" \ + sed -ne '/^..$/p' + printf 'aéa' | atf_check -o "inline:aéa" \ + sed -ne '/a.a/p' + printf 'aéa'| atf_check -o "inline:aéa" \ + sed -ne '/a.*a/p' + printf 'aaéaa' | atf_check -o "inline:aaéaa" \ + sed -ne '/aa.aa/p' + printf 'aéaéa' | atf_check -o "inline:aéaéa" \ + sed -ne '/a.a.a/p' + printf 'éa' | atf_check -o "inline:éa" \ + sed -ne '/.a/p' + printf 'aéaa' | atf_check -o "inline:aéaa" \ + sed -ne '/a.aa/p' + printf 'éaé' | atf_check -o "inline:éaé" \ + sed -ne '/.a./p' +} + +atf_test_case icase +icase_head() +{ + atf_set "descr" "Check case-insensitive matching for characters 128-255" +} +icase_body() +{ + export LC_CTYPE="C.UTF-8" + + a=$(printf '\302\265\n') # U+00B5 + b=$(printf '\316\234\n') # U+039C + c=$(printf '\316\274\n') # U+03BC + + echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip" + echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip" +} + +atf_test_case mbset cleanup +mbset_head() +{ + atf_set "descr" "Check multibyte sets matching" +} +mbset_body() +{ + export LC_CTYPE="C.UTF-8" + + # This involved an erroneously implemented optimization which reduces + # single-element sets to an exact match with a single codepoint. + # Match sets record small-codepoint characters in a bitmap and + # large-codepoint characters in an array; the optimization would falsely + # trigger if either the bitmap or the array was a singleton, ignoring + # the members of the other side of the set. + # + # To exercise this, we construct sets which have one member of one side + # and one or more of the other, and verify that all members can be + # found. + printf "a" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset + printf "à" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset + printf "a" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset + printf "à" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset + printf "á" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset + printf "à" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset + printf "a" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset + printf "b" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset + printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset + printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset + printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset + printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset + printf "á" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset + printf "à" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset + printf "a" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset + printf "b" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset +} +mbset_cleanup() +{ + rm -f mbset +} + +atf_init_test_cases() +{ + atf_add_test_case bmpat + atf_add_test_case icase + atf_add_test_case mbset +} |