diff options
Diffstat (limited to 'lib/libc/tests/regex/multibyte.sh')
| -rwxr-xr-x | lib/libc/tests/regex/multibyte.sh | 93 | 
1 files changed, 93 insertions, 0 deletions
| diff --git a/lib/libc/tests/regex/multibyte.sh b/lib/libc/tests/regex/multibyte.sh new file mode 100755 index 000000000000..18323f500a2b --- /dev/null +++ b/lib/libc/tests/regex/multibyte.sh @@ -0,0 +1,93 @@ +atf_test_case bmpat +bmpat_head() +{ +	atf_set "descr" "Check matching multibyte characters (PR153502)" +} +bmpat_body() +{ +	export LC_CTYPE="C.UTF-8" + +	printf 'é' | atf_check -o "inline:é" \ +	    sed -ne '/^.$/p' +	printf 'éé' | atf_check -o "inline:éé" \ +	    sed -ne '/^..$/p' +	printf 'aéa' | atf_check -o "inline:aéa" \ +	    sed -ne '/a.a/p' +	printf 'aéa'| atf_check -o "inline:aéa" \ +	    sed -ne '/a.*a/p' +	printf 'aaéaa' | atf_check -o "inline:aaéaa" \ +	    sed -ne '/aa.aa/p' +	printf 'aéaéa' | atf_check -o "inline:aéaéa" \ +	    sed -ne '/a.a.a/p' +	printf 'éa' | atf_check -o "inline:éa" \ +	    sed -ne '/.a/p' +	printf 'aéaa' | atf_check -o "inline:aéaa" \ +	    sed -ne '/a.aa/p' +	printf 'éaé' | atf_check -o "inline:éaé" \ +	    sed -ne '/.a./p' +} + +atf_test_case icase +icase_head() +{ +	atf_set "descr" "Check case-insensitive matching for characters 128-255" +} +icase_body() +{ +	export LC_CTYPE="C.UTF-8" + +	a=$(printf '\302\265\n')	# U+00B5 +	b=$(printf '\316\234\n')	# U+039C +	c=$(printf '\316\274\n')	# U+03BC + +	echo $b | atf_check -o "inline:$b\n" sed -ne "/$a/Ip" +	echo $c | atf_check -o "inline:$c\n" sed -ne "/$a/Ip" +} + +atf_test_case mbset cleanup +mbset_head() +{ +	atf_set "descr" "Check multibyte sets matching" +} +mbset_body() +{ +	export LC_CTYPE="C.UTF-8" + +	# This involved an erroneously implemented optimization which reduces +	# single-element sets to an exact match with a single codepoint. +	# Match sets record small-codepoint characters in a bitmap and +	# large-codepoint characters in an array; the optimization would falsely +	# trigger if either the bitmap or the array was a singleton, ignoring +	# the members of the other side of the set. +	# +	# To exercise this, we construct sets which have one member of one side +	# and one or more of the other, and verify that all members can be +	# found. +	printf "a" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset +	printf "à" > mbset; atf_check -o not-empty sed -ne '/[aà]/p' mbset +	printf "a" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset +	printf "à" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset +	printf "á" > mbset; atf_check -o not-empty sed -ne '/[aàá]/p' mbset +	printf "à" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset +	printf "a" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset +	printf "b" > mbset; atf_check -o not-empty sed -ne '/[abà]/p' mbset +	printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset +	printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aà]/p' mbset +	printf "a" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset +	printf "à" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset +	printf "á" > mbset; atf_check -o not-empty sed -Ene '/[aàá]/p' mbset +	printf "à" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset +	printf "a" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset +	printf "b" > mbset; atf_check -o not-empty sed -Ene '/[abà]/p' mbset +} +mbset_cleanup() +{ +	rm -f mbset +} + +atf_init_test_cases() +{ +	atf_add_test_case bmpat +	atf_add_test_case icase +	atf_add_test_case mbset +} | 
