1 files changed, 324 insertions, 143 deletions
diff --git a/libarchive/test/test_archive_string_conversion.c b/libarchive/test/test_archive_string_conversion.c
index 8b833ea6f705d..fea141d4ab0d1 100644
--- a/libarchive/test/test_archive_string_conversion.c
+++ b/libarchive/test/test_archive_string_conversion.c
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2011 Michihiro NAKAJIMA
+ * Copyright (c) 2011-2012 Michihiro NAKAJIMA
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,7 @@ __FBSDID("$FreeBSD$");
 Execute the following to rebuild the data for this program:
    tail -n +36 test_archive_string_conversion.c | /bin/sh
 #
-# This requires http://unicode.org/Public/UNIDATA/NormalizationTest.txt
+# This requires http://unicode.org/Public/6.0.0/ucd/NormalizationTest.txt
 #
 if="NormalizationTest.txt"
 if [ ! -f ${if} ]; then
@@ -158,7 +158,7 @@ unicode_to_wc(wchar_t *wp, uint32_t uc)
  */
 static int
 scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le,
-    const char *pattern, int exclude_mac_nfd)
+    const char *pattern, int mac_nfd)
 {
 	unsigned uc = 0;
 	const char *p = pattern;
@@ -166,6 +166,7 @@ scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le,
 	wchar_t *owp = wout;
 	char *op16be = u16be;
 	char *op16le = u16le;
+	int ret = 0;
 
 	for (;;) {
 		if (*p >= '0' && *p <= '9')
@@ -173,14 +174,31 @@ scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le,
 		else if (*p >= 'A' && *p <= 'F')
 			uc = (uc << 4) + (*p - 'A' + 0x0a);
 		else {
-			if (exclude_mac_nfd) {
+			if (mac_nfd && op == out) {
 				/*
 				 * These are not converted to NFD on Mac OS.
+ 				 * U+2000 - U+2FFF
+				 * U+F900 - U+FAFF
+				 * U+2F800 - U+2FAFF
 				 */
-				if ((uc >= 0x2000 && uc <= 0x2FFF) ||
-				    (uc >= 0xF900 && uc <= 0xFAFF) ||
-				    (uc >= 0x2F800 && uc <= 0x2FAFF))
-					return (-1);
+				switch (uc) {
+				case 0x2194: case 0x219A: case 0x219B:
+				case 0x21AE: case 0x21CD: case 0x21CE:
+				case 0x21CF: case 0x2204: case 0x2209:
+				case 0x220C: case 0x2224: case 0x2226:
+				case 0x2241: case 0x2244: case 0x2247:
+				case 0x2249: case 0x2260: case 0x2262:
+				case 0x226D: case 0x226E: case 0x226F:
+				case 0x2270: case 0x2271: case 0x2274:
+				case 0x2275: case 0x2276: case 0x2278:
+				case 0x2279: case 0x227A: case 0x227B:
+				case 0x2280: case 0x2281: case 0x2284:
+				case 0x2285: case 0x2288: case 0x2289:
+				case 0x22AC: case 0x22AD: case 0x22AE:
+				case 0x22AF: case 0x22E0: case 0x22E1:
+				case 0x22E2: case 0x22E3: case 0x22EA:
+				case 0x22EB: case 0x22EC: case 0x22ED:
+				
 				/*
 				 * Those code points are not converted to
 				 * NFD on Mac OS. I do not know the reason
@@ -190,9 +208,10 @@ scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le,
 				 *   1109C  ==> 1109B 110BA
 				 *   110AB  ==> 110A5 110BA
 				 */
-				if (uc == 0x1109A || uc == 0x1109C ||
-				    uc == 0x110AB)
-					return (-1);
+				case 0x1109A: case 0x1109C: case 0x110AB:
+					ret = 1;
+					break;
+				}
 			}
 			op16be += unicode_to_utf16be(op16be, uc);
 			op16le += unicode_to_utf16le(op16le, uc);
@@ -211,7 +230,7 @@ scan_unicode_pattern(char *out, wchar_t *wout, char *u16be, char *u16le,
 		}
 		p++;
 	}
-	return (0);
+	return (ret);
 }
 
 static int
@@ -230,27 +249,26 @@ is_wc_unicode(void)
  * On other platforms, the characters to be Form C.
  */
 static void
-test_archive_string_normalization(void)
+test_archive_string_normalization_nfc(const char *testdata)
 {
 	struct archive *a, *a2;
-	struct archive_entry *ae;
 	struct archive_string utf8;
 	struct archive_mstring mstr;
 	struct archive_string_conv *f_sconv8, *t_sconv8;
 	struct archive_string_conv *f_sconv16be, *f_sconv16le;
 	FILE *fp;
 	char buff[512];
-	static const char reffile[] = "test_archive_string_conversion.txt.Z";
-	ssize_t size;
 	int line = 0;
 	int locale_is_utf8, wc_is_unicode;
+	int sconv_opt = SCONV_SET_OPT_NORMALIZATION_C;
 
 	locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
 	wc_is_unicode = is_wc_unicode();
 	/* If it doesn't exist, just warn and return. */
 	if (!locale_is_utf8 && !wc_is_unicode) {
-		skipping("invalid encoding tests require a suitable locale;"
-		    " en_US.UTF-8 not available on this system");
+		skipping("A test of string normalization for NFC requires "
+		    "a suitable locale; en_US.UTF-8 not available on this "
+		    "system");
 		return;
 	}
 
@@ -258,27 +276,9 @@ test_archive_string_normalization(void)
 	memset(&mstr, 0, sizeof(mstr));
 
 	/*
-	 * Extract a test pattern file.
-	 */
-	extract_reference_file(reffile);
-	assert((a = archive_read_new()) != NULL);
-	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
-	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
-        assertEqualIntA(a, ARCHIVE_OK,
-            archive_read_open_filename(a, reffile, 512));
-
-	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
-	assert((fp = fopen("testdata.txt", "w")) != NULL);
-	while ((size = archive_read_data(a, buff, 512)) > 0)
-		fwrite(buff, 1, size, fp);
-	fclose(fp);
-
-	/* Open a test pattern file. */
-	assert((fp = fopen("testdata.txt", "r")) != NULL);
-
-	/*
 	 * Create string conversion objects.
 	 */
+	assert((a = archive_read_new()) != NULL);
 	assertA(NULL != (f_sconv8 =
 	    archive_string_conversion_from_charset(a, "UTF-8", 0)));
 	assertA(NULL != (f_sconv16be =
@@ -289,13 +289,18 @@ test_archive_string_normalization(void)
 	assertA(NULL != (t_sconv8 =
 	    archive_string_conversion_to_charset(a2, "UTF-8", 0)));
 	if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
-	    t_sconv8 == NULL || fp == NULL) {
+	    t_sconv8 == NULL) {
 		/* We cannot continue this test. */
-		if (fp != NULL)
-			fclose(fp);
 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
 		return;
 	}
+	archive_string_conversion_set_opt(f_sconv8, sconv_opt);
+	archive_string_conversion_set_opt(f_sconv16be, sconv_opt);
+	archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
+	archive_string_conversion_set_opt(t_sconv8, sconv_opt);
+
+	/* Open a test pattern file. */
+	assert((fp = fopen(testdata, "r")) != NULL);
 
 	/*
 	 * Read test data.
@@ -311,6 +316,9 @@ test_archive_string_normalization(void)
 		char utf16le_nfc[80], utf16le_nfd[80];
 		wchar_t wc_nfc[40], wc_nfd[40];
 		char *e, *p;
+		const wchar_t *wp;
+		const char *mp;
+		size_t mplen;
 
 		line++;
 		if (buff[0] == '#')
@@ -332,74 +340,22 @@ test_archive_string_normalization(void)
 		nfd[sizeof(nfd)-1] = '\0';
 
 		/*
-		 * Convert an NFC pattern to UTF-8 bytes.
+		 * Get an NFC patterns.
 		 */
-#if defined(__APPLE__)
-		if (scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc,
-		    nfc, 1) != 0)
-			continue;
-#else
 		scan_unicode_pattern(utf8_nfc, wc_nfc, utf16be_nfc, utf16le_nfc,
 		    nfc, 0);
-#endif
 
 		/*
-		 * Convert an NFD pattern to UTF-8 bytes.
+		 * Get an NFD patterns.
 		 */
 		scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
 		    nfd, 0);
 
 		if (locale_is_utf8) {
-#if defined(__APPLE__)
-			/*
-			 * Normalize an NFC string for import.
-			 */
-			assertEqualInt(0, archive_strcpy_in_locale(
-			    &utf8, utf8_nfc, f_sconv8));
-			failure("NFC(%s) should be converted to NFD(%s):%d",
-			    nfc, nfd, line);
-			assertEqualUTF8String(utf8_nfd, utf8.s);
-
-			/*
-			 * Normalize an NFD string for import.
-			 */
-			assertEqualInt(0, archive_strcpy_in_locale(
-			    &utf8, utf8_nfd, f_sconv8));
-			failure("NFD(%s) should not be any changed:%d",
-			    nfd, line);
-			assertEqualUTF8String(utf8_nfd, utf8.s);
-
-			/*
-			 * Copy an NFD string for export.
-			 */
-			assertEqualInt(0, archive_strcpy_in_locale(
-			    &utf8, utf8_nfd, t_sconv8));
-			failure("NFD(%s) should not be any changed:%d",
-			    nfd, line);
-			assertEqualUTF8String(utf8_nfd, utf8.s);
-
-			/*
-			 * Normalize an NFC string in UTF-16BE for import.
-			 */
-			assertEqualInt(0, archive_strncpy_in_locale(
-			    &utf8, utf16be_nfc, 100000, f_sconv16be));
-			failure("NFC(%s) should be converted to NFD(%s):%d",
-			    nfc, nfd, line);
-			assertEqualUTF8String(utf8_nfd, utf8.s);
-
-			/*
-			 * Normalize an NFC string in UTF-16LE for import.
-			 */
-			assertEqualInt(0, archive_strncpy_in_locale(
-			    &utf8, utf16le_nfc, 100000, f_sconv16le));
-			failure("NFC(%s) should be converted to NFD(%s):%d",
-			    nfc, nfd, line);
-			assertEqualUTF8String(utf8_nfd, utf8.s);
-#else
 			/*
 			 * Normalize an NFD string for import.
 			 */
-			assertEqualInt(0, archive_strcpy_in_locale(
+			assertEqualInt(0, archive_strcpy_l(
 			    &utf8, utf8_nfd, f_sconv8));
 			failure("NFD(%s) should be converted to NFC(%s):%d",
 			    nfd, nfc, line);
@@ -408,7 +364,7 @@ test_archive_string_normalization(void)
 			/*
 			 * Normalize an NFC string for import.
 			 */
-			assertEqualInt(0, archive_strcpy_in_locale(
+			assertEqualInt(0, archive_strcpy_l(
 			    &utf8, utf8_nfc, f_sconv8));
 			failure("NFC(%s) should not be any changed:%d",
 			    nfc, line);
@@ -417,7 +373,7 @@ test_archive_string_normalization(void)
 			/*
 			 * Copy an NFC string for export.
 			 */
-			assertEqualInt(0, archive_strcpy_in_locale(
+			assertEqualInt(0, archive_strcpy_l(
 			    &utf8, utf8_nfc, t_sconv8));
 			failure("NFC(%s) should not be any changed:%d",
 			    nfc, line);
@@ -426,7 +382,7 @@ test_archive_string_normalization(void)
 			/*
 			 * Normalize an NFD string in UTF-16BE for import.
 			 */
-			assertEqualInt(0, archive_strncpy_in_locale(
+			assertEqualInt(0, archive_strncpy_l(
 			    &utf8, utf16be_nfd, 100000, f_sconv16be));
 			failure("NFD(%s) should be converted to NFC(%s):%d",
 			    nfd, nfc, line);
@@ -435,12 +391,11 @@ test_archive_string_normalization(void)
 			/*
 			 * Normalize an NFD string in UTF-16LE for import.
 			 */
-			assertEqualInt(0, archive_strncpy_in_locale(
+			assertEqualInt(0, archive_strncpy_l(
 			    &utf8, utf16le_nfd, 100000, f_sconv16le));
 			failure("NFD(%s) should be converted to NFC(%s):%d",
 			    nfd, nfc, line);
 			assertEqualUTF8String(utf8_nfc, utf8.s);
-#endif
 		}
 
 		/*
@@ -451,99 +406,299 @@ test_archive_string_normalization(void)
 		 * locale UTF-8.
 		 */
 		if (locale_is_utf8 || wc_is_unicode) {
-			const wchar_t *wp;
-			const char *mp;
-			size_t mplen;
-
-#if defined(__APPLE__)
 			/*
 			 * Normalize an NFD string in UTF-8 for import.
 			 */
 			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-			    &mstr, utf8_nfc, 100000, f_sconv8));
+			    &mstr, utf8_nfd, 100000, f_sconv8));
 			assertEqualInt(0,
 			    archive_mstring_get_wcs(a, &mstr, &wp));
-			failure("UTF-8 NFC(%s) should be converted "
-			    "to WCS NFD(%s):%d", nfc, nfd, line);
-			assertEqualWString(wc_nfd, wp);
+			failure("UTF-8 NFD(%s) should be converted "
+			    "to WCS NFC(%s):%d", nfd, nfc, line);
+			assertEqualWString(wc_nfc, wp);
 
 			/*
 			 * Normalize an NFD string in UTF-16BE for import.
 			 */
 			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-			    &mstr, utf16be_nfc, 100000, f_sconv16be));
+			    &mstr, utf16be_nfd, 100000, f_sconv16be));
 			assertEqualInt(0,
 			    archive_mstring_get_wcs(a, &mstr, &wp));
-			failure("UTF-16BE NFC(%s) should be converted "
-			    "to WCS NFD(%s):%d", nfc, nfd, line);
-			assertEqualWString(wc_nfd, wp);
+			failure("UTF-8 NFD(%s) should be converted "
+			    "to WCS NFC(%s):%d", nfd, nfc, line);
+			assertEqualWString(wc_nfc, wp);
 
 			/*
 			 * Normalize an NFD string in UTF-16LE for import.
 			 */
 			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-			    &mstr, utf16le_nfc, 100000, f_sconv16le));
+			    &mstr, utf16le_nfd, 100000, f_sconv16le));
 			assertEqualInt(0,
 			    archive_mstring_get_wcs(a, &mstr, &wp));
-			failure("UTF-16LE NFC(%s) should be converted "
-			    "to WCS NFD(%s):%d", nfc, nfd, line);
-			assertEqualWString(wc_nfd, wp);
+			failure("UTF-8 NFD(%s) should be converted "
+			    "to WCS NFC(%s):%d", nfd, nfc, line);
+			assertEqualWString(wc_nfc, wp);
 
 			/*
-			 * Copy an NFD wide-string for export.
+			 * Copy an NFC wide-string for export.
 			 */
-			assertEqualInt(0, archive_mstring_copy_wcs(
-			    &mstr, wc_nfd));
+			assertEqualInt(0,
+			    archive_mstring_copy_wcs(&mstr, wc_nfc));
 			assertEqualInt(0, archive_mstring_get_mbs_l(
 			    &mstr, &mp, &mplen, t_sconv8));
-			failure("WCS NFD(%s) should be UTF-8 NFD:%d"
-			    ,nfd, line);
-			assertEqualUTF8String(utf8_nfd, mp);
-#else
+			failure("WCS NFC(%s) should be UTF-8 NFC:%d"
+			    ,nfc, line);
+			assertEqualUTF8String(utf8_nfc, mp);
+		}
+	}
+
+	archive_string_free(&utf8);
+	archive_mstring_clean(&mstr);
+	fclose(fp);
+	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+	assertEqualInt(ARCHIVE_OK, archive_write_free(a2));
+}
+
+static void
+test_archive_string_normalization_mac_nfd(const char *testdata)
+{
+	struct archive *a, *a2;
+	struct archive_string utf8;
+	struct archive_mstring mstr;
+	struct archive_string_conv *f_sconv8, *t_sconv8;
+	struct archive_string_conv *f_sconv16be, *f_sconv16le;
+	FILE *fp;
+	char buff[512];
+	int line = 0;
+	int locale_is_utf8, wc_is_unicode;
+	int sconv_opt = SCONV_SET_OPT_NORMALIZATION_D;
+
+	locale_is_utf8 = (NULL != setlocale(LC_ALL, "en_US.UTF-8"));
+	wc_is_unicode = is_wc_unicode();
+	/* If it doesn't exist, just warn and return. */
+	if (!locale_is_utf8 && !wc_is_unicode) {
+		skipping("A test of string normalization for NFD requires "
+		    "a suitable locale; en_US.UTF-8 not available on this "
+		    "system");
+		return;
+	}
+
+	archive_string_init(&utf8);
+	memset(&mstr, 0, sizeof(mstr));
+
+	/*
+	 * Create string conversion objects.
+	 */
+	assert((a = archive_read_new()) != NULL);
+	assertA(NULL != (f_sconv8 =
+	    archive_string_conversion_from_charset(a, "UTF-8", 0)));
+	assertA(NULL != (f_sconv16be =
+	    archive_string_conversion_from_charset(a, "UTF-16BE", 0)));
+	assertA(NULL != (f_sconv16le =
+	    archive_string_conversion_from_charset(a, "UTF-16LE", 0)));
+	assert((a2 = archive_write_new()) != NULL);
+	assertA(NULL != (t_sconv8 =
+	    archive_string_conversion_to_charset(a2, "UTF-8", 0)));
+	if (f_sconv8 == NULL || f_sconv16be == NULL || f_sconv16le == NULL ||
+	    t_sconv8 == NULL) {
+		/* We cannot continue this test. */
+		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+		return;
+	}
+	archive_string_conversion_set_opt(f_sconv8, sconv_opt);
+	archive_string_conversion_set_opt(f_sconv16be, sconv_opt);
+	archive_string_conversion_set_opt(f_sconv16le, sconv_opt);
+	archive_string_conversion_set_opt(t_sconv8, sconv_opt);
+
+	/* Open a test pattern file. */
+	assert((fp = fopen(testdata, "r")) != NULL);
+
+	/*
+	 * Read test data.
+	 *  Test data format:
+	 *     <NFC Unicode pattern> ';' <NFD Unicode pattern> '\n'
+	 *  Unicode pattern format:
+	 *     [0-9A-F]{4,5}([ ][0-9A-F]{4,5}){0,}
+	 */
+	while (fgets(buff, sizeof(buff), fp) != NULL) {
+		char nfc[80], nfd[80];
+		char utf8_nfc[80], utf8_nfd[80];
+		char utf16be_nfc[80], utf16be_nfd[80];
+		char utf16le_nfc[80], utf16le_nfd[80];
+		wchar_t wc_nfc[40], wc_nfd[40];
+		char *e, *p;
+		const wchar_t *wp;
+		const char *mp;
+		size_t mplen;
+		int should_be_nfc;
+
+		line++;
+		if (buff[0] == '#')
+			continue;
+		p = strchr(buff, ';');
+		if (p == NULL)
+			continue;
+		*p++ = '\0';
+		/* Copy an NFC pattern */
+		strncpy(nfc, buff, sizeof(nfc)-1);
+		nfc[sizeof(nfc)-1] = '\0';
+		e = p;
+		p = strchr(p, '\n');
+		if (p == NULL)
+			continue;
+		*p = '\0';
+		/* Copy an NFD pattern */
+		strncpy(nfd, e, sizeof(nfd)-1);
+		nfd[sizeof(nfd)-1] = '\0';
+
+		/*
+		 * Get an NFC patterns.
+		 */
+		should_be_nfc = scan_unicode_pattern(utf8_nfc, wc_nfc,
+			utf16be_nfc, utf16le_nfc, nfc, 1);
+
+		/*
+		 * Get an NFD patterns.
+		 */
+		scan_unicode_pattern(utf8_nfd, wc_nfd, utf16be_nfd, utf16le_nfd,
+		    nfd, 0);
+
+		if (locale_is_utf8) {
+			/*
+			 * Normalize an NFC string for import.
+			 */
+			assertEqualInt(0, archive_strcpy_l(
+			    &utf8, utf8_nfc, f_sconv8));
+			if (should_be_nfc) {
+				failure("NFC(%s) should not be converted to"
+				    " NFD(%s):%d", nfc, nfd, line);
+				assertEqualUTF8String(utf8_nfc, utf8.s);
+			} else {
+				failure("NFC(%s) should be converted to"
+				    " NFD(%s):%d", nfc, nfd, line);
+				assertEqualUTF8String(utf8_nfd, utf8.s);
+			}
+
+			/*
+			 * Normalize an NFD string for import.
+			 */
+			assertEqualInt(0, archive_strcpy_l(
+			    &utf8, utf8_nfd, f_sconv8));
+			failure("NFD(%s) should not be any changed:%d",
+			    nfd, line);
+			assertEqualUTF8String(utf8_nfd, utf8.s);
+
+			/*
+			 * Copy an NFD string for export.
+			 */
+			assertEqualInt(0, archive_strcpy_l(
+			    &utf8, utf8_nfd, t_sconv8));
+			failure("NFD(%s) should not be any changed:%d",
+			    nfd, line);
+			assertEqualUTF8String(utf8_nfd, utf8.s);
+
+			/*
+			 * Normalize an NFC string in UTF-16BE for import.
+			 */
+			assertEqualInt(0, archive_strncpy_l(
+			    &utf8, utf16be_nfc, 100000, f_sconv16be));
+			if (should_be_nfc) {
+				failure("NFC(%s) should not be converted to"
+				    " NFD(%s):%d", nfc, nfd, line);
+				assertEqualUTF8String(utf8_nfc, utf8.s);
+			} else {
+				failure("NFC(%s) should be converted to"
+				    " NFD(%s):%d", nfc, nfd, line);
+				assertEqualUTF8String(utf8_nfd, utf8.s);
+			}
+
+			/*
+			 * Normalize an NFC string in UTF-16LE for import.
+			 */
+			assertEqualInt(0, archive_strncpy_l(
+			    &utf8, utf16le_nfc, 100000, f_sconv16le));
+			if (should_be_nfc) {
+				failure("NFC(%s) should not be converted to"
+				    " NFD(%s):%d", nfc, nfd, line);
+				assertEqualUTF8String(utf8_nfc, utf8.s);
+			} else {
+				failure("NFC(%s) should be converted to"
+				    " NFD(%s):%d", nfc, nfd, line);
+				assertEqualUTF8String(utf8_nfd, utf8.s);
+			}
+		}
+
+		/*
+		 * Test for archive_mstring interface.
+		 * In specific, Windows platform UTF-16BE is directly
+		 * converted to/from wide-character to avoid the effect of
+		 * current locale since windows platform cannot make
+		 * locale UTF-8.
+		 */
+		if (locale_is_utf8 || wc_is_unicode) {
 			/*
 			 * Normalize an NFD string in UTF-8 for import.
 			 */
 			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-			    &mstr, utf8_nfd, 100000, f_sconv8));
+			    &mstr, utf8_nfc, 100000, f_sconv8));
 			assertEqualInt(0,
 			    archive_mstring_get_wcs(a, &mstr, &wp));
-			failure("UTF-8 NFD(%s) should be converted "
-			    "to WCS NFC(%s):%d", nfd, nfc, line);
-			assertEqualWString(wc_nfc, wp);
+			if (should_be_nfc) {
+				failure("UTF-8 NFC(%s) should not be converted "
+				    "to WCS NFD(%s):%d", nfc, nfd, line);
+				assertEqualWString(wc_nfc, wp);
+			} else {
+				failure("UTF-8 NFC(%s) should be converted "
+				    "to WCS NFD(%s):%d", nfc, nfd, line);
+				assertEqualWString(wc_nfd, wp);
+			}
 
 			/*
 			 * Normalize an NFD string in UTF-16BE for import.
 			 */
 			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-			    &mstr, utf16be_nfd, 100000, f_sconv16be));
+			    &mstr, utf16be_nfc, 100000, f_sconv16be));
 			assertEqualInt(0,
 			    archive_mstring_get_wcs(a, &mstr, &wp));
-			failure("UTF-8 NFD(%s) should be converted "
-			    "to WCS NFC(%s):%d", nfd, nfc, line);
-			assertEqualWString(wc_nfc, wp);
+			if (should_be_nfc) {
+				failure("UTF-16BE NFC(%s) should not be "
+				    "converted to WCS NFD(%s):%d",
+				    nfc, nfd, line);
+				assertEqualWString(wc_nfc, wp);
+			} else {
+				failure("UTF-16BE NFC(%s) should be converted "
+				    "to WCS NFD(%s):%d", nfc, nfd, line);
+				assertEqualWString(wc_nfd, wp);
+			}
 
 			/*
 			 * Normalize an NFD string in UTF-16LE for import.
 			 */
 			assertEqualInt(0, archive_mstring_copy_mbs_len_l(
-			    &mstr, utf16le_nfd, 100000, f_sconv16le));
+			    &mstr, utf16le_nfc, 100000, f_sconv16le));
 			assertEqualInt(0,
 			    archive_mstring_get_wcs(a, &mstr, &wp));
-			failure("UTF-8 NFD(%s) should be converted "
-			    "to WCS NFC(%s):%d", nfd, nfc, line);
-			assertEqualWString(wc_nfc, wp);
+			if (should_be_nfc) {
+				failure("UTF-16LE NFC(%s) should not be "
+				    "converted to WCS NFD(%s):%d",
+				    nfc, nfd, line);
+				assertEqualWString(wc_nfc, wp);
+			} else {
+				failure("UTF-16LE NFC(%s) should be converted "
+				    "to WCS NFD(%s):%d", nfc, nfd, line);
+				assertEqualWString(wc_nfd, wp);
+			}
 
 			/*
-			 * Copy an NFC wide-string for export.
+			 * Copy an NFD wide-string for export.
 			 */
 			assertEqualInt(0, archive_mstring_copy_wcs(
-			    &mstr, wc_nfc));
+			    &mstr, wc_nfd));
 			assertEqualInt(0, archive_mstring_get_mbs_l(
 			    &mstr, &mp, &mplen, t_sconv8));
-			failure("WCS NFC(%s) should be UTF-8 NFC:%d"
-			    ,nfc, line);
-			assertEqualUTF8String(utf8_nfc, mp);
-#endif
+			failure("WCS NFD(%s) should be UTF-8 NFD:%d"
+			    ,nfd, line);
+			assertEqualUTF8String(utf8_nfd, mp);
 		}
 	}
 
@@ -624,6 +779,32 @@ test_archive_string_canonicalization(void)
 
 DEFINE_TEST(test_archive_string_conversion)
 {
-	test_archive_string_normalization();
+	static const char reffile[] = "test_archive_string_conversion.txt.Z";
+	static const char testdata[] = "testdata.txt";
+	struct archive *a;
+	struct archive_entry *ae;
+	char buff[512];
+	ssize_t size;
+	FILE *fp;
+
+	/*
+	 * Extract a test pattern file.
+	 */
+	extract_reference_file(reffile);
+	assert((a = archive_read_new()) != NULL);
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
+        assertEqualIntA(a, ARCHIVE_OK,
+            archive_read_open_filename(a, reffile, 512));
+
+	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+	assert((fp = fopen(testdata, "w")) != NULL);
+	while ((size = archive_read_data(a, buff, 512)) > 0)
+		fwrite(buff, 1, size, fp);
+	fclose(fp);
+	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
+
+	test_archive_string_normalization_nfc(testdata);
+	test_archive_string_normalization_mac_nfd(testdata);
 	test_archive_string_canonicalization();
 }