diff options
-rw-r--r-- | tools/tools/locale/Makefile | 202 | ||||
-rw-r--r-- | tools/tools/locale/README | 83 | ||||
-rw-r--r-- | tools/tools/locale/etc/final-maps/map.UTF-8 | 554 | ||||
-rw-r--r-- | tools/tools/locale/patch/patch-UnicodeData.txt | 29 | ||||
-rwxr-xr-x | tools/tools/locale/tools/cldr2def.pl | 5 | ||||
-rwxr-xr-x | tools/tools/locale/tools/convert_map.pl | 5 | ||||
-rwxr-xr-x | tools/tools/locale/tools/finalize | 38 | ||||
-rwxr-xr-x | tools/tools/locale/tools/utf8-rollup.pl | 31 |
8 files changed, 826 insertions, 121 deletions
diff --git a/tools/tools/locale/Makefile b/tools/tools/locale/Makefile index 492ff714feb7..aab6246649fc 100644 --- a/tools/tools/locale/Makefile +++ b/tools/tools/locale/Makefile @@ -7,19 +7,30 @@ # # Modified by John Marino to suit DragonFly needs # +.if ${.CURDIR} == ${.OBJDIR} +.error Do make obj first. +.endif -.OBJDIR: . +LOCALESRCDIR?= ${DESTDIR}/usr/src/share +TMPDIR?= /tmp -.if !defined(UNIDIR) -.error UNIDIR is not set -.endif -PASSON= UNIDIR="${UNIDIR}" +BASEDIR= ${.CURDIR} +ETCDIR= ${BASEDIR}/etc +TOOLSDIR= ${BASEDIR}/tools +PATCHDIR= ${BASEDIR}/patch +UNIDIR= ${.OBJDIR:tA}/unicode -ETCDIR= ${.CURDIR}/etc +PKGS= openjdk8 \ + apache-ant \ + p5-XML-Parser \ + p5-Tie-IxHash \ + p5-Text-Iconv +tools-test: + pkg info -e ${PKGS} + @echo tools ok. KNOWN= monetdef numericdef msgdef colldef ctypedef # timedef TYPES?= ${KNOWN} -LOCALE_DESTDIR?= /tmp/generated-locales/ COLLATION_SPECIAL?= \ cs_CZ ISO8859-2 \ @@ -44,65 +55,80 @@ COLLATION_SPECIAL?= \ .for area enc in ${COLLATION_SPECIAL} COLLATIONS_SPECIAL_ENV+= ${area}.${enc} .endfor -PASSON+= COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" +SETENV= env -i \ + PATH="${PATH}" \ + TMPDIR="${TMPDIR}" \ + COLLATIONS_SPECIAL="${COLLATIONS_SPECIAL_ENV}" \ + UNIDIR="${UNIDIR}" \ + BASEDIR="${BASEDIR}" \ + TOOLSDIR="${TOOLSDIR}" \ + ETCDIR="${ETCDIR}" + +all: posix build afterbuild +.ORDER: posix build afterbuild + +afterbuild: build + @echo "" + @find . -name *failed -all: .for t in ${TYPES} . if ${KNOWN:M${t}} - test -d ${t} || mkdir ${t} - make build-${t} +build: build-${t} +.ORDER: build-${t} afterbuild . endif .endfor - @echo "" - @find . -name *failed +diff: .for t in ${TYPES} +. if ${KNOWN:M${t}} +diff: diff-${t} +diff-${t}: + -/usr/bin/diff -ruN -x Makefile -x Makefile.depend \ + ${LOCALESRCDIR}/${t} ${t} +. endif +.endfor + +install: +.for t in ${TYPES} +. if ${KNOWN:M${t}} install: install-${t} install-${t}: -. if ${KNOWN:M${t}} - rm -rf ${.CURDIR}/${t}.draft - rm -f ${.CURDIR}/../../../share/${t}/Makefile - rm -f ${.CURDIR}/../../../share/${t}/*.src - mv ${.CURDIR}/${t}/* ${.CURDIR}/../../../share/${t}/ + cd ${LOCALESRCDIR}/${t} && \ + rm -f Makefile *.src && \ + cd ${.OBJDIR} && \ + install -c ${t}/* ${LOCALESRCDIR}/${t} . endif .endfor post-install: .for t in ${TYPES} . if ${KNOWN:M${t}} - (cd ${.CURDIR}/../../../share/${t} && \ - make && make install && make clean) + cd ${LOCALSRCDIR}/${t} && \ + make && make install && make clean . endif .endfor .for t in ${TYPES} -gen-${t}: - mkdir -p ${t} ${t}.draft - perl -I tools tools/cldr2def.pl \ - --unidir=$$(realpath ${UNIDIR}) \ - --etc=$$(realpath ${ETCDIR}) \ +CLEANDIRS+= ${t} ${t}.draft +${t}: + mkdir -p ${t} ${t}.draft && \ + perl -I ${TOOLSDIR} ${TOOLSDIR}/cldr2def.pl \ + --unidir=${UNIDIR:tA} \ + --etc=${ETCDIR:tA} \ --type=${t} -build-${t}: gen-${t} - env ${PASSON} tools/finalize ${t} +build-${t}: ${t} + ${SETENV} OUTBASEDIR="${.OBJDIR}/${t}" ${TOOLSDIR}/finalize ${t} .endfor -gen-ctypedef: ctype-rollup -static-colldef: gen-colldef +static-colldef: colldef build-colldef: static-colldef static-colldef: .for area enc in ${COLLATION_SPECIAL} - awk -f tools/extract-colldef.awk ${UNIDIR}/posix/${area}.${enc}.src > \ - colldef.draft/${area}.${enc}.src -.endfor - -ctype-rollup: - perl -I tools tools/utf8-rollup.pl --unidir=$$(realpath ${UNIDIR}) - -clean: -.for t in ${TYPES} - rm -rf ${t} ${t}.draft +colldef.draft/${area}.${enc}.src: posix/${area}.${enc}.src + awk -f ${TOOLSDIR}/extract-colldef.awk \ + ${.ALLSRC} > ${.TARGET} || (rm -f ${.TARGET} && false) .endfor BASE_LOCALES_OF_INTEREST?= \ @@ -143,37 +169,73 @@ ENCODINGS= Big5 \ KOI8-U \ SJIS \ US-ASCII \ - UTF-8 \ - - -POSIX: -.if exists (${UNIDIR}/tools/java/cldr.jar) - mkdir -p ${UNIDIR}/posix -. for area in ${BASE_LOCALES_OF_INTEREST} -. if !exists(${UNIDIR}/posix/${area}.UTF-8.src) - java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ - org.unicode.cldr.posix.GeneratePOSIX \ - -d ${UNIDIR}/posix -m ${area} -c UTF-8 -. endif -. endfor -. for area encoding in ${COLLATION_SPECIAL} -. if !exists(${UNIDIR}/posix/${area}.${encoding}.src) - java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ - org.unicode.cldr.posix.GeneratePOSIX \ - -d ${UNIDIR}/posix -m ${area} -c ${encoding} -. endif -. endfor -. for enc in ${ENCODINGS} -. if !exists(${UNIDIR}/posix/${enc}.cm) - java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar \ - org.unicode.cldr.posix.GenerateCharmap \ - -d ${UNIDIR}/posix -c ${enc} -. endif -. endfor + UTF-8 + +# CLDR files +CLDRFILES_CORE= https://unicode.org/Public/cldr/35/core.zip +CLDRFILES_KEY= https://unicode.org/Public/cldr/35/keyboards.zip +CLDRFILES_TOOLS=https://unicode.org/Public/cldr/35/tools.zip +CLDRFILES_UCD= http://www.unicode.org/Public/zipped/latest/UCD.zip + +# fetch and extract targets +${UNIDIR}: + mkdir -p ${UNIDIR} +.for N in CORE KEY TOOLS UCD +${CLDRFILES_${N}:T}: + fetch ${CLDRFILES_${N}} +fetch: ${CLDRFILES_${N}:T} +extract-${CLDRFILES_${N}:T}:: ${CLDRFILES_${N}:T} ${UNIDIR} + cd ${UNIDIR} && unzip -o ../${CLDRFILES_${N}:T} +extract: extract-${CLDRFILES_${N}:T} +.endfor +patch:: +.if exists(${PATCHDIR}) + cd ${UNIDIR} && cat ${PATCHDIR}/patch-* | patch +.endif + +.if !exists(${UNIDIR}/tools/java/cldr.jar) +.ORDER: extract patch +build-tools: extract patch tools-test ${UNIDIR} + cd ${UNIDIR}/tools/java && ${SETENV} ant all jar .else - @echo "Please install CLDR toolset for the desired release" - @echo "It should go at ${UNIDIR}/tools" +build-tools: + @echo cldr.jar is ready. .endif -clean-POSIX: - rm -f ${UNIDIR}/posix/* +JAVA_CLDR= java -DCLDR_DIR=${UNIDIR:Q} -jar ${UNIDIR}/tools/java/cldr.jar + +posix: posixcm post-posixcm posixsrc posixcol +.ORDER: posixcm post-posixcm posixsrc posixcol +${UNIDIR}/posix: + ln -s -f ../posix ${.TARGET} +clean-posix: + rm -rf posix ${UNIDIR}/posix +post-posixcm: ${UNIDIR}/posix + perl -I ${TOOLSDIR} ${TOOLSDIR}/utf8-rollup.pl \ + --unidir=${UNIDIR} +.for enc in ${ENCODINGS} +posixcm: build-tools posix/${enc}.cm +.ORDER: build-tools posix/${enc}.cm +posix/${enc}.cm: + mkdir -p posix && \ + ${JAVA_CLDR} org.unicode.cldr.posix.GenerateCharmap \ + -d posix -c ${enc} +.endfor +.for area in ${BASE_LOCALES_OF_INTEREST} +posixsrc: build-tools posix/${area}.UTF-8.src +.ORDER: build-tools posix/${area}.UTF-8.src +posix/${area}.UTF-8.src: + mkdir -p posix && \ + ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ + -d posix -m ${area} -c UTF-8 +.endfor +.for area encoding in ${COLLATION_SPECIAL} +posixcol: build-tools posix/${area}.${encoding}.src +.ORDER: build-tools posix/${area}.${encoding}.src +posix/${area}.${encoding}.src: + mkdir -p posix && \ + ${JAVA_CLDR} org.unicode.cldr.posix.GeneratePOSIX \ + -d posix -m ${area} -c ${encoding} +.endfor + +.include <bsd.obj.mk> diff --git a/tools/tools/locale/README b/tools/tools/locale/README index 4badae846927..8dc8c2a25fc1 100644 --- a/tools/tools/locale/README +++ b/tools/tools/locale/README @@ -1,31 +1,58 @@ # $FreeBSD$ -To generate the locales: - -Tools needed: - java (openjdk >= 8) - perl - converters/p5-Text-Iconv - devel/p5-Tie-IxHash - textproc/p5-XML-Parser - -1. Fetch CLDR data from: http://unicode.org/Public/cldr/. You need all of the -core.zip, keyboards.zip, and tools.zip. -2. Fetch unidata (UCD.zip) from http://www.unicode.org/Public/zipped/latest. -3. Extract: - mkdir -p ~/unicode - cd ~/unicode - unzip ~/core.zip - unzip ~/keyboards.zip - unzip ~/tools.zip - unzip ~/UCD.zip -4. Export variable: - UNIDIR=~/unicode; export UNIDIR -5. Build the CLDR tools: - cd $UNIDIR/tools/java - ant jar -6. Build POSIX data files from CLDR data: - make POSIX -7. Build and install new locale data: - make +Files in this directory are used to generate locale source files +from files in CLDR (Unicode Common Locale Data Repository). + +To generate the files, do the following: + + cd /usr/src/tools/tools/locale + make obj (mandatory) + make -j16 (-jN recommended) + make diff (check if the changes are reasonable) + make install + +"make" downloads the necessary files, build them, and install the +results into /usr/src/share/* as source files for locales. + +More details are as follows: + +Variables: + LOCALESRCDIR + Destination path for the generated locale files. + Default: $DESTDIR/usr/src/share. + TMPDIR + Temporary directory. + Default: /tmp + +Targets: + make obj + Create a temporary directory for building. + + make clean + Clean up the obj directories. + + make cleandir + Remove the obj directories completely. + + make tools-test + Check if necessary tools are installed or not. + If something is missing, install them. + + make fetch + Download necessary files from CLDR. + + make build-tools + Build a tool to generate locale source files. + + make posix + Build POSIX locale source files. + + make build + Build locale files. + + make diff + Run diff(1) the build results against $LOCALESRCDIR. + make install + Install the build results into $LOCALESRCDIR. +[EOF] diff --git a/tools/tools/locale/etc/final-maps/map.UTF-8 b/tools/tools/locale/etc/final-maps/map.UTF-8 index 031b8545982d..ad0dfd2c7de6 100644 --- a/tools/tools/locale/etc/final-maps/map.UTF-8 +++ b/tools/tools/locale/etc/final-maps/map.UTF-8 @@ -2969,6 +2969,7 @@ CHARMAP <TELUGU_DIGIT_SEVEN> \xE0\xB1\xAD <TELUGU_DIGIT_EIGHT> \xE0\xB1\xAE <TELUGU_DIGIT_NINE> \xE0\xB1\xAF +<TELUGU_SIGN_SIDDHAM> \xE0\xB1\xB7 <TELUGU_FRACTION_DIGIT_ZERO_FOR_ODD_POWERS_OF_FOUR> \xE0\xB1\xB8 <TELUGU_FRACTION_DIGIT_ONE_FOR_ODD_POWERS_OF_FOUR> \xE0\xB1\xB9 <TELUGU_FRACTION_DIGIT_TWO_FOR_ODD_POWERS_OF_FOUR> \xE0\xB1\xBA @@ -3363,14 +3364,24 @@ CHARMAP <LAO_LETTER_KO> \xE0\xBA\x81 <LAO_LETTER_KHO_SUNG> \xE0\xBA\x82 <LAO_LETTER_KHO_TAM> \xE0\xBA\x84 +<LAO_LETTER_PALI_GHA> \xE0\xBA\x86 <LAO_LETTER_NGO> \xE0\xBA\x87 <LAO_LETTER_CO> \xE0\xBA\x88 +<LAO_LETTER_PALI_CHA> \xE0\xBA\x89 <LAO_LETTER_SO_TAM> \xE0\xBA\x8A +<LAO_LETTER_PALI_JHA> \xE0\xBA\x8C <LAO_LETTER_NYO> \xE0\xBA\x8D +<LAO_LETTER_PALI_NYA> \xE0\xBA\x8E +<LAO_LETTER_PALI_TTA> \xE0\xBA\x8F +<LAO_LETTER_PALI_TTHA> \xE0\xBA\x90 +<LAO_LETTER_PALI_DDA> \xE0\xBA\x91 +<LAO_LETTER_PALI_DDHA> \xE0\xBA\x92 +<LAO_LETTER_PALI_NNA> \xE0\xBA\x93 <LAO_LETTER_DO> \xE0\xBA\x94 <LAO_LETTER_TO> \xE0\xBA\x95 <LAO_LETTER_THO_SUNG> \xE0\xBA\x96 <LAO_LETTER_THO_TAM> \xE0\xBA\x97 +<LAO_LETTER_PALI_DHA> \xE0\xBA\x98 <LAO_LETTER_NO> \xE0\xBA\x99 <LAO_LETTER_BO> \xE0\xBA\x9A <LAO_LETTER_PO> \xE0\xBA\x9B @@ -3378,13 +3389,17 @@ CHARMAP <LAO_LETTER_FO_TAM> \xE0\xBA\x9D <LAO_LETTER_PHO_TAM> \xE0\xBA\x9E <LAO_LETTER_FO_SUNG> \xE0\xBA\x9F +<LAO_LETTER_PALI_BHA> \xE0\xBA\xA0 <LAO_LETTER_MO> \xE0\xBA\xA1 <LAO_LETTER_YO> \xE0\xBA\xA2 <LAO_LETTER_LO_LING> \xE0\xBA\xA3 <LAO_LETTER_LO_LOOT> \xE0\xBA\xA5 <LAO_LETTER_WO> \xE0\xBA\xA7 +<LAO_LETTER_SANSKRIT_SHA> \xE0\xBA\xA8 +<LAO_LETTER_SANSKRIT_SSA> \xE0\xBA\xA9 <LAO_LETTER_SO_SUNG> \xE0\xBA\xAA <LAO_LETTER_HO_SUNG> \xE0\xBA\xAB +<LAO_LETTER_PALI_LLA> \xE0\xBA\xAC <LAO_LETTER_O> \xE0\xBA\xAD <LAO_LETTER_HO_TAM> \xE0\xBA\xAE <LAO_ELLIPSIS> \xE0\xBA\xAF @@ -3398,6 +3413,7 @@ CHARMAP <LAO_VOWEL_SIGN_YY> \xE0\xBA\xB7 <LAO_VOWEL_SIGN_U> \xE0\xBA\xB8 <LAO_VOWEL_SIGN_UU> \xE0\xBA\xB9 +<LAO_SIGN_PALI_VIRAMA> \xE0\xBA\xBA <LAO_VOWEL_SIGN_MAI_KON> \xE0\xBA\xBB <LAO_SEMIVOWEL_SIGN_LO> \xE0\xBA\xBC <LAO_SEMIVOWEL_SIGN_NYO> \xE0\xBA\xBD @@ -6656,6 +6672,7 @@ CHARMAP <VEDIC_SIGN_ATIKRAMA> \xE1\xB3\xB7 <VEDIC_TONE_RING_ABOVE> \xE1\xB3\xB8 <VEDIC_TONE_DOUBLE_RING_ABOVE> \xE1\xB3\xB9 +<VEDIC_SIGN_DOUBLE_ANUSVARA_ANTARGOMUKHA> \xE1\xB3\xBA <LATIN_LETTER_SMALL_CAPITAL_A> \xE1\xB4\x80 <LATIN_LETTER_SMALL_CAPITAL_AE> \xE1\xB4\x81 <LATIN_SMALL_LETTER_TURNED_AE> \xE1\xB4\x82 @@ -10325,6 +10342,7 @@ CHARMAP <BLACK_MEDIUM_DOWN-POINTING_TRIANGLE_CENTRED> \xE2\xAF\x86 <BLACK_MEDIUM_LEFT-POINTING_TRIANGLE_CENTRED> \xE2\xAF\x87 <BLACK_MEDIUM_RIGHT-POINTING_TRIANGLE_CENTRED> \xE2\xAF\x88 +<NEPTUNE_FORM_TWO> \xE2\xAF\x89 <TOP_HALF_BLACK_CIRCLE> \xE2\xAF\x8A <BOTTOM_HALF_BLACK_CIRCLE> \xE2\xAF\x8B <LIGHT_FOUR_POINTED_BLACK_CUSP> \xE2\xAF\x8C @@ -10378,6 +10396,7 @@ CHARMAP <DOUBLED_SYMBOL> \xE2\xAF\xBC <PASSED_SYMBOL> \xE2\xAF\xBD <REVERSED_RIGHT_ANGLE> \xE2\xAF\xBE +<HELLSCHREIBER_PAUSE_SYMBOL> \xE2\xAF\xBF <GLAGOLITIC_CAPITAL_LETTER_AZU> \xE2\xB0\x80 <GLAGOLITIC_CAPITAL_LETTER_BUKY> \xE2\xB0\x81 <GLAGOLITIC_CAPITAL_LETTER_VEDE> \xE2\xB0\x82 @@ -10916,6 +10935,7 @@ CHARMAP <MEDIEVAL_COMMA> \xE2\xB9\x8C <PARAGRAPHUS_MARK> \xE2\xB9\x8D <PUNCTUS_ELEVATUS_MARK> \xE2\xB9\x8E +<CORNISH_VERSE_DIVIDER> \xE2\xB9\x8F <CJK_RADICAL_REPEAT> \xE2\xBA\x80 <CJK_RADICAL_CLIFF> \xE2\xBA\x81 <CJK_RADICAL_SECOND_ONE> \xE2\xBA\x82 @@ -41812,6 +41832,17 @@ CHARMAP <LATIN_SMALL_LETTER_OMEGA> \xEA\x9E\xB7 <LATIN_CAPITAL_LETTER_U_WITH_STROKE> \xEA\x9E\xB8 <LATIN_SMALL_LETTER_U_WITH_STROKE> \xEA\x9E\xB9 +<LATIN_CAPITAL_LETTER_GLOTTAL_A> \xEA\x9E\xBA +<LATIN_SMALL_LETTER_GLOTTAL_A> \xEA\x9E\xBB +<LATIN_CAPITAL_LETTER_GLOTTAL_I> \xEA\x9E\xBC +<LATIN_SMALL_LETTER_GLOTTAL_I> \xEA\x9E\xBD +<LATIN_CAPITAL_LETTER_GLOTTAL_U> \xEA\x9E\xBE +<LATIN_SMALL_LETTER_GLOTTAL_U> \xEA\x9E\xBF +<LATIN_CAPITAL_LETTER_ANGLICANA_W> \xEA\x9F\x82 +<LATIN_SMALL_LETTER_ANGLICANA_W> \xEA\x9F\x83 +<LATIN_CAPITAL_LETTER_C_WITH_PALATAL_HOOK> \xEA\x9F\x84 +<LATIN_CAPITAL_LETTER_S_WITH_HOOK> \xEA\x9F\x85 +<LATIN_CAPITAL_LETTER_Z_WITH_PALATAL_HOOK> \xEA\x9F\x86 <LATIN_EPIGRAPHIC_LETTER_SIDEWAYS_I> \xEA\x9F\xB7 <MODIFIER_LETTER_CAPITAL_H_WITH_STROKE> \xEA\x9F\xB8 <MODIFIER_LETTER_SMALL_LIGATURE_OE> \xEA\x9F\xB9 @@ -42577,6 +42608,8 @@ CHARMAP <LATIN_SMALL_LETTER_UO> \xEA\xAD\xA3 <LATIN_SMALL_LETTER_INVERTED_ALPHA> \xEA\xAD\xA4 <GREEK_LETTER_SMALL_CAPITAL_OMEGA> \xEA\xAD\xA5 +<LATIN_SMALL_LETTER_DZ_DIGRAPH_WITH_RETROFLEX_HOOK> \xEA\xAD\xA6 +<LATIN_SMALL_LETTER_TS_DIGRAPH_WITH_RETROFLEX_HOOK> \xEA\xAD\xA7 <CHEROKEE_SMALL_LETTER_A> \xEA\xAD\xB0 <CHEROKEE_SMALL_LETTER_E> \xEA\xAD\xB1 <CHEROKEE_SMALL_LETTER_I> \xEA\xAD\xB2 @@ -64381,6 +64414,29 @@ CHARMAP <SOGDIAN_PUNCTUATION_CIRCLE_WITH_DOT> \xF0\x90\xBD\x97 <SOGDIAN_PUNCTUATION_TWO_CIRCLES_WITH_DOTS> \xF0\x90\xBD\x98 <SOGDIAN_PUNCTUATION_HALF_CIRCLE_WITH_DOT> \xF0\x90\xBD\x99 +<ELYMAIC_LETTER_ALEPH> \xF0\x90\xBF\xA0 +<ELYMAIC_LETTER_BETH> \xF0\x90\xBF\xA1 +<ELYMAIC_LETTER_GIMEL> \xF0\x90\xBF\xA2 +<ELYMAIC_LETTER_DALETH> \xF0\x90\xBF\xA3 +<ELYMAIC_LETTER_HE> \xF0\x90\xBF\xA4 +<ELYMAIC_LETTER_WAW> \xF0\x90\xBF\xA5 +<ELYMAIC_LETTER_ZAYIN> \xF0\x90\xBF\xA6 +<ELYMAIC_LETTER_HETH> \xF0\x90\xBF\xA7 +<ELYMAIC_LETTER_TETH> \xF0\x90\xBF\xA8 +<ELYMAIC_LETTER_YODH> \xF0\x90\xBF\xA9 +<ELYMAIC_LETTER_KAPH> \xF0\x90\xBF\xAA +<ELYMAIC_LETTER_LAMEDH> \xF0\x90\xBF\xAB +<ELYMAIC_LETTER_MEM> \xF0\x90\xBF\xAC +<ELYMAIC_LETTER_NUN> \xF0\x90\xBF\xAD +<ELYMAIC_LETTER_SAMEKH> \xF0\x90\xBF\xAE +<ELYMAIC_LETTER_AYIN> \xF0\x90\xBF\xAF +<ELYMAIC_LETTER_PE> \xF0\x90\xBF\xB0 +<ELYMAIC_LETTER_SADHE> \xF0\x90\xBF\xB1 +<ELYMAIC_LETTER_QOPH> \xF0\x90\xBF\xB2 +<ELYMAIC_LETTER_RESH> \xF0\x90\xBF\xB3 +<ELYMAIC_LETTER_SHIN> \xF0\x90\xBF\xB4 +<ELYMAIC_LETTER_TAW> \xF0\x90\xBF\xB5 +<ELYMAIC_LIGATURE_ZAYIN-YODH> \xF0\x90\xBF\xB6 <BRAHMI_SIGN_CANDRABINDU> \xF0\x91\x80\x80 <BRAHMI_SIGN_ANUSVARA> \xF0\x91\x80\x81 <BRAHMI_SIGN_VISARGA> \xF0\x91\x80\x82 @@ -65163,6 +65219,7 @@ CHARMAP <NEWA_PLACEHOLDER_MARK> \xF0\x91\x91\x9B <NEWA_INSERTION_SIGN> \xF0\x91\x91\x9D <NEWA_SANDHI_MARK> \xF0\x91\x91\x9E +<NEWA_LETTER_VEDIC_ANUSVARA> \xF0\x91\x91\x9F <TIRHUTA_ANJI> \xF0\x91\x92\x80 <TIRHUTA_LETTER_A> \xF0\x91\x92\x81 <TIRHUTA_LETTER_AA> \xF0\x91\x92\x82 @@ -65485,6 +65542,7 @@ CHARMAP <TAKRI_VOWEL_SIGN_AU> \xF0\x91\x9A\xB5 <TAKRI_SIGN_VIRAMA> \xF0\x91\x9A\xB6 <TAKRI_SIGN_NUKTA> \xF0\x91\x9A\xB7 +<TAKRI_LETTER_ARCHAIC_KHA> \xF0\x91\x9A\xB8 <TAKRI_DIGIT_ZERO> \xF0\x91\x9B\x80 <TAKRI_DIGIT_ONE> \xF0\x91\x9B\x81 <TAKRI_DIGIT_TWO> \xF0\x91\x9B\x82 @@ -65697,6 +65755,71 @@ CHARMAP <WARANG_CITI_NUMBER_EIGHTY> \xF0\x91\xA3\xB1 <WARANG_CITI_NUMBER_NINETY> \xF0\x91\xA3\xB2 <WARANG_CITI_OM> \xF0\x91\xA3\xBF +<NANDINAGARI_LETTER_A> \xF0\x91\xA6\xA0 +<NANDINAGARI_LETTER_AA> \xF0\x91\xA6\xA1 +<NANDINAGARI_LETTER_I> \xF0\x91\xA6\xA2 +<NANDINAGARI_LETTER_II> \xF0\x91\xA6\xA3 +<NANDINAGARI_LETTER_U> \xF0\x91\xA6\xA4 +<NANDINAGARI_LETTER_UU> \xF0\x91\xA6\xA5 +<NANDINAGARI_LETTER_VOCALIC_R> \xF0\x91\xA6\xA6 +<NANDINAGARI_LETTER_VOCALIC_RR> \xF0\x91\xA6\xA7 +<NANDINAGARI_LETTER_E> \xF0\x91\xA6\xAA +<NANDINAGARI_LETTER_AI> \xF0\x91\xA6\xAB +<NANDINAGARI_LETTER_O> \xF0\x91\xA6\xAC +<NANDINAGARI_LETTER_AU> \xF0\x91\xA6\xAD +<NANDINAGARI_LETTER_KA> \xF0\x91\xA6\xAE +<NANDINAGARI_LETTER_KHA> \xF0\x91\xA6\xAF +<NANDINAGARI_LETTER_GA> \xF0\x91\xA6\xB0 +<NANDINAGARI_LETTER_GHA> \xF0\x91\xA6\xB1 +<NANDINAGARI_LETTER_NGA> \xF0\x91\xA6\xB2 +<NANDINAGARI_LETTER_CA> \xF0\x91\xA6\xB3 +<NANDINAGARI_LETTER_CHA> \xF0\x91\xA6\xB4 +<NANDINAGARI_LETTER_JA> \xF0\x91\xA6\xB5 +<NANDINAGARI_LETTER_JHA> \xF0\x91\xA6\xB6 +<NANDINAGARI_LETTER_NYA> \xF0\x91\xA6\xB7 +<NANDINAGARI_LETTER_TTA> \xF0\x91\xA6\xB8 +<NANDINAGARI_LETTER_TTHA> \xF0\x91\xA6\xB9 +<NANDINAGARI_LETTER_DDA> \xF0\x91\xA6\xBA +<NANDINAGARI_LETTER_DDHA> \xF0\x91\xA6\xBB +<NANDINAGARI_LETTER_NNA> \xF0\x91\xA6\xBC +<NANDINAGARI_LETTER_TA> \xF0\x91\xA6\xBD +<NANDINAGARI_LETTER_THA> \xF0\x91\xA6\xBE +<NANDINAGARI_LETTER_DA> \xF0\x91\xA6\xBF +<NANDINAGARI_LETTER_DHA> \xF0\x91\xA7\x80 +<NANDINAGARI_LETTER_NA> \xF0\x91\xA7\x81 +<NANDINAGARI_LETTER_PA> \xF0\x91\xA7\x82 +<NANDINAGARI_LETTER_PHA> \xF0\x91\xA7\x83 +<NANDINAGARI_LETTER_BA> \xF0\x91\xA7\x84 +<NANDINAGARI_LETTER_BHA> \xF0\x91\xA7\x85 +<NANDINAGARI_LETTER_MA> \xF0\x91\xA7\x86 +<NANDINAGARI_LETTER_YA> \xF0\x91\xA7\x87 +<NANDINAGARI_LETTER_RA> \xF0\x91\xA7\x88 +<NANDINAGARI_LETTER_LA> \xF0\x91\xA7\x89 +<NANDINAGARI_LETTER_VA> \xF0\x91\xA7\x8A +<NANDINAGARI_LETTER_SHA> \xF0\x91\xA7\x8B +<NANDINAGARI_LETTER_SSA> \xF0\x91\xA7\x8C +<NANDINAGARI_LETTER_SA> \xF0\x91\xA7\x8D +<NANDINAGARI_LETTER_HA> \xF0\x91\xA7\x8E +<NANDINAGARI_LETTER_LLA> \xF0\x91\xA7\x8F +<NANDINAGARI_LETTER_RRA> \xF0\x91\xA7\x90 +<NANDINAGARI_VOWEL_SIGN_AA> \xF0\x91\xA7\x91 +<NANDINAGARI_VOWEL_SIGN_I> \xF0\x91\xA7\x92 +<NANDINAGARI_VOWEL_SIGN_II> \xF0\x91\xA7\x93 +<NANDINAGARI_VOWEL_SIGN_U> \xF0\x91\xA7\x94 +<NANDINAGARI_VOWEL_SIGN_UU> \xF0\x91\xA7\x95 +<NANDINAGARI_VOWEL_SIGN_VOCALIC_R> \xF0\x91\xA7\x96 +<NANDINAGARI_VOWEL_SIGN_VOCALIC_RR> \xF0\x91\xA7\x97 +<NANDINAGARI_VOWEL_SIGN_E> \xF0\x91\xA7\x9A +<NANDINAGARI_VOWEL_SIGN_AI> \xF0\x91\xA7\x9B +<NANDINAGARI_VOWEL_SIGN_O> \xF0\x91\xA7\x9C +<NANDINAGARI_VOWEL_SIGN_AU> \xF0\x91\xA7\x9D +<NANDINAGARI_SIGN_ANUSVARA> \xF0\x91\xA7\x9E +<NANDINAGARI_SIGN_VISARGA> \xF0\x91\xA7\x9F +<NANDINAGARI_SIGN_VIRAMA> \xF0\x91\xA7\xA0 +<NANDINAGARI_SIGN_AVAGRAHA> \xF0\x91\xA7\xA1 +<NANDINAGARI_SIGN_SIDDHAM> \xF0\x91\xA7\xA2 +<NANDINAGARI_HEADSTROKE> \xF0\x91\xA7\xA3 +<NANDINAGARI_VOWEL_SIGN_PRISHTHAMATRA_E> \xF0\x91\xA7\xA4 <ZANABAZAR_SQUARE_LETTER_A> \xF0\x91\xA8\x80 <ZANABAZAR_SQUARE_VOWEL_SIGN_I> \xF0\x91\xA8\x81 <ZANABAZAR_SQUARE_VOWEL_SIGN_UE> \xF0\x91\xA8\x82 @@ -65821,6 +65944,8 @@ CHARMAP <SOYOMBO_LETTER_SA> \xF0\x91\xAA\x81 <SOYOMBO_LETTER_HA> \xF0\x91\xAA\x82 <SOYOMBO_LETTER_KSSA> \xF0\x91\xAA\x83 +<SOYOMBO_SIGN_JIHVAMULIYA> \xF0\x91\xAA\x84 +<SOYOMBO_SIGN_UPADHMANIYA> \xF0\x91\xAA\x85 <SOYOMBO_CLUSTER-INITIAL_LETTER_RA> \xF0\x91\xAA\x86 <SOYOMBO_CLUSTER-INITIAL_LETTER_LA> \xF0\x91\xAA\x87 <SOYOMBO_CLUSTER-INITIAL_LETTER_SHA> \xF0\x91\xAA\x88 @@ -66235,6 +66360,57 @@ CHARMAP <MAKASAR_VOWEL_SIGN_O> \xF0\x91\xBB\xB6 <MAKASAR_PASSIMBANG> \xF0\x91\xBB\xB7 <MAKASAR_END_OF_SECTION> \xF0\x91\xBB\xB8 +<TAMIL_FRACTION_ONE_THREE-HUNDRED-AND-TWENTIETH> \xF0\x91\xBF\x80 +<TAMIL_FRACTION_ONE_ONE-HUNDRED-AND-SIXTIETH> \xF0\x91\xBF\x81 +<TAMIL_FRACTION_ONE_EIGHTIETH> \xF0\x91\xBF\x82 +<TAMIL_FRACTION_ONE_SIXTY-FOURTH> \xF0\x91\xBF\x83 +<TAMIL_FRACTION_ONE_FORTIETH> \xF0\x91\xBF\x84 +<TAMIL_FRACTION_ONE_THIRTY-SECOND> \xF0\x91\xBF\x85 +<TAMIL_FRACTION_THREE_EIGHTIETHS> \xF0\x91\xBF\x86 +<TAMIL_FRACTION_THREE_SIXTY-FOURTHS> \xF0\x91\xBF\x87 +<TAMIL_FRACTION_ONE_TWENTIETH> \xF0\x91\xBF\x88 +<TAMIL_FRACTION_ONE_SIXTEENTH-1> \xF0\x91\xBF\x89 +<TAMIL_FRACTION_ONE_SIXTEENTH-2> \xF0\x91\xBF\x8A +<TAMIL_FRACTION_ONE_TENTH> \xF0\x91\xBF\x8B +<TAMIL_FRACTION_ONE_EIGHTH> \xF0\x91\xBF\x8C +<TAMIL_FRACTION_THREE_TWENTIETHS> \xF0\x91\xBF\x8D +<TAMIL_FRACTION_THREE_SIXTEENTHS> \xF0\x91\xBF\x8E +<TAMIL_FRACTION_ONE_FIFTH> \xF0\x91\xBF\x8F +<TAMIL_FRACTION_ONE_QUARTER> \xF0\x91\xBF\x90 +<TAMIL_FRACTION_ONE_HALF-1> \xF0\x91\xBF\x91 +<TAMIL_FRACTION_ONE_HALF-2> \xF0\x91\xBF\x92 +<TAMIL_FRACTION_THREE_QUARTERS> \xF0\x91\xBF\x93 +<TAMIL_FRACTION_DOWNSCALING_FACTOR_KIIZH> \xF0\x91\xBF\x94 +<TAMIL_SIGN_NEL> \xF0\x91\xBF\x95 +<TAMIL_SIGN_CEVITU> \xF0\x91\xBF\x96 +<TAMIL_SIGN_AAZHAAKKU> \xF0\x91\xBF\x97 +<TAMIL_SIGN_UZHAKKU> \xF0\x91\xBF\x98 +<TAMIL_SIGN_MUUVUZHAKKU> \xF0\x91\xBF\x99 +<TAMIL_SIGN_KURUNI> \xF0\x91\xBF\x9A +<TAMIL_SIGN_PATHAKKU> \xF0\x91\xBF\x9B +<TAMIL_SIGN_MUKKURUNI> \xF0\x91\xBF\x9C +<TAMIL_SIGN_KAACU> \xF0\x91\xBF\x9D +<TAMIL_SIGN_PANAM> \xF0\x91\xBF\x9E +<TAMIL_SIGN_PON> \xF0\x91\xBF\x9F +<TAMIL_SIGN_VARAAKAN> \xF0\x91\xBF\xA0 +<TAMIL_SIGN_PAARAM> \xF0\x91\xBF\xA1 +<TAMIL_SIGN_KUZHI> \xF0\x91\xBF\xA2 +<TAMIL_SIGN_VELI> \xF0\x91\xBF\xA3 +<TAMIL_WET_CULTIVATION_SIGN> \xF0\x91\xBF\xA4 +<TAMIL_DRY_CULTIVATION_SIGN> \xF0\x91\xBF\xA5 +<TAMIL_LAND_SIGN> \xF0\x91\xBF\xA6 +<TAMIL_SALT_PAN_SIGN> \xF0\x91\xBF\xA7 +<TAMIL_TRADITIONAL_CREDIT_SIGN> \xF0\x91\xBF\xA8 +<TAMIL_TRADITIONAL_NUMBER_SIGN> \xF0\x91\xBF\xA9 +<TAMIL_CURRENT_SIGN> \xF0\x91\xBF\xAA +<TAMIL_AND_ODD_SIGN> \xF0\x91\xBF\xAB +<TAMIL_SPENT_SIGN> \xF0\x91\xBF\xAC +<TAMIL_TOTAL_SIGN> \xF0\x91\xBF\xAD +<TAMIL_IN_POSSESSION_SIGN> \xF0\x91\xBF\xAE +<TAMIL_STARTING_FROM_SIGN> \xF0\x91\xBF\xAF +<TAMIL_SIGN_MUTHALIYA> \xF0\x91\xBF\xB0 +<TAMIL_SIGN_VAKAIYARAA> \xF0\x91\xBF\xB1 +<TAMIL_PUNCTUATION_END_OF_TEXT> \xF0\x91\xBF\xBF <CUNEIFORM_SIGN_A> \xF0\x92\x80\x80 <CUNEIFORM_SIGN_A_TIMES_A> \xF0\x92\x80\x81 <CUNEIFORM_SIGN_A_TIMES_BAD> \xF0\x92\x80\x82 @@ -68540,6 +68716,15 @@ CHARMAP <EGYPTIAN_HIEROGLYPH_AA030> \xF0\x93\x90\xAC <EGYPTIAN_HIEROGLYPH_AA031> \xF0\x93\x90\xAD <EGYPTIAN_HIEROGLYPH_AA032> \xF0\x93\x90\xAE +<EGYPTIAN_HIEROGLYPH_VERTICAL_JOINER> \xF0\x93\x90\xB0 +<EGYPTIAN_HIEROGLYPH_HORIZONTAL_JOINER> \xF0\x93\x90\xB1 +<EGYPTIAN_HIEROGLYPH_INSERT_AT_TOP_START> \xF0\x93\x90\xB2 +<EGYPTIAN_HIEROGLYPH_INSERT_AT_BOTTOM_START> \xF0\x93\x90\xB3 +<EGYPTIAN_HIEROGLYPH_INSERT_AT_TOP_END> \xF0\x93\x90\xB4 +<EGYPTIAN_HIEROGLYPH_INSERT_AT_BOTTOM_END> \xF0\x93\x90\xB5 +<EGYPTIAN_HIEROGLYPH_OVERLAY_MIDDLE> \xF0\x93\x90\xB6 +<EGYPTIAN_HIEROGLYPH_BEGIN_SEGMENT> \xF0\x93\x90\xB7 +<EGYPTIAN_HIEROGLYPH_END_SEGMENT> \xF0\x93\x90\xB8 <ANATOLIAN_HIEROGLYPH_A001> \xF0\x94\x90\x80 <ANATOLIAN_HIEROGLYPH_A002> \xF0\x94\x90\x81 <ANATOLIAN_HIEROGLYPH_A003> \xF0\x94\x90\x82 @@ -70058,6 +70243,13 @@ CHARMAP <MIAO_LETTER_WA> \xF0\x96\xBD\x82 <MIAO_LETTER_AH> \xF0\x96\xBD\x83 <MIAO_LETTER_HHA> \xF0\x96\xBD\x84 +<MIAO_LETTER_BRI> \xF0\x96\xBD\x85 +<MIAO_LETTER_SYI> \xF0\x96\xBD\x86 +<MIAO_LETTER_DZYI> \xF0\x96\xBD\x87 +<MIAO_LETTER_TE> \xF0\x96\xBD\x88 +<MIAO_LETTER_TSE> \xF0\x96\xBD\x89 +<MIAO_LETTER_RTE> \xF0\x96\xBD\x8A +<MIAO_SIGN_CONSONANT_MODIFIER_BAR> \xF0\x96\xBD\x8F <MIAO_LETTER_NASALIZATION> \xF0\x96\xBD\x90 <MIAO_SIGN_ASPIRATION> \xF0\x96\xBD\x91 <MIAO_SIGN_REFORMED_VOICING> \xF0\x96\xBD\x92 @@ -70105,6 +70297,15 @@ CHARMAP <MIAO_VOWEL_SIGN_OU> \xF0\x96\xBD\xBC <MIAO_VOWEL_SIGN_N> \xF0\x96\xBD\xBD <MIAO_VOWEL_SIGN_NG> \xF0\x96\xBD\xBE +<MIAO_VOWEL_SIGN_UOG> \xF0\x96\xBD\xBF +<MIAO_VOWEL_SIGN_YUI> \xF0\x96\xBE\x80 +<MIAO_VOWEL_SIGN_OG> \xF0\x96\xBE\x81 +<MIAO_VOWEL_SIGN_OER> \xF0\x96\xBE\x82 +<MIAO_VOWEL_SIGN_VW> \xF0\x96\xBE\x83 +<MIAO_VOWEL_SIGN_IG> \xF0\x96\xBE\x84 +<MIAO_VOWEL_SIGN_EA> \xF0\x96\xBE\x85 +<MIAO_VOWEL_SIGN_IONG> \xF0\x96\xBE\x86 +<MIAO_VOWEL_SIGN_UI> \xF0\x96\xBE\x87 <MIAO_TONE_RIGHT> \xF0\x96\xBE\x8F <MIAO_TONE_TOP_RIGHT> \xF0\x96\xBE\x90 <MIAO_TONE_ABOVE> \xF0\x96\xBE\x91 @@ -70124,6 +70325,8 @@ CHARMAP <MIAO_LETTER_REFORMED_TONE-8> \xF0\x96\xBE\x9F <TANGUT_ITERATION_MARK> \xF0\x96\xBF\xA0 <NUSHU_ITERATION_MARK> \xF0\x96\xBF\xA1 +<OLD_CHINESE_HOOK_MARK> \xF0\x96\xBF\xA2 +<OLD_CHINESE_ITERATION_MARK> \xF0\x96\xBF\xA3 <TANGUT_IDEOGRAPH-17000> \xF0\x97\x80\x80 <TANGUT_IDEOGRAPH-17001> \xF0\x97\x80\x81 <TANGUT_IDEOGRAPH-17002> \xF0\x97\x80\x82 @@ -76254,6 +76457,12 @@ CHARMAP <TANGUT_IDEOGRAPH-187EF> \xF0\x98\x9F\xAF <TANGUT_IDEOGRAPH-187F0> \xF0\x98\x9F\xB0 <TANGUT_IDEOGRAPH-187F1> \xF0\x98\x9F\xB1 +<TANGUT_IDEOGRAPH-187F2> \xF0\x98\x9F\xB2 +<TANGUT_IDEOGRAPH-187F3> \xF0\x98\x9F\xB3 +<TANGUT_IDEOGRAPH-187F4> \xF0\x98\x9F\xB4 +<TANGUT_IDEOGRAPH-187F5> \xF0\x98\x9F\xB5 +<TANGUT_IDEOGRAPH-187F6> \xF0\x98\x9F\xB6 +<TANGUT_IDEOGRAPH-187F7> \xF0\x98\x9F\xB7 <TANGUT_COMPONENT-001> \xF0\x98\xA0\x80 <TANGUT_COMPONENT-002> \xF0\x98\xA0\x81 <TANGUT_COMPONENT-003> \xF0\x98\xA0\x82 @@ -77296,6 +77505,13 @@ CHARMAP <HENTAIGANA_LETTER_WO-7> \xF0\x9B\x84\x9C <HENTAIGANA_LETTER_N-MU-MO-1> \xF0\x9B\x84\x9D <HENTAIGANA_LETTER_N-MU-MO-2> \xF0\x9B\x84\x9E +<HIRAGANA_LETTER_SMALL_WI> \xF0\x9B\x85\x90 +<HIRAGANA_LETTER_SMALL_WE> \xF0\x9B\x85\x91 +<HIRAGANA_LETTER_SMALL_WO> \xF0\x9B\x85\x92 +<KATAKANA_LETTER_SMALL_WI> \xF0\x9B\x85\xA4 +<KATAKANA_LETTER_SMALL_WE> \xF0\x9B\x85\xA5 +<KATAKANA_LETTER_SMALL_WO> \xF0\x9B\x85\xA6 +<KATAKANA_LETTER_SMALL_N> \xF0\x9B\x85\xA7 <NUSHU_CHARACTER-1B170> \xF0\x9B\x85\xB0 <NUSHU_CHARACTER-1B171> \xF0\x9B\x85\xB1 <NUSHU_CHARACTER-1B172> \xF0\x9B\x85\xB2 @@ -80224,6 +80440,136 @@ CHARMAP <COMBINING_GLAGOLITIC_LETTER_BIG_YUS> \xF0\x9E\x80\xA8 <COMBINING_GLAGOLITIC_LETTER_IOTATED_BIG_YUS> \xF0\x9E\x80\xA9 <COMBINING_GLAGOLITIC_LETTER_FITA> \xF0\x9E\x80\xAA +<NYIAKENG_PUACHUE_HMONG_LETTER_MA> \xF0\x9E\x84\x80 +<NYIAKENG_PUACHUE_HMONG_LETTER_TSA> \xF0\x9E\x84\x81 +<NYIAKENG_PUACHUE_HMONG_LETTER_NTA> \xF0\x9E\x84\x82 +<NYIAKENG_PUACHUE_HMONG_LETTER_TA> \xF0\x9E\x84\x83 +<NYIAKENG_PUACHUE_HMONG_LETTER_HA> \xF0\x9E\x84\x84 +<NYIAKENG_PUACHUE_HMONG_LETTER_NA> \xF0\x9E\x84\x85 +<NYIAKENG_PUACHUE_HMONG_LETTER_XA> \xF0\x9E\x84\x86 +<NYIAKENG_PUACHUE_HMONG_LETTER_NKA> \xF0\x9E\x84\x87 +<NYIAKENG_PUACHUE_HMONG_LETTER_CA> \xF0\x9E\x84\x88 +<NYIAKENG_PUACHUE_HMONG_LETTER_LA> \xF0\x9E\x84\x89 +<NYIAKENG_PUACHUE_HMONG_LETTER_SA> \xF0\x9E\x84\x8A +<NYIAKENG_PUACHUE_HMONG_LETTER_ZA> \xF0\x9E\x84\x8B +<NYIAKENG_PUACHUE_HMONG_LETTER_NCA> \xF0\x9E\x84\x8C +<NYIAKENG_PUACHUE_HMONG_LETTER_NTSA> \xF0\x9E\x84\x8D +<NYIAKENG_PUACHUE_HMONG_LETTER_KA> \xF0\x9E\x84\x8E +<NYIAKENG_PUACHUE_HMONG_LETTER_DA> \xF0\x9E\x84\x8F +<NYIAKENG_PUACHUE_HMONG_LETTER_NYA> \xF0\x9E\x84\x90 +<NYIAKENG_PUACHUE_HMONG_LETTER_NRA> \xF0\x9E\x84\x91 +<NYIAKENG_PUACHUE_HMONG_LETTER_VA> \xF0\x9E\x84\x92 +<NYIAKENG_PUACHUE_HMONG_LETTER_NTXA> \xF0\x9E\x84\x93 +<NYIAKENG_PUACHUE_HMONG_LETTER_TXA> \xF0\x9E\x84\x94 +<NYIAKENG_PUACHUE_HMONG_LETTER_FA> \xF0\x9E\x84\x95 +<NYIAKENG_PUACHUE_HMONG_LETTER_RA> \xF0\x9E\x84\x96 +<NYIAKENG_PUACHUE_HMONG_LETTER_QA> \xF0\x9E\x84\x97 +<NYIAKENG_PUACHUE_HMONG_LETTER_YA> \xF0\x9E\x84\x98 +<NYIAKENG_PUACHUE_HMONG_LETTER_NQA> \xF0\x9E\x84\x99 +<NYIAKENG_PUACHUE_HMONG_LETTER_PA> \xF0\x9E\x84\x9A +<NYIAKENG_PUACHUE_HMONG_LETTER_XYA> \xF0\x9E\x84\x9B +<NYIAKENG_PUACHUE_HMONG_LETTER_NPA> \xF0\x9E\x84\x9C +<NYIAKENG_PUACHUE_HMONG_LETTER_DLA> \xF0\x9E\x84\x9D +<NYIAKENG_PUACHUE_HMONG_LETTER_NPLA> \xF0\x9E\x84\x9E +<NYIAKENG_PUACHUE_HMONG_LETTER_HAH> \xF0\x9E\x84\x9F +<NYIAKENG_PUACHUE_HMONG_LETTER_MLA> \xF0\x9E\x84\xA0 +<NYIAKENG_PUACHUE_HMONG_LETTER_PLA> \xF0\x9E\x84\xA1 +<NYIAKENG_PUACHUE_HMONG_LETTER_GA> \xF0\x9E\x84\xA2 +<NYIAKENG_PUACHUE_HMONG_LETTER_RRA> \xF0\x9E\x84\xA3 +<NYIAKENG_PUACHUE_HMONG_LETTER_A> \xF0\x9E\x84\xA4 +<NYIAKENG_PUACHUE_HMONG_LETTER_AA> \xF0\x9E\x84\xA5 +<NYIAKENG_PUACHUE_HMONG_LETTER_I> \xF0\x9E\x84\xA6 +<NYIAKENG_PUACHUE_HMONG_LETTER_U> \xF0\x9E\x84\xA7 +<NYIAKENG_PUACHUE_HMONG_LETTER_O> \xF0\x9E\x84\xA8 +<NYIAKENG_PUACHUE_HMONG_LETTER_OO> \xF0\x9E\x84\xA9 +<NYIAKENG_PUACHUE_HMONG_LETTER_E> \xF0\x9E\x84\xAA +<NYIAKENG_PUACHUE_HMONG_LETTER_EE> \xF0\x9E\x84\xAB +<NYIAKENG_PUACHUE_HMONG_LETTER_W> \xF0\x9E\x84\xAC +<NYIAKENG_PUACHUE_HMONG_TONE-B> \xF0\x9E\x84\xB0 +<NYIAKENG_PUACHUE_HMONG_TONE-M> \xF0\x9E\x84\xB1 +<NYIAKENG_PUACHUE_HMONG_TONE-J> \xF0\x9E\x84\xB2 +<NYIAKENG_PUACHUE_HMONG_TONE-V> \xF0\x9E\x84\xB3 +<NYIAKENG_PUACHUE_HMONG_TONE-S> \xF0\x9E\x84\xB4 +<NYIAKENG_PUACHUE_HMONG_TONE-G> \xF0\x9E\x84\xB5 +<NYIAKENG_PUACHUE_HMONG_TONE-D> \xF0\x9E\x84\xB6 +<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_PERSON> \xF0\x9E\x84\xB7 +<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_THING> \xF0\x9E\x84\xB8 +<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_LOCATION> \xF0\x9E\x84\xB9 +<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_ANIMAL> \xF0\x9E\x84\xBA +<NYIAKENG_PUACHUE_HMONG_SIGN_FOR_INVERTEBRATE> \xF0\x9E\x84\xBB +<NYIAKENG_PUACHUE_HMONG_SIGN_XW_XW> \xF0\x9E\x84\xBC +<NYIAKENG_PUACHUE_HMONG_SYLLABLE_LENGTHENER> \xF0\x9E\x84\xBD +<NYIAKENG_PUACHUE_HMONG_DIGIT_ZERO> \xF0\x9E\x85\x80 +<NYIAKENG_PUACHUE_HMONG_DIGIT_ONE> \xF0\x9E\x85\x81 +<NYIAKENG_PUACHUE_HMONG_DIGIT_TWO> \xF0\x9E\x85\x82 +<NYIAKENG_PUACHUE_HMONG_DIGIT_THREE> \xF0\x9E\x85\x83 +<NYIAKENG_PUACHUE_HMONG_DIGIT_FOUR> \xF0\x9E\x85\x84 +<NYIAKENG_PUACHUE_HMONG_DIGIT_FIVE> \xF0\x9E\x85\x85 +<NYIAKENG_PUACHUE_HMONG_DIGIT_SIX> \xF0\x9E\x85\x86 +<NYIAKENG_PUACHUE_HMONG_DIGIT_SEVEN> \xF0\x9E\x85\x87 +<NYIAKENG_PUACHUE_HMONG_DIGIT_EIGHT> \xF0\x9E\x85\x88 +<NYIAKENG_PUACHUE_HMONG_DIGIT_NINE> \xF0\x9E\x85\x89 +<NYIAKENG_PUACHUE_HMONG_LOGOGRAM_NYAJ> \xF0\x9E\x85\x8E +<NYIAKENG_PUACHUE_HMONG_CIRCLED_CA> \xF0\x9E\x85\x8F +<WANCHO_LETTER_AA> \xF0\x9E\x8B\x80 +<WANCHO_LETTER_A> \xF0\x9E\x8B\x81 +<WANCHO_LETTER_BA> \xF0\x9E\x8B\x82 +<WANCHO_LETTER_CA> \xF0\x9E\x8B\x83 +<WANCHO_LETTER_DA> \xF0\x9E\x8B\x84 +<WANCHO_LETTER_GA> \xF0\x9E\x8B\x85 +<WANCHO_LETTER_YA> \xF0\x9E\x8B\x86 +<WANCHO_LETTER_PHA> \xF0\x9E\x8B\x87 +<WANCHO_LETTER_LA> \xF0\x9E\x8B\x88 +<WANCHO_LETTER_NA> \xF0\x9E\x8B\x89 +<WANCHO_LETTER_PA> \xF0\x9E\x8B\x8A +<WANCHO_LETTER_TA> \xF0\x9E\x8B\x8B +<WANCHO_LETTER_THA> \xF0\x9E\x8B\x8C +<WANCHO_LETTER_FA> \xF0\x9E\x8B\x8D +<WANCHO_LETTER_SA> \xF0\x9E\x8B\x8E +<WANCHO_LETTER_SHA> \xF0\x9E\x8B\x8F +<WANCHO_LETTER_JA> \xF0\x9E\x8B\x90 +<WANCHO_LETTER_ZA> \xF0\x9E\x8B\x91 +<WANCHO_LETTER_WA> \xF0\x9E\x8B\x92 +<WANCHO_LETTER_VA> \xF0\x9E\x8B\x93 +<WANCHO_LETTER_KA> \xF0\x9E\x8B\x94 +<WANCHO_LETTER_O> \xF0\x9E\x8B\x95 +<WANCHO_LETTER_AU> \xF0\x9E\x8B\x96 +<WANCHO_LETTER_RA> \xF0\x9E\x8B\x97 +<WANCHO_LETTER_MA> \xF0\x9E\x8B\x98 +<WANCHO_LETTER_KHA> \xF0\x9E\x8B\x99 +<WANCHO_LETTER_HA> \xF0\x9E\x8B\x9A +<WANCHO_LETTER_E> \xF0\x9E\x8B\x9B +<WANCHO_LETTER_I> \xF0\x9E\x8B\x9C +<WANCHO_LETTER_NGA> \xF0\x9E\x8B\x9D +<WANCHO_LETTER_U> \xF0\x9E\x8B\x9E +<WANCHO_LETTER_LLHA> \xF0\x9E\x8B\x9F +<WANCHO_LETTER_TSA> \xF0\x9E\x8B\xA0 +<WANCHO_LETTER_TRA> \xF0\x9E\x8B\xA1 +<WANCHO_LETTER_ONG> \xF0\x9E\x8B\xA2 +<WANCHO_LETTER_AANG> \xF0\x9E\x8B\xA3 +<WANCHO_LETTER_ANG> \xF0\x9E\x8B\xA4 +<WANCHO_LETTER_ING> \xF0\x9E\x8B\xA5 +<WANCHO_LETTER_ON> \xF0\x9E\x8B\xA6 +<WANCHO_LETTER_EN> \xF0\x9E\x8B\xA7 +<WANCHO_LETTER_AAN> \xF0\x9E\x8B\xA8 +<WANCHO_LETTER_NYA> \xF0\x9E\x8B\xA9 +<WANCHO_LETTER_UEN> \xF0\x9E\x8B\xAA +<WANCHO_LETTER_YIH> \xF0\x9E\x8B\xAB +<WANCHO_TONE_TUP> \xF0\x9E\x8B\xAC +<WANCHO_TONE_TUPNI> \xF0\x9E\x8B\xAD +<WANCHO_TONE_KOI> \xF0\x9E\x8B\xAE +<WANCHO_TONE_KOINI> \xF0\x9E\x8B\xAF +<WANCHO_DIGIT_ZERO> \xF0\x9E\x8B\xB0 +<WANCHO_DIGIT_ONE> \xF0\x9E\x8B\xB1 +<WANCHO_DIGIT_TWO> \xF0\x9E\x8B\xB2 +<WANCHO_DIGIT_THREE> \xF0\x9E\x8B\xB3 +<WANCHO_DIGIT_FOUR> \xF0\x9E\x8B\xB4 +<WANCHO_DIGIT_FIVE> \xF0\x9E\x8B\xB5 +<WANCHO_DIGIT_SIX> \xF0\x9E\x8B\xB6 +<WANCHO_DIGIT_SEVEN> \xF0\x9E\x8B\xB7 +<WANCHO_DIGIT_EIGHT> \xF0\x9E\x8B\xB8 +<WANCHO_DIGIT_NINE> \xF0\x9E\x8B\xB9 +<WANCHO_NGUN_SIGN> \xF0\x9E\x8B\xBF <MENDE_KIKAKUI_SYLLABLE_M001_KI> \xF0\x9E\xA0\x80 <MENDE_KIKAKUI_SYLLABLE_M002_KA> \xF0\x9E\xA0\x81 <MENDE_KIKAKUI_SYLLABLE_M003_KU> \xF0\x9E\xA0\x82 @@ -80512,6 +80858,7 @@ CHARMAP <ADLAM_CONSONANT_MODIFIER> \xF0\x9E\xA5\x88 <ADLAM_GEMINATE_CONSONANT_MODIFIER> \xF0\x9E\xA5\x89 <ADLAM_NUKTA> \xF0\x9E\xA5\x8A +<ADLAM_NASALIZATION_MARK> \xF0\x9E\xA5\x8B <ADLAM_DIGIT_ZERO> \xF0\x9E\xA5\x90 <ADLAM_DIGIT_ONE> \xF0\x9E\xA5\x91 <ADLAM_DIGIT_TWO> \xF0\x9E\xA5\x92 @@ -80592,6 +80939,67 @@ CHARMAP <INDIC_SIYAQ_NUMBER_ALTERNATE_TWO> \xF0\x9E\xB2\xB2 <INDIC_SIYAQ_NUMBER_ALTERNATE_TEN_THOUSAND> \xF0\x9E\xB2\xB3 <INDIC_SIYAQ_ALTERNATE_LAKH_MARK> \xF0\x9E\xB2\xB4 +<OTTOMAN_SIYAQ_NUMBER_ONE> \xF0\x9E\xB4\x81 +<OTTOMAN_SIYAQ_NUMBER_TWO> \xF0\x9E\xB4\x82 +<OTTOMAN_SIYAQ_NUMBER_THREE> \xF0\x9E\xB4\x83 +<OTTOMAN_SIYAQ_NUMBER_FOUR> \xF0\x9E\xB4\x84 +<OTTOMAN_SIYAQ_NUMBER_FIVE> \xF0\x9E\xB4\x85 +<OTTOMAN_SIYAQ_NUMBER_SIX> \xF0\x9E\xB4\x86 +<OTTOMAN_SIYAQ_NUMBER_SEVEN> \xF0\x9E\xB4\x87 +<OTTOMAN_SIYAQ_NUMBER_EIGHT> \xF0\x9E\xB4\x88 +<OTTOMAN_SIYAQ_NUMBER_NINE> \xF0\x9E\xB4\x89 +<OTTOMAN_SIYAQ_NUMBER_TEN> \xF0\x9E\xB4\x8A +<OTTOMAN_SIYAQ_NUMBER_TWENTY> \xF0\x9E\xB4\x8B +<OTTOMAN_SIYAQ_NUMBER_THIRTY> \xF0\x9E\xB4\x8C +<OTTOMAN_SIYAQ_NUMBER_FORTY> \xF0\x9E\xB4\x8D +<OTTOMAN_SIYAQ_NUMBER_FIFTY> \xF0\x9E\xB4\x8E +<OTTOMAN_SIYAQ_NUMBER_SIXTY> \xF0\x9E\xB4\x8F +<OTTOMAN_SIYAQ_NUMBER_SEVENTY> \xF0\x9E\xB4\x90 +<OTTOMAN_SIYAQ_NUMBER_EIGHTY> \xF0\x9E\xB4\x91 +<OTTOMAN_SIYAQ_NUMBER_NINETY> \xF0\x9E\xB4\x92 +<OTTOMAN_SIYAQ_NUMBER_ONE_HUNDRED> \xF0\x9E\xB4\x93 +<OTTOMAN_SIYAQ_NUMBER_TWO_HUNDRED> \xF0\x9E\xB4\x94 +<OTTOMAN_SIYAQ_NUMBER_THREE_HUNDRED> \xF0\x9E\xB4\x95 +<OTTOMAN_SIYAQ_NUMBER_FOUR_HUNDRED> \xF0\x9E\xB4\x96 +<OTTOMAN_SIYAQ_NUMBER_FIVE_HUNDRED> \xF0\x9E\xB4\x97 +<OTTOMAN_SIYAQ_NUMBER_SIX_HUNDRED> \xF0\x9E\xB4\x98 +<OTTOMAN_SIYAQ_NUMBER_SEVEN_HUNDRED> \xF0\x9E\xB4\x99 +<OTTOMAN_SIYAQ_NUMBER_EIGHT_HUNDRED> \xF0\x9E\xB4\x9A +<OTTOMAN_SIYAQ_NUMBER_NINE_HUNDRED> \xF0\x9E\xB4\x9B +<OTTOMAN_SIYAQ_NUMBER_ONE_THOUSAND> \xF0\x9E\xB4\x9C +<OTTOMAN_SIYAQ_NUMBER_TWO_THOUSAND> \xF0\x9E\xB4\x9D +<OTTOMAN_SIYAQ_NUMBER_THREE_THOUSAND> \xF0\x9E\xB4\x9E +<OTTOMAN_SIYAQ_NUMBER_FOUR_THOUSAND> \xF0\x9E\xB4\x9F +<OTTOMAN_SIYAQ_NUMBER_FIVE_THOUSAND> \xF0\x9E\xB4\xA0 +<OTTOMAN_SIYAQ_NUMBER_SIX_THOUSAND> \xF0\x9E\xB4\xA1 +<OTTOMAN_SIYAQ_NUMBER_SEVEN_THOUSAND> \xF0\x9E\xB4\xA2 +<OTTOMAN_SIYAQ_NUMBER_EIGHT_THOUSAND> \xF0\x9E\xB4\xA3 +<OTTOMAN_SIYAQ_NUMBER_NINE_THOUSAND> \xF0\x9E\xB4\xA4 +<OTTOMAN_SIYAQ_NUMBER_TEN_THOUSAND> \xF0\x9E\xB4\xA5 +<OTTOMAN_SIYAQ_NUMBER_TWENTY_THOUSAND> \xF0\x9E\xB4\xA6 +<OTTOMAN_SIYAQ_NUMBER_THIRTY_THOUSAND> \xF0\x9E\xB4\xA7 +<OTTOMAN_SIYAQ_NUMBER_FORTY_THOUSAND> \xF0\x9E\xB4\xA8 +<OTTOMAN_SIYAQ_NUMBER_FIFTY_THOUSAND> \xF0\x9E\xB4\xA9 +<OTTOMAN_SIYAQ_NUMBER_SIXTY_THOUSAND> \xF0\x9E\xB4\xAA +<OTTOMAN_SIYAQ_NUMBER_SEVENTY_THOUSAND> \xF0\x9E\xB4\xAB +<OTTOMAN_SIYAQ_NUMBER_EIGHTY_THOUSAND> \xF0\x9E\xB4\xAC +<OTTOMAN_SIYAQ_NUMBER_NINETY_THOUSAND> \xF0\x9E\xB4\xAD +<OTTOMAN_SIYAQ_MARRATAN> \xF0\x9E\xB4\xAE +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TWO> \xF0\x9E\xB4\xAF +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_THREE> \xF0\x9E\xB4\xB0 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_FOUR> \xF0\x9E\xB4\xB1 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_FIVE> \xF0\x9E\xB4\xB2 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_SIX> \xF0\x9E\xB4\xB3 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_SEVEN> \xF0\x9E\xB4\xB4 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_EIGHT> \xF0\x9E\xB4\xB5 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_NINE> \xF0\x9E\xB4\xB6 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TEN> \xF0\x9E\xB4\xB7 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_FOUR_HUNDRED> \xF0\x9E\xB4\xB8 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_SIX_HUNDRED> \xF0\x9E\xB4\xB9 +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TWO_THOUSAND> \xF0\x9E\xB4\xBA +<OTTOMAN_SIYAQ_ALTERNATE_NUMBER_TEN_THOUSAND> \xF0\x9E\xB4\xBB +<OTTOMAN_SIYAQ_FRACTION_ONE_HALF> \xF0\x9E\xB4\xBC +<OTTOMAN_SIYAQ_FRACTION_ONE_SIXTH> \xF0\x9E\xB4\xBD <ARABIC_MATHEMATICAL_ALEF> \xF0\x9E\xB8\x80 <ARABIC_MATHEMATICAL_BEH> \xF0\x9E\xB8\x81 <ARABIC_MATHEMATICAL_JEEM> \xF0\x9E\xB8\x82 @@ -81066,6 +81474,7 @@ CHARMAP <NEGATIVE_CIRCLED_LATIN_CAPITAL_LETTER_Z> \xF0\x9F\x85\xA9 <RAISED_MC_SIGN> \xF0\x9F\x85\xAA <RAISED_MD_SIGN> \xF0\x9F\x85\xAB +<RAISED_MR_SIGN> \xF0\x9F\x85\xAC <NEGATIVE_SQUARED_LATIN_CAPITAL_LETTER_A> \xF0\x9F\x85\xB0 <NEGATIVE_SQUARED_LATIN_CAPITAL_LETTER_B> \xF0\x9F\x85\xB1 <NEGATIVE_SQUARED_LATIN_CAPITAL_LETTER_C> \xF0\x9F\x85\xB2 @@ -82198,6 +82607,7 @@ CHARMAP <SHOPPING_TROLLEY> \xF0\x9F\x9B\x92 <STUPA> \xF0\x9F\x9B\x93 <PAGODA> \xF0\x9F\x9B\x94 +<HINDU_TEMPLE> \xF0\x9F\x9B\x95 <HAMMER_AND_WRENCH> \xF0\x9F\x9B\xA0 <SHIELD> \xF0\x9F\x9B\xA1 <OIL_DRUM> \xF0\x9F\x9B\xA2 @@ -82221,6 +82631,7 @@ CHARMAP <SLED> \xF0\x9F\x9B\xB7 <FLYING_SAUCER> \xF0\x9F\x9B\xB8 <SKATEBOARD> \xF0\x9F\x9B\xB9 +<AUTO_RICKSHAW> \xF0\x9F\x9B\xBA <ALCHEMICAL_SYMBOL_FOR_QUINTESSENCE> \xF0\x9F\x9C\x80 <ALCHEMICAL_SYMBOL_FOR_AIR> \xF0\x9F\x9C\x81 <ALCHEMICAL_SYMBOL_FOR_FIRE> \xF0\x9F\x9C\x82 @@ -82426,6 +82837,18 @@ CHARMAP <NEGATIVE_CIRCLED_TRIANGLE> \xF0\x9F\x9F\x96 <CIRCLED_SQUARE> \xF0\x9F\x9F\x97 <NEGATIVE_CIRCLED_SQUARE> \xF0\x9F\x9F\x98 +<LARGE_ORANGE_CIRCLE> \xF0\x9F\x9F\xA0 +<LARGE_YELLOW_CIRCLE> \xF0\x9F\x9F\xA1 +<LARGE_GREEN_CIRCLE> \xF0\x9F\x9F\xA2 +<LARGE_PURPLE_CIRCLE> \xF0\x9F\x9F\xA3 +<LARGE_BROWN_CIRCLE> \xF0\x9F\x9F\xA4 +<LARGE_RED_SQUARE> \xF0\x9F\x9F\xA5 +<LARGE_BLUE_SQUARE> \xF0\x9F\x9F\xA6 +<LARGE_ORANGE_SQUARE> \xF0\x9F\x9F\xA7 +<LARGE_YELLOW_SQUARE> \xF0\x9F\x9F\xA8 +<LARGE_GREEN_SQUARE> \xF0\x9F\x9F\xA9 +<LARGE_PURPLE_SQUARE> \xF0\x9F\x9F\xAA +<LARGE_BROWN_SQUARE> \xF0\x9F\x9F\xAB <LEFTWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD> \xF0\x9F\xA0\x80 <UPWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD> \xF0\x9F\xA0\x81 <RIGHTWARDS_ARROW_WITH_SMALL_TRIANGLE_ARROWHEAD> \xF0\x9F\xA0\x82 @@ -82586,6 +83009,9 @@ CHARMAP <DOWNWARD_FACING_NOTCHED_HOOK> \xF0\x9F\xA4\x89 <DOWNWARD_FACING_HOOK_WITH_DOT> \xF0\x9F\xA4\x8A <DOWNWARD_FACING_NOTCHED_HOOK_WITH_DOT> \xF0\x9F\xA4\x8B +<WHITE_HEART> \xF0\x9F\xA4\x8D +<BROWN_HEART> \xF0\x9F\xA4\x8E +<PINCHING_HAND> \xF0\x9F\xA4\x8F <ZIPPER-MOUTH_FACE> \xF0\x9F\xA4\x90 <MONEY-MOUTH_FACE> \xF0\x9F\xA4\x91 <FACE_WITH_THERMOMETER> \xF0\x9F\xA4\x92 @@ -82633,6 +83059,7 @@ CHARMAP <WRESTLERS> \xF0\x9F\xA4\xBC <WATER_POLO> \xF0\x9F\xA4\xBD <HANDBALL> \xF0\x9F\xA4\xBE +<DIVING_MASK> \xF0\x9F\xA4\xBF <WILTED_FLOWER> \xF0\x9F\xA5\x80 <DRUM_WITH_DRUMSTICKS> \xF0\x9F\xA5\x81 <CLINKING_GLASSES> \xF0\x9F\xA5\x82 @@ -82682,11 +83109,13 @@ CHARMAP <MOON_CAKE> \xF0\x9F\xA5\xAE <BAGEL> \xF0\x9F\xA5\xAF <SMILING_FACE_WITH_SMILING_EYES_AND_THREE_HEARTS> \xF0\x9F\xA5\xB0 +<YAWNING_FACE> \xF0\x9F\xA5\xB1 <FACE_WITH_PARTY_HORN_AND_PARTY_HAT> \xF0\x9F\xA5\xB3 <FACE_WITH_UNEVEN_EYES_AND_WAVY_MOUTH> \xF0\x9F\xA5\xB4 <OVERHEATED_FACE> \xF0\x9F\xA5\xB5 <FREEZING_FACE> \xF0\x9F\xA5\xB6 <FACE_WITH_PLEADING_EYES> \xF0\x9F\xA5\xBA +<SARI> \xF0\x9F\xA5\xBB <LAB_COAT> \xF0\x9F\xA5\xBC <GOGGLES> \xF0\x9F\xA5\xBD <HIKING_BOOT> \xF0\x9F\xA5\xBE @@ -82726,6 +83155,14 @@ CHARMAP <MICROBE> \xF0\x9F\xA6\xA0 <BADGER> \xF0\x9F\xA6\xA1 <SWAN> \xF0\x9F\xA6\xA2 +<SLOTH> \xF0\x9F\xA6\xA5 +<OTTER> \xF0\x9F\xA6\xA6 +<ORANGUTAN> \xF0\x9F\xA6\xA7 +<SKUNK> \xF0\x9F\xA6\xA8 +<FLAMINGO> \xF0\x9F\xA6\xA9 +<OYSTER> \xF0\x9F\xA6\xAA +<GUIDE_DOG> \xF0\x9F\xA6\xAE +<PROBING_CANE> \xF0\x9F\xA6\xAF <EMOJI_COMPONENT_RED_HAIR> \xF0\x9F\xA6\xB0 <EMOJI_COMPONENT_CURLY_HAIR> \xF0\x9F\xA6\xB1 <EMOJI_COMPONENT_BALD> \xF0\x9F\xA6\xB2 @@ -82736,9 +83173,26 @@ CHARMAP <TOOTH> \xF0\x9F\xA6\xB7 <SUPERHERO> \xF0\x9F\xA6\xB8 <SUPERVILLAIN> \xF0\x9F\xA6\xB9 +<SAFETY_VEST> \xF0\x9F\xA6\xBA +<EAR_WITH_HEARING_AID> \xF0\x9F\xA6\xBB +<MOTORIZED_WHEELCHAIR> \xF0\x9F\xA6\xBC +<MANUAL_WHEELCHAIR> \xF0\x9F\xA6\xBD +<MECHANICAL_ARM> \xF0\x9F\xA6\xBE +<MECHANICAL_LEG> \xF0\x9F\xA6\xBF <CHEESE_WEDGE> \xF0\x9F\xA7\x80 <CUPCAKE> \xF0\x9F\xA7\x81 <SALT_SHAKER> \xF0\x9F\xA7\x82 +<BEVERAGE_BOX> \xF0\x9F\xA7\x83 +<GARLIC> \xF0\x9F\xA7\x84 +<ONION> \xF0\x9F\xA7\x85 +<FALAFEL> \xF0\x9F\xA7\x86 +<WAFFLE> \xF0\x9F\xA7\x87 +<BUTTER> \xF0\x9F\xA7\x88 +<MATE_DRINK> \xF0\x9F\xA7\x89 +<ICE_CUBE> \xF0\x9F\xA7\x8A +<STANDING_PERSON> \xF0\x9F\xA7\x8D +<KNEELING_PERSON> \xF0\x9F\xA7\x8E +<DEAF_PERSON> \xF0\x9F\xA7\x8F <FACE_WITH_MONOCLE> \xF0\x9F\xA7\x90 <ADULT> \xF0\x9F\xA7\x91 <CHILD> \xF0\x9F\xA7\x92 @@ -82787,6 +83241,90 @@ CHARMAP <SPONGE> \xF0\x9F\xA7\xBD <RECEIPT> \xF0\x9F\xA7\xBE <NAZAR_AMULET> \xF0\x9F\xA7\xBF +<NEUTRAL_CHESS_KING> \xF0\x9F\xA8\x80 +<NEUTRAL_CHESS_QUEEN> \xF0\x9F\xA8\x81 +<NEUTRAL_CHESS_ROOK> \xF0\x9F\xA8\x82 +<NEUTRAL_CHESS_BISHOP> \xF0\x9F\xA8\x83 +<NEUTRAL_CHESS_KNIGHT> \xF0\x9F\xA8\x84 +<NEUTRAL_CHESS_PAWN> \xF0\x9F\xA8\x85 +<WHITE_CHESS_KNIGHT_ROTATED_FORTY-FIVE_DEGREES> \xF0\x9F\xA8\x86 +<BLACK_CHESS_KNIGHT_ROTATED_FORTY-FIVE_DEGREES> \xF0\x9F\xA8\x87 +<NEUTRAL_CHESS_KNIGHT_ROTATED_FORTY-FIVE_DEGREES> \xF0\x9F\xA8\x88 +<WHITE_CHESS_KING_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x89 +<WHITE_CHESS_QUEEN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8A +<WHITE_CHESS_ROOK_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8B +<WHITE_CHESS_BISHOP_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8C +<WHITE_CHESS_KNIGHT_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8D +<WHITE_CHESS_PAWN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8E +<BLACK_CHESS_KING_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x8F +<BLACK_CHESS_QUEEN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x90 +<BLACK_CHESS_ROOK_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x91 +<BLACK_CHESS_BISHOP_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x92 +<BLACK_CHESS_KNIGHT_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x93 +<BLACK_CHESS_PAWN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x94 +<NEUTRAL_CHESS_KING_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x95 +<NEUTRAL_CHESS_QUEEN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x96 +<NEUTRAL_CHESS_ROOK_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x97 +<NEUTRAL_CHESS_BISHOP_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x98 +<NEUTRAL_CHESS_KNIGHT_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x99 +<NEUTRAL_CHESS_PAWN_ROTATED_NINETY_DEGREES> \xF0\x9F\xA8\x9A +<WHITE_CHESS_KNIGHT_ROTATED_ONE_HUNDRED_THIRTY-FIVE_DEGREES> \xF0\x9F\xA8\x9B +<BLACK_CHESS_KNIGHT_ROTATED_ONE_HUNDRED_THIRTY-FIVE_DEGREES> \xF0\x9F\xA8\x9C +<NEUTRAL_CHESS_KNIGHT_ROTATED_ONE_HUNDRED_THIRTY-FIVE_DEGREES> \xF0\x9F\xA8\x9D +<WHITE_CHESS_TURNED_KING> \xF0\x9F\xA8\x9E +<WHITE_CHESS_TURNED_QUEEN> \xF0\x9F\xA8\x9F +<WHITE_CHESS_TURNED_ROOK> \xF0\x9F\xA8\xA0 +<WHITE_CHESS_TURNED_BISHOP> \xF0\x9F\xA8\xA1 +<WHITE_CHESS_TURNED_KNIGHT> \xF0\x9F\xA8\xA2 +<WHITE_CHESS_TURNED_PAWN> \xF0\x9F\xA8\xA3 +<BLACK_CHESS_TURNED_KING> \xF0\x9F\xA8\xA4 +<BLACK_CHESS_TURNED_QUEEN> \xF0\x9F\xA8\xA5 +<BLACK_CHESS_TURNED_ROOK> \xF0\x9F\xA8\xA6 +<BLACK_CHESS_TURNED_BISHOP> \xF0\x9F\xA8\xA7 +<BLACK_CHESS_TURNED_KNIGHT> \xF0\x9F\xA8\xA8 +<BLACK_CHESS_TURNED_PAWN> \xF0\x9F\xA8\xA9 +<NEUTRAL_CHESS_TURNED_KING> \xF0\x9F\xA8\xAA +<NEUTRAL_CHESS_TURNED_QUEEN> \xF0\x9F\xA8\xAB +<NEUTRAL_CHESS_TURNED_ROOK> \xF0\x9F\xA8\xAC +<NEUTRAL_CHESS_TURNED_BISHOP> \xF0\x9F\xA8\xAD +<NEUTRAL_CHESS_TURNED_KNIGHT> \xF0\x9F\xA8\xAE +<NEUTRAL_CHESS_TURNED_PAWN> \xF0\x9F\xA8\xAF +<WHITE_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_TWENTY-FIVE_DEGREES> \xF0\x9F\xA8\xB0 +<BLACK_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_TWENTY-FIVE_DEGREES> \xF0\x9F\xA8\xB1 +<NEUTRAL_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_TWENTY-FIVE_DEGREES> \xF0\x9F\xA8\xB2 +<WHITE_CHESS_KING_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB3 +<WHITE_CHESS_QUEEN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB4 +<WHITE_CHESS_ROOK_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB5 +<WHITE_CHESS_BISHOP_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB6 +<WHITE_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB7 +<WHITE_CHESS_PAWN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB8 +<BLACK_CHESS_KING_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xB9 +<BLACK_CHESS_QUEEN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBA +<BLACK_CHESS_ROOK_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBB +<BLACK_CHESS_BISHOP_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBC +<BLACK_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBD +<BLACK_CHESS_PAWN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBE +<NEUTRAL_CHESS_KING_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA8\xBF +<NEUTRAL_CHESS_QUEEN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x80 +<NEUTRAL_CHESS_ROOK_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x81 +<NEUTRAL_CHESS_BISHOP_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x82 +<NEUTRAL_CHESS_KNIGHT_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x83 +<NEUTRAL_CHESS_PAWN_ROTATED_TWO_HUNDRED_SEVENTY_DEGREES> \xF0\x9F\xA9\x84 +<WHITE_CHESS_KNIGHT_ROTATED_THREE_HUNDRED_FIFTEEN_DEGREES> \xF0\x9F\xA9\x85 +<BLACK_CHESS_KNIGHT_ROTATED_THREE_HUNDRED_FIFTEEN_DEGREES> \xF0\x9F\xA9\x86 +<NEUTRAL_CHESS_KNIGHT_ROTATED_THREE_HUNDRED_FIFTEEN_DEGREES> \xF0\x9F\xA9\x87 +<WHITE_CHESS_EQUIHOPPER> \xF0\x9F\xA9\x88 +<BLACK_CHESS_EQUIHOPPER> \xF0\x9F\xA9\x89 +<NEUTRAL_CHESS_EQUIHOPPER> \xF0\x9F\xA9\x8A +<WHITE_CHESS_EQUIHOPPER_ROTATED_NINETY_DEGREES> \xF0\x9F\xA9\x8B +<BLACK_CHESS_EQUIHOPPER_ROTATED_NINETY_DEGREES> \xF0\x9F\xA9\x8C +<NEUTRAL_CHESS_EQUIHOPPER_ROTATED_NINETY_DEGREES> \xF0\x9F\xA9\x8D +<WHITE_CHESS_KNIGHT-QUEEN> \xF0\x9F\xA9\x8E +<WHITE_CHESS_KNIGHT-ROOK> \xF0\x9F\xA9\x8F +<WHITE_CHESS_KNIGHT-BISHOP> \xF0\x9F\xA9\x90 +<BLACK_CHESS_KNIGHT-QUEEN> \xF0\x9F\xA9\x91 +<BLACK_CHESS_KNIGHT-ROOK> \xF0\x9F\xA9\x92 +<BLACK_CHESS_KNIGHT-BISHOP> \xF0\x9F\xA9\x93 <XIANGQI_RED_GENERAL> \xF0\x9F\xA9\xA0 <XIANGQI_RED_MANDARIN> \xF0\x9F\xA9\xA1 <XIANGQI_RED_ELEPHANT> \xF0\x9F\xA9\xA2 @@ -82801,6 +83339,22 @@ CHARMAP <XIANGQI_BLACK_CHARIOT> \xF0\x9F\xA9\xAB <XIANGQI_BLACK_CANNON> \xF0\x9F\xA9\xAC <XIANGQI_BLACK_SOLDIER> \xF0\x9F\xA9\xAD +<BALLET_SHOES> \xF0\x9F\xA9\xB0 +<ONE-PIECE_SWIMSUIT> \xF0\x9F\xA9\xB1 +<BRIEFS> \xF0\x9F\xA9\xB2 +<SHORTS> \xF0\x9F\xA9\xB3 +<DROP_OF_BLOOD> \xF0\x9F\xA9\xB8 +<ADHESIVE_BANDAGE> \xF0\x9F\xA9\xB9 +<STETHOSCOPE> \xF0\x9F\xA9\xBA +<YO-YO> \xF0\x9F\xAA\x80 +<KITE> \xF0\x9F\xAA\x81 +<PARACHUTE> \xF0\x9F\xAA\x82 +<RINGED_PLANET> \xF0\x9F\xAA\x90 +<CHAIR> \xF0\x9F\xAA\x91 +<RAZOR> \xF0\x9F\xAA\x92 +<AXE> \xF0\x9F\xAA\x93 +<DIYA_LAMP> \xF0\x9F\xAA\x94 +<BANJO> \xF0\x9F\xAA\x95 <CJK_UNIFIED_IDEOGRAPH-20000> \xF0\xA0\x80\x80 <CJK_UNIFIED_IDEOGRAPH-20001> \xF0\xA0\x80\x81 <CJK_UNIFIED_IDEOGRAPH-20002> \xF0\xA0\x80\x82 diff --git a/tools/tools/locale/patch/patch-UnicodeData.txt b/tools/tools/locale/patch/patch-UnicodeData.txt new file mode 100644 index 000000000000..fe65ebacd16a --- /dev/null +++ b/tools/tools/locale/patch/patch-UnicodeData.txt @@ -0,0 +1,29 @@ +--- UnicodeData.txt.orig 2020-06-29 14:05:49.483379000 +0900 ++++ UnicodeData.txt 2020-06-29 14:12:09.808622000 +0900 +@@ -12138,7 +12138,7 @@ + 33FE;IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE;So;0;L;<compat> 0033 0031 65E5;;;;N;;;;; + 33FF;SQUARE GAL;So;0;ON;<square> 0067 0061 006C;;;;N;;;;; + 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;; +-4DBF;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; ++4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;; + 4DC0;HEXAGRAM FOR THE CREATIVE HEAVEN;So;0;ON;;;;;N;;;;; + 4DC1;HEXAGRAM FOR THE RECEPTIVE EARTH;So;0;ON;;;;;N;;;;; + 4DC2;HEXAGRAM FOR DIFFICULTY AT THE BEGINNING;So;0;ON;;;;;N;;;;; +@@ -12204,7 +12204,7 @@ + 4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;; + 4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;; + 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; +-9FFC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; ++9FEF;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; + A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;; + A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;; + A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;; +@@ -32901,7 +32901,7 @@ + 1FBF8;SEGMENTED DIGIT EIGHT;Nd;0;EN;<font> 0038;8;8;8;N;;;;; + 1FBF9;SEGMENTED DIGIT NINE;Nd;0;EN;<font> 0039;9;9;9;N;;;;; + 20000;<CJK Ideograph Extension B, First>;Lo;0;L;;;;;N;;;;; +-2A6DD;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;; ++2A6D6;<CJK Ideograph Extension B, Last>;Lo;0;L;;;;;N;;;;; + 2A700;<CJK Ideograph Extension C, First>;Lo;0;L;;;;;N;;;;; + 2B734;<CJK Ideograph Extension C, Last>;Lo;0;L;;;;;N;;;;; + 2B740;<CJK Ideograph Extension D, First>;Lo;0;L;;;;;N;;;;; diff --git a/tools/tools/locale/tools/cldr2def.pl b/tools/tools/locale/tools/cldr2def.pl index 3a6f8ac79d18..5f756cc3895a 100755 --- a/tools/tools/locale/tools/cldr2def.pl +++ b/tools/tools/locale/tools/cldr2def.pl @@ -460,6 +460,11 @@ sub transform_ctypes { foreach my $enc (sort keys(%{$languages{$l}{$f}{data}{$c}})) { next if ($enc eq $DEFENCODING); $filename = "$UNIDIR/posix/$file.$DEFENCODING.src"; + if ($file eq 'ja_JP') { + # Override $filename for ja_JP because + # its CTYPE is not compatible with UTF-8. + $filename = "$UNIDIR/posix/$file.eucJP.src"; + } if (! -f $filename) { print STDERR "Cannot open $filename\n"; next; diff --git a/tools/tools/locale/tools/convert_map.pl b/tools/tools/locale/tools/convert_map.pl index 88222531d064..8b54ff33381b 100755 --- a/tools/tools/locale/tools/convert_map.pl +++ b/tools/tools/locale/tools/convert_map.pl @@ -87,7 +87,7 @@ sub load_utf8_cm { my $file = shift; - open(UTF8, "$file") || die "open"; + open(UTF8, "$file") || die "$!: open: $file"; while (<UTF8>) { next if (/^#/); @@ -158,7 +158,8 @@ $mf = shift(@ARGV); $codeset = shift(@ARGV); my $max_mb; -load_utf8_cm("etc/final-maps/map.UTF-8"); +my $etcdir = (exists $ENV{'ETCDIR'}) ? $ENV{'ETCDIR'} : "etc"; +load_utf8_cm("${etcdir}/final-maps/map.UTF-8"); load_map($mf); diff --git a/tools/tools/locale/tools/finalize b/tools/tools/locale/tools/finalize index f4dfd7d0892f..88dfcad0cb24 100755 --- a/tools/tools/locale/tools/finalize +++ b/tools/tools/locale/tools/finalize @@ -47,15 +47,21 @@ usage () $1 = "numericdef" -o $1 = "timedef" -o $1 = "ctypedef" ] || usage self=$(realpath $0) -base=$(dirname ${self}) -old=${base}/../${1}.draft -new=${base}/../${1} -TEMP=/tmp/${1}.locales -TEMP2=/tmp/${1}.hashes -TEMP3=/tmp/${1}.symlinks -TEMP4=/tmp/${1}.mapped -FULLMAP=/tmp/utf8-map -FULLEXTRACT=/tmp/extracted-names +base=${BASEDIR:-$(dirname ${self})} +: ${ETCDIR:=${base}/../etc} +: ${TOOLSDIR:=${base}} +: ${OUTBASEDIR:=${base}/../${1}} +: ${OLD_DIR:=${OUTBASEDIR}.draft} +: ${NEW_DIR:=${OUTBASEDIR}} +old=${OLD_DIR} +new=${NEW_DIR} +: ${TMPDIR:=/tmp} +TEMP=${TMPDIR}/${1}.locales +TEMP2=${TMPDIR}/${1}.hashes +TEMP3=${TMPDIR}/${1}.symlinks +TEMP4=${TMPDIR}/${1}.mapped +FULLMAP=${TMPDIR}/utf8-map +FULLEXTRACT=${TMPDIR}/extracted-names AWKCMD="/## PLACEHOLDER/ { \ while ( getline line < \"${TEMP}\" ) {print line} } \ /## SYMPAIRS/ { \ @@ -65,6 +71,7 @@ AWKCMD="/## PLACEHOLDER/ { \ !/## / { print \$0 }" # Rename the sources with 3 components name into the POSIX version of the name using @modifier +mkdir -p $old $new cd $old pwd for i in *_*_*.*.src; do @@ -142,13 +149,13 @@ then rm -f ${TEMP2} /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${UNIDIR}/posix/UTF-8.cm \ - > ${base}/../etc/final-maps/map.UTF-8 + > ${ETCDIR}/final-maps/map.UTF-8 /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${UNIDIR}/posix/eucCN.cm \ - > ${base}/../etc/final-maps/map.eucCN + > ${ETCDIR}/final-maps/map.eucCN /usr/bin/sed -E -e 's/[ ]+/ /g' \ ${UNIDIR}/posix/eucCN.cm \ - > ${base}/../etc/final-maps/map.GB2312 + > ${ETCDIR}/final-maps/map.GB2312 # GB18030 and Big5 are pre-generated from CLDR data CHARMAPS="ARMSCII-8 CP1131 CP1251 \ @@ -160,10 +167,11 @@ then for map in ${CHARMAPS} do encoding=${map} - /usr/local/bin/perl ${base}/convert_map.pl \ - ${base}/../etc/charmaps/${map}.TXT ${encoding} \ + env ETCDIR="${ETCDIR}" \ + /usr/local/bin/perl ${TOOLSDIR}/convert_map.pl \ + ${ETCDIR}/charmaps/${map}.TXT ${encoding} \ | /usr/bin/sed -E -e 's/ +/ /g' \ - > ${base}/../etc/final-maps/map.${map} + > ${ETCDIR}/final-maps/map.${map} echo map ${map} converted. done diff --git a/tools/tools/locale/tools/utf8-rollup.pl b/tools/tools/locale/tools/utf8-rollup.pl index da93d2f4398a..b275828d52c9 100755 --- a/tools/tools/locale/tools/utf8-rollup.pl +++ b/tools/tools/locale/tools/utf8-rollup.pl @@ -30,6 +30,7 @@ use strict; use Getopt::Long; +use Encode qw(encode decode); if ($#ARGV != 0) { print "Usage: $0 --unidir=<unidir>\n"; @@ -52,6 +53,23 @@ generate_footer (); ############################ +sub utf8to32 { + my @kl = split /\\x/, $_[0]; + + shift @kl if ($kl[0] eq ''); + my $k = pack('H2' x scalar @kl, @kl); + my $ux = encode('UTF-32BE', decode('UTF-8', $k)); + my $u = uc(unpack('H*', $ux)); + # Remove BOM + $u =~ s/^0000FEFF//; + # Remove heading bytes of 0 + while ($u =~ m/^0/ and length($u) > 4) { + $u =~ s/^0//; + } + + return $u; +} + sub get_utf8map { my $file = shift; @@ -75,9 +93,10 @@ sub get_utf8map { last if ($l eq "END CHARMAP"); $l =~ /^(<[^\s]+>)\s+(.*)/; - my $k = $2; + my $k = utf8to32($2); # UTF-8 char code my $v = $1; - $k =~ s/\\x//g; # UTF-8 char code + +# print STDERR "register: $k - $v\n"; $utf8map{$k} = $v; } } @@ -143,7 +162,7 @@ sub parse_unidata { foreach my $l (@lines) { my @d = split(/;/, $l, -1); - my $mb = wctomb($d[0]); + my $mb = $d[0]; my $cat; # XXX There are code points present in UnicodeData.txt @@ -180,9 +199,9 @@ sub parse_unidata { # Check if there's upper/lower mapping if ($d[12] ne "") { - $data{'toupper'}{$mb} = wctomb($d[12]); + $data{'toupper'}{$mb} = $d[12]; } elsif ($d[13] ne "") { - $data{'tolower'}{$mb} = wctomb($d[13]); + $data{'tolower'}{$mb} = $d[13]; } } @@ -193,7 +212,7 @@ sub parse_unidata { foreach my $cat (sort keys (%data)) { print FOUT "$cat\t"; $first = 1; - foreach my $mb (sort keys (%{$data{$cat}})) { + foreach my $mb (sort {hex($a) <=> hex($b)} keys (%{$data{$cat}})) { if ($first == 1) { $first = 0; } elsif ($inrange == 1) { |