diff options
Diffstat (limited to 'lib/Support/UnicodeCaseFold.cpp')
| -rw-r--r-- | lib/Support/UnicodeCaseFold.cpp | 742 | 
1 files changed, 742 insertions, 0 deletions
| diff --git a/lib/Support/UnicodeCaseFold.cpp b/lib/Support/UnicodeCaseFold.cpp new file mode 100644 index 000000000000..b18d49dbafb0 --- /dev/null +++ b/lib/Support/UnicodeCaseFold.cpp @@ -0,0 +1,742 @@ +//===---------- Support/UnicodeCaseFold.cpp -------------------------------===// +// +// This file was generated by utils/unicode-case-fold.py from the Unicode +// case folding database at +//    http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt +// +// To regenerate this file, run: +//   utils/unicode-case-fold.py \ +//     "http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt" \ +//     > lib/Support/UnicodeCaseFold.cpp +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Unicode.h" + +int llvm::sys::unicode::foldCharSimple(int C) { +  if (C < 0x0041) +    return C; +  // 26 characters +  if (C <= 0x005a) +    return C + 32; +  // MICRO SIGN +  if (C == 0x00b5) +    return 0x03bc; +  if (C < 0x00c0) +    return C; +  // 23 characters +  if (C <= 0x00d6) +    return C + 32; +  if (C < 0x00d8) +    return C; +  // 7 characters +  if (C <= 0x00de) +    return C + 32; +  if (C < 0x0100) +    return C; +  // 24 characters +  if (C <= 0x012e) +    return C | 1; +  if (C < 0x0132) +    return C; +  // 3 characters +  if (C <= 0x0136) +    return C | 1; +  if (C < 0x0139) +    return C; +  // 8 characters +  if (C <= 0x0147 && C % 2 == 1) +    return C + 1; +  if (C < 0x014a) +    return C; +  // 23 characters +  if (C <= 0x0176) +    return C | 1; +  // LATIN CAPITAL LETTER Y WITH DIAERESIS +  if (C == 0x0178) +    return 0x00ff; +  if (C < 0x0179) +    return C; +  // 3 characters +  if (C <= 0x017d && C % 2 == 1) +    return C + 1; +  // LATIN SMALL LETTER LONG S +  if (C == 0x017f) +    return 0x0073; +  // LATIN CAPITAL LETTER B WITH HOOK +  if (C == 0x0181) +    return 0x0253; +  if (C < 0x0182) +    return C; +  // 2 characters +  if (C <= 0x0184) +    return C | 1; +  // LATIN CAPITAL LETTER OPEN O +  if (C == 0x0186) +    return 0x0254; +  // LATIN CAPITAL LETTER C WITH HOOK +  if (C == 0x0187) +    return 0x0188; +  if (C < 0x0189) +    return C; +  // 2 characters +  if (C <= 0x018a) +    return C + 205; +  // LATIN CAPITAL LETTER D WITH TOPBAR +  if (C == 0x018b) +    return 0x018c; +  // LATIN CAPITAL LETTER REVERSED E +  if (C == 0x018e) +    return 0x01dd; +  // LATIN CAPITAL LETTER SCHWA +  if (C == 0x018f) +    return 0x0259; +  // LATIN CAPITAL LETTER OPEN E +  if (C == 0x0190) +    return 0x025b; +  // LATIN CAPITAL LETTER F WITH HOOK +  if (C == 0x0191) +    return 0x0192; +  // LATIN CAPITAL LETTER G WITH HOOK +  if (C == 0x0193) +    return 0x0260; +  // LATIN CAPITAL LETTER GAMMA +  if (C == 0x0194) +    return 0x0263; +  // LATIN CAPITAL LETTER IOTA +  if (C == 0x0196) +    return 0x0269; +  // LATIN CAPITAL LETTER I WITH STROKE +  if (C == 0x0197) +    return 0x0268; +  // LATIN CAPITAL LETTER K WITH HOOK +  if (C == 0x0198) +    return 0x0199; +  // LATIN CAPITAL LETTER TURNED M +  if (C == 0x019c) +    return 0x026f; +  // LATIN CAPITAL LETTER N WITH LEFT HOOK +  if (C == 0x019d) +    return 0x0272; +  // LATIN CAPITAL LETTER O WITH MIDDLE TILDE +  if (C == 0x019f) +    return 0x0275; +  if (C < 0x01a0) +    return C; +  // 3 characters +  if (C <= 0x01a4) +    return C | 1; +  // LATIN LETTER YR +  if (C == 0x01a6) +    return 0x0280; +  // LATIN CAPITAL LETTER TONE TWO +  if (C == 0x01a7) +    return 0x01a8; +  // LATIN CAPITAL LETTER ESH +  if (C == 0x01a9) +    return 0x0283; +  // LATIN CAPITAL LETTER T WITH HOOK +  if (C == 0x01ac) +    return 0x01ad; +  // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +  if (C == 0x01ae) +    return 0x0288; +  // LATIN CAPITAL LETTER U WITH HORN +  if (C == 0x01af) +    return 0x01b0; +  if (C < 0x01b1) +    return C; +  // 2 characters +  if (C <= 0x01b2) +    return C + 217; +  if (C < 0x01b3) +    return C; +  // 2 characters +  if (C <= 0x01b5 && C % 2 == 1) +    return C + 1; +  // LATIN CAPITAL LETTER EZH +  if (C == 0x01b7) +    return 0x0292; +  if (C < 0x01b8) +    return C; +  // 2 characters +  if (C <= 0x01bc && C % 4 == 0) +    return C + 1; +  // LATIN CAPITAL LETTER DZ WITH CARON +  if (C == 0x01c4) +    return 0x01c6; +  // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +  if (C == 0x01c5) +    return 0x01c6; +  // LATIN CAPITAL LETTER LJ +  if (C == 0x01c7) +    return 0x01c9; +  // LATIN CAPITAL LETTER L WITH SMALL LETTER J +  if (C == 0x01c8) +    return 0x01c9; +  // LATIN CAPITAL LETTER NJ +  if (C == 0x01ca) +    return 0x01cc; +  if (C < 0x01cb) +    return C; +  // 9 characters +  if (C <= 0x01db && C % 2 == 1) +    return C + 1; +  if (C < 0x01de) +    return C; +  // 9 characters +  if (C <= 0x01ee) +    return C | 1; +  // LATIN CAPITAL LETTER DZ +  if (C == 0x01f1) +    return 0x01f3; +  if (C < 0x01f2) +    return C; +  // 2 characters +  if (C <= 0x01f4) +    return C | 1; +  // LATIN CAPITAL LETTER HWAIR +  if (C == 0x01f6) +    return 0x0195; +  // LATIN CAPITAL LETTER WYNN +  if (C == 0x01f7) +    return 0x01bf; +  if (C < 0x01f8) +    return C; +  // 20 characters +  if (C <= 0x021e) +    return C | 1; +  // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +  if (C == 0x0220) +    return 0x019e; +  if (C < 0x0222) +    return C; +  // 9 characters +  if (C <= 0x0232) +    return C | 1; +  // LATIN CAPITAL LETTER A WITH STROKE +  if (C == 0x023a) +    return 0x2c65; +  // LATIN CAPITAL LETTER C WITH STROKE +  if (C == 0x023b) +    return 0x023c; +  // LATIN CAPITAL LETTER L WITH BAR +  if (C == 0x023d) +    return 0x019a; +  // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +  if (C == 0x023e) +    return 0x2c66; +  // LATIN CAPITAL LETTER GLOTTAL STOP +  if (C == 0x0241) +    return 0x0242; +  // LATIN CAPITAL LETTER B WITH STROKE +  if (C == 0x0243) +    return 0x0180; +  // LATIN CAPITAL LETTER U BAR +  if (C == 0x0244) +    return 0x0289; +  // LATIN CAPITAL LETTER TURNED V +  if (C == 0x0245) +    return 0x028c; +  if (C < 0x0246) +    return C; +  // 5 characters +  if (C <= 0x024e) +    return C | 1; +  // COMBINING GREEK YPOGEGRAMMENI +  if (C == 0x0345) +    return 0x03b9; +  if (C < 0x0370) +    return C; +  // 2 characters +  if (C <= 0x0372) +    return C | 1; +  // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +  if (C == 0x0376) +    return 0x0377; +  // GREEK CAPITAL LETTER YOT +  if (C == 0x037f) +    return 0x03f3; +  // GREEK CAPITAL LETTER ALPHA WITH TONOS +  if (C == 0x0386) +    return 0x03ac; +  if (C < 0x0388) +    return C; +  // 3 characters +  if (C <= 0x038a) +    return C + 37; +  // GREEK CAPITAL LETTER OMICRON WITH TONOS +  if (C == 0x038c) +    return 0x03cc; +  if (C < 0x038e) +    return C; +  // 2 characters +  if (C <= 0x038f) +    return C + 63; +  if (C < 0x0391) +    return C; +  // 17 characters +  if (C <= 0x03a1) +    return C + 32; +  if (C < 0x03a3) +    return C; +  // 9 characters +  if (C <= 0x03ab) +    return C + 32; +  // GREEK SMALL LETTER FINAL SIGMA +  if (C == 0x03c2) +    return 0x03c3; +  // GREEK CAPITAL KAI SYMBOL +  if (C == 0x03cf) +    return 0x03d7; +  // GREEK BETA SYMBOL +  if (C == 0x03d0) +    return 0x03b2; +  // GREEK THETA SYMBOL +  if (C == 0x03d1) +    return 0x03b8; +  // GREEK PHI SYMBOL +  if (C == 0x03d5) +    return 0x03c6; +  // GREEK PI SYMBOL +  if (C == 0x03d6) +    return 0x03c0; +  if (C < 0x03d8) +    return C; +  // 12 characters +  if (C <= 0x03ee) +    return C | 1; +  // GREEK KAPPA SYMBOL +  if (C == 0x03f0) +    return 0x03ba; +  // GREEK RHO SYMBOL +  if (C == 0x03f1) +    return 0x03c1; +  // GREEK CAPITAL THETA SYMBOL +  if (C == 0x03f4) +    return 0x03b8; +  // GREEK LUNATE EPSILON SYMBOL +  if (C == 0x03f5) +    return 0x03b5; +  // GREEK CAPITAL LETTER SHO +  if (C == 0x03f7) +    return 0x03f8; +  // GREEK CAPITAL LUNATE SIGMA SYMBOL +  if (C == 0x03f9) +    return 0x03f2; +  // GREEK CAPITAL LETTER SAN +  if (C == 0x03fa) +    return 0x03fb; +  if (C < 0x03fd) +    return C; +  // 3 characters +  if (C <= 0x03ff) +    return C + -130; +  if (C < 0x0400) +    return C; +  // 16 characters +  if (C <= 0x040f) +    return C + 80; +  if (C < 0x0410) +    return C; +  // 32 characters +  if (C <= 0x042f) +    return C + 32; +  if (C < 0x0460) +    return C; +  // 17 characters +  if (C <= 0x0480) +    return C | 1; +  if (C < 0x048a) +    return C; +  // 27 characters +  if (C <= 0x04be) +    return C | 1; +  // CYRILLIC LETTER PALOCHKA +  if (C == 0x04c0) +    return 0x04cf; +  if (C < 0x04c1) +    return C; +  // 7 characters +  if (C <= 0x04cd && C % 2 == 1) +    return C + 1; +  if (C < 0x04d0) +    return C; +  // 48 characters +  if (C <= 0x052e) +    return C | 1; +  if (C < 0x0531) +    return C; +  // 38 characters +  if (C <= 0x0556) +    return C + 48; +  if (C < 0x10a0) +    return C; +  // 38 characters +  if (C <= 0x10c5) +    return C + 7264; +  if (C < 0x10c7) +    return C; +  // 2 characters +  if (C <= 0x10cd && C % 6 == 5) +    return C + 7264; +  if (C < 0x13f8) +    return C; +  // 6 characters +  if (C <= 0x13fd) +    return C + -8; +  // CYRILLIC SMALL LETTER ROUNDED VE +  if (C == 0x1c80) +    return 0x0432; +  // CYRILLIC SMALL LETTER LONG-LEGGED DE +  if (C == 0x1c81) +    return 0x0434; +  // CYRILLIC SMALL LETTER NARROW O +  if (C == 0x1c82) +    return 0x043e; +  if (C < 0x1c83) +    return C; +  // 2 characters +  if (C <= 0x1c84) +    return C + -6210; +  // CYRILLIC SMALL LETTER THREE-LEGGED TE +  if (C == 0x1c85) +    return 0x0442; +  // CYRILLIC SMALL LETTER TALL HARD SIGN +  if (C == 0x1c86) +    return 0x044a; +  // CYRILLIC SMALL LETTER TALL YAT +  if (C == 0x1c87) +    return 0x0463; +  // CYRILLIC SMALL LETTER UNBLENDED UK +  if (C == 0x1c88) +    return 0xa64b; +  if (C < 0x1e00) +    return C; +  // 75 characters +  if (C <= 0x1e94) +    return C | 1; +  // LATIN SMALL LETTER LONG S WITH DOT ABOVE +  if (C == 0x1e9b) +    return 0x1e61; +  // LATIN CAPITAL LETTER SHARP S +  if (C == 0x1e9e) +    return 0x00df; +  if (C < 0x1ea0) +    return C; +  // 48 characters +  if (C <= 0x1efe) +    return C | 1; +  if (C < 0x1f08) +    return C; +  // 8 characters +  if (C <= 0x1f0f) +    return C + -8; +  if (C < 0x1f18) +    return C; +  // 6 characters +  if (C <= 0x1f1d) +    return C + -8; +  if (C < 0x1f28) +    return C; +  // 8 characters +  if (C <= 0x1f2f) +    return C + -8; +  if (C < 0x1f38) +    return C; +  // 8 characters +  if (C <= 0x1f3f) +    return C + -8; +  if (C < 0x1f48) +    return C; +  // 6 characters +  if (C <= 0x1f4d) +    return C + -8; +  if (C < 0x1f59) +    return C; +  // 4 characters +  if (C <= 0x1f5f && C % 2 == 1) +    return C + -8; +  if (C < 0x1f68) +    return C; +  // 8 characters +  if (C <= 0x1f6f) +    return C + -8; +  if (C < 0x1f88) +    return C; +  // 8 characters +  if (C <= 0x1f8f) +    return C + -8; +  if (C < 0x1f98) +    return C; +  // 8 characters +  if (C <= 0x1f9f) +    return C + -8; +  if (C < 0x1fa8) +    return C; +  // 8 characters +  if (C <= 0x1faf) +    return C + -8; +  if (C < 0x1fb8) +    return C; +  // 2 characters +  if (C <= 0x1fb9) +    return C + -8; +  if (C < 0x1fba) +    return C; +  // 2 characters +  if (C <= 0x1fbb) +    return C + -74; +  // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +  if (C == 0x1fbc) +    return 0x1fb3; +  // GREEK PROSGEGRAMMENI +  if (C == 0x1fbe) +    return 0x03b9; +  if (C < 0x1fc8) +    return C; +  // 4 characters +  if (C <= 0x1fcb) +    return C + -86; +  // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +  if (C == 0x1fcc) +    return 0x1fc3; +  if (C < 0x1fd8) +    return C; +  // 2 characters +  if (C <= 0x1fd9) +    return C + -8; +  if (C < 0x1fda) +    return C; +  // 2 characters +  if (C <= 0x1fdb) +    return C + -100; +  if (C < 0x1fe8) +    return C; +  // 2 characters +  if (C <= 0x1fe9) +    return C + -8; +  if (C < 0x1fea) +    return C; +  // 2 characters +  if (C <= 0x1feb) +    return C + -112; +  // GREEK CAPITAL LETTER RHO WITH DASIA +  if (C == 0x1fec) +    return 0x1fe5; +  if (C < 0x1ff8) +    return C; +  // 2 characters +  if (C <= 0x1ff9) +    return C + -128; +  if (C < 0x1ffa) +    return C; +  // 2 characters +  if (C <= 0x1ffb) +    return C + -126; +  // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +  if (C == 0x1ffc) +    return 0x1ff3; +  // OHM SIGN +  if (C == 0x2126) +    return 0x03c9; +  // KELVIN SIGN +  if (C == 0x212a) +    return 0x006b; +  // ANGSTROM SIGN +  if (C == 0x212b) +    return 0x00e5; +  // TURNED CAPITAL F +  if (C == 0x2132) +    return 0x214e; +  if (C < 0x2160) +    return C; +  // 16 characters +  if (C <= 0x216f) +    return C + 16; +  // ROMAN NUMERAL REVERSED ONE HUNDRED +  if (C == 0x2183) +    return 0x2184; +  if (C < 0x24b6) +    return C; +  // 26 characters +  if (C <= 0x24cf) +    return C + 26; +  if (C < 0x2c00) +    return C; +  // 47 characters +  if (C <= 0x2c2e) +    return C + 48; +  // LATIN CAPITAL LETTER L WITH DOUBLE BAR +  if (C == 0x2c60) +    return 0x2c61; +  // LATIN CAPITAL LETTER L WITH MIDDLE TILDE +  if (C == 0x2c62) +    return 0x026b; +  // LATIN CAPITAL LETTER P WITH STROKE +  if (C == 0x2c63) +    return 0x1d7d; +  // LATIN CAPITAL LETTER R WITH TAIL +  if (C == 0x2c64) +    return 0x027d; +  if (C < 0x2c67) +    return C; +  // 3 characters +  if (C <= 0x2c6b && C % 2 == 1) +    return C + 1; +  // LATIN CAPITAL LETTER ALPHA +  if (C == 0x2c6d) +    return 0x0251; +  // LATIN CAPITAL LETTER M WITH HOOK +  if (C == 0x2c6e) +    return 0x0271; +  // LATIN CAPITAL LETTER TURNED A +  if (C == 0x2c6f) +    return 0x0250; +  // LATIN CAPITAL LETTER TURNED ALPHA +  if (C == 0x2c70) +    return 0x0252; +  if (C < 0x2c72) +    return C; +  // 2 characters +  if (C <= 0x2c75 && C % 3 == 2) +    return C + 1; +  if (C < 0x2c7e) +    return C; +  // 2 characters +  if (C <= 0x2c7f) +    return C + -10815; +  if (C < 0x2c80) +    return C; +  // 50 characters +  if (C <= 0x2ce2) +    return C | 1; +  if (C < 0x2ceb) +    return C; +  // 2 characters +  if (C <= 0x2ced && C % 2 == 1) +    return C + 1; +  if (C < 0x2cf2) +    return C; +  // 2 characters +  if (C <= 0xa640 && C % 31054 == 11506) +    return C + 1; +  if (C < 0xa642) +    return C; +  // 22 characters +  if (C <= 0xa66c) +    return C | 1; +  if (C < 0xa680) +    return C; +  // 14 characters +  if (C <= 0xa69a) +    return C | 1; +  if (C < 0xa722) +    return C; +  // 7 characters +  if (C <= 0xa72e) +    return C | 1; +  if (C < 0xa732) +    return C; +  // 31 characters +  if (C <= 0xa76e) +    return C | 1; +  if (C < 0xa779) +    return C; +  // 2 characters +  if (C <= 0xa77b && C % 2 == 1) +    return C + 1; +  // LATIN CAPITAL LETTER INSULAR G +  if (C == 0xa77d) +    return 0x1d79; +  if (C < 0xa77e) +    return C; +  // 5 characters +  if (C <= 0xa786) +    return C | 1; +  // LATIN CAPITAL LETTER SALTILLO +  if (C == 0xa78b) +    return 0xa78c; +  // LATIN CAPITAL LETTER TURNED H +  if (C == 0xa78d) +    return 0x0265; +  if (C < 0xa790) +    return C; +  // 2 characters +  if (C <= 0xa792) +    return C | 1; +  if (C < 0xa796) +    return C; +  // 10 characters +  if (C <= 0xa7a8) +    return C | 1; +  // LATIN CAPITAL LETTER H WITH HOOK +  if (C == 0xa7aa) +    return 0x0266; +  // LATIN CAPITAL LETTER REVERSED OPEN E +  if (C == 0xa7ab) +    return 0x025c; +  // LATIN CAPITAL LETTER SCRIPT G +  if (C == 0xa7ac) +    return 0x0261; +  // LATIN CAPITAL LETTER L WITH BELT +  if (C == 0xa7ad) +    return 0x026c; +  // LATIN CAPITAL LETTER SMALL CAPITAL I +  if (C == 0xa7ae) +    return 0x026a; +  // LATIN CAPITAL LETTER TURNED K +  if (C == 0xa7b0) +    return 0x029e; +  // LATIN CAPITAL LETTER TURNED T +  if (C == 0xa7b1) +    return 0x0287; +  // LATIN CAPITAL LETTER J WITH CROSSED-TAIL +  if (C == 0xa7b2) +    return 0x029d; +  // LATIN CAPITAL LETTER CHI +  if (C == 0xa7b3) +    return 0xab53; +  if (C < 0xa7b4) +    return C; +  // 2 characters +  if (C <= 0xa7b6) +    return C | 1; +  if (C < 0xab70) +    return C; +  // 80 characters +  if (C <= 0xabbf) +    return C + -38864; +  if (C < 0xff21) +    return C; +  // 26 characters +  if (C <= 0xff3a) +    return C + 32; +  if (C < 0x10400) +    return C; +  // 40 characters +  if (C <= 0x10427) +    return C + 40; +  if (C < 0x104b0) +    return C; +  // 36 characters +  if (C <= 0x104d3) +    return C + 40; +  if (C < 0x10c80) +    return C; +  // 51 characters +  if (C <= 0x10cb2) +    return C + 64; +  if (C < 0x118a0) +    return C; +  // 32 characters +  if (C <= 0x118bf) +    return C + 32; +  if (C < 0x1e900) +    return C; +  // 34 characters +  if (C <= 0x1e921) +    return C + 34; + +  return C; +} | 
