aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Support/UnicodeNameToCodepoint.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Support/UnicodeNameToCodepoint.cpp')
-rw-r--r--llvm/lib/Support/UnicodeNameToCodepoint.cpp47
1 files changed, 23 insertions, 24 deletions
diff --git a/llvm/lib/Support/UnicodeNameToCodepoint.cpp b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
index 1e8aebf1b8eb..accebf1098ab 100644
--- a/llvm/lib/Support/UnicodeNameToCodepoint.cpp
+++ b/llvm/lib/Support/UnicodeNameToCodepoint.cpp
@@ -46,7 +46,7 @@ struct Node {
std::string S;
// Reserve enough space for most unicode code points.
// The chosen value represent the 99th percentile of name size as of
- // Unicode 14.
+ // Unicode 15.0.
S.reserve(46);
const Node *N = this;
while (N) {
@@ -105,7 +105,7 @@ static Node readNode(uint32_t Offset, const Node *Parent = nullptr) {
uint8_t H = UnicodeNameToCodepointIndex[Offset++];
N.HasSibling = H & 0x80;
bool HasChildren = H & 0x40;
- H &= ~0xC0;
+ H &= uint8_t(~0xC0);
if (HasChildren) {
N.ChildrenOffset = (H << 16);
N.ChildrenOffset |=
@@ -251,7 +251,7 @@ constexpr const char *const HangulSyllables[][3] = {
};
// clang-format on
-// Unicode 14.0
+// Unicode 15.0
// 3.12 Conjoining Jamo Behavior Common constants
constexpr const char32_t SBase = 0xAC00;
constexpr const uint32_t LCount = 19;
@@ -285,7 +285,7 @@ static std::size_t findSyllable(StringRef Name, bool Strict,
return size_t(Len);
}
-static llvm::Optional<char32_t>
+static std::optional<char32_t>
nameToHangulCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
Buffer.clear();
// Hangul Syllable Decomposition
@@ -294,7 +294,7 @@ nameToHangulCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
bool DoesStartWith = startsWith(Name, "HANGUL SYLLABLE ", Strict, Consummed,
NameStart, NeedleStart);
if (!DoesStartWith)
- return None;
+ return std::nullopt;
Name = Name.substr(Consummed);
int L = -1, V = -1, T = -1;
Name = Name.substr(findSyllable(Name, Strict, NameStart, L, 0));
@@ -314,7 +314,7 @@ nameToHangulCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
std::uint32_t(T);
}
// Otherwise, it's an illegal syllable name.
- return None;
+ return std::nullopt;
}
struct GeneratedNamesData {
@@ -323,18 +323,17 @@ struct GeneratedNamesData {
uint32_t End;
};
-// Unicode 14.0 Table 4-8. Name Derivation Rule Prefix Strings
-// This needs to be kept in sync with
-// llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp
+// Unicode 15.0 Table 4-8. Name Derivation Rule Prefix Strings
static const GeneratedNamesData GeneratedNamesDataTable[] = {
{"CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4DBF},
- {"CJK UNIFIED IDEOGRAPH-", 0x4E00, 0x9FFC},
- {"CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2A6DD},
- {"CJK UNIFIED IDEOGRAPH-", 0x2A700, 0x2B734},
+ {"CJK UNIFIED IDEOGRAPH-", 0x4E00, 0x9FFF},
+ {"CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2A6DF},
+ {"CJK UNIFIED IDEOGRAPH-", 0x2A700, 0x2B739},
{"CJK UNIFIED IDEOGRAPH-", 0x2B740, 0x2B81D},
{"CJK UNIFIED IDEOGRAPH-", 0x2B820, 0x2CEA1},
{"CJK UNIFIED IDEOGRAPH-", 0x2CEB0, 0x2EBE0},
{"CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134A},
+ {"CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323AF},
{"TANGUT IDEOGRAPH-", 0x17000, 0x187F7},
{"TANGUT IDEOGRAPH-", 0x18D00, 0x18D08},
{"KHITAN SMALL SCRIPT CHARACTER-", 0x18B00, 0x18CD5},
@@ -344,7 +343,7 @@ static const GeneratedNamesData GeneratedNamesDataTable[] = {
{"CJK COMPATIBILITY IDEOGRAPH-", 0x2F800, 0x2FA1D},
};
-static llvm::Optional<char32_t>
+static std::optional<char32_t>
nameToGeneratedCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
for (auto &&Item : GeneratedNamesDataTable) {
Buffer.clear();
@@ -368,15 +367,15 @@ nameToGeneratedCodePoint(StringRef Name, bool Strict, BufferType &Buffer) {
}
return V;
}
- return None;
+ return std::nullopt;
}
-static llvm::Optional<char32_t> nameToCodepoint(StringRef Name, bool Strict,
- BufferType &Buffer) {
+static std::optional<char32_t> nameToCodepoint(StringRef Name, bool Strict,
+ BufferType &Buffer) {
if (Name.empty())
- return None;
+ return std::nullopt;
- llvm::Optional<char32_t> Res = nameToHangulCodePoint(Name, Strict, Buffer);
+ std::optional<char32_t> Res = nameToHangulCodePoint(Name, Strict, Buffer);
if (!Res)
Res = nameToGeneratedCodePoint(Name, Strict, Buffer);
if (Res)
@@ -398,22 +397,22 @@ static llvm::Optional<char32_t> nameToCodepoint(StringRef Name, bool Strict,
}
return Value;
}
- return None;
+ return std::nullopt;
}
-llvm::Optional<char32_t> nameToCodepointStrict(StringRef Name) {
+std::optional<char32_t> nameToCodepointStrict(StringRef Name) {
BufferType Buffer;
auto Opt = nameToCodepoint(Name, true, Buffer);
return Opt;
}
-llvm::Optional<LooseMatchingResult>
+std::optional<LooseMatchingResult>
nameToCodepointLooseMatching(StringRef Name) {
BufferType Buffer;
auto Opt = nameToCodepoint(Name, false, Buffer);
if (!Opt)
- return None;
+ return std::nullopt;
return LooseMatchingResult{*Opt, Buffer};
}
@@ -445,8 +444,8 @@ nearestMatchesForCodepointName(StringRef Pattern, std::size_t MaxMatchesCount) {
return Name;
};
- auto It = std::lower_bound(
- Matches.begin(), Matches.end(), Distance,
+ auto It = llvm::lower_bound(
+ Matches, Distance,
[&](const MatchForCodepointName &a, std::size_t Distance) {
if (Distance == a.Distance)
return a.Name < GetName();