diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp | 182 |
1 files changed, 175 insertions, 7 deletions
diff --git a/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp b/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp index f916300835ce..dcac0bd63859 100644 --- a/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp +++ b/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp @@ -23,7 +23,7 @@ using namespace llvm; -using llvm::itanium_demangle::OutputStream; +using llvm::itanium_demangle::OutputBuffer; using llvm::itanium_demangle::StringView; using llvm::itanium_demangle::SwapAndRestore; @@ -88,7 +88,7 @@ class Demangler { public: // Demangled output. - OutputStream Output; + OutputBuffer Output; Demangler(size_t MaxRecursionLevel = 500); @@ -135,6 +135,7 @@ private: void printDecimalNumber(uint64_t N); void printBasicType(BasicType); void printLifetime(uint64_t Index); + void printIdentifier(Identifier Ident); char look() const; char consume(); @@ -163,7 +164,7 @@ char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, } Demangler D; - if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { + if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) { if (Status != nullptr) *Status = demangle_memory_alloc_failure; return nullptr; @@ -283,8 +284,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { switch (consume()) { case 'C': { parseOptionalBase62Number('s'); - Identifier Ident = parseIdentifier(); - print(Ident.Name); + printIdentifier(parseIdentifier()); break; } case 'M': { @@ -333,7 +333,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { print(NS); if (!Ident.empty()) { print(":"); - print(Ident.Name); + printIdentifier(Ident); } print('#'); printDecimalNumber(Disambiguator); @@ -342,7 +342,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { // Implementation internal namespaces. if (!Ident.empty()) { print("::"); - print(Ident.Name); + printIdentifier(Ident); } } break; @@ -669,6 +669,8 @@ void Demangler::demangleFnSig() { print("C"); } else { Identifier Ident = parseIdentifier(); + if (Ident.Punycode) + Error = true; for (char C : Ident.Name) { // When mangling ABI string, the "-" is replaced with "_". if (C == '_') @@ -1078,6 +1080,172 @@ void Demangler::printLifetime(uint64_t Index) { } } +static inline bool decodePunycodeDigit(char C, size_t &Value) { + if (isLower(C)) { + Value = C - 'a'; + return true; + } + + if (isDigit(C)) { + Value = 26 + (C - '0'); + return true; + } + + return false; +} + +static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { + char *Buffer = Output.getBuffer(); + char *Start = Buffer + StartIdx; + char *End = Buffer + Output.getCurrentPosition(); + Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); +} + +// Encodes code point as UTF-8 and stores results in Output. Returns false if +// CodePoint is not a valid unicode scalar value. +static inline bool encodeUTF8(size_t CodePoint, char *Output) { + if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) + return false; + + if (CodePoint <= 0x7F) { + Output[0] = CodePoint; + return true; + } + + if (CodePoint <= 0x7FF) { + Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); + Output[1] = 0x80 | (CodePoint & 0x3F); + return true; + } + + if (CodePoint <= 0xFFFF) { + Output[0] = 0xE0 | (CodePoint >> 12); + Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); + Output[2] = 0x80 | (CodePoint & 0x3F); + return true; + } + + if (CodePoint <= 0x10FFFF) { + Output[0] = 0xF0 | (CodePoint >> 18); + Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); + Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); + Output[3] = 0x80 | (CodePoint & 0x3F); + return true; + } + + return false; +} + +// Decodes string encoded using punycode and appends results to Output. +// Returns true if decoding was successful. +static bool decodePunycode(StringView Input, OutputBuffer &Output) { + size_t OutputSize = Output.getCurrentPosition(); + size_t InputIdx = 0; + + // Rust uses an underscore as a delimiter. + size_t DelimiterPos = StringView::npos; + for (size_t I = 0; I != Input.size(); ++I) + if (Input[I] == '_') + DelimiterPos = I; + + if (DelimiterPos != StringView::npos) { + // Copy basic code points before the last delimiter to the output. + for (; InputIdx != DelimiterPos; ++InputIdx) { + char C = Input[InputIdx]; + if (!isValid(C)) + return false; + // Code points are padded with zeros while decoding is in progress. + char UTF8[4] = {C}; + Output += StringView(UTF8, UTF8 + 4); + } + // Skip over the delimiter. + ++InputIdx; + } + + size_t Base = 36; + size_t Skew = 38; + size_t Bias = 72; + size_t N = 0x80; + size_t TMin = 1; + size_t TMax = 26; + size_t Damp = 700; + + auto Adapt = [&](size_t Delta, size_t NumPoints) { + Delta /= Damp; + Delta += Delta / NumPoints; + Damp = 2; + + size_t K = 0; + while (Delta > (Base - TMin) * TMax / 2) { + Delta /= Base - TMin; + K += Base; + } + return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); + }; + + // Main decoding loop. + for (size_t I = 0; InputIdx != Input.size(); I += 1) { + size_t OldI = I; + size_t W = 1; + size_t Max = std::numeric_limits<size_t>::max(); + for (size_t K = Base; true; K += Base) { + if (InputIdx == Input.size()) + return false; + char C = Input[InputIdx++]; + size_t Digit = 0; + if (!decodePunycodeDigit(C, Digit)) + return false; + + if (Digit > (Max - I) / W) + return false; + I += Digit * W; + + size_t T; + if (K <= Bias) + T = TMin; + else if (K >= Bias + TMax) + T = TMax; + else + T = K - Bias; + + if (Digit < T) + break; + + if (W > Max / (Base - T)) + return false; + W *= (Base - T); + } + size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; + Bias = Adapt(I - OldI, NumPoints); + + if (I / NumPoints > Max - N) + return false; + N += I / NumPoints; + I = I % NumPoints; + + // Insert N at position I in the output. + char UTF8[4] = {}; + if (!encodeUTF8(N, UTF8)) + return false; + Output.insert(OutputSize + I * 4, UTF8, 4); + } + + removeNullBytes(Output, OutputSize); + return true; +} + +void Demangler::printIdentifier(Identifier Ident) { + if (Error || !Print) + return; + + if (Ident.Punycode) { + if (!decodePunycode(Ident.Name, Output)) + Error = true; + } else { + print(Ident.Name); + } +} + char Demangler::look() const { if (Error || Position >= Input.size()) return 0; |