diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp | 1262 |
1 files changed, 1262 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp b/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp new file mode 100644 index 000000000000..8c01155127d8 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Demangle/RustDemangle.cpp @@ -0,0 +1,1262 @@ +//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a demangler for Rust v0 mangled symbols as specified in +// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html +// +//===----------------------------------------------------------------------===// + +#include "llvm/Demangle/Demangle.h" +#include "llvm/Demangle/StringView.h" +#include "llvm/Demangle/Utility.h" + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <limits> + +using namespace llvm; + +using llvm::itanium_demangle::OutputBuffer; +using llvm::itanium_demangle::ScopedOverride; +using llvm::itanium_demangle::StringView; + +namespace { + +struct Identifier { + StringView Name; + bool Punycode; + + bool empty() const { return Name.empty(); } +}; + +enum class BasicType { + Bool, + Char, + I8, + I16, + I32, + I64, + I128, + ISize, + U8, + U16, + U32, + U64, + U128, + USize, + F32, + F64, + Str, + Placeholder, + Unit, + Variadic, + Never, +}; + +enum class IsInType { + No, + Yes, +}; + +enum class LeaveGenericsOpen { + No, + Yes, +}; + +class Demangler { + // Maximum recursion level. Used to avoid stack overflow. + size_t MaxRecursionLevel; + // Current recursion level. + size_t RecursionLevel; + size_t BoundLifetimes; + // Input string that is being demangled with "_R" prefix removed. + StringView Input; + // Position in the input string. + size_t Position; + // When true, print methods append the output to the stream. + // When false, the output is suppressed. + bool Print; + // True if an error occurred. + bool Error; + +public: + // Demangled output. + OutputBuffer Output; + + Demangler(size_t MaxRecursionLevel = 500); + + bool demangle(StringView MangledName); + +private: + bool demanglePath(IsInType Type, + LeaveGenericsOpen LeaveOpen = LeaveGenericsOpen::No); + void demangleImplPath(IsInType InType); + void demangleGenericArg(); + void demangleType(); + void demangleFnSig(); + void demangleDynBounds(); + void demangleDynTrait(); + void demangleOptionalBinder(); + void demangleConst(); + void demangleConstInt(); + void demangleConstBool(); + void demangleConstChar(); + + template <typename Callable> void demangleBackref(Callable Demangler) { + uint64_t Backref = parseBase62Number(); + if (Error || Backref >= Position) { + Error = true; + return; + } + + if (!Print) + return; + + ScopedOverride<size_t> SavePosition(Position, Position); + Position = Backref; + Demangler(); + } + + Identifier parseIdentifier(); + uint64_t parseOptionalBase62Number(char Tag); + uint64_t parseBase62Number(); + uint64_t parseDecimalNumber(); + uint64_t parseHexNumber(StringView &HexDigits); + + void print(char C); + void print(StringView S); + void printDecimalNumber(uint64_t N); + void printBasicType(BasicType); + void printLifetime(uint64_t Index); + void printIdentifier(Identifier Ident); + + char look() const; + char consume(); + bool consumeIf(char Prefix); + + bool addAssign(uint64_t &A, uint64_t B); + bool mulAssign(uint64_t &A, uint64_t B); +}; + +} // namespace + +char *llvm::rustDemangle(const char *MangledName) { + if (MangledName == nullptr) + return nullptr; + + // Return early if mangled name doesn't look like a Rust symbol. + StringView Mangled(MangledName); + if (!Mangled.startsWith("_R")) + return nullptr; + + Demangler D; + if (!D.demangle(Mangled)) { + std::free(D.Output.getBuffer()); + return nullptr; + } + + D.Output += '\0'; + + return D.Output.getBuffer(); +} + +Demangler::Demangler(size_t MaxRecursionLevel) + : MaxRecursionLevel(MaxRecursionLevel) {} + +static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } + +static inline bool isHexDigit(const char C) { + return ('0' <= C && C <= '9') || ('a' <= C && C <= 'f'); +} + +static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } + +static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } + +/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. +static inline bool isValid(const char C) { + return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; +} + +// Demangles Rust v0 mangled symbol. Returns true when successful, and false +// otherwise. The demangled symbol is stored in Output field. It is +// responsibility of the caller to free the memory behind the output stream. +// +// <symbol-name> = "_R" <path> [<instantiating-crate>] +bool Demangler::demangle(StringView Mangled) { + Position = 0; + Error = false; + Print = true; + RecursionLevel = 0; + BoundLifetimes = 0; + + if (!Mangled.consumeFront("_R")) { + Error = true; + return false; + } + size_t Dot = Mangled.find('.'); + Input = Mangled.substr(0, Dot); + StringView Suffix = Mangled.dropFront(Dot); + + demanglePath(IsInType::No); + + if (Position != Input.size()) { + ScopedOverride<bool> SavePrint(Print, false); + demanglePath(IsInType::No); + } + + if (Position != Input.size()) + Error = true; + + if (!Suffix.empty()) { + print(" ("); + print(Suffix); + print(")"); + } + + return !Error; +} + +// Demangles a path. InType indicates whether a path is inside a type. When +// LeaveOpen is true, a closing `>` after generic arguments is omitted from the +// output. Return value indicates whether generics arguments have been left +// open. +// +// <path> = "C" <identifier> // crate root +// | "M" <impl-path> <type> // <T> (inherent impl) +// | "X" <impl-path> <type> <path> // <T as Trait> (trait impl) +// | "Y" <type> <path> // <T as Trait> (trait definition) +// | "N" <ns> <path> <identifier> // ...::ident (nested path) +// | "I" <path> {<generic-arg>} "E" // ...<T, U> (generic args) +// | <backref> +// <identifier> = [<disambiguator>] <undisambiguated-identifier> +// <ns> = "C" // closure +// | "S" // shim +// | <A-Z> // other special namespaces +// | <a-z> // internal namespaces +bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) { + if (Error || RecursionLevel >= MaxRecursionLevel) { + Error = true; + return false; + } + ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); + + switch (consume()) { + case 'C': { + parseOptionalBase62Number('s'); + printIdentifier(parseIdentifier()); + break; + } + case 'M': { + demangleImplPath(InType); + print("<"); + demangleType(); + print(">"); + break; + } + case 'X': { + demangleImplPath(InType); + print("<"); + demangleType(); + print(" as "); + demanglePath(IsInType::Yes); + print(">"); + break; + } + case 'Y': { + print("<"); + demangleType(); + print(" as "); + demanglePath(IsInType::Yes); + print(">"); + break; + } + case 'N': { + char NS = consume(); + if (!isLower(NS) && !isUpper(NS)) { + Error = true; + break; + } + demanglePath(InType); + + uint64_t Disambiguator = parseOptionalBase62Number('s'); + Identifier Ident = parseIdentifier(); + + if (isUpper(NS)) { + // Special namespaces + print("::{"); + if (NS == 'C') + print("closure"); + else if (NS == 'S') + print("shim"); + else + print(NS); + if (!Ident.empty()) { + print(":"); + printIdentifier(Ident); + } + print('#'); + printDecimalNumber(Disambiguator); + print('}'); + } else { + // Implementation internal namespaces. + if (!Ident.empty()) { + print("::"); + printIdentifier(Ident); + } + } + break; + } + case 'I': { + demanglePath(InType); + // Omit "::" when in a type, where it is optional. + if (InType == IsInType::No) + print("::"); + print("<"); + for (size_t I = 0; !Error && !consumeIf('E'); ++I) { + if (I > 0) + print(", "); + demangleGenericArg(); + } + if (LeaveOpen == LeaveGenericsOpen::Yes) + return true; + else + print(">"); + break; + } + case 'B': { + bool IsOpen = false; + demangleBackref([&] { IsOpen = demanglePath(InType, LeaveOpen); }); + return IsOpen; + } + default: + Error = true; + break; + } + + return false; +} + +// <impl-path> = [<disambiguator>] <path> +// <disambiguator> = "s" <base-62-number> +void Demangler::demangleImplPath(IsInType InType) { + ScopedOverride<bool> SavePrint(Print, false); + parseOptionalBase62Number('s'); + demanglePath(InType); +} + +// <generic-arg> = <lifetime> +// | <type> +// | "K" <const> +// <lifetime> = "L" <base-62-number> +void Demangler::demangleGenericArg() { + if (consumeIf('L')) + printLifetime(parseBase62Number()); + else if (consumeIf('K')) + demangleConst(); + else + demangleType(); +} + +// <basic-type> = "a" // i8 +// | "b" // bool +// | "c" // char +// | "d" // f64 +// | "e" // str +// | "f" // f32 +// | "h" // u8 +// | "i" // isize +// | "j" // usize +// | "l" // i32 +// | "m" // u32 +// | "n" // i128 +// | "o" // u128 +// | "s" // i16 +// | "t" // u16 +// | "u" // () +// | "v" // ... +// | "x" // i64 +// | "y" // u64 +// | "z" // ! +// | "p" // placeholder (e.g. for generic params), shown as _ +static bool parseBasicType(char C, BasicType &Type) { + switch (C) { + case 'a': + Type = BasicType::I8; + return true; + case 'b': + Type = BasicType::Bool; + return true; + case 'c': + Type = BasicType::Char; + return true; + case 'd': + Type = BasicType::F64; + return true; + case 'e': + Type = BasicType::Str; + return true; + case 'f': + Type = BasicType::F32; + return true; + case 'h': + Type = BasicType::U8; + return true; + case 'i': + Type = BasicType::ISize; + return true; + case 'j': + Type = BasicType::USize; + return true; + case 'l': + Type = BasicType::I32; + return true; + case 'm': + Type = BasicType::U32; + return true; + case 'n': + Type = BasicType::I128; + return true; + case 'o': + Type = BasicType::U128; + return true; + case 'p': + Type = BasicType::Placeholder; + return true; + case 's': + Type = BasicType::I16; + return true; + case 't': + Type = BasicType::U16; + return true; + case 'u': + Type = BasicType::Unit; + return true; + case 'v': + Type = BasicType::Variadic; + return true; + case 'x': + Type = BasicType::I64; + return true; + case 'y': + Type = BasicType::U64; + return true; + case 'z': + Type = BasicType::Never; + return true; + default: + return false; + } +} + +void Demangler::printBasicType(BasicType Type) { + switch (Type) { + case BasicType::Bool: + print("bool"); + break; + case BasicType::Char: + print("char"); + break; + case BasicType::I8: + print("i8"); + break; + case BasicType::I16: + print("i16"); + break; + case BasicType::I32: + print("i32"); + break; + case BasicType::I64: + print("i64"); + break; + case BasicType::I128: + print("i128"); + break; + case BasicType::ISize: + print("isize"); + break; + case BasicType::U8: + print("u8"); + break; + case BasicType::U16: + print("u16"); + break; + case BasicType::U32: + print("u32"); + break; + case BasicType::U64: + print("u64"); + break; + case BasicType::U128: + print("u128"); + break; + case BasicType::USize: + print("usize"); + break; + case BasicType::F32: + print("f32"); + break; + case BasicType::F64: + print("f64"); + break; + case BasicType::Str: + print("str"); + break; + case BasicType::Placeholder: + print("_"); + break; + case BasicType::Unit: + print("()"); + break; + case BasicType::Variadic: + print("..."); + break; + case BasicType::Never: + print("!"); + break; + } +} + +// <type> = | <basic-type> +// | <path> // named type +// | "A" <type> <const> // [T; N] +// | "S" <type> // [T] +// | "T" {<type>} "E" // (T1, T2, T3, ...) +// | "R" [<lifetime>] <type> // &T +// | "Q" [<lifetime>] <type> // &mut T +// | "P" <type> // *const T +// | "O" <type> // *mut T +// | "F" <fn-sig> // fn(...) -> ... +// | "D" <dyn-bounds> <lifetime> // dyn Trait<Assoc = X> + Send + 'a +// | <backref> // backref +void Demangler::demangleType() { + if (Error || RecursionLevel >= MaxRecursionLevel) { + Error = true; + return; + } + ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); + + size_t Start = Position; + char C = consume(); + BasicType Type; + if (parseBasicType(C, Type)) + return printBasicType(Type); + + switch (C) { + case 'A': + print("["); + demangleType(); + print("; "); + demangleConst(); + print("]"); + break; + case 'S': + print("["); + demangleType(); + print("]"); + break; + case 'T': { + print("("); + size_t I = 0; + for (; !Error && !consumeIf('E'); ++I) { + if (I > 0) + print(", "); + demangleType(); + } + if (I == 1) + print(","); + print(")"); + break; + } + case 'R': + case 'Q': + print('&'); + if (consumeIf('L')) { + if (auto Lifetime = parseBase62Number()) { + printLifetime(Lifetime); + print(' '); + } + } + if (C == 'Q') + print("mut "); + demangleType(); + break; + case 'P': + print("*const "); + demangleType(); + break; + case 'O': + print("*mut "); + demangleType(); + break; + case 'F': + demangleFnSig(); + break; + case 'D': + demangleDynBounds(); + if (consumeIf('L')) { + if (auto Lifetime = parseBase62Number()) { + print(" + "); + printLifetime(Lifetime); + } + } else { + Error = true; + } + break; + case 'B': + demangleBackref([&] { demangleType(); }); + break; + default: + Position = Start; + demanglePath(IsInType::Yes); + break; + } +} + +// <fn-sig> := [<binder>] ["U"] ["K" <abi>] {<type>} "E" <type> +// <abi> = "C" +// | <undisambiguated-identifier> +void Demangler::demangleFnSig() { + ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); + demangleOptionalBinder(); + + if (consumeIf('U')) + print("unsafe "); + + if (consumeIf('K')) { + print("extern \""); + if (consumeIf('C')) { + print("C"); + } else { + Identifier Ident = parseIdentifier(); + if (Ident.Punycode) + Error = true; + for (char C : Ident.Name) { + // When mangling ABI string, the "-" is replaced with "_". + if (C == '_') + C = '-'; + print(C); + } + } + print("\" "); + } + + print("fn("); + for (size_t I = 0; !Error && !consumeIf('E'); ++I) { + if (I > 0) + print(", "); + demangleType(); + } + print(")"); + + if (consumeIf('u')) { + // Skip the unit type from the output. + } else { + print(" -> "); + demangleType(); + } +} + +// <dyn-bounds> = [<binder>] {<dyn-trait>} "E" +void Demangler::demangleDynBounds() { + ScopedOverride<size_t> SaveBoundLifetimes(BoundLifetimes, BoundLifetimes); + print("dyn "); + demangleOptionalBinder(); + for (size_t I = 0; !Error && !consumeIf('E'); ++I) { + if (I > 0) + print(" + "); + demangleDynTrait(); + } +} + +// <dyn-trait> = <path> {<dyn-trait-assoc-binding>} +// <dyn-trait-assoc-binding> = "p" <undisambiguated-identifier> <type> +void Demangler::demangleDynTrait() { + bool IsOpen = demanglePath(IsInType::Yes, LeaveGenericsOpen::Yes); + while (!Error && consumeIf('p')) { + if (!IsOpen) { + IsOpen = true; + print('<'); + } else { + print(", "); + } + print(parseIdentifier().Name); + print(" = "); + demangleType(); + } + if (IsOpen) + print(">"); +} + +// Demangles optional binder and updates the number of bound lifetimes. +// +// <binder> = "G" <base-62-number> +void Demangler::demangleOptionalBinder() { + uint64_t Binder = parseOptionalBase62Number('G'); + if (Error || Binder == 0) + return; + + // In valid inputs each bound lifetime is referenced later. Referencing a + // lifetime requires at least one byte of input. Reject inputs that are too + // short to reference all bound lifetimes. Otherwise demangling of invalid + // binders could generate excessive amounts of output. + if (Binder >= Input.size() - BoundLifetimes) { + Error = true; + return; + } + + print("for<"); + for (size_t I = 0; I != Binder; ++I) { + BoundLifetimes += 1; + if (I > 0) + print(", "); + printLifetime(1); + } + print("> "); +} + +// <const> = <basic-type> <const-data> +// | "p" // placeholder +// | <backref> +void Demangler::demangleConst() { + if (Error || RecursionLevel >= MaxRecursionLevel) { + Error = true; + return; + } + ScopedOverride<size_t> SaveRecursionLevel(RecursionLevel, RecursionLevel + 1); + + char C = consume(); + BasicType Type; + if (parseBasicType(C, Type)) { + switch (Type) { + case BasicType::I8: + case BasicType::I16: + case BasicType::I32: + case BasicType::I64: + case BasicType::I128: + case BasicType::ISize: + case BasicType::U8: + case BasicType::U16: + case BasicType::U32: + case BasicType::U64: + case BasicType::U128: + case BasicType::USize: + demangleConstInt(); + break; + case BasicType::Bool: + demangleConstBool(); + break; + case BasicType::Char: + demangleConstChar(); + break; + case BasicType::Placeholder: + print('_'); + break; + default: + Error = true; + break; + } + } else if (C == 'B') { + demangleBackref([&] { demangleConst(); }); + } else { + Error = true; + } +} + +// <const-data> = ["n"] <hex-number> +void Demangler::demangleConstInt() { + if (consumeIf('n')) + print('-'); + + StringView HexDigits; + uint64_t Value = parseHexNumber(HexDigits); + if (HexDigits.size() <= 16) { + printDecimalNumber(Value); + } else { + print("0x"); + print(HexDigits); + } +} + +// <const-data> = "0_" // false +// | "1_" // true +void Demangler::demangleConstBool() { + StringView HexDigits; + parseHexNumber(HexDigits); + if (HexDigits == "0") + print("false"); + else if (HexDigits == "1") + print("true"); + else + Error = true; +} + +/// Returns true if CodePoint represents a printable ASCII character. +static bool isAsciiPrintable(uint64_t CodePoint) { + return 0x20 <= CodePoint && CodePoint <= 0x7e; +} + +// <const-data> = <hex-number> +void Demangler::demangleConstChar() { + StringView HexDigits; + uint64_t CodePoint = parseHexNumber(HexDigits); + if (Error || HexDigits.size() > 6) { + Error = true; + return; + } + + print("'"); + switch (CodePoint) { + case '\t': + print(R"(\t)"); + break; + case '\r': + print(R"(\r)"); + break; + case '\n': + print(R"(\n)"); + break; + case '\\': + print(R"(\\)"); + break; + case '"': + print(R"(")"); + break; + case '\'': + print(R"(\')"); + break; + default: + if (isAsciiPrintable(CodePoint)) { + char C = CodePoint; + print(C); + } else { + print(R"(\u{)"); + print(HexDigits); + print('}'); + } + break; + } + print('\''); +} + +// <undisambiguated-identifier> = ["u"] <decimal-number> ["_"] <bytes> +Identifier Demangler::parseIdentifier() { + bool Punycode = consumeIf('u'); + uint64_t Bytes = parseDecimalNumber(); + + // Underscore resolves the ambiguity when identifier starts with a decimal + // digit or another underscore. + consumeIf('_'); + + if (Error || Bytes > Input.size() - Position) { + Error = true; + return {}; + } + StringView S = Input.substr(Position, Bytes); + Position += Bytes; + + if (!std::all_of(S.begin(), S.end(), isValid)) { + Error = true; + return {}; + } + + return {S, Punycode}; +} + +// Parses optional base 62 number. The presence of a number is determined using +// Tag. Returns 0 when tag is absent and parsed value + 1 otherwise +// +// This function is indended for parsing disambiguators and binders which when +// not present have their value interpreted as 0, and otherwise as decoded +// value + 1. For example for binders, value for "G_" is 1, for "G0_" value is +// 2. When "G" is absent value is 0. +uint64_t Demangler::parseOptionalBase62Number(char Tag) { + if (!consumeIf(Tag)) + return 0; + + uint64_t N = parseBase62Number(); + if (Error || !addAssign(N, 1)) + return 0; + + return N; +} + +// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by +// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, +// "1_" encodes 2, etc. +// +// <base-62-number> = {<0-9a-zA-Z>} "_" +uint64_t Demangler::parseBase62Number() { + if (consumeIf('_')) + return 0; + + uint64_t Value = 0; + + while (true) { + uint64_t Digit; + char C = consume(); + + if (C == '_') { + break; + } else if (isDigit(C)) { + Digit = C - '0'; + } else if (isLower(C)) { + Digit = 10 + (C - 'a'); + } else if (isUpper(C)) { + Digit = 10 + 26 + (C - 'A'); + } else { + Error = true; + return 0; + } + + if (!mulAssign(Value, 62)) + return 0; + + if (!addAssign(Value, Digit)) + return 0; + } + + if (!addAssign(Value, 1)) + return 0; + + return Value; +} + +// Parses a decimal number that had been encoded without any leading zeros. +// +// <decimal-number> = "0" +// | <1-9> {<0-9>} +uint64_t Demangler::parseDecimalNumber() { + char C = look(); + if (!isDigit(C)) { + Error = true; + return 0; + } + + if (C == '0') { + consume(); + return 0; + } + + uint64_t Value = 0; + + while (isDigit(look())) { + if (!mulAssign(Value, 10)) { + Error = true; + return 0; + } + + uint64_t D = consume() - '0'; + if (!addAssign(Value, D)) + return 0; + } + + return Value; +} + +// Parses a hexadecimal number with <0-9a-f> as a digits. Returns the parsed +// value and stores hex digits in HexDigits. The return value is unspecified if +// HexDigits.size() > 16. +// +// <hex-number> = "0_" +// | <1-9a-f> {<0-9a-f>} "_" +uint64_t Demangler::parseHexNumber(StringView &HexDigits) { + size_t Start = Position; + uint64_t Value = 0; + + if (!isHexDigit(look())) + Error = true; + + if (consumeIf('0')) { + if (!consumeIf('_')) + Error = true; + } else { + while (!Error && !consumeIf('_')) { + char C = consume(); + Value *= 16; + if (isDigit(C)) + Value += C - '0'; + else if ('a' <= C && C <= 'f') + Value += 10 + (C - 'a'); + else + Error = true; + } + } + + if (Error) { + HexDigits = StringView(); + return 0; + } + + size_t End = Position - 1; + assert(Start < End); + HexDigits = Input.substr(Start, End - Start); + return Value; +} + +void Demangler::print(char C) { + if (Error || !Print) + return; + + Output += C; +} + +void Demangler::print(StringView S) { + if (Error || !Print) + return; + + Output += S; +} + +void Demangler::printDecimalNumber(uint64_t N) { + if (Error || !Print) + return; + + Output << N; +} + +// Prints a lifetime. An index 0 always represents an erased lifetime. Indices +// starting from 1, are De Bruijn indices, referring to higher-ranked lifetimes +// bound by one of the enclosing binders. +void Demangler::printLifetime(uint64_t Index) { + if (Index == 0) { + print("'_"); + return; + } + + if (Index - 1 >= BoundLifetimes) { + Error = true; + return; + } + + uint64_t Depth = BoundLifetimes - Index; + print('\''); + if (Depth < 26) { + char C = 'a' + Depth; + print(C); + } else { + print('z'); + printDecimalNumber(Depth - 26 + 1); + } +} + +static inline bool decodePunycodeDigit(char C, size_t &Value) { + if (isLower(C)) { + Value = C - 'a'; + return true; + } + + if (isDigit(C)) { + Value = 26 + (C - '0'); + return true; + } + + return false; +} + +static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) { + char *Buffer = Output.getBuffer(); + char *Start = Buffer + StartIdx; + char *End = Buffer + Output.getCurrentPosition(); + Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer); +} + +// Encodes code point as UTF-8 and stores results in Output. Returns false if +// CodePoint is not a valid unicode scalar value. +static inline bool encodeUTF8(size_t CodePoint, char *Output) { + if (0xD800 <= CodePoint && CodePoint <= 0xDFFF) + return false; + + if (CodePoint <= 0x7F) { + Output[0] = CodePoint; + return true; + } + + if (CodePoint <= 0x7FF) { + Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F); + Output[1] = 0x80 | (CodePoint & 0x3F); + return true; + } + + if (CodePoint <= 0xFFFF) { + Output[0] = 0xE0 | (CodePoint >> 12); + Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F); + Output[2] = 0x80 | (CodePoint & 0x3F); + return true; + } + + if (CodePoint <= 0x10FFFF) { + Output[0] = 0xF0 | (CodePoint >> 18); + Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F); + Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F); + Output[3] = 0x80 | (CodePoint & 0x3F); + return true; + } + + return false; +} + +// Decodes string encoded using punycode and appends results to Output. +// Returns true if decoding was successful. +static bool decodePunycode(StringView Input, OutputBuffer &Output) { + size_t OutputSize = Output.getCurrentPosition(); + size_t InputIdx = 0; + + // Rust uses an underscore as a delimiter. + size_t DelimiterPos = StringView::npos; + for (size_t I = 0; I != Input.size(); ++I) + if (Input[I] == '_') + DelimiterPos = I; + + if (DelimiterPos != StringView::npos) { + // Copy basic code points before the last delimiter to the output. + for (; InputIdx != DelimiterPos; ++InputIdx) { + char C = Input[InputIdx]; + if (!isValid(C)) + return false; + // Code points are padded with zeros while decoding is in progress. + char UTF8[4] = {C}; + Output += StringView(UTF8, UTF8 + 4); + } + // Skip over the delimiter. + ++InputIdx; + } + + size_t Base = 36; + size_t Skew = 38; + size_t Bias = 72; + size_t N = 0x80; + size_t TMin = 1; + size_t TMax = 26; + size_t Damp = 700; + + auto Adapt = [&](size_t Delta, size_t NumPoints) { + Delta /= Damp; + Delta += Delta / NumPoints; + Damp = 2; + + size_t K = 0; + while (Delta > (Base - TMin) * TMax / 2) { + Delta /= Base - TMin; + K += Base; + } + return K + (((Base - TMin + 1) * Delta) / (Delta + Skew)); + }; + + // Main decoding loop. + for (size_t I = 0; InputIdx != Input.size(); I += 1) { + size_t OldI = I; + size_t W = 1; + size_t Max = std::numeric_limits<size_t>::max(); + for (size_t K = Base; true; K += Base) { + if (InputIdx == Input.size()) + return false; + char C = Input[InputIdx++]; + size_t Digit = 0; + if (!decodePunycodeDigit(C, Digit)) + return false; + + if (Digit > (Max - I) / W) + return false; + I += Digit * W; + + size_t T; + if (K <= Bias) + T = TMin; + else if (K >= Bias + TMax) + T = TMax; + else + T = K - Bias; + + if (Digit < T) + break; + + if (W > Max / (Base - T)) + return false; + W *= (Base - T); + } + size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1; + Bias = Adapt(I - OldI, NumPoints); + + if (I / NumPoints > Max - N) + return false; + N += I / NumPoints; + I = I % NumPoints; + + // Insert N at position I in the output. + char UTF8[4] = {}; + if (!encodeUTF8(N, UTF8)) + return false; + Output.insert(OutputSize + I * 4, UTF8, 4); + } + + removeNullBytes(Output, OutputSize); + return true; +} + +void Demangler::printIdentifier(Identifier Ident) { + if (Error || !Print) + return; + + if (Ident.Punycode) { + if (!decodePunycode(Ident.Name, Output)) + Error = true; + } else { + print(Ident.Name); + } +} + +char Demangler::look() const { + if (Error || Position >= Input.size()) + return 0; + + return Input[Position]; +} + +char Demangler::consume() { + if (Error || Position >= Input.size()) { + Error = true; + return 0; + } + + return Input[Position++]; +} + +bool Demangler::consumeIf(char Prefix) { + if (Error || Position >= Input.size() || Input[Position] != Prefix) + return false; + + Position += 1; + return true; +} + +/// Computes A + B. When computation wraps around sets the error and returns +/// false. Otherwise assigns the result to A and returns true. +bool Demangler::addAssign(uint64_t &A, uint64_t B) { + if (A > std::numeric_limits<uint64_t>::max() - B) { + Error = true; + return false; + } + + A += B; + return true; +} + +/// Computes A * B. When computation wraps around sets the error and returns +/// false. Otherwise assigns the result to A and returns true. +bool Demangler::mulAssign(uint64_t &A, uint64_t B) { + if (B != 0 && A > std::numeric_limits<uint64_t>::max() / B) { + Error = true; + return false; + } + + A *= B; + return true; +} |