diff options
Diffstat (limited to 'contrib/llvm-project/lldb/source/Plugins/ObjectFile')
22 files changed, 9464 insertions, 0 deletions
diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp new file mode 100644 index 000000000000..d40f87b1a7b4 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.cpp @@ -0,0 +1,586 @@ +//===-- BreakpadRecords.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" +#include "lldb/lldb-defines.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/FormatVariadic.h" +#include <optional> + +using namespace lldb_private; +using namespace lldb_private::breakpad; + +namespace { +enum class Token { + Unknown, + Module, + Info, + CodeID, + File, + Func, + Inline, + InlineOrigin, + Public, + Stack, + CFI, + Init, + Win, +}; +} + +template<typename T> +static T stringTo(llvm::StringRef Str); + +template <> Token stringTo<Token>(llvm::StringRef Str) { + return llvm::StringSwitch<Token>(Str) + .Case("MODULE", Token::Module) + .Case("INFO", Token::Info) + .Case("CODE_ID", Token::CodeID) + .Case("FILE", Token::File) + .Case("FUNC", Token::Func) + .Case("INLINE", Token::Inline) + .Case("INLINE_ORIGIN", Token::InlineOrigin) + .Case("PUBLIC", Token::Public) + .Case("STACK", Token::Stack) + .Case("CFI", Token::CFI) + .Case("INIT", Token::Init) + .Case("WIN", Token::Win) + .Default(Token::Unknown); +} + +template <> +llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) { + using llvm::Triple; + return llvm::StringSwitch<Triple::OSType>(Str) + .Case("Linux", Triple::Linux) + .Case("mac", Triple::MacOSX) + .Case("windows", Triple::Win32) + .Default(Triple::UnknownOS); +} + +template <> +llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) { + using llvm::Triple; + return llvm::StringSwitch<Triple::ArchType>(Str) + .Case("arm", Triple::arm) + .Cases("arm64", "arm64e", Triple::aarch64) + .Case("mips", Triple::mips) + .Case("msp430", Triple::msp430) + .Case("ppc", Triple::ppc) + .Case("ppc64", Triple::ppc64) + .Case("s390", Triple::systemz) + .Case("sparc", Triple::sparc) + .Case("sparcv9", Triple::sparcv9) + .Case("x86", Triple::x86) + .Cases("x86_64", "x86_64h", Triple::x86_64) + .Default(Triple::UnknownArch); +} + +template<typename T> +static T consume(llvm::StringRef &Str) { + llvm::StringRef Token; + std::tie(Token, Str) = getToken(Str); + return stringTo<T>(Token); +} + +/// Return the number of hex digits needed to encode an (POD) object of a given +/// type. +template <typename T> static constexpr size_t hex_digits() { + return 2 * sizeof(T); +} + +static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) { + struct data_t { + using uuid_t = uint8_t[16]; + uuid_t uuid; + llvm::support::ubig32_t age; + } data; + static_assert(sizeof(data) == 20); + // The textual module id encoding should be between 33 and 40 bytes long, + // depending on the size of the age field, which is of variable length. + // The first three chunks of the id are encoded in big endian, so we need to + // byte-swap those. + if (str.size() <= hex_digits<data_t::uuid_t>() || + str.size() > hex_digits<data_t>()) + return UUID(); + if (!all_of(str, llvm::isHexDigit)) + return UUID(); + + llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>()); + llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>()); + + llvm::copy(fromHex(uuid_str), data.uuid); + uint32_t age; + bool success = to_integer(age_str, age, 16); + assert(success); + UNUSED_IF_ASSERT_DISABLED(success); + data.age = age; + + // On non-windows, the age field should always be zero, so we don't include to + // match the native uuid format of these platforms. + return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data) + : sizeof(data.uuid)); +} + +std::optional<Record::Kind> Record::classify(llvm::StringRef Line) { + Token Tok = consume<Token>(Line); + switch (Tok) { + case Token::Module: + return Record::Module; + case Token::Info: + return Record::Info; + case Token::File: + return Record::File; + case Token::Func: + return Record::Func; + case Token::Public: + return Record::Public; + case Token::Stack: + Tok = consume<Token>(Line); + switch (Tok) { + case Token::CFI: + return Record::StackCFI; + case Token::Win: + return Record::StackWin; + default: + return std::nullopt; + } + case Token::Inline: + return Record::Inline; + case Token::InlineOrigin: + return Record::InlineOrigin; + case Token::Unknown: + // Optimistically assume that any unrecognised token means this is a line + // record, those don't have a special keyword and start directly with a + // hex number. + return Record::Line; + + case Token::CodeID: + case Token::CFI: + case Token::Init: + case Token::Win: + // These should never appear at the start of a valid record. + return std::nullopt; + } + llvm_unreachable("Fully covered switch above!"); +} + +std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) { + // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out + if (consume<Token>(Line) != Token::Module) + return std::nullopt; + + llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line); + if (OS == llvm::Triple::UnknownOS) + return std::nullopt; + + llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line); + if (Arch == llvm::Triple::UnknownArch) + return std::nullopt; + + llvm::StringRef Str; + std::tie(Str, Line) = getToken(Line); + UUID ID = parseModuleId(OS, Str); + if (!ID) + return std::nullopt; + + return ModuleRecord(OS, Arch, std::move(ID)); +} + +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const ModuleRecord &R) { + return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " " + << llvm::Triple::getArchTypeName(R.Arch) << " " + << R.ID.GetAsString(); +} + +std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) { + // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe] + if (consume<Token>(Line) != Token::Info) + return std::nullopt; + + if (consume<Token>(Line) != Token::CodeID) + return std::nullopt; + + llvm::StringRef Str; + std::tie(Str, Line) = getToken(Line); + // If we don't have any text following the code ID (e.g. on linux), we should + // use this as the UUID. Otherwise, we should revert back to the module ID. + UUID ID; + if (Line.trim().empty()) { + if (Str.empty() || !ID.SetFromStringRef(Str)) + return std::nullopt; + } + return InfoRecord(std::move(ID)); +} + +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const InfoRecord &R) { + return OS << "INFO CODE_ID " << R.ID.GetAsString(); +} + +template <typename T> +static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) { + // TOKEN number name + if (consume<Token>(Line) != TokenType) + return std::nullopt; + + llvm::StringRef Str; + size_t Number; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, Number)) + return std::nullopt; + + llvm::StringRef Name = Line.trim(); + if (Name.empty()) + return std::nullopt; + + return T(Number, Name); +} + +std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) { + // FILE number name + return parseNumberName<FileRecord>(Line, Token::File); +} + +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const FileRecord &R) { + return OS << "FILE " << R.Number << " " << R.Name; +} + +std::optional<InlineOriginRecord> +InlineOriginRecord::parse(llvm::StringRef Line) { + // INLINE_ORIGIN number name + return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin); +} + +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const InlineOriginRecord &R) { + return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name; +} + +static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, + lldb::addr_t &Address, lldb::addr_t *Size, + lldb::addr_t &ParamSize, llvm::StringRef &Name) { + // PUBLIC [m] address param_size name + // or + // FUNC [m] address size param_size name + + Token Tok = Size ? Token::Func : Token::Public; + + if (consume<Token>(Line) != Tok) + return false; + + llvm::StringRef Str; + std::tie(Str, Line) = getToken(Line); + Multiple = Str == "m"; + + if (Multiple) + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, Address, 16)) + return false; + + if (Tok == Token::Func) { + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, *Size, 16)) + return false; + } + + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, ParamSize, 16)) + return false; + + Name = Line.trim(); + if (Name.empty()) + return false; + + return true; +} + +std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) { + bool Multiple; + lldb::addr_t Address, Size, ParamSize; + llvm::StringRef Name; + + if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name)) + return FuncRecord(Multiple, Address, Size, ParamSize, Name); + + return std::nullopt; +} + +bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) { + return L.Multiple == R.Multiple && L.Address == R.Address && + L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name; +} +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const FuncRecord &R) { + return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}", + R.Multiple ? "m " : "", R.Address, R.Size, + R.ParamSize, R.Name); +} + +std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) { + // INLINE inline_nest_level call_site_line call_site_file_num origin_num + // [address size]+ + if (consume<Token>(Line) != Token::Inline) + return std::nullopt; + + llvm::SmallVector<llvm::StringRef> Tokens; + SplitString(Line, Tokens, " "); + if (Tokens.size() < 6 || Tokens.size() % 2 == 1) + return std::nullopt; + + size_t InlineNestLevel; + uint32_t CallSiteLineNum; + size_t CallSiteFileNum; + size_t OriginNum; + if (!(to_integer(Tokens[0], InlineNestLevel) && + to_integer(Tokens[1], CallSiteLineNum) && + to_integer(Tokens[2], CallSiteFileNum) && + to_integer(Tokens[3], OriginNum))) + return std::nullopt; + + InlineRecord Record = InlineRecord(InlineNestLevel, CallSiteLineNum, + CallSiteFileNum, OriginNum); + for (size_t i = 4; i < Tokens.size(); i += 2) { + lldb::addr_t Address; + if (!to_integer(Tokens[i], Address, 16)) + return std::nullopt; + lldb::addr_t Size; + if (!to_integer(Tokens[i + 1].trim(), Size, 16)) + return std::nullopt; + Record.Ranges.emplace_back(Address, Size); + } + return Record; +} + +bool breakpad::operator==(const InlineRecord &L, const InlineRecord &R) { + return L.InlineNestLevel == R.InlineNestLevel && + L.CallSiteLineNum == R.CallSiteLineNum && + L.CallSiteFileNum == R.CallSiteFileNum && L.OriginNum == R.OriginNum && + L.Ranges == R.Ranges; +} + +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const InlineRecord &R) { + OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel, + R.CallSiteLineNum, R.CallSiteFileNum, R.OriginNum); + for (const auto &range : R.Ranges) { + OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second); + } + return OS; +} + +std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) { + lldb::addr_t Address; + llvm::StringRef Str; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, Address, 16)) + return std::nullopt; + + lldb::addr_t Size; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, Size, 16)) + return std::nullopt; + + uint32_t LineNum; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, LineNum)) + return std::nullopt; + + size_t FileNum; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, FileNum)) + return std::nullopt; + + return LineRecord(Address, Size, LineNum, FileNum); +} + +bool breakpad::operator==(const LineRecord &L, const LineRecord &R) { + return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum && + L.FileNum == R.FileNum; +} +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const LineRecord &R) { + return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size, + R.LineNum, R.FileNum); +} + +std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) { + bool Multiple; + lldb::addr_t Address, ParamSize; + llvm::StringRef Name; + + if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name)) + return PublicRecord(Multiple, Address, ParamSize, Name); + + return std::nullopt; +} + +bool breakpad::operator==(const PublicRecord &L, const PublicRecord &R) { + return L.Multiple == R.Multiple && L.Address == R.Address && + L.ParamSize == R.ParamSize && L.Name == R.Name; +} +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const PublicRecord &R) { + return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}", + R.Multiple ? "m " : "", R.Address, R.ParamSize, + R.Name); +} + +std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) { + // STACK CFI INIT address size reg1: expr1 reg2: expr2 ... + // or + // STACK CFI address reg1: expr1 reg2: expr2 ... + // No token in exprN ends with a colon. + + if (consume<Token>(Line) != Token::Stack) + return std::nullopt; + if (consume<Token>(Line) != Token::CFI) + return std::nullopt; + + llvm::StringRef Str; + std::tie(Str, Line) = getToken(Line); + + bool IsInitRecord = stringTo<Token>(Str) == Token::Init; + if (IsInitRecord) + std::tie(Str, Line) = getToken(Line); + + lldb::addr_t Address; + if (!to_integer(Str, Address, 16)) + return std::nullopt; + + std::optional<lldb::addr_t> Size; + if (IsInitRecord) { + Size.emplace(); + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, *Size, 16)) + return std::nullopt; + } + + return StackCFIRecord(Address, Size, Line.trim()); +} + +bool breakpad::operator==(const StackCFIRecord &L, const StackCFIRecord &R) { + return L.Address == R.Address && L.Size == R.Size && + L.UnwindRules == R.UnwindRules; +} + +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const StackCFIRecord &R) { + OS << "STACK CFI "; + if (R.Size) + OS << "INIT "; + OS << llvm::formatv("{0:x-} ", R.Address); + if (R.Size) + OS << llvm::formatv("{0:x-} ", *R.Size); + return OS << " " << R.UnwindRules; +} + +std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) { + // STACK WIN type rva code_size prologue_size epilogue_size parameter_size + // saved_register_size local_size max_stack_size has_program_string + // program_string_OR_allocates_base_pointer + + if (consume<Token>(Line) != Token::Stack) + return std::nullopt; + if (consume<Token>(Line) != Token::Win) + return std::nullopt; + + llvm::StringRef Str; + uint8_t Type; + std::tie(Str, Line) = getToken(Line); + // Right now we only support the "FrameData" frame type. + if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData) + return std::nullopt; + + lldb::addr_t RVA; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, RVA, 16)) + return std::nullopt; + + lldb::addr_t CodeSize; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, CodeSize, 16)) + return std::nullopt; + + // Skip fields which we aren't using right now. + std::tie(Str, Line) = getToken(Line); // prologue_size + std::tie(Str, Line) = getToken(Line); // epilogue_size + + lldb::addr_t ParameterSize; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, ParameterSize, 16)) + return std::nullopt; + + lldb::addr_t SavedRegisterSize; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, SavedRegisterSize, 16)) + return std::nullopt; + + lldb::addr_t LocalSize; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, LocalSize, 16)) + return std::nullopt; + + std::tie(Str, Line) = getToken(Line); // max_stack_size + + uint8_t HasProgramString; + std::tie(Str, Line) = getToken(Line); + if (!to_integer(Str, HasProgramString)) + return std::nullopt; + // FrameData records should always have a program string. + if (!HasProgramString) + return std::nullopt; + + return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize, + LocalSize, Line.trim()); +} + +bool breakpad::operator==(const StackWinRecord &L, const StackWinRecord &R) { + return L.RVA == R.RVA && L.CodeSize == R.CodeSize && + L.ParameterSize == R.ParameterSize && + L.SavedRegisterSize == R.SavedRegisterSize && + L.LocalSize == R.LocalSize && L.ProgramString == R.ProgramString; +} + +llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS, + const StackWinRecord &R) { + return OS << llvm::formatv( + "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA, + R.CodeSize, R.ParameterSize, R.SavedRegisterSize, R.LocalSize, + R.ProgramString); +} + +llvm::StringRef breakpad::toString(Record::Kind K) { + switch (K) { + case Record::Module: + return "MODULE"; + case Record::Info: + return "INFO"; + case Record::File: + return "FILE"; + case Record::Func: + return "FUNC"; + case Record::Inline: + return "INLINE"; + case Record::InlineOrigin: + return "INLINE_ORIGIN"; + case Record::Line: + return "LINE"; + case Record::Public: + return "PUBLIC"; + case Record::StackCFI: + return "STACK CFI"; + case Record::StackWin: + return "STACK WIN"; + } + llvm_unreachable("Unknown record kind!"); +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h new file mode 100644 index 000000000000..f10c8c41b793 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h @@ -0,0 +1,235 @@ +//===-- BreakpadRecords.h ------------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_BREAKPAD_BREAKPADRECORDS_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_BREAKPAD_BREAKPADRECORDS_H + +#include "lldb/Utility/UUID.h" +#include "lldb/lldb-types.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatProviders.h" +#include "llvm/TargetParser/Triple.h" +#include <optional> + +namespace lldb_private { +namespace breakpad { + +class Record { +public: + enum Kind { + Module, + Info, + File, + Func, + Inline, + InlineOrigin, + Line, + Public, + StackCFI, + StackWin + }; + + /// Attempt to guess the kind of the record present in the argument without + /// doing a full parse. The returned kind will always be correct for valid + /// records, but the full parse can still fail in case of corrupted input. + static std::optional<Kind> classify(llvm::StringRef Line); + +protected: + Record(Kind K) : TheKind(K) {} + + ~Record() = default; + +public: + Kind getKind() { return TheKind; } + +private: + Kind TheKind; +}; + +llvm::StringRef toString(Record::Kind K); +inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, Record::Kind K) { + OS << toString(K); + return OS; +} + +class ModuleRecord : public Record { +public: + static std::optional<ModuleRecord> parse(llvm::StringRef Line); + ModuleRecord(llvm::Triple::OSType OS, llvm::Triple::ArchType Arch, UUID ID) + : Record(Module), OS(OS), Arch(Arch), ID(std::move(ID)) {} + + llvm::Triple::OSType OS; + llvm::Triple::ArchType Arch; + UUID ID; +}; + +inline bool operator==(const ModuleRecord &L, const ModuleRecord &R) { + return L.OS == R.OS && L.Arch == R.Arch && L.ID == R.ID; +} +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const ModuleRecord &R); + +class InfoRecord : public Record { +public: + static std::optional<InfoRecord> parse(llvm::StringRef Line); + InfoRecord(UUID ID) : Record(Info), ID(std::move(ID)) {} + + UUID ID; +}; + +inline bool operator==(const InfoRecord &L, const InfoRecord &R) { + return L.ID == R.ID; +} +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InfoRecord &R); + +class FileRecord : public Record { +public: + static std::optional<FileRecord> parse(llvm::StringRef Line); + FileRecord(size_t Number, llvm::StringRef Name) + : Record(File), Number(Number), Name(Name) {} + + size_t Number; + llvm::StringRef Name; +}; + +inline bool operator==(const FileRecord &L, const FileRecord &R) { + return L.Number == R.Number && L.Name == R.Name; +} +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FileRecord &R); + +class InlineOriginRecord : public Record { +public: + static std::optional<InlineOriginRecord> parse(llvm::StringRef Line); + InlineOriginRecord(size_t Number, llvm::StringRef Name) + : Record(InlineOrigin), Number(Number), Name(Name) {} + + size_t Number; + llvm::StringRef Name; +}; + +inline bool operator==(const InlineOriginRecord &L, + const InlineOriginRecord &R) { + return L.Number == R.Number && L.Name == R.Name; +} +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const InlineOriginRecord &R); + +class FuncRecord : public Record { +public: + static std::optional<FuncRecord> parse(llvm::StringRef Line); + FuncRecord(bool Multiple, lldb::addr_t Address, lldb::addr_t Size, + lldb::addr_t ParamSize, llvm::StringRef Name) + : Record(Module), Multiple(Multiple), Address(Address), Size(Size), + ParamSize(ParamSize), Name(Name) {} + + bool Multiple; + lldb::addr_t Address; + lldb::addr_t Size; + lldb::addr_t ParamSize; + llvm::StringRef Name; +}; + +bool operator==(const FuncRecord &L, const FuncRecord &R); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FuncRecord &R); + +class InlineRecord : public Record { +public: + static std::optional<InlineRecord> parse(llvm::StringRef Line); + InlineRecord(size_t InlineNestLevel, uint32_t CallSiteLineNum, + size_t CallSiteFileNum, size_t OriginNum) + : Record(Inline), InlineNestLevel(InlineNestLevel), + CallSiteLineNum(CallSiteLineNum), CallSiteFileNum(CallSiteFileNum), + OriginNum(OriginNum) {} + + size_t InlineNestLevel; + uint32_t CallSiteLineNum; + size_t CallSiteFileNum; + size_t OriginNum; + // A vector of address range covered by this inline + std::vector<std::pair<lldb::addr_t, lldb::addr_t>> Ranges; +}; + +bool operator==(const InlineRecord &L, const InlineRecord &R); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const InlineRecord &R); + +class LineRecord : public Record { +public: + static std::optional<LineRecord> parse(llvm::StringRef Line); + LineRecord(lldb::addr_t Address, lldb::addr_t Size, uint32_t LineNum, + size_t FileNum) + : Record(Line), Address(Address), Size(Size), LineNum(LineNum), + FileNum(FileNum) {} + + lldb::addr_t Address; + lldb::addr_t Size; + uint32_t LineNum; + size_t FileNum; +}; + +bool operator==(const LineRecord &L, const LineRecord &R); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LineRecord &R); + +class PublicRecord : public Record { +public: + static std::optional<PublicRecord> parse(llvm::StringRef Line); + PublicRecord(bool Multiple, lldb::addr_t Address, lldb::addr_t ParamSize, + llvm::StringRef Name) + : Record(Module), Multiple(Multiple), Address(Address), + ParamSize(ParamSize), Name(Name) {} + + bool Multiple; + lldb::addr_t Address; + lldb::addr_t ParamSize; + llvm::StringRef Name; +}; + +bool operator==(const PublicRecord &L, const PublicRecord &R); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const PublicRecord &R); + +class StackCFIRecord : public Record { +public: + static std::optional<StackCFIRecord> parse(llvm::StringRef Line); + StackCFIRecord(lldb::addr_t Address, std::optional<lldb::addr_t> Size, + llvm::StringRef UnwindRules) + : Record(StackCFI), Address(Address), Size(Size), + UnwindRules(UnwindRules) {} + + lldb::addr_t Address; + std::optional<lldb::addr_t> Size; + llvm::StringRef UnwindRules; +}; + +bool operator==(const StackCFIRecord &L, const StackCFIRecord &R); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const StackCFIRecord &R); + +class StackWinRecord : public Record { +public: + static std::optional<StackWinRecord> parse(llvm::StringRef Line); + + StackWinRecord(lldb::addr_t RVA, lldb::addr_t CodeSize, + lldb::addr_t ParameterSize, lldb::addr_t SavedRegisterSize, + lldb::addr_t LocalSize, llvm::StringRef ProgramString) + : Record(StackWin), RVA(RVA), CodeSize(CodeSize), + ParameterSize(ParameterSize), SavedRegisterSize(SavedRegisterSize), + LocalSize(LocalSize), ProgramString(ProgramString) {} + + enum class FrameType : uint8_t { FPO = 0, FrameData = 4 }; + lldb::addr_t RVA; + lldb::addr_t CodeSize; + lldb::addr_t ParameterSize; + lldb::addr_t SavedRegisterSize; + lldb::addr_t LocalSize; + llvm::StringRef ProgramString; +}; + +bool operator==(const StackWinRecord &L, const StackWinRecord &R); +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const StackWinRecord &R); + +} // namespace breakpad +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_BREAKPAD_BREAKPADRECORDS_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp new file mode 100644 index 000000000000..33673f139b49 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp @@ -0,0 +1,169 @@ +//===-- ObjectFileBreakpad.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h" +#include "Plugins/ObjectFile/Breakpad/BreakpadRecords.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include <optional> + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::breakpad; + +LLDB_PLUGIN_DEFINE(ObjectFileBreakpad) + +namespace { +struct Header { + ArchSpec arch; + UUID uuid; + static std::optional<Header> parse(llvm::StringRef text); +}; +} // namespace + +std::optional<Header> Header::parse(llvm::StringRef text) { + llvm::StringRef line; + std::tie(line, text) = text.split('\n'); + auto Module = ModuleRecord::parse(line); + if (!Module) + return std::nullopt; + + llvm::Triple triple; + triple.setArch(Module->Arch); + triple.setOS(Module->OS); + + std::tie(line, text) = text.split('\n'); + + auto Info = InfoRecord::parse(line); + UUID uuid = Info && Info->ID ? Info->ID : Module->ID; + return Header{ArchSpec(triple), std::move(uuid)}; +} + +char ObjectFileBreakpad::ID; + +void ObjectFileBreakpad::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications); +} + +void ObjectFileBreakpad::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +ObjectFile *ObjectFileBreakpad::CreateInstance( + const ModuleSP &module_sp, DataBufferSP data_sp, offset_t data_offset, + const FileSpec *file, offset_t file_offset, offset_t length) { + if (!data_sp) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) + return nullptr; + data_offset = 0; + } + auto text = toStringRef(data_sp->GetData()); + std::optional<Header> header = Header::parse(text); + if (!header) + return nullptr; + + // Update the data to contain the entire file if it doesn't already + if (data_sp->GetByteSize() < length) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) + return nullptr; + data_offset = 0; + } + + return new ObjectFileBreakpad(module_sp, data_sp, data_offset, file, + file_offset, length, std::move(header->arch), + std::move(header->uuid)); +} + +ObjectFile *ObjectFileBreakpad::CreateMemoryInstance( + const ModuleSP &module_sp, WritableDataBufferSP data_sp, + const ProcessSP &process_sp, addr_t header_addr) { + return nullptr; +} + +size_t ObjectFileBreakpad::GetModuleSpecifications( + const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, + offset_t file_offset, offset_t length, ModuleSpecList &specs) { + auto text = toStringRef(data_sp->GetData()); + std::optional<Header> header = Header::parse(text); + if (!header) + return 0; + ModuleSpec spec(file, std::move(header->arch)); + spec.GetUUID() = std::move(header->uuid); + specs.Append(spec); + return 1; +} + +ObjectFileBreakpad::ObjectFileBreakpad(const ModuleSP &module_sp, + DataBufferSP &data_sp, + offset_t data_offset, + const FileSpec *file, offset_t offset, + offset_t length, ArchSpec arch, + UUID uuid) + : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), + m_arch(std::move(arch)), m_uuid(std::move(uuid)) {} + +bool ObjectFileBreakpad::ParseHeader() { + // We already parsed the header during initialization. + return true; +} + +void ObjectFileBreakpad::ParseSymtab(Symtab &symtab) { + // Nothing to do for breakpad files, all information is parsed as debug info + // which means "lldb_private::Function" objects are used, or symbols are added + // by the SymbolFileBreakpad::AddSymbols(...) function in the symbol file. +} + +void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) { + if (m_sections_up) + return; + m_sections_up = std::make_unique<SectionList>(); + + std::optional<Record::Kind> current_section; + offset_t section_start; + llvm::StringRef text = toStringRef(m_data.GetData()); + uint32_t next_section_id = 1; + auto maybe_add_section = [&](const uint8_t *end_ptr) { + if (!current_section) + return; // We have been called before parsing the first line. + + offset_t end_offset = end_ptr - m_data.GetDataStart(); + auto section_sp = std::make_shared<Section>( + GetModule(), this, next_section_id++, + ConstString(toString(*current_section)), eSectionTypeOther, + /*file_vm_addr*/ 0, /*vm_size*/ 0, section_start, + end_offset - section_start, /*log2align*/ 0, /*flags*/ 0); + m_sections_up->AddSection(section_sp); + unified_section_list.AddSection(section_sp); + }; + while (!text.empty()) { + llvm::StringRef line; + std::tie(line, text) = text.split('\n'); + + std::optional<Record::Kind> next_section = Record::classify(line); + if (next_section == Record::Line || next_section == Record::Inline) { + // Line/Inline records logically belong to the preceding Func record, so + // we put them in the same section. + next_section = Record::Func; + } + if (next_section == current_section) + continue; + + // Changing sections, finish off the previous one, if there was any. + maybe_add_section(line.bytes_begin()); + // And start a new one. + current_section = next_section; + section_start = line.bytes_begin() - m_data.GetDataStart(); + } + // Finally, add the last section. + maybe_add_section(m_data.GetDataEnd()); +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h new file mode 100644 index 000000000000..074d667c1ca5 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h @@ -0,0 +1,104 @@ +//===-- ObjectFileBreakpad.h ---------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_BREAKPAD_OBJECTFILEBREAKPAD_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_BREAKPAD_OBJECTFILEBREAKPAD_H + +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/ArchSpec.h" + +namespace lldb_private { +namespace breakpad { + +class ObjectFileBreakpad : public ObjectFile { +public: + // Static Functions + static void Initialize(); + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "breakpad"; } + static const char *GetPluginDescriptionStatic() { + return "Breakpad object file reader."; + } + + static ObjectFile * + CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const FileSpec *file, + lldb::offset_t file_offset, lldb::offset_t length); + + static ObjectFile *CreateMemoryInstance(const lldb::ModuleSP &module_sp, + lldb::WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, + lldb::addr_t header_addr); + + static size_t GetModuleSpecifications(const FileSpec &file, + lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, + lldb::offset_t file_offset, + lldb::offset_t length, + ModuleSpecList &specs); + + // PluginInterface protocol + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + // LLVM RTTI support + static char ID; + bool isA(const void *ClassID) const override { + return ClassID == &ID || ObjectFile::isA(ClassID); + } + static bool classof(const ObjectFile *obj) { return obj->isA(&ID); } + + // ObjectFile Protocol. + + bool ParseHeader() override; + + lldb::ByteOrder GetByteOrder() const override { + return m_arch.GetByteOrder(); + } + + bool IsExecutable() const override { return false; } + + uint32_t GetAddressByteSize() const override { + return m_arch.GetAddressByteSize(); + } + + AddressClass GetAddressClass(lldb::addr_t file_addr) override { + return AddressClass::eInvalid; + } + + void ParseSymtab(lldb_private::Symtab &symtab) override; + + bool IsStripped() override { return false; } + + void CreateSections(SectionList &unified_section_list) override; + + void Dump(Stream *s) override {} + + ArchSpec GetArchitecture() override { return m_arch; } + + UUID GetUUID() override { return m_uuid; } + + uint32_t GetDependentModules(FileSpecList &files) override { return 0; } + + Type CalculateType() override { return eTypeDebugInfo; } + + Strata CalculateStrata() override { return eStrataUser; } + +private: + ArchSpec m_arch; + UUID m_uuid; + + ObjectFileBreakpad(const lldb::ModuleSP &module_sp, + lldb::DataBufferSP &data_sp, lldb::offset_t data_offset, + const FileSpec *file, lldb::offset_t offset, + lldb::offset_t length, ArchSpec arch, UUID uuid); +}; + +} // namespace breakpad +} // namespace lldb_private +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_BREAKPAD_OBJECTFILEBREAKPAD_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/COFF/ObjectFileCOFF.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/COFF/ObjectFileCOFF.cpp new file mode 100644 index 000000000000..a7ad5d27b237 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/COFF/ObjectFileCOFF.cpp @@ -0,0 +1,311 @@ +//===-- ObjectFileCOFF.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjectFileCOFF.h" + +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Utility/LLDBLog.h" + +#include "llvm/Support/Error.h" +#include "llvm/Support/FormatAdapters.h" + +using namespace lldb; +using namespace lldb_private; + +using namespace llvm; +using namespace llvm::object; + +static bool IsCOFFObjectFile(const DataBufferSP &data) { + return identify_magic(toStringRef(data->GetData())) == + file_magic::coff_object; +} + +LLDB_PLUGIN_DEFINE(ObjectFileCOFF) + +char ObjectFileCOFF::ID; + +ObjectFileCOFF::~ObjectFileCOFF() = default; + +void ObjectFileCOFF::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications); +} + +void ObjectFileCOFF::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +lldb_private::ObjectFile * +ObjectFileCOFF::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, + offset_t data_offset, const FileSpec *file, + offset_t file_offset, offset_t length) { + Log *log = GetLog(LLDBLog::Object); + + if (!data_sp) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) { + LLDB_LOG(log, + "Failed to create ObjectFileCOFF instance: cannot read file {0}", + file->GetPath()); + return nullptr; + } + data_offset = 0; + } + + assert(data_sp && "must have mapped file at this point"); + + if (!IsCOFFObjectFile(data_sp)) + return nullptr; + + if (data_sp->GetByteSize() < length) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) { + LLDB_LOG(log, + "Failed to create ObjectFileCOFF instance: cannot read file {0}", + file->GetPath()); + return nullptr; + } + data_offset = 0; + } + + + MemoryBufferRef buffer{toStringRef(data_sp->GetData()), + file->GetFilename().GetStringRef()}; + + Expected<std::unique_ptr<Binary>> binary = createBinary(buffer); + if (!binary) { + LLDB_LOG_ERROR(log, binary.takeError(), + "Failed to create binary for file ({1}): {0}", + file->GetPath()); + return nullptr; + } + + LLDB_LOG(log, "ObjectFileCOFF::ObjectFileCOFF module = {1} ({2}), file = {3}", + module_sp.get(), module_sp->GetSpecificationDescription(), + file->GetPath()); + + return new ObjectFileCOFF(unique_dyn_cast<COFFObjectFile>(std::move(*binary)), + module_sp, data_sp, data_offset, file, file_offset, + length); +} + +lldb_private::ObjectFile *ObjectFileCOFF::CreateMemoryInstance( + const ModuleSP &module_sp, WritableDataBufferSP data_sp, + const ProcessSP &process_sp, addr_t header) { + // FIXME: do we need to worry about construction from a memory region? + return nullptr; +} + +size_t ObjectFileCOFF::GetModuleSpecifications( + const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, + offset_t file_offset, offset_t length, ModuleSpecList &specs) { + if (!IsCOFFObjectFile(data_sp)) + return 0; + + MemoryBufferRef buffer{toStringRef(data_sp->GetData()), + file.GetFilename().GetStringRef()}; + Expected<std::unique_ptr<Binary>> binary = createBinary(buffer); + if (!binary) { + Log *log = GetLog(LLDBLog::Object); + LLDB_LOG_ERROR(log, binary.takeError(), + "Failed to create binary for file ({1}): {0}", + file.GetFilename()); + return 0; + } + + std::unique_ptr<COFFObjectFile> object = + unique_dyn_cast<COFFObjectFile>(std::move(*binary)); + switch (static_cast<COFF::MachineTypes>(object->getMachine())) { + case COFF::IMAGE_FILE_MACHINE_I386: + specs.Append(ModuleSpec(file, ArchSpec("i686-unknown-windows-msvc"))); + return 1; + case COFF::IMAGE_FILE_MACHINE_AMD64: + specs.Append(ModuleSpec(file, ArchSpec("x86_64-unknown-windows-msvc"))); + return 1; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + specs.Append(ModuleSpec(file, ArchSpec("armv7-unknown-windows-msvc"))); + return 1; + case COFF::IMAGE_FILE_MACHINE_ARM64: + specs.Append(ModuleSpec(file, ArchSpec("aarch64-unknown-windows-msvc"))); + return 1; + default: + return 0; + } +} + +void ObjectFileCOFF::Dump(Stream *stream) { + ModuleSP module(GetModule()); + if (!module) + return; + + std::lock_guard<std::recursive_mutex> guard(module->GetMutex()); + + stream->Printf("%p: ", static_cast<void *>(this)); + stream->Indent(); + stream->PutCString("ObjectFileCOFF"); + *stream << ", file = '" << m_file + << "', arch = " << GetArchitecture().GetArchitectureName() << '\n'; + + if (SectionList *sections = GetSectionList()) + sections->Dump(stream->AsRawOstream(), stream->GetIndentLevel(), nullptr, + true, std::numeric_limits<uint32_t>::max()); +} + +uint32_t ObjectFileCOFF::GetAddressByteSize() const { + return const_cast<ObjectFileCOFF *>(this)->GetArchitecture().GetAddressByteSize(); +} + +ArchSpec ObjectFileCOFF::GetArchitecture() { + switch (static_cast<COFF::MachineTypes>(m_object->getMachine())) { + case COFF::IMAGE_FILE_MACHINE_I386: + return ArchSpec("i686-unknown-windows-msvc"); + case COFF::IMAGE_FILE_MACHINE_AMD64: + return ArchSpec("x86_64-unknown-windows-msvc"); + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return ArchSpec("armv7-unknown-windows-msvc"); + case COFF::IMAGE_FILE_MACHINE_ARM64: + return ArchSpec("aarch64-unknown-windows-msvc"); + default: + return ArchSpec(); + } +} + +void ObjectFileCOFF::CreateSections(lldb_private::SectionList §ions) { + if (m_sections_up) + return; + + m_sections_up = std::make_unique<SectionList>(); + ModuleSP module(GetModule()); + if (!module) + return; + + std::lock_guard<std::recursive_mutex> guard(module->GetMutex()); + + auto SectionType = [](StringRef Name, + const coff_section *Section) -> lldb::SectionType { + lldb::SectionType type = + StringSwitch<lldb::SectionType>(Name) + // DWARF Debug Sections + .Case(".debug_abbrev", eSectionTypeDWARFDebugAbbrev) + .Case(".debug_info", eSectionTypeDWARFDebugInfo) + .Case(".debug_line", eSectionTypeDWARFDebugLine) + .Case(".debug_pubnames", eSectionTypeDWARFDebugPubNames) + .Case(".debug_pubtypes", eSectionTypeDWARFDebugPubTypes) + .Case(".debug_str", eSectionTypeDWARFDebugStr) + // CodeView Debug Sections: .debug$S, .debug$T + .StartsWith(".debug$", eSectionTypeDebug) + .Case("clangast", eSectionTypeOther) + .Default(eSectionTypeInvalid); + if (type != eSectionTypeInvalid) + return type; + + if (Section->Characteristics & COFF::IMAGE_SCN_CNT_CODE) + return eSectionTypeCode; + if (Section->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + return eSectionTypeData; + if (Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + return Section->SizeOfRawData ? eSectionTypeData : eSectionTypeZeroFill; + return eSectionTypeOther; + }; + auto Permissions = [](const object::coff_section *Section) -> uint32_t { + uint32_t permissions = 0; + if (Section->Characteristics & COFF::IMAGE_SCN_MEM_EXECUTE) + permissions |= lldb::ePermissionsExecutable; + if (Section->Characteristics & COFF::IMAGE_SCN_MEM_READ) + permissions |= lldb::ePermissionsReadable; + if (Section->Characteristics & COFF::IMAGE_SCN_MEM_WRITE) + permissions |= lldb::ePermissionsWritable; + return permissions; + }; + + for (const auto &SecRef : m_object->sections()) { + const auto COFFSection = m_object->getCOFFSection(SecRef); + + llvm::Expected<StringRef> Name = SecRef.getName(); + StringRef SectionName = Name ? *Name : COFFSection->Name; + if (!Name) + consumeError(Name.takeError()); + + SectionSP section = + std::make_unique<Section>(module, this, + static_cast<user_id_t>(SecRef.getIndex()), + ConstString(SectionName), + SectionType(SectionName, COFFSection), + COFFSection->VirtualAddress, + COFFSection->VirtualSize, + COFFSection->PointerToRawData, + COFFSection->SizeOfRawData, + COFFSection->getAlignment(), + 0); + section->SetPermissions(Permissions(COFFSection)); + + m_sections_up->AddSection(section); + sections.AddSection(section); + } +} + +void ObjectFileCOFF::ParseSymtab(lldb_private::Symtab &symtab) { + Log *log = GetLog(LLDBLog::Object); + + SectionList *sections = GetSectionList(); + symtab.Reserve(symtab.GetNumSymbols() + m_object->getNumberOfSymbols()); + + auto SymbolType = [](const COFFSymbolRef &Symbol) -> lldb::SymbolType { + if (Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) + return eSymbolTypeCode; + if (Symbol.getBaseType() == COFF::IMAGE_SYM_TYPE_NULL && + Symbol.getComplexType() == COFF::IMAGE_SYM_DTYPE_NULL) + return eSymbolTypeData; + return eSymbolTypeInvalid; + }; + + for (const auto &SymRef : m_object->symbols()) { + const auto COFFSymRef = m_object->getCOFFSymbol(SymRef); + + Expected<StringRef> NameOrErr = SymRef.getName(); + if (!NameOrErr) { + LLDB_LOG_ERROR(log, NameOrErr.takeError(), + "ObjectFileCOFF: failed to get symbol name: {0}"); + continue; + } + + Symbol symbol; + symbol.GetMangled().SetValue(ConstString(*NameOrErr)); + + int16_t SecIdx = static_cast<int16_t>(COFFSymRef.getSectionNumber()); + if (SecIdx == COFF::IMAGE_SYM_ABSOLUTE) { + symbol.GetAddressRef() = Address{COFFSymRef.getValue()}; + symbol.SetType(eSymbolTypeAbsolute); + } else if (SecIdx >= 1) { + symbol.GetAddressRef() = Address(sections->GetSectionAtIndex(SecIdx - 1), + COFFSymRef.getValue()); + symbol.SetType(SymbolType(COFFSymRef)); + } + + symtab.AddSymbol(symbol); + } + + LLDB_LOG(log, "ObjectFileCOFF::ParseSymtab processed {0} symbols", + m_object->getNumberOfSymbols()); +} + +bool ObjectFileCOFF::ParseHeader() { + ModuleSP module(GetModule()); + if (!module) + return false; + + std::lock_guard<std::recursive_mutex> guard(module->GetMutex()); + + m_data.SetByteOrder(eByteOrderLittle); + m_data.SetAddressByteSize(GetAddressByteSize()); + + return true; +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/COFF/ObjectFileCOFF.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/COFF/ObjectFileCOFF.h new file mode 100644 index 000000000000..46c43f93f7ff --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/COFF/ObjectFileCOFF.h @@ -0,0 +1,116 @@ +//===-- ObjectFileCOFF.h -------------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_COFF_OBJECTFILECOFF_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_COFF_OBJECTFILECOFF_H + +#include "lldb/Symbol/ObjectFile.h" + +#include "llvm/Object/COFF.h" + +/// \class ObjectFileELF +/// Generic COFF object file reader. +/// +/// This class provides a generic COFF reader plugin implementing the ObjectFile +/// protocol. Assumes that the COFF object format is a Microsoft style COFF +/// rather than the full generality afforded by it. +class ObjectFileCOFF : public lldb_private::ObjectFile { + std::unique_ptr<llvm::object::COFFObjectFile> m_object; + lldb_private::UUID m_uuid; + + ObjectFileCOFF(std::unique_ptr<llvm::object::COFFObjectFile> object, + const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const lldb_private::FileSpec *file, + lldb::offset_t file_offset, lldb::offset_t length) + : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset), + m_object(std::move(object)) {} + +public: + ~ObjectFileCOFF() override; + + static void Initialize(); + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "COFF"; } + static llvm::StringRef GetPluginDescriptionStatic() { + return "COFF Object File Reader"; + } + + static lldb_private::ObjectFile * + CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const lldb_private::FileSpec *file, + lldb::offset_t file_offset, lldb::offset_t length); + + static lldb_private::ObjectFile * + CreateMemoryInstance(const lldb::ModuleSP &module_sp, + lldb::WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, lldb::addr_t header); + + static size_t GetModuleSpecifications(const lldb_private::FileSpec &file, + lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, + lldb::offset_t file_offset, + lldb::offset_t length, + lldb_private::ModuleSpecList &specs); + + // LLVM RTTI support + static char ID; + bool isA(const void *ClassID) const override { + return ClassID == &ID || ObjectFile::isA(ClassID); + } + static bool classof(const ObjectFile *obj) { return obj->isA(&ID); } + + // PluginInterface protocol + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + // ObjectFile protocol + void Dump(lldb_private::Stream *stream) override; + + uint32_t GetAddressByteSize() const override; + + uint32_t GetDependentModules(lldb_private::FileSpecList &specs) override { + return 0; + } + + bool IsExecutable() const override { + // COFF is an object file format only, it cannot host an executable. + return false; + } + + lldb_private::ArchSpec GetArchitecture() override; + + void CreateSections(lldb_private::SectionList &) override; + + void ParseSymtab(lldb_private::Symtab &) override; + + bool IsStripped() override { + // FIXME see if there is a good way to identify a /Z7 v /Zi or /ZI build. + return false; + } + + lldb_private::UUID GetUUID() override { return m_uuid; } + + lldb::ByteOrder GetByteOrder() const override { + // Microsoft always uses little endian. + return lldb::ByteOrder::eByteOrderLittle; + } + + bool ParseHeader() override; + + lldb_private::ObjectFile::Type CalculateType() override { + // COFF is an object file format only, it cannot host an executable. + return lldb_private::ObjectFile::eTypeObjectFile; + } + + lldb_private::ObjectFile::Strata CalculateStrata() override { + // FIXME the object file may correspond to a kernel image. + return lldb_private::ObjectFile::eStrataUser; + } +}; + +#endif diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp new file mode 100644 index 000000000000..a6e385f70709 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp @@ -0,0 +1,441 @@ +//===-- ELFHeader.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <cstring> + +#include "lldb/Core/Section.h" +#include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/Stream.h" + +#include "ELFHeader.h" + +using namespace elf; +using namespace lldb; +using namespace llvm::ELF; + +// Static utility functions. +// +// GetMaxU64 and GetMaxS64 wrap the similarly named methods from DataExtractor +// with error handling code and provide for parsing a sequence of values. +static bool GetMaxU64(const lldb_private::DataExtractor &data, + lldb::offset_t *offset, uint64_t *value, + uint32_t byte_size) { + const lldb::offset_t saved_offset = *offset; + *value = data.GetMaxU64(offset, byte_size); + return *offset != saved_offset; +} + +static bool GetMaxU64(const lldb_private::DataExtractor &data, + lldb::offset_t *offset, uint64_t *value, + uint32_t byte_size, uint32_t count) { + lldb::offset_t saved_offset = *offset; + + for (uint32_t i = 0; i < count; ++i, ++value) { + if (!GetMaxU64(data, offset, value, byte_size)) { + *offset = saved_offset; + return false; + } + } + return true; +} + +static bool GetMaxS64(const lldb_private::DataExtractor &data, + lldb::offset_t *offset, int64_t *value, + uint32_t byte_size) { + const lldb::offset_t saved_offset = *offset; + *value = data.GetMaxS64(offset, byte_size); + return *offset != saved_offset; +} + +static bool GetMaxS64(const lldb_private::DataExtractor &data, + lldb::offset_t *offset, int64_t *value, + uint32_t byte_size, uint32_t count) { + lldb::offset_t saved_offset = *offset; + + for (uint32_t i = 0; i < count; ++i, ++value) { + if (!GetMaxS64(data, offset, value, byte_size)) { + *offset = saved_offset; + return false; + } + } + return true; +} + +// ELFHeader + +ELFHeader::ELFHeader() { memset(this, 0, sizeof(ELFHeader)); } + +ByteOrder ELFHeader::GetByteOrder() const { + if (e_ident[EI_DATA] == ELFDATA2MSB) + return eByteOrderBig; + if (e_ident[EI_DATA] == ELFDATA2LSB) + return eByteOrderLittle; + return eByteOrderInvalid; +} + +bool ELFHeader::HasHeaderExtension() const { + bool result = false; + + // Check if any of these values looks like sentinel. + result |= e_phnum_hdr == 0xFFFF; // PN_XNUM + result |= e_shnum_hdr == SHN_UNDEF; + result |= e_shstrndx_hdr == SHN_XINDEX; + + // If header extension is present, the section offset cannot be null. + result &= e_shoff != 0; + + // Done. + return result; +} + +void ELFHeader::ParseHeaderExtension(lldb_private::DataExtractor &data) { + // Extract section #0 header. + ELFSectionHeader section_zero; + lldb::offset_t offset = 0; + lldb_private::DataExtractor sh_data(data, e_shoff, e_shentsize); + bool ok = section_zero.Parse(sh_data, &offset); + + // If we succeeded, fix the header. + if (ok) { + if (e_phnum_hdr == 0xFFFF) // PN_XNUM + e_phnum = section_zero.sh_info; + if (e_shnum_hdr == SHN_UNDEF) + e_shnum = section_zero.sh_size; + if (e_shstrndx_hdr == SHN_XINDEX) + e_shstrndx = section_zero.sh_link; + } +} + +bool ELFHeader::Parse(lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + // Read e_ident. This provides byte order and address size info. + if (data.GetU8(offset, &e_ident, EI_NIDENT) == nullptr) + return false; + + const unsigned byte_size = Is32Bit() ? 4 : 8; + data.SetByteOrder(GetByteOrder()); + data.SetAddressByteSize(byte_size); + + // Read e_type and e_machine. + if (data.GetU16(offset, &e_type, 2) == nullptr) + return false; + + // Read e_version. + if (data.GetU32(offset, &e_version, 1) == nullptr) + return false; + + // Read e_entry, e_phoff and e_shoff. + if (!GetMaxU64(data, offset, &e_entry, byte_size, 3)) + return false; + + // Read e_flags. + if (data.GetU32(offset, &e_flags, 1) == nullptr) + return false; + + // Read e_ehsize, e_phentsize, e_phnum, e_shentsize, e_shnum and e_shstrndx. + if (data.GetU16(offset, &e_ehsize, 6) == nullptr) + return false; + + // Initialize e_phnum, e_shnum, and e_shstrndx with the values read from the + // header. + e_phnum = e_phnum_hdr; + e_shnum = e_shnum_hdr; + e_shstrndx = e_shstrndx_hdr; + + // See if we have extended header in section #0. + if (HasHeaderExtension()) + ParseHeaderExtension(data); + + return true; +} + +bool ELFHeader::MagicBytesMatch(const uint8_t *magic) { + return memcmp(magic, ElfMagic, strlen(ElfMagic)) == 0; +} + +unsigned ELFHeader::AddressSizeInBytes(const uint8_t *magic) { + unsigned address_size = 0; + + switch (magic[EI_CLASS]) { + case ELFCLASS32: + address_size = 4; + break; + + case ELFCLASS64: + address_size = 8; + break; + } + return address_size; +} + +unsigned ELFHeader::GetRelocationJumpSlotType() const { + unsigned slot = 0; + + switch (e_machine) { + default: + assert(false && "architecture not supported"); + break; + case EM_PPC: + slot = R_PPC_JMP_SLOT; + break; + case EM_PPC64: + slot = R_PPC64_JMP_SLOT; + break; + case EM_386: + case EM_IAMCU: // FIXME: is this correct? + slot = R_386_JUMP_SLOT; + break; + case EM_X86_64: + slot = R_X86_64_JUMP_SLOT; + break; + case EM_ARM: + slot = R_ARM_JUMP_SLOT; + break; + case EM_HEXAGON: + slot = R_HEX_JMP_SLOT; + break; + case EM_AARCH64: + slot = R_AARCH64_JUMP_SLOT; + break; + case EM_MIPS: + slot = R_MIPS_JUMP_SLOT; + break; + case EM_S390: + slot = R_390_JMP_SLOT; + break; + case EM_RISCV: + slot = R_RISCV_JUMP_SLOT; + break; + case EM_LOONGARCH: + slot = R_LARCH_JUMP_SLOT; + break; + } + + return slot; +} + +// ELFSectionHeader + +ELFSectionHeader::ELFSectionHeader() { + memset(this, 0, sizeof(ELFSectionHeader)); +} + +bool ELFSectionHeader::Parse(const lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + const unsigned byte_size = data.GetAddressByteSize(); + + // Read sh_name and sh_type. + if (data.GetU32(offset, &sh_name, 2) == nullptr) + return false; + + // Read sh_flags. + if (!GetMaxU64(data, offset, &sh_flags, byte_size)) + return false; + + // Read sh_addr, sh_off and sh_size. + if (!GetMaxU64(data, offset, &sh_addr, byte_size, 3)) + return false; + + // Read sh_link and sh_info. + if (data.GetU32(offset, &sh_link, 2) == nullptr) + return false; + + // Read sh_addralign and sh_entsize. + if (!GetMaxU64(data, offset, &sh_addralign, byte_size, 2)) + return false; + + return true; +} + +// ELFSymbol + +ELFSymbol::ELFSymbol() { memset(this, 0, sizeof(ELFSymbol)); } + +#define ENUM_TO_CSTR(e) \ + case e: \ + return #e + +const char *ELFSymbol::bindingToCString(unsigned char binding) { + switch (binding) { + ENUM_TO_CSTR(STB_LOCAL); + ENUM_TO_CSTR(STB_GLOBAL); + ENUM_TO_CSTR(STB_WEAK); + ENUM_TO_CSTR(STB_LOOS); + ENUM_TO_CSTR(STB_HIOS); + ENUM_TO_CSTR(STB_LOPROC); + ENUM_TO_CSTR(STB_HIPROC); + } + return ""; +} + +const char *ELFSymbol::typeToCString(unsigned char type) { + switch (type) { + ENUM_TO_CSTR(STT_NOTYPE); + ENUM_TO_CSTR(STT_OBJECT); + ENUM_TO_CSTR(STT_FUNC); + ENUM_TO_CSTR(STT_SECTION); + ENUM_TO_CSTR(STT_FILE); + ENUM_TO_CSTR(STT_COMMON); + ENUM_TO_CSTR(STT_TLS); + ENUM_TO_CSTR(STT_GNU_IFUNC); + ENUM_TO_CSTR(STT_HIOS); + ENUM_TO_CSTR(STT_LOPROC); + ENUM_TO_CSTR(STT_HIPROC); + } + return ""; +} + +const char *ELFSymbol::sectionIndexToCString( + elf_half shndx, const lldb_private::SectionList *section_list) { + switch (shndx) { + ENUM_TO_CSTR(SHN_UNDEF); + ENUM_TO_CSTR(SHN_LOPROC); + ENUM_TO_CSTR(SHN_HIPROC); + ENUM_TO_CSTR(SHN_LOOS); + ENUM_TO_CSTR(SHN_HIOS); + ENUM_TO_CSTR(SHN_ABS); + ENUM_TO_CSTR(SHN_COMMON); + ENUM_TO_CSTR(SHN_XINDEX); + default: { + const lldb_private::Section *section = + section_list->GetSectionAtIndex(shndx).get(); + if (section) + return section->GetName().AsCString(""); + } break; + } + return ""; +} + +void ELFSymbol::Dump(lldb_private::Stream *s, uint32_t idx, + const lldb_private::DataExtractor *strtab_data, + const lldb_private::SectionList *section_list) { + s->Printf("[%3u] 0x%16.16" PRIx64 " 0x%16.16" PRIx64 + " 0x%8.8x 0x%2.2x (%-10s %-13s) 0x%2.2x 0x%4.4x (%-10s) %s\n", + idx, st_value, st_size, st_name, st_info, + bindingToCString(getBinding()), typeToCString(getType()), st_other, + st_shndx, sectionIndexToCString(st_shndx, section_list), + strtab_data ? strtab_data->PeekCStr(st_name) : ""); +} + +bool ELFSymbol::Parse(const lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + const unsigned byte_size = data.GetAddressByteSize(); + const bool parsing_32 = byte_size == 4; + + // Read st_name. + if (data.GetU32(offset, &st_name, 1) == nullptr) + return false; + + if (parsing_32) { + // Read st_value and st_size. + if (!GetMaxU64(data, offset, &st_value, byte_size, 2)) + return false; + + // Read st_info and st_other. + if (data.GetU8(offset, &st_info, 2) == nullptr) + return false; + + // Read st_shndx. + if (data.GetU16(offset, &st_shndx, 1) == nullptr) + return false; + } else { + // Read st_info and st_other. + if (data.GetU8(offset, &st_info, 2) == nullptr) + return false; + + // Read st_shndx. + if (data.GetU16(offset, &st_shndx, 1) == nullptr) + return false; + + // Read st_value and st_size. + if (data.GetU64(offset, &st_value, 2) == nullptr) + return false; + } + return true; +} + +// ELFProgramHeader + +ELFProgramHeader::ELFProgramHeader() { + memset(this, 0, sizeof(ELFProgramHeader)); +} + +bool ELFProgramHeader::Parse(const lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + const uint32_t byte_size = data.GetAddressByteSize(); + const bool parsing_32 = byte_size == 4; + + // Read p_type; + if (data.GetU32(offset, &p_type, 1) == nullptr) + return false; + + if (parsing_32) { + // Read p_offset, p_vaddr, p_paddr, p_filesz and p_memsz. + if (!GetMaxU64(data, offset, &p_offset, byte_size, 5)) + return false; + + // Read p_flags. + if (data.GetU32(offset, &p_flags, 1) == nullptr) + return false; + + // Read p_align. + if (!GetMaxU64(data, offset, &p_align, byte_size)) + return false; + } else { + // Read p_flags. + if (data.GetU32(offset, &p_flags, 1) == nullptr) + return false; + + // Read p_offset, p_vaddr, p_paddr, p_filesz, p_memsz and p_align. + if (!GetMaxU64(data, offset, &p_offset, byte_size, 6)) + return false; + } + + return true; +} + +// ELFDynamic + +ELFDynamic::ELFDynamic() { memset(this, 0, sizeof(ELFDynamic)); } + +bool ELFDynamic::Parse(const lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + const unsigned byte_size = data.GetAddressByteSize(); + return GetMaxS64(data, offset, &d_tag, byte_size, 2); +} + +// ELFRel + +ELFRel::ELFRel() { memset(this, 0, sizeof(ELFRel)); } + +bool ELFRel::Parse(const lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + const unsigned byte_size = data.GetAddressByteSize(); + + // Read r_offset and r_info. + return GetMaxU64(data, offset, &r_offset, byte_size, 2) != false; +} + +// ELFRela + +ELFRela::ELFRela() { memset(this, 0, sizeof(ELFRela)); } + +bool ELFRela::Parse(const lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + const unsigned byte_size = data.GetAddressByteSize(); + + // Read r_offset and r_info. + if (!GetMaxU64(data, offset, &r_offset, byte_size, 2)) + return false; + + // Read r_addend; + if (!GetMaxS64(data, offset, &r_addend, byte_size)) + return false; + + return true; +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h new file mode 100644 index 000000000000..963cc850736f --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h @@ -0,0 +1,394 @@ +//===-- ELFHeader.h ------------------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Generic structures and typedefs for ELF files. +/// +/// This file provides definitions for the various entities comprising an ELF +/// file. The structures are generic in the sense that they do not correspond +/// to the exact binary layout of an ELF, but can be used to hold the +/// information present in both 32 and 64 bit variants of the format. Each +/// entity provides a \c Parse method which is capable of transparently +/// reading both 32 and 64 bit instances of the object. +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_ELFHEADER_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_ELFHEADER_H + +#include "llvm/BinaryFormat/ELF.h" + +#include "lldb/lldb-enumerations.h" +#include "lldb/lldb-types.h" + +namespace lldb_private { +class DataExtractor; +} // End namespace lldb_private. + +namespace elf { + +/// \name ELF type definitions. +/// +/// Types used to represent the various components of ELF structures. All +/// types are signed or unsigned integral types wide enough to hold values +/// from both +/// 32 and 64 bit ELF variants. +//@{ +typedef uint64_t elf_addr; +typedef uint64_t elf_off; +typedef uint16_t elf_half; +typedef uint32_t elf_word; +typedef int32_t elf_sword; +typedef uint64_t elf_size; +typedef uint64_t elf_xword; +typedef int64_t elf_sxword; +//@} + +/// \class ELFHeader +/// Generic representation of an ELF file header. +/// +/// This object is used to identify the general attributes on an ELF file and +/// to locate additional sections within the file. +struct ELFHeader { + unsigned char e_ident[llvm::ELF::EI_NIDENT]; ///< ELF file identification. + elf_addr e_entry; ///< Virtual address program entry point. + elf_off e_phoff; ///< File offset of program header table. + elf_off e_shoff; ///< File offset of section header table. + elf_word e_flags; ///< Processor specific flags. + elf_word e_version; ///< Version of object file (always 1). + elf_half e_type; ///< Object file type. + elf_half e_machine; ///< Target architecture. + elf_half e_ehsize; ///< Byte size of the ELF header. + elf_half e_phentsize; ///< Size of a program header table entry. + elf_half e_phnum_hdr; ///< Number of program header entries. + elf_half e_shentsize; ///< Size of a section header table entry. + elf_half e_shnum_hdr; ///< Number of section header entries. + elf_half e_shstrndx_hdr; ///< String table section index. + + // In some cases these numbers do not fit in 16 bits and they are + // stored outside of the header in section #0. Here are the actual + // values. + elf_word e_phnum; ///< Number of program header entries. + elf_word e_shnum; ///< Number of section header entries. + elf_word e_shstrndx; ///< String table section index. + + ELFHeader(); + + /// Returns true if this is a 32 bit ELF file header. + /// + /// \return + /// True if this is a 32 bit ELF file header. + bool Is32Bit() const { + return e_ident[llvm::ELF::EI_CLASS] == llvm::ELF::ELFCLASS32; + } + + /// Returns true if this is a 64 bit ELF file header. + /// + /// \return + /// True if this is a 64 bit ELF file header. + bool Is64Bit() const { + return e_ident[llvm::ELF::EI_CLASS] == llvm::ELF::ELFCLASS64; + } + + /// The byte order of this ELF file header. + /// + /// \return + /// The byte order of this ELF file as described by the header. + lldb::ByteOrder GetByteOrder() const; + + /// The jump slot relocation type of this ELF. + unsigned GetRelocationJumpSlotType() const; + + /// Check if there should be header extension in section header #0 + /// + /// \return + /// True if parsing the ELFHeader requires reading header extension + /// and false otherwise. + bool HasHeaderExtension() const; + + /// Parse an ELFHeader entry starting at position \p offset and update the + /// data extractor with the address size and byte order attributes as + /// defined by the header. + /// + /// \param[in,out] data + /// The DataExtractor to read from. Updated with the address size and + /// byte order attributes appropriate to this header. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFHeader was successfully read and false + /// otherwise. + bool Parse(lldb_private::DataExtractor &data, lldb::offset_t *offset); + + /// Examines at most EI_NIDENT bytes starting from the given pointer and + /// determines if the magic ELF identification exists. + /// + /// \return + /// True if the given sequence of bytes identifies an ELF file. + static bool MagicBytesMatch(const uint8_t *magic); + + /// Examines at most EI_NIDENT bytes starting from the given address and + /// determines the address size of the underlying ELF file. This function + /// should only be called on an pointer for which MagicBytesMatch returns + /// true. + /// + /// \return + /// The number of bytes forming an address in the ELF file (either 4 or + /// 8), else zero if the address size could not be determined. + static unsigned AddressSizeInBytes(const uint8_t *magic); + +private: + + /// Parse an ELFHeader header extension entry. This method is called by + /// Parse(). + /// + /// \param[in] data + /// The DataExtractor to read from. + void ParseHeaderExtension(lldb_private::DataExtractor &data); +}; + +/// \class ELFSectionHeader +/// Generic representation of an ELF section header. +struct ELFSectionHeader { + elf_word sh_name; ///< Section name string index. + elf_word sh_type; ///< Section type. + elf_xword sh_flags; ///< Section attributes. + elf_addr sh_addr; ///< Virtual address of the section in memory. + elf_off sh_offset; ///< Start of section from beginning of file. + elf_xword sh_size; ///< Number of bytes occupied in the file. + elf_word sh_link; ///< Index of associated section. + elf_word sh_info; ///< Extra section info (overloaded). + elf_xword sh_addralign; ///< Power of two alignment constraint. + elf_xword sh_entsize; ///< Byte size of each section entry. + + ELFSectionHeader(); + + /// Parse an ELFSectionHeader entry from the given DataExtracter starting at + /// position \p offset. + /// + /// \param[in] data + /// The DataExtractor to read from. The address size of the extractor + /// determines if a 32 or 64 bit object should be read. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFSectionHeader was successfully read and false + /// otherwise. + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); +}; + +/// \class ELFProgramHeader +/// Generic representation of an ELF program header. +struct ELFProgramHeader { + elf_word p_type; ///< Type of program segment. + elf_word p_flags; ///< Segment attributes. + elf_off p_offset; ///< Start of segment from beginning of file. + elf_addr p_vaddr; ///< Virtual address of segment in memory. + elf_addr p_paddr; ///< Physical address (for non-VM systems). + elf_xword p_filesz; ///< Byte size of the segment in file. + elf_xword p_memsz; ///< Byte size of the segment in memory. + elf_xword p_align; ///< Segment alignment constraint. + + ELFProgramHeader(); + + /// Parse an ELFProgramHeader entry from the given DataExtractor starting at + /// position \p offset. The address size of the DataExtractor determines if + /// a 32 or 64 bit object is to be parsed. + /// + /// \param[in] data + /// The DataExtractor to read from. The address size of the extractor + /// determines if a 32 or 64 bit object should be read. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFProgramHeader was successfully read and false + /// otherwise. + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); +}; + +/// \class ELFSymbol +/// Represents a symbol within an ELF symbol table. +struct ELFSymbol { + elf_addr st_value; ///< Absolute or relocatable address. + elf_xword st_size; ///< Size of the symbol or zero. + elf_word st_name; ///< Symbol name string index. + unsigned char st_info; ///< Symbol type and binding attributes. + unsigned char st_other; ///< Reserved for future use. + elf_half st_shndx; ///< Section to which this symbol applies. + + ELFSymbol(); + + /// Returns the binding attribute of the st_info member. + unsigned char getBinding() const { return st_info >> 4; } + + /// Returns the type attribute of the st_info member. + unsigned char getType() const { return st_info & 0x0F; } + + /// Sets the binding and type of the st_info member. + void setBindingAndType(unsigned char binding, unsigned char type) { + st_info = (binding << 4) + (type & 0x0F); + } + + static const char *bindingToCString(unsigned char binding); + + static const char *typeToCString(unsigned char type); + + static const char * + sectionIndexToCString(elf_half shndx, + const lldb_private::SectionList *section_list); + + /// Parse an ELFSymbol entry from the given DataExtractor starting at + /// position \p offset. The address size of the DataExtractor determines if + /// a 32 or 64 bit object is to be parsed. + /// + /// \param[in] data + /// The DataExtractor to read from. The address size of the extractor + /// determines if a 32 or 64 bit object should be read. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFSymbol was successfully read and false otherwise. + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); + + void Dump(lldb_private::Stream *s, uint32_t idx, + const lldb_private::DataExtractor *strtab_data, + const lldb_private::SectionList *section_list); +}; + +/// \class ELFDynamic +/// Represents an entry in an ELF dynamic table. +struct ELFDynamic { + elf_sxword d_tag; ///< Type of dynamic table entry. + union { + elf_xword d_val; ///< Integer value of the table entry. + elf_addr d_ptr; ///< Pointer value of the table entry. + }; + + ELFDynamic(); + + /// Parse an ELFDynamic entry from the given DataExtractor starting at + /// position \p offset. The address size of the DataExtractor determines if + /// a 32 or 64 bit object is to be parsed. + /// + /// \param[in] data + /// The DataExtractor to read from. The address size of the extractor + /// determines if a 32 or 64 bit object should be read. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFDynamic entry was successfully read and false + /// otherwise. + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); +}; + +/// \class ELFRel +/// Represents a relocation entry with an implicit addend. +struct ELFRel { + elf_addr r_offset; ///< Address of reference. + elf_xword r_info; ///< symbol index and type of relocation. + + ELFRel(); + + /// Parse an ELFRel entry from the given DataExtractor starting at position + /// \p offset. The address size of the DataExtractor determines if a 32 or + /// 64 bit object is to be parsed. + /// + /// \param[in] data + /// The DataExtractor to read from. The address size of the extractor + /// determines if a 32 or 64 bit object should be read. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFRel entry was successfully read and false otherwise. + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); + + /// Returns the type when the given entry represents a 32-bit relocation. + static unsigned RelocType32(const ELFRel &rel) { return rel.r_info & 0x0ff; } + + /// Returns the type when the given entry represents a 64-bit relocation. + static unsigned RelocType64(const ELFRel &rel) { + return rel.r_info & 0xffffffff; + } + + /// Returns the symbol index when the given entry represents a 32-bit + /// relocation. + static unsigned RelocSymbol32(const ELFRel &rel) { return rel.r_info >> 8; } + + /// Returns the symbol index when the given entry represents a 64-bit + /// relocation. + static unsigned RelocSymbol64(const ELFRel &rel) { return rel.r_info >> 32; } +}; + +/// \class ELFRela +/// Represents a relocation entry with an explicit addend. +struct ELFRela { + elf_addr r_offset; ///< Address of reference. + elf_xword r_info; ///< Symbol index and type of relocation. + elf_sxword r_addend; ///< Constant part of expression. + + ELFRela(); + + /// Parse an ELFRela entry from the given DataExtractor starting at position + /// \p offset. The address size of the DataExtractor determines if a 32 or + /// 64 bit object is to be parsed. + /// + /// \param[in] data + /// The DataExtractor to read from. The address size of the extractor + /// determines if a 32 or 64 bit object should be read. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFRela entry was successfully read and false otherwise. + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); + + /// Returns the type when the given entry represents a 32-bit relocation. + static unsigned RelocType32(const ELFRela &rela) { + return rela.r_info & 0x0ff; + } + + /// Returns the type when the given entry represents a 64-bit relocation. + static unsigned RelocType64(const ELFRela &rela) { + return rela.r_info & 0xffffffff; + } + + /// Returns the symbol index when the given entry represents a 32-bit + /// relocation. + static unsigned RelocSymbol32(const ELFRela &rela) { + return rela.r_info >> 8; + } + + /// Returns the symbol index when the given entry represents a 64-bit + /// relocation. + static unsigned RelocSymbol64(const ELFRela &rela) { + return rela.r_info >> 32; + } +}; + +} // End namespace elf. + +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_ELFHEADER_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp new file mode 100644 index 000000000000..890db5c27481 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -0,0 +1,3706 @@ +//===-- ObjectFileELF.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjectFileELF.h" + +#include <algorithm> +#include <cassert> +#include <optional> +#include <unordered_map> + +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Progress.h" +#include "lldb/Core/Section.h" +#include "lldb/Host/FileSystem.h" +#include "lldb/Host/LZMA.h" +#include "lldb/Symbol/DWARFCallFrameInfo.h" +#include "lldb/Symbol/SymbolContext.h" +#include "lldb/Target/SectionLoadList.h" +#include "lldb/Target/Target.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/FileSpecList.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/RangeMap.h" +#include "lldb/Utility/Status.h" +#include "lldb/Utility/Stream.h" +#include "lldb/Utility/Timer.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/Object/Decompressor.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/CRC.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MipsABIFlags.h" + +#define CASE_AND_STREAM(s, def, width) \ + case def: \ + s->Printf("%-*s", width, #def); \ + break; + +using namespace lldb; +using namespace lldb_private; +using namespace elf; +using namespace llvm::ELF; + +LLDB_PLUGIN_DEFINE(ObjectFileELF) + +// ELF note owner definitions +static const char *const LLDB_NT_OWNER_FREEBSD = "FreeBSD"; +static const char *const LLDB_NT_OWNER_GNU = "GNU"; +static const char *const LLDB_NT_OWNER_NETBSD = "NetBSD"; +static const char *const LLDB_NT_OWNER_NETBSDCORE = "NetBSD-CORE"; +static const char *const LLDB_NT_OWNER_OPENBSD = "OpenBSD"; +static const char *const LLDB_NT_OWNER_ANDROID = "Android"; +static const char *const LLDB_NT_OWNER_CORE = "CORE"; +static const char *const LLDB_NT_OWNER_LINUX = "LINUX"; + +// ELF note type definitions +static const elf_word LLDB_NT_FREEBSD_ABI_TAG = 0x01; +static const elf_word LLDB_NT_FREEBSD_ABI_SIZE = 4; + +static const elf_word LLDB_NT_GNU_ABI_TAG = 0x01; +static const elf_word LLDB_NT_GNU_ABI_SIZE = 16; + +static const elf_word LLDB_NT_GNU_BUILD_ID_TAG = 0x03; + +static const elf_word LLDB_NT_NETBSD_IDENT_TAG = 1; +static const elf_word LLDB_NT_NETBSD_IDENT_DESCSZ = 4; +static const elf_word LLDB_NT_NETBSD_IDENT_NAMESZ = 7; +static const elf_word LLDB_NT_NETBSD_PROCINFO = 1; + +// GNU ABI note OS constants +static const elf_word LLDB_NT_GNU_ABI_OS_LINUX = 0x00; +static const elf_word LLDB_NT_GNU_ABI_OS_HURD = 0x01; +static const elf_word LLDB_NT_GNU_ABI_OS_SOLARIS = 0x02; + +namespace { + +//===----------------------------------------------------------------------===// +/// \class ELFRelocation +/// Generic wrapper for ELFRel and ELFRela. +/// +/// This helper class allows us to parse both ELFRel and ELFRela relocation +/// entries in a generic manner. +class ELFRelocation { +public: + /// Constructs an ELFRelocation entry with a personality as given by @p + /// type. + /// + /// \param type Either DT_REL or DT_RELA. Any other value is invalid. + ELFRelocation(unsigned type); + + ~ELFRelocation(); + + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); + + static unsigned RelocType32(const ELFRelocation &rel); + + static unsigned RelocType64(const ELFRelocation &rel); + + static unsigned RelocSymbol32(const ELFRelocation &rel); + + static unsigned RelocSymbol64(const ELFRelocation &rel); + + static elf_addr RelocOffset32(const ELFRelocation &rel); + + static elf_addr RelocOffset64(const ELFRelocation &rel); + + static elf_sxword RelocAddend32(const ELFRelocation &rel); + + static elf_sxword RelocAddend64(const ELFRelocation &rel); + + bool IsRela() { return (reloc.is<ELFRela *>()); } + +private: + typedef llvm::PointerUnion<ELFRel *, ELFRela *> RelocUnion; + + RelocUnion reloc; +}; +} // end anonymous namespace + +ELFRelocation::ELFRelocation(unsigned type) { + if (type == DT_REL || type == SHT_REL) + reloc = new ELFRel(); + else if (type == DT_RELA || type == SHT_RELA) + reloc = new ELFRela(); + else { + assert(false && "unexpected relocation type"); + reloc = static_cast<ELFRel *>(nullptr); + } +} + +ELFRelocation::~ELFRelocation() { + if (reloc.is<ELFRel *>()) + delete reloc.get<ELFRel *>(); + else + delete reloc.get<ELFRela *>(); +} + +bool ELFRelocation::Parse(const lldb_private::DataExtractor &data, + lldb::offset_t *offset) { + if (reloc.is<ELFRel *>()) + return reloc.get<ELFRel *>()->Parse(data, offset); + else + return reloc.get<ELFRela *>()->Parse(data, offset); +} + +unsigned ELFRelocation::RelocType32(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return ELFRel::RelocType32(*rel.reloc.get<ELFRel *>()); + else + return ELFRela::RelocType32(*rel.reloc.get<ELFRela *>()); +} + +unsigned ELFRelocation::RelocType64(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return ELFRel::RelocType64(*rel.reloc.get<ELFRel *>()); + else + return ELFRela::RelocType64(*rel.reloc.get<ELFRela *>()); +} + +unsigned ELFRelocation::RelocSymbol32(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return ELFRel::RelocSymbol32(*rel.reloc.get<ELFRel *>()); + else + return ELFRela::RelocSymbol32(*rel.reloc.get<ELFRela *>()); +} + +unsigned ELFRelocation::RelocSymbol64(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return ELFRel::RelocSymbol64(*rel.reloc.get<ELFRel *>()); + else + return ELFRela::RelocSymbol64(*rel.reloc.get<ELFRela *>()); +} + +elf_addr ELFRelocation::RelocOffset32(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return rel.reloc.get<ELFRel *>()->r_offset; + else + return rel.reloc.get<ELFRela *>()->r_offset; +} + +elf_addr ELFRelocation::RelocOffset64(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return rel.reloc.get<ELFRel *>()->r_offset; + else + return rel.reloc.get<ELFRela *>()->r_offset; +} + +elf_sxword ELFRelocation::RelocAddend32(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return 0; + else + return rel.reloc.get<ELFRela *>()->r_addend; +} + +elf_sxword ELFRelocation::RelocAddend64(const ELFRelocation &rel) { + if (rel.reloc.is<ELFRel *>()) + return 0; + else + return rel.reloc.get<ELFRela *>()->r_addend; +} + +static user_id_t SegmentID(size_t PHdrIndex) { + return ~user_id_t(PHdrIndex); +} + +bool ELFNote::Parse(const DataExtractor &data, lldb::offset_t *offset) { + // Read all fields. + if (data.GetU32(offset, &n_namesz, 3) == nullptr) + return false; + + // The name field is required to be nul-terminated, and n_namesz includes the + // terminating nul in observed implementations (contrary to the ELF-64 spec). + // A special case is needed for cores generated by some older Linux versions, + // which write a note named "CORE" without a nul terminator and n_namesz = 4. + if (n_namesz == 4) { + char buf[4]; + if (data.ExtractBytes(*offset, 4, data.GetByteOrder(), buf) != 4) + return false; + if (strncmp(buf, "CORE", 4) == 0) { + n_name = "CORE"; + *offset += 4; + return true; + } + } + + const char *cstr = data.GetCStr(offset, llvm::alignTo(n_namesz, 4)); + if (cstr == nullptr) { + Log *log = GetLog(LLDBLog::Symbols); + LLDB_LOGF(log, "Failed to parse note name lacking nul terminator"); + + return false; + } + n_name = cstr; + return true; +} + +static uint32_t mipsVariantFromElfFlags (const elf::ELFHeader &header) { + const uint32_t mips_arch = header.e_flags & llvm::ELF::EF_MIPS_ARCH; + uint32_t endian = header.e_ident[EI_DATA]; + uint32_t arch_variant = ArchSpec::eMIPSSubType_unknown; + uint32_t fileclass = header.e_ident[EI_CLASS]; + + // If there aren't any elf flags available (e.g core elf file) then return + // default + // 32 or 64 bit arch (without any architecture revision) based on object file's class. + if (header.e_type == ET_CORE) { + switch (fileclass) { + case llvm::ELF::ELFCLASS32: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32el + : ArchSpec::eMIPSSubType_mips32; + case llvm::ELF::ELFCLASS64: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64el + : ArchSpec::eMIPSSubType_mips64; + default: + return arch_variant; + } + } + + switch (mips_arch) { + case llvm::ELF::EF_MIPS_ARCH_1: + case llvm::ELF::EF_MIPS_ARCH_2: + case llvm::ELF::EF_MIPS_ARCH_32: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32el + : ArchSpec::eMIPSSubType_mips32; + case llvm::ELF::EF_MIPS_ARCH_32R2: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r2el + : ArchSpec::eMIPSSubType_mips32r2; + case llvm::ELF::EF_MIPS_ARCH_32R6: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips32r6el + : ArchSpec::eMIPSSubType_mips32r6; + case llvm::ELF::EF_MIPS_ARCH_3: + case llvm::ELF::EF_MIPS_ARCH_4: + case llvm::ELF::EF_MIPS_ARCH_5: + case llvm::ELF::EF_MIPS_ARCH_64: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64el + : ArchSpec::eMIPSSubType_mips64; + case llvm::ELF::EF_MIPS_ARCH_64R2: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64r2el + : ArchSpec::eMIPSSubType_mips64r2; + case llvm::ELF::EF_MIPS_ARCH_64R6: + return (endian == ELFDATA2LSB) ? ArchSpec::eMIPSSubType_mips64r6el + : ArchSpec::eMIPSSubType_mips64r6; + default: + break; + } + + return arch_variant; +} + +static uint32_t riscvVariantFromElfFlags(const elf::ELFHeader &header) { + uint32_t fileclass = header.e_ident[EI_CLASS]; + switch (fileclass) { + case llvm::ELF::ELFCLASS32: + return ArchSpec::eRISCVSubType_riscv32; + case llvm::ELF::ELFCLASS64: + return ArchSpec::eRISCVSubType_riscv64; + default: + return ArchSpec::eRISCVSubType_unknown; + } +} + +static uint32_t ppc64VariantFromElfFlags(const elf::ELFHeader &header) { + uint32_t endian = header.e_ident[EI_DATA]; + if (endian == ELFDATA2LSB) + return ArchSpec::eCore_ppc64le_generic; + else + return ArchSpec::eCore_ppc64_generic; +} + +static uint32_t loongarchVariantFromElfFlags(const elf::ELFHeader &header) { + uint32_t fileclass = header.e_ident[EI_CLASS]; + switch (fileclass) { + case llvm::ELF::ELFCLASS32: + return ArchSpec::eLoongArchSubType_loongarch32; + case llvm::ELF::ELFCLASS64: + return ArchSpec::eLoongArchSubType_loongarch64; + default: + return ArchSpec::eLoongArchSubType_unknown; + } +} + +static uint32_t subTypeFromElfHeader(const elf::ELFHeader &header) { + if (header.e_machine == llvm::ELF::EM_MIPS) + return mipsVariantFromElfFlags(header); + else if (header.e_machine == llvm::ELF::EM_PPC64) + return ppc64VariantFromElfFlags(header); + else if (header.e_machine == llvm::ELF::EM_RISCV) + return riscvVariantFromElfFlags(header); + else if (header.e_machine == llvm::ELF::EM_LOONGARCH) + return loongarchVariantFromElfFlags(header); + + return LLDB_INVALID_CPUTYPE; +} + +char ObjectFileELF::ID; + +// Arbitrary constant used as UUID prefix for core files. +const uint32_t ObjectFileELF::g_core_uuid_magic(0xE210C); + +// Static methods. +void ObjectFileELF::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications); +} + +void ObjectFileELF::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +ObjectFile *ObjectFileELF::CreateInstance(const lldb::ModuleSP &module_sp, + DataBufferSP data_sp, + lldb::offset_t data_offset, + const lldb_private::FileSpec *file, + lldb::offset_t file_offset, + lldb::offset_t length) { + bool mapped_writable = false; + if (!data_sp) { + data_sp = MapFileDataWritable(*file, length, file_offset); + if (!data_sp) + return nullptr; + data_offset = 0; + mapped_writable = true; + } + + assert(data_sp); + + if (data_sp->GetByteSize() <= (llvm::ELF::EI_NIDENT + data_offset)) + return nullptr; + + const uint8_t *magic = data_sp->GetBytes() + data_offset; + if (!ELFHeader::MagicBytesMatch(magic)) + return nullptr; + + // Update the data to contain the entire file if it doesn't already + if (data_sp->GetByteSize() < length) { + data_sp = MapFileDataWritable(*file, length, file_offset); + if (!data_sp) + return nullptr; + data_offset = 0; + mapped_writable = true; + magic = data_sp->GetBytes(); + } + + // If we didn't map the data as writable take ownership of the buffer. + if (!mapped_writable) { + data_sp = std::make_shared<DataBufferHeap>(data_sp->GetBytes(), + data_sp->GetByteSize()); + data_offset = 0; + magic = data_sp->GetBytes(); + } + + unsigned address_size = ELFHeader::AddressSizeInBytes(magic); + if (address_size == 4 || address_size == 8) { + std::unique_ptr<ObjectFileELF> objfile_up(new ObjectFileELF( + module_sp, data_sp, data_offset, file, file_offset, length)); + ArchSpec spec = objfile_up->GetArchitecture(); + if (spec && objfile_up->SetModulesArchitecture(spec)) + return objfile_up.release(); + } + + return nullptr; +} + +ObjectFile *ObjectFileELF::CreateMemoryInstance( + const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, lldb::addr_t header_addr) { + if (data_sp && data_sp->GetByteSize() > (llvm::ELF::EI_NIDENT)) { + const uint8_t *magic = data_sp->GetBytes(); + if (ELFHeader::MagicBytesMatch(magic)) { + unsigned address_size = ELFHeader::AddressSizeInBytes(magic); + if (address_size == 4 || address_size == 8) { + std::unique_ptr<ObjectFileELF> objfile_up( + new ObjectFileELF(module_sp, data_sp, process_sp, header_addr)); + ArchSpec spec = objfile_up->GetArchitecture(); + if (spec && objfile_up->SetModulesArchitecture(spec)) + return objfile_up.release(); + } + } + } + return nullptr; +} + +bool ObjectFileELF::MagicBytesMatch(DataBufferSP &data_sp, + lldb::addr_t data_offset, + lldb::addr_t data_length) { + if (data_sp && + data_sp->GetByteSize() > (llvm::ELF::EI_NIDENT + data_offset)) { + const uint8_t *magic = data_sp->GetBytes() + data_offset; + return ELFHeader::MagicBytesMatch(magic); + } + return false; +} + +static uint32_t calc_crc32(uint32_t init, const DataExtractor &data) { + return llvm::crc32(init, + llvm::ArrayRef(data.GetDataStart(), data.GetByteSize())); +} + +uint32_t ObjectFileELF::CalculateELFNotesSegmentsCRC32( + const ProgramHeaderColl &program_headers, DataExtractor &object_data) { + + uint32_t core_notes_crc = 0; + + for (const ELFProgramHeader &H : program_headers) { + if (H.p_type == llvm::ELF::PT_NOTE) { + const elf_off ph_offset = H.p_offset; + const size_t ph_size = H.p_filesz; + + DataExtractor segment_data; + if (segment_data.SetData(object_data, ph_offset, ph_size) != ph_size) { + // The ELF program header contained incorrect data, probably corefile + // is incomplete or corrupted. + break; + } + + core_notes_crc = calc_crc32(core_notes_crc, segment_data); + } + } + + return core_notes_crc; +} + +static const char *OSABIAsCString(unsigned char osabi_byte) { +#define _MAKE_OSABI_CASE(x) \ + case x: \ + return #x + switch (osabi_byte) { + _MAKE_OSABI_CASE(ELFOSABI_NONE); + _MAKE_OSABI_CASE(ELFOSABI_HPUX); + _MAKE_OSABI_CASE(ELFOSABI_NETBSD); + _MAKE_OSABI_CASE(ELFOSABI_GNU); + _MAKE_OSABI_CASE(ELFOSABI_HURD); + _MAKE_OSABI_CASE(ELFOSABI_SOLARIS); + _MAKE_OSABI_CASE(ELFOSABI_AIX); + _MAKE_OSABI_CASE(ELFOSABI_IRIX); + _MAKE_OSABI_CASE(ELFOSABI_FREEBSD); + _MAKE_OSABI_CASE(ELFOSABI_TRU64); + _MAKE_OSABI_CASE(ELFOSABI_MODESTO); + _MAKE_OSABI_CASE(ELFOSABI_OPENBSD); + _MAKE_OSABI_CASE(ELFOSABI_OPENVMS); + _MAKE_OSABI_CASE(ELFOSABI_NSK); + _MAKE_OSABI_CASE(ELFOSABI_AROS); + _MAKE_OSABI_CASE(ELFOSABI_FENIXOS); + _MAKE_OSABI_CASE(ELFOSABI_C6000_ELFABI); + _MAKE_OSABI_CASE(ELFOSABI_C6000_LINUX); + _MAKE_OSABI_CASE(ELFOSABI_ARM); + _MAKE_OSABI_CASE(ELFOSABI_STANDALONE); + default: + return "<unknown-osabi>"; + } +#undef _MAKE_OSABI_CASE +} + +// +// WARNING : This function is being deprecated +// It's functionality has moved to ArchSpec::SetArchitecture This function is +// only being kept to validate the move. +// +// TODO : Remove this function +static bool GetOsFromOSABI(unsigned char osabi_byte, + llvm::Triple::OSType &ostype) { + switch (osabi_byte) { + case ELFOSABI_AIX: + ostype = llvm::Triple::OSType::AIX; + break; + case ELFOSABI_FREEBSD: + ostype = llvm::Triple::OSType::FreeBSD; + break; + case ELFOSABI_GNU: + ostype = llvm::Triple::OSType::Linux; + break; + case ELFOSABI_NETBSD: + ostype = llvm::Triple::OSType::NetBSD; + break; + case ELFOSABI_OPENBSD: + ostype = llvm::Triple::OSType::OpenBSD; + break; + case ELFOSABI_SOLARIS: + ostype = llvm::Triple::OSType::Solaris; + break; + default: + ostype = llvm::Triple::OSType::UnknownOS; + } + return ostype != llvm::Triple::OSType::UnknownOS; +} + +size_t ObjectFileELF::GetModuleSpecifications( + const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, lldb::offset_t file_offset, + lldb::offset_t length, lldb_private::ModuleSpecList &specs) { + Log *log = GetLog(LLDBLog::Modules); + + const size_t initial_count = specs.GetSize(); + + if (ObjectFileELF::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) { + DataExtractor data; + data.SetData(data_sp); + elf::ELFHeader header; + lldb::offset_t header_offset = data_offset; + if (header.Parse(data, &header_offset)) { + if (data_sp) { + ModuleSpec spec(file); + // In Android API level 23 and above, bionic dynamic linker is able to + // load .so file directly from zip file. In that case, .so file is + // page aligned and uncompressed, and this module spec should retain the + // .so file offset and file size to pass through the information from + // lldb-server to LLDB. For normal file, file_offset should be 0, + // length should be the size of the file. + spec.SetObjectOffset(file_offset); + spec.SetObjectSize(length); + + const uint32_t sub_type = subTypeFromElfHeader(header); + spec.GetArchitecture().SetArchitecture( + eArchTypeELF, header.e_machine, sub_type, header.e_ident[EI_OSABI]); + + if (spec.GetArchitecture().IsValid()) { + llvm::Triple::OSType ostype; + llvm::Triple::VendorType vendor; + llvm::Triple::OSType spec_ostype = + spec.GetArchitecture().GetTriple().getOS(); + + LLDB_LOGF(log, "ObjectFileELF::%s file '%s' module OSABI: %s", + __FUNCTION__, file.GetPath().c_str(), + OSABIAsCString(header.e_ident[EI_OSABI])); + + // SetArchitecture should have set the vendor to unknown + vendor = spec.GetArchitecture().GetTriple().getVendor(); + assert(vendor == llvm::Triple::UnknownVendor); + UNUSED_IF_ASSERT_DISABLED(vendor); + + // + // Validate it is ok to remove GetOsFromOSABI + GetOsFromOSABI(header.e_ident[EI_OSABI], ostype); + assert(spec_ostype == ostype); + if (spec_ostype != llvm::Triple::OSType::UnknownOS) { + LLDB_LOGF(log, + "ObjectFileELF::%s file '%s' set ELF module OS type " + "from ELF header OSABI.", + __FUNCTION__, file.GetPath().c_str()); + } + + // When ELF file does not contain GNU build ID, the later code will + // calculate CRC32 with this data_sp file_offset and length. It is + // important for Android zip .so file, which is a slice of a file, + // to not access the outside of the file slice range. + if (data_sp->GetByteSize() < length) + data_sp = MapFileData(file, length, file_offset); + if (data_sp) + data.SetData(data_sp); + // In case there is header extension in the section #0, the header we + // parsed above could have sentinel values for e_phnum, e_shnum, and + // e_shstrndx. In this case we need to reparse the header with a + // bigger data source to get the actual values. + if (header.HasHeaderExtension()) { + lldb::offset_t header_offset = data_offset; + header.Parse(data, &header_offset); + } + + uint32_t gnu_debuglink_crc = 0; + std::string gnu_debuglink_file; + SectionHeaderColl section_headers; + lldb_private::UUID &uuid = spec.GetUUID(); + + GetSectionHeaderInfo(section_headers, data, header, uuid, + gnu_debuglink_file, gnu_debuglink_crc, + spec.GetArchitecture()); + + llvm::Triple &spec_triple = spec.GetArchitecture().GetTriple(); + + LLDB_LOGF(log, + "ObjectFileELF::%s file '%s' module set to triple: %s " + "(architecture %s)", + __FUNCTION__, file.GetPath().c_str(), + spec_triple.getTriple().c_str(), + spec.GetArchitecture().GetArchitectureName()); + + if (!uuid.IsValid()) { + uint32_t core_notes_crc = 0; + + if (!gnu_debuglink_crc) { + LLDB_SCOPED_TIMERF( + "Calculating module crc32 %s with size %" PRIu64 " KiB", + file.GetFilename().AsCString(), + (length - file_offset) / 1024); + + // For core files - which usually don't happen to have a + // gnu_debuglink, and are pretty bulky - calculating whole + // contents crc32 would be too much of luxury. Thus we will need + // to fallback to something simpler. + if (header.e_type == llvm::ELF::ET_CORE) { + ProgramHeaderColl program_headers; + GetProgramHeaderInfo(program_headers, data, header); + + core_notes_crc = + CalculateELFNotesSegmentsCRC32(program_headers, data); + } else { + gnu_debuglink_crc = calc_crc32(0, data); + } + } + using u32le = llvm::support::ulittle32_t; + if (gnu_debuglink_crc) { + // Use 4 bytes of crc from the .gnu_debuglink section. + u32le data(gnu_debuglink_crc); + uuid = UUID(&data, sizeof(data)); + } else if (core_notes_crc) { + // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make + // it look different form .gnu_debuglink crc followed by 4 bytes + // of note segments crc. + u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)}; + uuid = UUID(data, sizeof(data)); + } + } + + specs.Append(spec); + } + } + } + } + + return specs.GetSize() - initial_count; +} + +// ObjectFile protocol + +ObjectFileELF::ObjectFileELF(const lldb::ModuleSP &module_sp, + DataBufferSP data_sp, lldb::offset_t data_offset, + const FileSpec *file, lldb::offset_t file_offset, + lldb::offset_t length) + : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset) { + if (file) + m_file = *file; +} + +ObjectFileELF::ObjectFileELF(const lldb::ModuleSP &module_sp, + DataBufferSP header_data_sp, + const lldb::ProcessSP &process_sp, + addr_t header_addr) + : ObjectFile(module_sp, process_sp, header_addr, header_data_sp) {} + +bool ObjectFileELF::IsExecutable() const { + return ((m_header.e_type & ET_EXEC) != 0) || (m_header.e_entry != 0); +} + +bool ObjectFileELF::SetLoadAddress(Target &target, lldb::addr_t value, + bool value_is_offset) { + ModuleSP module_sp = GetModule(); + if (module_sp) { + size_t num_loaded_sections = 0; + SectionList *section_list = GetSectionList(); + if (section_list) { + if (!value_is_offset) { + addr_t base = GetBaseAddress().GetFileAddress(); + if (base == LLDB_INVALID_ADDRESS) + return false; + value -= base; + } + + const size_t num_sections = section_list->GetSize(); + size_t sect_idx = 0; + + for (sect_idx = 0; sect_idx < num_sections; ++sect_idx) { + // Iterate through the object file sections to find all of the sections + // that have SHF_ALLOC in their flag bits. + SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); + + // PT_TLS segments can have the same p_vaddr and p_paddr as other + // PT_LOAD segments so we shouldn't load them. If we do load them, then + // the SectionLoadList will incorrectly fill in the instance variable + // SectionLoadList::m_addr_to_sect with the same address as a PT_LOAD + // segment and we won't be able to resolve addresses in the PT_LOAD + // segment whose p_vaddr entry matches that of the PT_TLS. Any variables + // that appear in the PT_TLS segments get resolved by the DWARF + // expressions. If this ever changes we will need to fix all object + // file plug-ins, but until then, we don't want PT_TLS segments to + // remove the entry from SectionLoadList::m_addr_to_sect when we call + // SetSectionLoadAddress() below. + if (section_sp->IsThreadSpecific()) + continue; + if (section_sp->Test(SHF_ALLOC) || + section_sp->GetType() == eSectionTypeContainer) { + lldb::addr_t load_addr = section_sp->GetFileAddress(); + // We don't want to update the load address of a section with type + // eSectionTypeAbsoluteAddress as they already have the absolute load + // address already specified + if (section_sp->GetType() != eSectionTypeAbsoluteAddress) + load_addr += value; + + // On 32-bit systems the load address have to fit into 4 bytes. The + // rest of the bytes are the overflow from the addition. + if (GetAddressByteSize() == 4) + load_addr &= 0xFFFFFFFF; + + if (target.GetSectionLoadList().SetSectionLoadAddress(section_sp, + load_addr)) + ++num_loaded_sections; + } + } + return num_loaded_sections > 0; + } + } + return false; +} + +ByteOrder ObjectFileELF::GetByteOrder() const { + if (m_header.e_ident[EI_DATA] == ELFDATA2MSB) + return eByteOrderBig; + if (m_header.e_ident[EI_DATA] == ELFDATA2LSB) + return eByteOrderLittle; + return eByteOrderInvalid; +} + +uint32_t ObjectFileELF::GetAddressByteSize() const { + return m_data.GetAddressByteSize(); +} + +AddressClass ObjectFileELF::GetAddressClass(addr_t file_addr) { + Symtab *symtab = GetSymtab(); + if (!symtab) + return AddressClass::eUnknown; + + // The address class is determined based on the symtab. Ask it from the + // object file what contains the symtab information. + ObjectFile *symtab_objfile = symtab->GetObjectFile(); + if (symtab_objfile != nullptr && symtab_objfile != this) + return symtab_objfile->GetAddressClass(file_addr); + + auto res = ObjectFile::GetAddressClass(file_addr); + if (res != AddressClass::eCode) + return res; + + auto ub = m_address_class_map.upper_bound(file_addr); + if (ub == m_address_class_map.begin()) { + // No entry in the address class map before the address. Return default + // address class for an address in a code section. + return AddressClass::eCode; + } + + // Move iterator to the address class entry preceding address + --ub; + + return ub->second; +} + +size_t ObjectFileELF::SectionIndex(const SectionHeaderCollIter &I) { + return std::distance(m_section_headers.begin(), I); +} + +size_t ObjectFileELF::SectionIndex(const SectionHeaderCollConstIter &I) const { + return std::distance(m_section_headers.begin(), I); +} + +bool ObjectFileELF::ParseHeader() { + lldb::offset_t offset = 0; + return m_header.Parse(m_data, &offset); +} + +UUID ObjectFileELF::GetUUID() { + // Need to parse the section list to get the UUIDs, so make sure that's been + // done. + if (!ParseSectionHeaders() && GetType() != ObjectFile::eTypeCoreFile) + return UUID(); + + if (!m_uuid) { + using u32le = llvm::support::ulittle32_t; + if (GetType() == ObjectFile::eTypeCoreFile) { + uint32_t core_notes_crc = 0; + + if (!ParseProgramHeaders()) + return UUID(); + + core_notes_crc = + CalculateELFNotesSegmentsCRC32(m_program_headers, m_data); + + if (core_notes_crc) { + // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make it + // look different form .gnu_debuglink crc - followed by 4 bytes of note + // segments crc. + u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)}; + m_uuid = UUID(data, sizeof(data)); + } + } else { + if (!m_gnu_debuglink_crc) + m_gnu_debuglink_crc = calc_crc32(0, m_data); + if (m_gnu_debuglink_crc) { + // Use 4 bytes of crc from the .gnu_debuglink section. + u32le data(m_gnu_debuglink_crc); + m_uuid = UUID(&data, sizeof(data)); + } + } + } + + return m_uuid; +} + +std::optional<FileSpec> ObjectFileELF::GetDebugLink() { + if (m_gnu_debuglink_file.empty()) + return std::nullopt; + return FileSpec(m_gnu_debuglink_file); +} + +uint32_t ObjectFileELF::GetDependentModules(FileSpecList &files) { + size_t num_modules = ParseDependentModules(); + uint32_t num_specs = 0; + + for (unsigned i = 0; i < num_modules; ++i) { + if (files.AppendIfUnique(m_filespec_up->GetFileSpecAtIndex(i))) + num_specs++; + } + + return num_specs; +} + +Address ObjectFileELF::GetImageInfoAddress(Target *target) { + if (!ParseDynamicSymbols()) + return Address(); + + SectionList *section_list = GetSectionList(); + if (!section_list) + return Address(); + + // Find the SHT_DYNAMIC (.dynamic) section. + SectionSP dynsym_section_sp( + section_list->FindSectionByType(eSectionTypeELFDynamicLinkInfo, true)); + if (!dynsym_section_sp) + return Address(); + assert(dynsym_section_sp->GetObjectFile() == this); + + user_id_t dynsym_id = dynsym_section_sp->GetID(); + const ELFSectionHeaderInfo *dynsym_hdr = GetSectionHeaderByIndex(dynsym_id); + if (!dynsym_hdr) + return Address(); + + for (size_t i = 0; i < m_dynamic_symbols.size(); ++i) { + ELFDynamic &symbol = m_dynamic_symbols[i]; + + if (symbol.d_tag == DT_DEBUG) { + // Compute the offset as the number of previous entries plus the size of + // d_tag. + addr_t offset = i * dynsym_hdr->sh_entsize + GetAddressByteSize(); + return Address(dynsym_section_sp, offset); + } + // MIPS executables uses DT_MIPS_RLD_MAP_REL to support PIE. DT_MIPS_RLD_MAP + // exists in non-PIE. + else if ((symbol.d_tag == DT_MIPS_RLD_MAP || + symbol.d_tag == DT_MIPS_RLD_MAP_REL) && + target) { + addr_t offset = i * dynsym_hdr->sh_entsize + GetAddressByteSize(); + addr_t dyn_base = dynsym_section_sp->GetLoadBaseAddress(target); + if (dyn_base == LLDB_INVALID_ADDRESS) + return Address(); + + Status error; + if (symbol.d_tag == DT_MIPS_RLD_MAP) { + // DT_MIPS_RLD_MAP tag stores an absolute address of the debug pointer. + Address addr; + if (target->ReadPointerFromMemory(dyn_base + offset, error, addr, true)) + return addr; + } + if (symbol.d_tag == DT_MIPS_RLD_MAP_REL) { + // DT_MIPS_RLD_MAP_REL tag stores the offset to the debug pointer, + // relative to the address of the tag. + uint64_t rel_offset; + rel_offset = target->ReadUnsignedIntegerFromMemory( + dyn_base + offset, GetAddressByteSize(), UINT64_MAX, error, true); + if (error.Success() && rel_offset != UINT64_MAX) { + Address addr; + addr_t debug_ptr_address = + dyn_base + (offset - GetAddressByteSize()) + rel_offset; + addr.SetOffset(debug_ptr_address); + return addr; + } + } + } + } + + return Address(); +} + +lldb_private::Address ObjectFileELF::GetEntryPointAddress() { + if (m_entry_point_address.IsValid()) + return m_entry_point_address; + + if (!ParseHeader() || !IsExecutable()) + return m_entry_point_address; + + SectionList *section_list = GetSectionList(); + addr_t offset = m_header.e_entry; + + if (!section_list) + m_entry_point_address.SetOffset(offset); + else + m_entry_point_address.ResolveAddressUsingFileSections(offset, section_list); + return m_entry_point_address; +} + +Address ObjectFileELF::GetBaseAddress() { + if (GetType() == ObjectFile::eTypeObjectFile) { + for (SectionHeaderCollIter I = std::next(m_section_headers.begin()); + I != m_section_headers.end(); ++I) { + const ELFSectionHeaderInfo &header = *I; + if (header.sh_flags & SHF_ALLOC) + return Address(GetSectionList()->FindSectionByID(SectionIndex(I)), 0); + } + return LLDB_INVALID_ADDRESS; + } + + for (const auto &EnumPHdr : llvm::enumerate(ProgramHeaders())) { + const ELFProgramHeader &H = EnumPHdr.value(); + if (H.p_type != PT_LOAD) + continue; + + return Address( + GetSectionList()->FindSectionByID(SegmentID(EnumPHdr.index())), 0); + } + return LLDB_INVALID_ADDRESS; +} + +// ParseDependentModules +size_t ObjectFileELF::ParseDependentModules() { + if (m_filespec_up) + return m_filespec_up->GetSize(); + + m_filespec_up = std::make_unique<FileSpecList>(); + + if (!ParseSectionHeaders()) + return 0; + + SectionList *section_list = GetSectionList(); + if (!section_list) + return 0; + + // Find the SHT_DYNAMIC section. + Section *dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicLinkInfo, true) + .get(); + if (!dynsym) + return 0; + assert(dynsym->GetObjectFile() == this); + + const ELFSectionHeaderInfo *header = GetSectionHeaderByIndex(dynsym->GetID()); + if (!header) + return 0; + // sh_link: section header index of string table used by entries in the + // section. + Section *dynstr = section_list->FindSectionByID(header->sh_link).get(); + if (!dynstr) + return 0; + + DataExtractor dynsym_data; + DataExtractor dynstr_data; + if (ReadSectionData(dynsym, dynsym_data) && + ReadSectionData(dynstr, dynstr_data)) { + ELFDynamic symbol; + const lldb::offset_t section_size = dynsym_data.GetByteSize(); + lldb::offset_t offset = 0; + + // The only type of entries we are concerned with are tagged DT_NEEDED, + // yielding the name of a required library. + while (offset < section_size) { + if (!symbol.Parse(dynsym_data, &offset)) + break; + + if (symbol.d_tag != DT_NEEDED) + continue; + + uint32_t str_index = static_cast<uint32_t>(symbol.d_val); + const char *lib_name = dynstr_data.PeekCStr(str_index); + FileSpec file_spec(lib_name); + FileSystem::Instance().Resolve(file_spec); + m_filespec_up->Append(file_spec); + } + } + + return m_filespec_up->GetSize(); +} + +// GetProgramHeaderInfo +size_t ObjectFileELF::GetProgramHeaderInfo(ProgramHeaderColl &program_headers, + DataExtractor &object_data, + const ELFHeader &header) { + // We have already parsed the program headers + if (!program_headers.empty()) + return program_headers.size(); + + // If there are no program headers to read we are done. + if (header.e_phnum == 0) + return 0; + + program_headers.resize(header.e_phnum); + if (program_headers.size() != header.e_phnum) + return 0; + + const size_t ph_size = header.e_phnum * header.e_phentsize; + const elf_off ph_offset = header.e_phoff; + DataExtractor data; + if (data.SetData(object_data, ph_offset, ph_size) != ph_size) + return 0; + + uint32_t idx; + lldb::offset_t offset; + for (idx = 0, offset = 0; idx < header.e_phnum; ++idx) { + if (!program_headers[idx].Parse(data, &offset)) + break; + } + + if (idx < program_headers.size()) + program_headers.resize(idx); + + return program_headers.size(); +} + +// ParseProgramHeaders +bool ObjectFileELF::ParseProgramHeaders() { + return GetProgramHeaderInfo(m_program_headers, m_data, m_header) != 0; +} + +lldb_private::Status +ObjectFileELF::RefineModuleDetailsFromNote(lldb_private::DataExtractor &data, + lldb_private::ArchSpec &arch_spec, + lldb_private::UUID &uuid) { + Log *log = GetLog(LLDBLog::Modules); + Status error; + + lldb::offset_t offset = 0; + + while (true) { + // Parse the note header. If this fails, bail out. + const lldb::offset_t note_offset = offset; + ELFNote note = ELFNote(); + if (!note.Parse(data, &offset)) { + // We're done. + return error; + } + + LLDB_LOGF(log, "ObjectFileELF::%s parsing note name='%s', type=%" PRIu32, + __FUNCTION__, note.n_name.c_str(), note.n_type); + + // Process FreeBSD ELF notes. + if ((note.n_name == LLDB_NT_OWNER_FREEBSD) && + (note.n_type == LLDB_NT_FREEBSD_ABI_TAG) && + (note.n_descsz == LLDB_NT_FREEBSD_ABI_SIZE)) { + // Pull out the min version info. + uint32_t version_info; + if (data.GetU32(&offset, &version_info, 1) == nullptr) { + error.SetErrorString("failed to read FreeBSD ABI note payload"); + return error; + } + + // Convert the version info into a major/minor number. + const uint32_t version_major = version_info / 100000; + const uint32_t version_minor = (version_info / 1000) % 100; + + char os_name[32]; + snprintf(os_name, sizeof(os_name), "freebsd%" PRIu32 ".%" PRIu32, + version_major, version_minor); + + // Set the elf OS version to FreeBSD. Also clear the vendor. + arch_spec.GetTriple().setOSName(os_name); + arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor); + + LLDB_LOGF(log, + "ObjectFileELF::%s detected FreeBSD %" PRIu32 ".%" PRIu32 + ".%" PRIu32, + __FUNCTION__, version_major, version_minor, + static_cast<uint32_t>(version_info % 1000)); + } + // Process GNU ELF notes. + else if (note.n_name == LLDB_NT_OWNER_GNU) { + switch (note.n_type) { + case LLDB_NT_GNU_ABI_TAG: + if (note.n_descsz == LLDB_NT_GNU_ABI_SIZE) { + // Pull out the min OS version supporting the ABI. + uint32_t version_info[4]; + if (data.GetU32(&offset, &version_info[0], note.n_descsz / 4) == + nullptr) { + error.SetErrorString("failed to read GNU ABI note payload"); + return error; + } + + // Set the OS per the OS field. + switch (version_info[0]) { + case LLDB_NT_GNU_ABI_OS_LINUX: + arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux); + arch_spec.GetTriple().setVendor( + llvm::Triple::VendorType::UnknownVendor); + LLDB_LOGF(log, + "ObjectFileELF::%s detected Linux, min version %" PRIu32 + ".%" PRIu32 ".%" PRIu32, + __FUNCTION__, version_info[1], version_info[2], + version_info[3]); + // FIXME we have the minimal version number, we could be propagating + // that. version_info[1] = OS Major, version_info[2] = OS Minor, + // version_info[3] = Revision. + break; + case LLDB_NT_GNU_ABI_OS_HURD: + arch_spec.GetTriple().setOS(llvm::Triple::OSType::UnknownOS); + arch_spec.GetTriple().setVendor( + llvm::Triple::VendorType::UnknownVendor); + LLDB_LOGF(log, + "ObjectFileELF::%s detected Hurd (unsupported), min " + "version %" PRIu32 ".%" PRIu32 ".%" PRIu32, + __FUNCTION__, version_info[1], version_info[2], + version_info[3]); + break; + case LLDB_NT_GNU_ABI_OS_SOLARIS: + arch_spec.GetTriple().setOS(llvm::Triple::OSType::Solaris); + arch_spec.GetTriple().setVendor( + llvm::Triple::VendorType::UnknownVendor); + LLDB_LOGF(log, + "ObjectFileELF::%s detected Solaris, min version %" PRIu32 + ".%" PRIu32 ".%" PRIu32, + __FUNCTION__, version_info[1], version_info[2], + version_info[3]); + break; + default: + LLDB_LOGF(log, + "ObjectFileELF::%s unrecognized OS in note, id %" PRIu32 + ", min version %" PRIu32 ".%" PRIu32 ".%" PRIu32, + __FUNCTION__, version_info[0], version_info[1], + version_info[2], version_info[3]); + break; + } + } + break; + + case LLDB_NT_GNU_BUILD_ID_TAG: + // Only bother processing this if we don't already have the uuid set. + if (!uuid.IsValid()) { + // 16 bytes is UUID|MD5, 20 bytes is SHA1. Other linkers may produce a + // build-id of a different length. Accept it as long as it's at least + // 4 bytes as it will be better than our own crc32. + if (note.n_descsz >= 4) { + if (const uint8_t *buf = data.PeekData(offset, note.n_descsz)) { + // Save the build id as the UUID for the module. + uuid = UUID(buf, note.n_descsz); + } else { + error.SetErrorString("failed to read GNU_BUILD_ID note payload"); + return error; + } + } + } + break; + } + if (arch_spec.IsMIPS() && + arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS) + // The note.n_name == LLDB_NT_OWNER_GNU is valid for Linux platform + arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux); + } + // Process NetBSD ELF executables and shared libraries + else if ((note.n_name == LLDB_NT_OWNER_NETBSD) && + (note.n_type == LLDB_NT_NETBSD_IDENT_TAG) && + (note.n_descsz == LLDB_NT_NETBSD_IDENT_DESCSZ) && + (note.n_namesz == LLDB_NT_NETBSD_IDENT_NAMESZ)) { + // Pull out the version info. + uint32_t version_info; + if (data.GetU32(&offset, &version_info, 1) == nullptr) { + error.SetErrorString("failed to read NetBSD ABI note payload"); + return error; + } + // Convert the version info into a major/minor/patch number. + // #define __NetBSD_Version__ MMmmrrpp00 + // + // M = major version + // m = minor version; a minor number of 99 indicates current. + // r = 0 (since NetBSD 3.0 not used) + // p = patchlevel + const uint32_t version_major = version_info / 100000000; + const uint32_t version_minor = (version_info % 100000000) / 1000000; + const uint32_t version_patch = (version_info % 10000) / 100; + // Set the elf OS version to NetBSD. Also clear the vendor. + arch_spec.GetTriple().setOSName( + llvm::formatv("netbsd{0}.{1}.{2}", version_major, version_minor, + version_patch).str()); + arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor); + } + // Process NetBSD ELF core(5) notes + else if ((note.n_name == LLDB_NT_OWNER_NETBSDCORE) && + (note.n_type == LLDB_NT_NETBSD_PROCINFO)) { + // Set the elf OS version to NetBSD. Also clear the vendor. + arch_spec.GetTriple().setOS(llvm::Triple::OSType::NetBSD); + arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor); + } + // Process OpenBSD ELF notes. + else if (note.n_name == LLDB_NT_OWNER_OPENBSD) { + // Set the elf OS version to OpenBSD. Also clear the vendor. + arch_spec.GetTriple().setOS(llvm::Triple::OSType::OpenBSD); + arch_spec.GetTriple().setVendor(llvm::Triple::VendorType::UnknownVendor); + } else if (note.n_name == LLDB_NT_OWNER_ANDROID) { + arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux); + arch_spec.GetTriple().setEnvironment( + llvm::Triple::EnvironmentType::Android); + } else if (note.n_name == LLDB_NT_OWNER_LINUX) { + // This is sometimes found in core files and usually contains extended + // register info + arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux); + } else if (note.n_name == LLDB_NT_OWNER_CORE) { + // Parse the NT_FILE to look for stuff in paths to shared libraries + // The contents look like this in a 64 bit ELF core file: + // + // count = 0x000000000000000a (10) + // page_size = 0x0000000000001000 (4096) + // Index start end file_ofs path + // ===== ------------------ ------------------ ------------------ ------------------------------------- + // [ 0] 0x0000000000401000 0x0000000000000000 /tmp/a.out + // [ 1] 0x0000000000600000 0x0000000000601000 0x0000000000000000 /tmp/a.out + // [ 2] 0x0000000000601000 0x0000000000602000 0x0000000000000001 /tmp/a.out + // [ 3] 0x00007fa79c9ed000 0x00007fa79cba8000 0x0000000000000000 /lib/x86_64-linux-gnu/libc-2.19.so + // [ 4] 0x00007fa79cba8000 0x00007fa79cda7000 0x00000000000001bb /lib/x86_64-linux-gnu/libc-2.19.so + // [ 5] 0x00007fa79cda7000 0x00007fa79cdab000 0x00000000000001ba /lib/x86_64-linux-gnu/libc-2.19.so + // [ 6] 0x00007fa79cdab000 0x00007fa79cdad000 0x00000000000001be /lib/x86_64-linux-gnu/libc-2.19.so + // [ 7] 0x00007fa79cdb2000 0x00007fa79cdd5000 0x0000000000000000 /lib/x86_64-linux-gnu/ld-2.19.so + // [ 8] 0x00007fa79cfd4000 0x00007fa79cfd5000 0x0000000000000022 /lib/x86_64-linux-gnu/ld-2.19.so + // [ 9] 0x00007fa79cfd5000 0x00007fa79cfd6000 0x0000000000000023 /lib/x86_64-linux-gnu/ld-2.19.so + // + // In the 32 bit ELFs the count, page_size, start, end, file_ofs are + // uint32_t. + // + // For reference: see readelf source code (in binutils). + if (note.n_type == NT_FILE) { + uint64_t count = data.GetAddress(&offset); + const char *cstr; + data.GetAddress(&offset); // Skip page size + offset += count * 3 * + data.GetAddressByteSize(); // Skip all start/end/file_ofs + for (size_t i = 0; i < count; ++i) { + cstr = data.GetCStr(&offset); + if (cstr == nullptr) { + error.SetErrorStringWithFormat("ObjectFileELF::%s trying to read " + "at an offset after the end " + "(GetCStr returned nullptr)", + __FUNCTION__); + return error; + } + llvm::StringRef path(cstr); + if (path.contains("/lib/x86_64-linux-gnu") || path.contains("/lib/i386-linux-gnu")) { + arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux); + break; + } + } + if (arch_spec.IsMIPS() && + arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS) + // In case of MIPSR6, the LLDB_NT_OWNER_GNU note is missing for some + // cases (e.g. compile with -nostdlib) Hence set OS to Linux + arch_spec.GetTriple().setOS(llvm::Triple::OSType::Linux); + } + } + + // Calculate the offset of the next note just in case "offset" has been + // used to poke at the contents of the note data + offset = note_offset + note.GetByteSize(); + } + + return error; +} + +void ObjectFileELF::ParseARMAttributes(DataExtractor &data, uint64_t length, + ArchSpec &arch_spec) { + lldb::offset_t Offset = 0; + + uint8_t FormatVersion = data.GetU8(&Offset); + if (FormatVersion != llvm::ELFAttrs::Format_Version) + return; + + Offset = Offset + sizeof(uint32_t); // Section Length + llvm::StringRef VendorName = data.GetCStr(&Offset); + + if (VendorName != "aeabi") + return; + + if (arch_spec.GetTriple().getEnvironment() == + llvm::Triple::UnknownEnvironment) + arch_spec.GetTriple().setEnvironment(llvm::Triple::EABI); + + while (Offset < length) { + uint8_t Tag = data.GetU8(&Offset); + uint32_t Size = data.GetU32(&Offset); + + if (Tag != llvm::ARMBuildAttrs::File || Size == 0) + continue; + + while (Offset < length) { + uint64_t Tag = data.GetULEB128(&Offset); + switch (Tag) { + default: + if (Tag < 32) + data.GetULEB128(&Offset); + else if (Tag % 2 == 0) + data.GetULEB128(&Offset); + else + data.GetCStr(&Offset); + + break; + + case llvm::ARMBuildAttrs::CPU_raw_name: + case llvm::ARMBuildAttrs::CPU_name: + data.GetCStr(&Offset); + + break; + + case llvm::ARMBuildAttrs::ABI_VFP_args: { + uint64_t VFPArgs = data.GetULEB128(&Offset); + + if (VFPArgs == llvm::ARMBuildAttrs::BaseAAPCS) { + if (arch_spec.GetTriple().getEnvironment() == + llvm::Triple::UnknownEnvironment || + arch_spec.GetTriple().getEnvironment() == llvm::Triple::EABIHF) + arch_spec.GetTriple().setEnvironment(llvm::Triple::EABI); + + arch_spec.SetFlags(ArchSpec::eARM_abi_soft_float); + } else if (VFPArgs == llvm::ARMBuildAttrs::HardFPAAPCS) { + if (arch_spec.GetTriple().getEnvironment() == + llvm::Triple::UnknownEnvironment || + arch_spec.GetTriple().getEnvironment() == llvm::Triple::EABI) + arch_spec.GetTriple().setEnvironment(llvm::Triple::EABIHF); + + arch_spec.SetFlags(ArchSpec::eARM_abi_hard_float); + } + + break; + } + } + } + } +} + +// GetSectionHeaderInfo +size_t ObjectFileELF::GetSectionHeaderInfo(SectionHeaderColl §ion_headers, + DataExtractor &object_data, + const elf::ELFHeader &header, + lldb_private::UUID &uuid, + std::string &gnu_debuglink_file, + uint32_t &gnu_debuglink_crc, + ArchSpec &arch_spec) { + // Don't reparse the section headers if we already did that. + if (!section_headers.empty()) + return section_headers.size(); + + // Only initialize the arch_spec to okay defaults if they're not already set. + // We'll refine this with note data as we parse the notes. + if (arch_spec.GetTriple().getOS() == llvm::Triple::OSType::UnknownOS) { + llvm::Triple::OSType ostype; + llvm::Triple::OSType spec_ostype; + const uint32_t sub_type = subTypeFromElfHeader(header); + arch_spec.SetArchitecture(eArchTypeELF, header.e_machine, sub_type, + header.e_ident[EI_OSABI]); + + // Validate if it is ok to remove GetOsFromOSABI. Note, that now the OS is + // determined based on EI_OSABI flag and the info extracted from ELF notes + // (see RefineModuleDetailsFromNote). However in some cases that still + // might be not enough: for example a shared library might not have any + // notes at all and have EI_OSABI flag set to System V, as result the OS + // will be set to UnknownOS. + GetOsFromOSABI(header.e_ident[EI_OSABI], ostype); + spec_ostype = arch_spec.GetTriple().getOS(); + assert(spec_ostype == ostype); + UNUSED_IF_ASSERT_DISABLED(spec_ostype); + } + + if (arch_spec.GetMachine() == llvm::Triple::mips || + arch_spec.GetMachine() == llvm::Triple::mipsel || + arch_spec.GetMachine() == llvm::Triple::mips64 || + arch_spec.GetMachine() == llvm::Triple::mips64el) { + switch (header.e_flags & llvm::ELF::EF_MIPS_ARCH_ASE) { + case llvm::ELF::EF_MIPS_MICROMIPS: + arch_spec.SetFlags(ArchSpec::eMIPSAse_micromips); + break; + case llvm::ELF::EF_MIPS_ARCH_ASE_M16: + arch_spec.SetFlags(ArchSpec::eMIPSAse_mips16); + break; + case llvm::ELF::EF_MIPS_ARCH_ASE_MDMX: + arch_spec.SetFlags(ArchSpec::eMIPSAse_mdmx); + break; + default: + break; + } + } + + if (arch_spec.GetMachine() == llvm::Triple::arm || + arch_spec.GetMachine() == llvm::Triple::thumb) { + if (header.e_flags & llvm::ELF::EF_ARM_SOFT_FLOAT) + arch_spec.SetFlags(ArchSpec::eARM_abi_soft_float); + else if (header.e_flags & llvm::ELF::EF_ARM_VFP_FLOAT) + arch_spec.SetFlags(ArchSpec::eARM_abi_hard_float); + } + + if (arch_spec.GetMachine() == llvm::Triple::riscv32 || + arch_spec.GetMachine() == llvm::Triple::riscv64) { + uint32_t flags = arch_spec.GetFlags(); + + if (header.e_flags & llvm::ELF::EF_RISCV_RVC) + flags |= ArchSpec::eRISCV_rvc; + if (header.e_flags & llvm::ELF::EF_RISCV_RVE) + flags |= ArchSpec::eRISCV_rve; + + if ((header.e_flags & llvm::ELF::EF_RISCV_FLOAT_ABI_SINGLE) == + llvm::ELF::EF_RISCV_FLOAT_ABI_SINGLE) + flags |= ArchSpec::eRISCV_float_abi_single; + else if ((header.e_flags & llvm::ELF::EF_RISCV_FLOAT_ABI_DOUBLE) == + llvm::ELF::EF_RISCV_FLOAT_ABI_DOUBLE) + flags |= ArchSpec::eRISCV_float_abi_double; + else if ((header.e_flags & llvm::ELF::EF_RISCV_FLOAT_ABI_QUAD) == + llvm::ELF::EF_RISCV_FLOAT_ABI_QUAD) + flags |= ArchSpec::eRISCV_float_abi_quad; + + arch_spec.SetFlags(flags); + } + + // If there are no section headers we are done. + if (header.e_shnum == 0) + return 0; + + Log *log = GetLog(LLDBLog::Modules); + + section_headers.resize(header.e_shnum); + if (section_headers.size() != header.e_shnum) + return 0; + + const size_t sh_size = header.e_shnum * header.e_shentsize; + const elf_off sh_offset = header.e_shoff; + DataExtractor sh_data; + if (sh_data.SetData(object_data, sh_offset, sh_size) != sh_size) + return 0; + + uint32_t idx; + lldb::offset_t offset; + for (idx = 0, offset = 0; idx < header.e_shnum; ++idx) { + if (!section_headers[idx].Parse(sh_data, &offset)) + break; + } + if (idx < section_headers.size()) + section_headers.resize(idx); + + const unsigned strtab_idx = header.e_shstrndx; + if (strtab_idx && strtab_idx < section_headers.size()) { + const ELFSectionHeaderInfo &sheader = section_headers[strtab_idx]; + const size_t byte_size = sheader.sh_size; + const Elf64_Off offset = sheader.sh_offset; + lldb_private::DataExtractor shstr_data; + + if (shstr_data.SetData(object_data, offset, byte_size) == byte_size) { + for (SectionHeaderCollIter I = section_headers.begin(); + I != section_headers.end(); ++I) { + static ConstString g_sect_name_gnu_debuglink(".gnu_debuglink"); + const ELFSectionHeaderInfo &sheader = *I; + const uint64_t section_size = + sheader.sh_type == SHT_NOBITS ? 0 : sheader.sh_size; + ConstString name(shstr_data.PeekCStr(I->sh_name)); + + I->section_name = name; + + if (arch_spec.IsMIPS()) { + uint32_t arch_flags = arch_spec.GetFlags(); + DataExtractor data; + if (sheader.sh_type == SHT_MIPS_ABIFLAGS) { + + if (section_size && (data.SetData(object_data, sheader.sh_offset, + section_size) == section_size)) { + // MIPS ASE Mask is at offset 12 in MIPS.abiflags section + lldb::offset_t offset = 12; // MIPS ABI Flags Version: 0 + arch_flags |= data.GetU32(&offset); + + // The floating point ABI is at offset 7 + offset = 7; + switch (data.GetU8(&offset)) { + case llvm::Mips::Val_GNU_MIPS_ABI_FP_ANY: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_ANY; + break; + case llvm::Mips::Val_GNU_MIPS_ABI_FP_DOUBLE: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_DOUBLE; + break; + case llvm::Mips::Val_GNU_MIPS_ABI_FP_SINGLE: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_SINGLE; + break; + case llvm::Mips::Val_GNU_MIPS_ABI_FP_SOFT: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_SOFT; + break; + case llvm::Mips::Val_GNU_MIPS_ABI_FP_OLD_64: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_OLD_64; + break; + case llvm::Mips::Val_GNU_MIPS_ABI_FP_XX: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_XX; + break; + case llvm::Mips::Val_GNU_MIPS_ABI_FP_64: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_64; + break; + case llvm::Mips::Val_GNU_MIPS_ABI_FP_64A: + arch_flags |= lldb_private::ArchSpec::eMIPS_ABI_FP_64A; + break; + } + } + } + // Settings appropriate ArchSpec ABI Flags + switch (header.e_flags & llvm::ELF::EF_MIPS_ABI) { + case llvm::ELF::EF_MIPS_ABI_O32: + arch_flags |= lldb_private::ArchSpec::eMIPSABI_O32; + break; + case EF_MIPS_ABI_O64: + arch_flags |= lldb_private::ArchSpec::eMIPSABI_O64; + break; + case EF_MIPS_ABI_EABI32: + arch_flags |= lldb_private::ArchSpec::eMIPSABI_EABI32; + break; + case EF_MIPS_ABI_EABI64: + arch_flags |= lldb_private::ArchSpec::eMIPSABI_EABI64; + break; + default: + // ABI Mask doesn't cover N32 and N64 ABI. + if (header.e_ident[EI_CLASS] == llvm::ELF::ELFCLASS64) + arch_flags |= lldb_private::ArchSpec::eMIPSABI_N64; + else if (header.e_flags & llvm::ELF::EF_MIPS_ABI2) + arch_flags |= lldb_private::ArchSpec::eMIPSABI_N32; + break; + } + arch_spec.SetFlags(arch_flags); + } + + if (arch_spec.GetMachine() == llvm::Triple::arm || + arch_spec.GetMachine() == llvm::Triple::thumb) { + DataExtractor data; + + if (sheader.sh_type == SHT_ARM_ATTRIBUTES && section_size != 0 && + data.SetData(object_data, sheader.sh_offset, section_size) == section_size) + ParseARMAttributes(data, section_size, arch_spec); + } + + if (name == g_sect_name_gnu_debuglink) { + DataExtractor data; + if (section_size && (data.SetData(object_data, sheader.sh_offset, + section_size) == section_size)) { + lldb::offset_t gnu_debuglink_offset = 0; + gnu_debuglink_file = data.GetCStr(&gnu_debuglink_offset); + gnu_debuglink_offset = llvm::alignTo(gnu_debuglink_offset, 4); + data.GetU32(&gnu_debuglink_offset, &gnu_debuglink_crc, 1); + } + } + + // Process ELF note section entries. + bool is_note_header = (sheader.sh_type == SHT_NOTE); + + // The section header ".note.android.ident" is stored as a + // PROGBITS type header but it is actually a note header. + static ConstString g_sect_name_android_ident(".note.android.ident"); + if (!is_note_header && name == g_sect_name_android_ident) + is_note_header = true; + + if (is_note_header) { + // Allow notes to refine module info. + DataExtractor data; + if (section_size && (data.SetData(object_data, sheader.sh_offset, + section_size) == section_size)) { + Status error = RefineModuleDetailsFromNote(data, arch_spec, uuid); + if (error.Fail()) { + LLDB_LOGF(log, "ObjectFileELF::%s ELF note processing failed: %s", + __FUNCTION__, error.AsCString()); + } + } + } + } + + // Make any unknown triple components to be unspecified unknowns. + if (arch_spec.GetTriple().getVendor() == llvm::Triple::UnknownVendor) + arch_spec.GetTriple().setVendorName(llvm::StringRef()); + if (arch_spec.GetTriple().getOS() == llvm::Triple::UnknownOS) + arch_spec.GetTriple().setOSName(llvm::StringRef()); + + return section_headers.size(); + } + } + + section_headers.clear(); + return 0; +} + +llvm::StringRef +ObjectFileELF::StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const { + size_t pos = symbol_name.find('@'); + return symbol_name.substr(0, pos); +} + +// ParseSectionHeaders +size_t ObjectFileELF::ParseSectionHeaders() { + return GetSectionHeaderInfo(m_section_headers, m_data, m_header, m_uuid, + m_gnu_debuglink_file, m_gnu_debuglink_crc, + m_arch_spec); +} + +const ObjectFileELF::ELFSectionHeaderInfo * +ObjectFileELF::GetSectionHeaderByIndex(lldb::user_id_t id) { + if (!ParseSectionHeaders()) + return nullptr; + + if (id < m_section_headers.size()) + return &m_section_headers[id]; + + return nullptr; +} + +lldb::user_id_t ObjectFileELF::GetSectionIndexByName(const char *name) { + if (!name || !name[0] || !ParseSectionHeaders()) + return 0; + for (size_t i = 1; i < m_section_headers.size(); ++i) + if (m_section_headers[i].section_name == ConstString(name)) + return i; + return 0; +} + +static SectionType GetSectionTypeFromName(llvm::StringRef Name) { + if (Name.consume_front(".debug_")) { + return llvm::StringSwitch<SectionType>(Name) + .Case("abbrev", eSectionTypeDWARFDebugAbbrev) + .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo) + .Case("addr", eSectionTypeDWARFDebugAddr) + .Case("aranges", eSectionTypeDWARFDebugAranges) + .Case("cu_index", eSectionTypeDWARFDebugCuIndex) + .Case("frame", eSectionTypeDWARFDebugFrame) + .Case("info", eSectionTypeDWARFDebugInfo) + .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo) + .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine) + .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr) + .Case("loc", eSectionTypeDWARFDebugLoc) + .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo) + .Case("loclists", eSectionTypeDWARFDebugLocLists) + .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo) + .Case("macinfo", eSectionTypeDWARFDebugMacInfo) + .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro) + .Case("names", eSectionTypeDWARFDebugNames) + .Case("pubnames", eSectionTypeDWARFDebugPubNames) + .Case("pubtypes", eSectionTypeDWARFDebugPubTypes) + .Case("ranges", eSectionTypeDWARFDebugRanges) + .Case("rnglists", eSectionTypeDWARFDebugRngLists) + .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo) + .Case("str", eSectionTypeDWARFDebugStr) + .Case("str.dwo", eSectionTypeDWARFDebugStrDwo) + .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets) + .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo) + .Case("tu_index", eSectionTypeDWARFDebugTuIndex) + .Case("types", eSectionTypeDWARFDebugTypes) + .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo) + .Default(eSectionTypeOther); + } + return llvm::StringSwitch<SectionType>(Name) + .Case(".ARM.exidx", eSectionTypeARMexidx) + .Case(".ARM.extab", eSectionTypeARMextab) + .Case(".ctf", eSectionTypeDebug) + .Cases(".data", ".tdata", eSectionTypeData) + .Case(".eh_frame", eSectionTypeEHFrame) + .Case(".gnu_debugaltlink", eSectionTypeDWARFGNUDebugAltLink) + .Case(".gosymtab", eSectionTypeGoSymtab) + .Case(".text", eSectionTypeCode) + .Case(".swift_ast", eSectionTypeSwiftModules) + .Default(eSectionTypeOther); +} + +SectionType ObjectFileELF::GetSectionType(const ELFSectionHeaderInfo &H) const { + switch (H.sh_type) { + case SHT_PROGBITS: + if (H.sh_flags & SHF_EXECINSTR) + return eSectionTypeCode; + break; + case SHT_NOBITS: + if (H.sh_flags & SHF_ALLOC) + return eSectionTypeZeroFill; + break; + case SHT_SYMTAB: + return eSectionTypeELFSymbolTable; + case SHT_DYNSYM: + return eSectionTypeELFDynamicSymbols; + case SHT_RELA: + case SHT_REL: + return eSectionTypeELFRelocationEntries; + case SHT_DYNAMIC: + return eSectionTypeELFDynamicLinkInfo; + } + return GetSectionTypeFromName(H.section_name.GetStringRef()); +} + +static uint32_t GetTargetByteSize(SectionType Type, const ArchSpec &arch) { + switch (Type) { + case eSectionTypeData: + case eSectionTypeZeroFill: + return arch.GetDataByteSize(); + case eSectionTypeCode: + return arch.GetCodeByteSize(); + default: + return 1; + } +} + +static Permissions GetPermissions(const ELFSectionHeader &H) { + Permissions Perm = Permissions(0); + if (H.sh_flags & SHF_ALLOC) + Perm |= ePermissionsReadable; + if (H.sh_flags & SHF_WRITE) + Perm |= ePermissionsWritable; + if (H.sh_flags & SHF_EXECINSTR) + Perm |= ePermissionsExecutable; + return Perm; +} + +static Permissions GetPermissions(const ELFProgramHeader &H) { + Permissions Perm = Permissions(0); + if (H.p_flags & PF_R) + Perm |= ePermissionsReadable; + if (H.p_flags & PF_W) + Perm |= ePermissionsWritable; + if (H.p_flags & PF_X) + Perm |= ePermissionsExecutable; + return Perm; +} + +namespace { + +using VMRange = lldb_private::Range<addr_t, addr_t>; + +struct SectionAddressInfo { + SectionSP Segment; + VMRange Range; +}; + +// (Unlinked) ELF object files usually have 0 for every section address, meaning +// we need to compute synthetic addresses in order for "file addresses" from +// different sections to not overlap. This class handles that logic. +class VMAddressProvider { + using VMMap = llvm::IntervalMap<addr_t, SectionSP, 4, + llvm::IntervalMapHalfOpenInfo<addr_t>>; + + ObjectFile::Type ObjectType; + addr_t NextVMAddress = 0; + VMMap::Allocator Alloc; + VMMap Segments{Alloc}; + VMMap Sections{Alloc}; + lldb_private::Log *Log = GetLog(LLDBLog::Modules); + size_t SegmentCount = 0; + std::string SegmentName; + + VMRange GetVMRange(const ELFSectionHeader &H) { + addr_t Address = H.sh_addr; + addr_t Size = H.sh_flags & SHF_ALLOC ? H.sh_size : 0; + + // When this is a debug file for relocatable file, the address is all zero + // and thus needs to use accumulate method + if ((ObjectType == ObjectFile::Type::eTypeObjectFile || + (ObjectType == ObjectFile::Type::eTypeDebugInfo && H.sh_addr == 0)) && + Segments.empty() && (H.sh_flags & SHF_ALLOC)) { + NextVMAddress = + llvm::alignTo(NextVMAddress, std::max<addr_t>(H.sh_addralign, 1)); + Address = NextVMAddress; + NextVMAddress += Size; + } + return VMRange(Address, Size); + } + +public: + VMAddressProvider(ObjectFile::Type Type, llvm::StringRef SegmentName) + : ObjectType(Type), SegmentName(std::string(SegmentName)) {} + + std::string GetNextSegmentName() const { + return llvm::formatv("{0}[{1}]", SegmentName, SegmentCount).str(); + } + + std::optional<VMRange> GetAddressInfo(const ELFProgramHeader &H) { + if (H.p_memsz == 0) { + LLDB_LOG(Log, "Ignoring zero-sized {0} segment. Corrupt object file?", + SegmentName); + return std::nullopt; + } + + if (Segments.overlaps(H.p_vaddr, H.p_vaddr + H.p_memsz)) { + LLDB_LOG(Log, "Ignoring overlapping {0} segment. Corrupt object file?", + SegmentName); + return std::nullopt; + } + return VMRange(H.p_vaddr, H.p_memsz); + } + + std::optional<SectionAddressInfo> GetAddressInfo(const ELFSectionHeader &H) { + VMRange Range = GetVMRange(H); + SectionSP Segment; + auto It = Segments.find(Range.GetRangeBase()); + if ((H.sh_flags & SHF_ALLOC) && It.valid()) { + addr_t MaxSize; + if (It.start() <= Range.GetRangeBase()) { + MaxSize = It.stop() - Range.GetRangeBase(); + Segment = *It; + } else + MaxSize = It.start() - Range.GetRangeBase(); + if (Range.GetByteSize() > MaxSize) { + LLDB_LOG(Log, "Shortening section crossing segment boundaries. " + "Corrupt object file?"); + Range.SetByteSize(MaxSize); + } + } + if (Range.GetByteSize() > 0 && + Sections.overlaps(Range.GetRangeBase(), Range.GetRangeEnd())) { + LLDB_LOG(Log, "Ignoring overlapping section. Corrupt object file?"); + return std::nullopt; + } + if (Segment) + Range.Slide(-Segment->GetFileAddress()); + return SectionAddressInfo{Segment, Range}; + } + + void AddSegment(const VMRange &Range, SectionSP Seg) { + Segments.insert(Range.GetRangeBase(), Range.GetRangeEnd(), std::move(Seg)); + ++SegmentCount; + } + + void AddSection(SectionAddressInfo Info, SectionSP Sect) { + if (Info.Range.GetByteSize() == 0) + return; + if (Info.Segment) + Info.Range.Slide(Info.Segment->GetFileAddress()); + Sections.insert(Info.Range.GetRangeBase(), Info.Range.GetRangeEnd(), + std::move(Sect)); + } +}; +} + +// We have to do this because ELF doesn't have section IDs, and also +// doesn't require section names to be unique. (We use the section index +// for section IDs, but that isn't guaranteed to be the same in separate +// debug images.) +static SectionSP FindMatchingSection(const SectionList §ion_list, + SectionSP section) { + SectionSP sect_sp; + + addr_t vm_addr = section->GetFileAddress(); + ConstString name = section->GetName(); + offset_t byte_size = section->GetByteSize(); + bool thread_specific = section->IsThreadSpecific(); + uint32_t permissions = section->GetPermissions(); + uint32_t alignment = section->GetLog2Align(); + + for (auto sect : section_list) { + if (sect->GetName() == name && + sect->IsThreadSpecific() == thread_specific && + sect->GetPermissions() == permissions && + sect->GetByteSize() == byte_size && sect->GetFileAddress() == vm_addr && + sect->GetLog2Align() == alignment) { + sect_sp = sect; + break; + } else { + sect_sp = FindMatchingSection(sect->GetChildren(), section); + if (sect_sp) + break; + } + } + + return sect_sp; +} + +void ObjectFileELF::CreateSections(SectionList &unified_section_list) { + if (m_sections_up) + return; + + m_sections_up = std::make_unique<SectionList>(); + VMAddressProvider regular_provider(GetType(), "PT_LOAD"); + VMAddressProvider tls_provider(GetType(), "PT_TLS"); + + for (const auto &EnumPHdr : llvm::enumerate(ProgramHeaders())) { + const ELFProgramHeader &PHdr = EnumPHdr.value(); + if (PHdr.p_type != PT_LOAD && PHdr.p_type != PT_TLS) + continue; + + VMAddressProvider &provider = + PHdr.p_type == PT_TLS ? tls_provider : regular_provider; + auto InfoOr = provider.GetAddressInfo(PHdr); + if (!InfoOr) + continue; + + uint32_t Log2Align = llvm::Log2_64(std::max<elf_xword>(PHdr.p_align, 1)); + SectionSP Segment = std::make_shared<Section>( + GetModule(), this, SegmentID(EnumPHdr.index()), + ConstString(provider.GetNextSegmentName()), eSectionTypeContainer, + InfoOr->GetRangeBase(), InfoOr->GetByteSize(), PHdr.p_offset, + PHdr.p_filesz, Log2Align, /*flags*/ 0); + Segment->SetPermissions(GetPermissions(PHdr)); + Segment->SetIsThreadSpecific(PHdr.p_type == PT_TLS); + m_sections_up->AddSection(Segment); + + provider.AddSegment(*InfoOr, std::move(Segment)); + } + + ParseSectionHeaders(); + if (m_section_headers.empty()) + return; + + for (SectionHeaderCollIter I = std::next(m_section_headers.begin()); + I != m_section_headers.end(); ++I) { + const ELFSectionHeaderInfo &header = *I; + + ConstString &name = I->section_name; + const uint64_t file_size = + header.sh_type == SHT_NOBITS ? 0 : header.sh_size; + + VMAddressProvider &provider = + header.sh_flags & SHF_TLS ? tls_provider : regular_provider; + auto InfoOr = provider.GetAddressInfo(header); + if (!InfoOr) + continue; + + SectionType sect_type = GetSectionType(header); + + const uint32_t target_bytes_size = + GetTargetByteSize(sect_type, m_arch_spec); + + elf::elf_xword log2align = + (header.sh_addralign == 0) ? 0 : llvm::Log2_64(header.sh_addralign); + + SectionSP section_sp(new Section( + InfoOr->Segment, GetModule(), // Module to which this section belongs. + this, // ObjectFile to which this section belongs and should + // read section data from. + SectionIndex(I), // Section ID. + name, // Section name. + sect_type, // Section type. + InfoOr->Range.GetRangeBase(), // VM address. + InfoOr->Range.GetByteSize(), // VM size in bytes of this section. + header.sh_offset, // Offset of this section in the file. + file_size, // Size of the section as found in the file. + log2align, // Alignment of the section + header.sh_flags, // Flags for this section. + target_bytes_size)); // Number of host bytes per target byte + + section_sp->SetPermissions(GetPermissions(header)); + section_sp->SetIsThreadSpecific(header.sh_flags & SHF_TLS); + (InfoOr->Segment ? InfoOr->Segment->GetChildren() : *m_sections_up) + .AddSection(section_sp); + provider.AddSection(std::move(*InfoOr), std::move(section_sp)); + } + + // For eTypeDebugInfo files, the Symbol Vendor will take care of updating the + // unified section list. + if (GetType() != eTypeDebugInfo) + unified_section_list = *m_sections_up; + + // If there's a .gnu_debugdata section, we'll try to read the .symtab that's + // embedded in there and replace the one in the original object file (if any). + // If there's none in the orignal object file, we add it to it. + if (auto gdd_obj_file = GetGnuDebugDataObjectFile()) { + if (auto gdd_objfile_section_list = gdd_obj_file->GetSectionList()) { + if (SectionSP symtab_section_sp = + gdd_objfile_section_list->FindSectionByType( + eSectionTypeELFSymbolTable, true)) { + SectionSP module_section_sp = unified_section_list.FindSectionByType( + eSectionTypeELFSymbolTable, true); + if (module_section_sp) + unified_section_list.ReplaceSection(module_section_sp->GetID(), + symtab_section_sp); + else + unified_section_list.AddSection(symtab_section_sp); + } + } + } +} + +std::shared_ptr<ObjectFileELF> ObjectFileELF::GetGnuDebugDataObjectFile() { + if (m_gnu_debug_data_object_file != nullptr) + return m_gnu_debug_data_object_file; + + SectionSP section = + GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata")); + if (!section) + return nullptr; + + if (!lldb_private::lzma::isAvailable()) { + GetModule()->ReportWarning( + "No LZMA support found for reading .gnu_debugdata section"); + return nullptr; + } + + // Uncompress the data + DataExtractor data; + section->GetSectionData(data); + llvm::SmallVector<uint8_t, 0> uncompressedData; + auto err = lldb_private::lzma::uncompress(data.GetData(), uncompressedData); + if (err) { + GetModule()->ReportWarning( + "An error occurred while decompression the section {0}: {1}", + section->GetName().AsCString(), llvm::toString(std::move(err)).c_str()); + return nullptr; + } + + // Construct ObjectFileELF object from decompressed buffer + DataBufferSP gdd_data_buf( + new DataBufferHeap(uncompressedData.data(), uncompressedData.size())); + auto fspec = GetFileSpec().CopyByAppendingPathComponent( + llvm::StringRef("gnu_debugdata")); + m_gnu_debug_data_object_file.reset(new ObjectFileELF( + GetModule(), gdd_data_buf, 0, &fspec, 0, gdd_data_buf->GetByteSize())); + + // This line is essential; otherwise a breakpoint can be set but not hit. + m_gnu_debug_data_object_file->SetType(ObjectFile::eTypeDebugInfo); + + ArchSpec spec = m_gnu_debug_data_object_file->GetArchitecture(); + if (spec && m_gnu_debug_data_object_file->SetModulesArchitecture(spec)) + return m_gnu_debug_data_object_file; + + return nullptr; +} + +// Find the arm/aarch64 mapping symbol character in the given symbol name. +// Mapping symbols have the form of "$<char>[.<any>]*". Additionally we +// recognize cases when the mapping symbol prefixed by an arbitrary string +// because if a symbol prefix added to each symbol in the object file with +// objcopy then the mapping symbols are also prefixed. +static char FindArmAarch64MappingSymbol(const char *symbol_name) { + if (!symbol_name) + return '\0'; + + const char *dollar_pos = ::strchr(symbol_name, '$'); + if (!dollar_pos || dollar_pos[1] == '\0') + return '\0'; + + if (dollar_pos[2] == '\0' || dollar_pos[2] == '.') + return dollar_pos[1]; + return '\0'; +} + +#define STO_MIPS_ISA (3 << 6) +#define STO_MICROMIPS (2 << 6) +#define IS_MICROMIPS(ST_OTHER) (((ST_OTHER)&STO_MIPS_ISA) == STO_MICROMIPS) + +// private +std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap> +ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id, + SectionList *section_list, const size_t num_symbols, + const DataExtractor &symtab_data, + const DataExtractor &strtab_data) { + ELFSymbol symbol; + lldb::offset_t offset = 0; + // The changes these symbols would make to the class map. We will also update + // m_address_class_map but need to tell the caller what changed because the + // caller may be another object file. + FileAddressToAddressClassMap address_class_map; + + static ConstString text_section_name(".text"); + static ConstString init_section_name(".init"); + static ConstString fini_section_name(".fini"); + static ConstString ctors_section_name(".ctors"); + static ConstString dtors_section_name(".dtors"); + + static ConstString data_section_name(".data"); + static ConstString rodata_section_name(".rodata"); + static ConstString rodata1_section_name(".rodata1"); + static ConstString data2_section_name(".data1"); + static ConstString bss_section_name(".bss"); + static ConstString opd_section_name(".opd"); // For ppc64 + + // On Android the oatdata and the oatexec symbols in the oat and odex files + // covers the full .text section what causes issues with displaying unusable + // symbol name to the user and very slow unwinding speed because the + // instruction emulation based unwind plans try to emulate all instructions + // in these symbols. Don't add these symbols to the symbol list as they have + // no use for the debugger and they are causing a lot of trouble. Filtering + // can't be restricted to Android because this special object file don't + // contain the note section specifying the environment to Android but the + // custom extension and file name makes it highly unlikely that this will + // collide with anything else. + llvm::StringRef file_extension = m_file.GetFileNameExtension(); + bool skip_oatdata_oatexec = + file_extension == ".oat" || file_extension == ".odex"; + + ArchSpec arch = GetArchitecture(); + ModuleSP module_sp(GetModule()); + SectionList *module_section_list = + module_sp ? module_sp->GetSectionList() : nullptr; + + // We might have debug information in a separate object, in which case + // we need to map the sections from that object to the sections in the + // main object during symbol lookup. If we had to compare the sections + // for every single symbol, that would be expensive, so this map is + // used to accelerate the process. + std::unordered_map<lldb::SectionSP, lldb::SectionSP> section_map; + + unsigned i; + for (i = 0; i < num_symbols; ++i) { + if (!symbol.Parse(symtab_data, &offset)) + break; + + const char *symbol_name = strtab_data.PeekCStr(symbol.st_name); + if (!symbol_name) + symbol_name = ""; + + // No need to add non-section symbols that have no names + if (symbol.getType() != STT_SECTION && + (symbol_name == nullptr || symbol_name[0] == '\0')) + continue; + + // Skipping oatdata and oatexec sections if it is requested. See details + // above the definition of skip_oatdata_oatexec for the reasons. + if (skip_oatdata_oatexec && (::strcmp(symbol_name, "oatdata") == 0 || + ::strcmp(symbol_name, "oatexec") == 0)) + continue; + + SectionSP symbol_section_sp; + SymbolType symbol_type = eSymbolTypeInvalid; + Elf64_Half shndx = symbol.st_shndx; + + switch (shndx) { + case SHN_ABS: + symbol_type = eSymbolTypeAbsolute; + break; + case SHN_UNDEF: + symbol_type = eSymbolTypeUndefined; + break; + default: + symbol_section_sp = section_list->FindSectionByID(shndx); + break; + } + + // If a symbol is undefined do not process it further even if it has a STT + // type + if (symbol_type != eSymbolTypeUndefined) { + switch (symbol.getType()) { + default: + case STT_NOTYPE: + // The symbol's type is not specified. + break; + + case STT_OBJECT: + // The symbol is associated with a data object, such as a variable, an + // array, etc. + symbol_type = eSymbolTypeData; + break; + + case STT_FUNC: + // The symbol is associated with a function or other executable code. + symbol_type = eSymbolTypeCode; + break; + + case STT_SECTION: + // The symbol is associated with a section. Symbol table entries of + // this type exist primarily for relocation and normally have STB_LOCAL + // binding. + break; + + case STT_FILE: + // Conventionally, the symbol's name gives the name of the source file + // associated with the object file. A file symbol has STB_LOCAL + // binding, its section index is SHN_ABS, and it precedes the other + // STB_LOCAL symbols for the file, if it is present. + symbol_type = eSymbolTypeSourceFile; + break; + + case STT_GNU_IFUNC: + // The symbol is associated with an indirect function. The actual + // function will be resolved if it is referenced. + symbol_type = eSymbolTypeResolver; + break; + } + } + + if (symbol_type == eSymbolTypeInvalid && symbol.getType() != STT_SECTION) { + if (symbol_section_sp) { + ConstString sect_name = symbol_section_sp->GetName(); + if (sect_name == text_section_name || sect_name == init_section_name || + sect_name == fini_section_name || sect_name == ctors_section_name || + sect_name == dtors_section_name) { + symbol_type = eSymbolTypeCode; + } else if (sect_name == data_section_name || + sect_name == data2_section_name || + sect_name == rodata_section_name || + sect_name == rodata1_section_name || + sect_name == bss_section_name) { + symbol_type = eSymbolTypeData; + } + } + } + + int64_t symbol_value_offset = 0; + uint32_t additional_flags = 0; + + if (arch.IsValid()) { + if (arch.GetMachine() == llvm::Triple::arm) { + if (symbol.getBinding() == STB_LOCAL) { + char mapping_symbol = FindArmAarch64MappingSymbol(symbol_name); + if (symbol_type == eSymbolTypeCode) { + switch (mapping_symbol) { + case 'a': + // $a[.<any>]* - marks an ARM instruction sequence + address_class_map[symbol.st_value] = AddressClass::eCode; + break; + case 'b': + case 't': + // $b[.<any>]* - marks a THUMB BL instruction sequence + // $t[.<any>]* - marks a THUMB instruction sequence + address_class_map[symbol.st_value] = + AddressClass::eCodeAlternateISA; + break; + case 'd': + // $d[.<any>]* - marks a data item sequence (e.g. lit pool) + address_class_map[symbol.st_value] = AddressClass::eData; + break; + } + } + if (mapping_symbol) + continue; + } + } else if (arch.GetMachine() == llvm::Triple::aarch64) { + if (symbol.getBinding() == STB_LOCAL) { + char mapping_symbol = FindArmAarch64MappingSymbol(symbol_name); + if (symbol_type == eSymbolTypeCode) { + switch (mapping_symbol) { + case 'x': + // $x[.<any>]* - marks an A64 instruction sequence + address_class_map[symbol.st_value] = AddressClass::eCode; + break; + case 'd': + // $d[.<any>]* - marks a data item sequence (e.g. lit pool) + address_class_map[symbol.st_value] = AddressClass::eData; + break; + } + } + if (mapping_symbol) + continue; + } + } + + if (arch.GetMachine() == llvm::Triple::arm) { + if (symbol_type == eSymbolTypeCode) { + if (symbol.st_value & 1) { + // Subtracting 1 from the address effectively unsets the low order + // bit, which results in the address actually pointing to the + // beginning of the symbol. This delta will be used below in + // conjunction with symbol.st_value to produce the final + // symbol_value that we store in the symtab. + symbol_value_offset = -1; + address_class_map[symbol.st_value ^ 1] = + AddressClass::eCodeAlternateISA; + } else { + // This address is ARM + address_class_map[symbol.st_value] = AddressClass::eCode; + } + } + } + + /* + * MIPS: + * The bit #0 of an address is used for ISA mode (1 for microMIPS, 0 for + * MIPS). + * This allows processor to switch between microMIPS and MIPS without any + * need + * for special mode-control register. However, apart from .debug_line, + * none of + * the ELF/DWARF sections set the ISA bit (for symbol or section). Use + * st_other + * flag to check whether the symbol is microMIPS and then set the address + * class + * accordingly. + */ + if (arch.IsMIPS()) { + if (IS_MICROMIPS(symbol.st_other)) + address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA; + else if ((symbol.st_value & 1) && (symbol_type == eSymbolTypeCode)) { + symbol.st_value = symbol.st_value & (~1ull); + address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA; + } else { + if (symbol_type == eSymbolTypeCode) + address_class_map[symbol.st_value] = AddressClass::eCode; + else if (symbol_type == eSymbolTypeData) + address_class_map[symbol.st_value] = AddressClass::eData; + else + address_class_map[symbol.st_value] = AddressClass::eUnknown; + } + } + } + + // symbol_value_offset may contain 0 for ARM symbols or -1 for THUMB + // symbols. See above for more details. + uint64_t symbol_value = symbol.st_value + symbol_value_offset; + + if (symbol_section_sp && + CalculateType() != ObjectFile::Type::eTypeObjectFile) + symbol_value -= symbol_section_sp->GetFileAddress(); + + if (symbol_section_sp && module_section_list && + module_section_list != section_list) { + auto section_it = section_map.find(symbol_section_sp); + if (section_it == section_map.end()) { + section_it = section_map + .emplace(symbol_section_sp, + FindMatchingSection(*module_section_list, + symbol_section_sp)) + .first; + } + if (section_it->second) + symbol_section_sp = section_it->second; + } + + bool is_global = symbol.getBinding() == STB_GLOBAL; + uint32_t flags = symbol.st_other << 8 | symbol.st_info | additional_flags; + llvm::StringRef symbol_ref(symbol_name); + + // Symbol names may contain @VERSION suffixes. Find those and strip them + // temporarily. + size_t version_pos = symbol_ref.find('@'); + bool has_suffix = version_pos != llvm::StringRef::npos; + llvm::StringRef symbol_bare = symbol_ref.substr(0, version_pos); + Mangled mangled(symbol_bare); + + // Now append the suffix back to mangled and unmangled names. Only do it if + // the demangling was successful (string is not empty). + if (has_suffix) { + llvm::StringRef suffix = symbol_ref.substr(version_pos); + + llvm::StringRef mangled_name = mangled.GetMangledName().GetStringRef(); + if (!mangled_name.empty()) + mangled.SetMangledName(ConstString((mangled_name + suffix).str())); + + ConstString demangled = mangled.GetDemangledName(); + llvm::StringRef demangled_name = demangled.GetStringRef(); + if (!demangled_name.empty()) + mangled.SetDemangledName(ConstString((demangled_name + suffix).str())); + } + + // In ELF all symbol should have a valid size but it is not true for some + // function symbols coming from hand written assembly. As none of the + // function symbol should have 0 size we try to calculate the size for + // these symbols in the symtab with saying that their original size is not + // valid. + bool symbol_size_valid = + symbol.st_size != 0 || symbol.getType() != STT_FUNC; + + bool is_trampoline = false; + if (arch.IsValid() && (arch.GetMachine() == llvm::Triple::aarch64)) { + // On AArch64, trampolines are registered as code. + // If we detect a trampoline (which starts with __AArch64ADRPThunk_ or + // __AArch64AbsLongThunk_) we register the symbol as a trampoline. This + // way we will be able to detect the trampoline when we step in a function + // and step through the trampoline. + if (symbol_type == eSymbolTypeCode) { + llvm::StringRef trampoline_name = mangled.GetName().GetStringRef(); + if (trampoline_name.starts_with("__AArch64ADRPThunk_") || + trampoline_name.starts_with("__AArch64AbsLongThunk_")) { + symbol_type = eSymbolTypeTrampoline; + is_trampoline = true; + } + } + } + + Symbol dc_symbol( + i + start_id, // ID is the original symbol table index. + mangled, + symbol_type, // Type of this symbol + is_global, // Is this globally visible? + false, // Is this symbol debug info? + is_trampoline, // Is this symbol a trampoline? + false, // Is this symbol artificial? + AddressRange(symbol_section_sp, // Section in which this symbol is + // defined or null. + symbol_value, // Offset in section or symbol value. + symbol.st_size), // Size in bytes of this symbol. + symbol_size_valid, // Symbol size is valid + has_suffix, // Contains linker annotations? + flags); // Symbol flags. + if (symbol.getBinding() == STB_WEAK) + dc_symbol.SetIsWeak(true); + symtab->AddSymbol(dc_symbol); + } + + m_address_class_map.merge(address_class_map); + return {i, address_class_map}; +} + +std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap> +ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, user_id_t start_id, + lldb_private::Section *symtab) { + if (symtab->GetObjectFile() != this) { + // If the symbol table section is owned by a different object file, have it + // do the parsing. + ObjectFileELF *obj_file_elf = + static_cast<ObjectFileELF *>(symtab->GetObjectFile()); + auto [num_symbols, address_class_map] = + obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab); + + // The other object file returned the changes it made to its address + // class map, make the same changes to ours. + m_address_class_map.merge(address_class_map); + + return {num_symbols, address_class_map}; + } + + // Get section list for this object file. + SectionList *section_list = m_sections_up.get(); + if (!section_list) + return {}; + + user_id_t symtab_id = symtab->GetID(); + const ELFSectionHeaderInfo *symtab_hdr = GetSectionHeaderByIndex(symtab_id); + assert(symtab_hdr->sh_type == SHT_SYMTAB || + symtab_hdr->sh_type == SHT_DYNSYM); + + // sh_link: section header index of associated string table. + user_id_t strtab_id = symtab_hdr->sh_link; + Section *strtab = section_list->FindSectionByID(strtab_id).get(); + + if (symtab && strtab) { + assert(symtab->GetObjectFile() == this); + assert(strtab->GetObjectFile() == this); + + DataExtractor symtab_data; + DataExtractor strtab_data; + if (ReadSectionData(symtab, symtab_data) && + ReadSectionData(strtab, strtab_data)) { + size_t num_symbols = symtab_data.GetByteSize() / symtab_hdr->sh_entsize; + + return ParseSymbols(symbol_table, start_id, section_list, num_symbols, + symtab_data, strtab_data); + } + } + + return {0, {}}; +} + +size_t ObjectFileELF::ParseDynamicSymbols() { + if (m_dynamic_symbols.size()) + return m_dynamic_symbols.size(); + + SectionList *section_list = GetSectionList(); + if (!section_list) + return 0; + + // Find the SHT_DYNAMIC section. + Section *dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicLinkInfo, true) + .get(); + if (!dynsym) + return 0; + assert(dynsym->GetObjectFile() == this); + + ELFDynamic symbol; + DataExtractor dynsym_data; + if (ReadSectionData(dynsym, dynsym_data)) { + const lldb::offset_t section_size = dynsym_data.GetByteSize(); + lldb::offset_t cursor = 0; + + while (cursor < section_size) { + if (!symbol.Parse(dynsym_data, &cursor)) + break; + + m_dynamic_symbols.push_back(symbol); + } + } + + return m_dynamic_symbols.size(); +} + +const ELFDynamic *ObjectFileELF::FindDynamicSymbol(unsigned tag) { + if (!ParseDynamicSymbols()) + return nullptr; + + DynamicSymbolCollIter I = m_dynamic_symbols.begin(); + DynamicSymbolCollIter E = m_dynamic_symbols.end(); + for (; I != E; ++I) { + ELFDynamic *symbol = &*I; + + if (symbol->d_tag == tag) + return symbol; + } + + return nullptr; +} + +unsigned ObjectFileELF::PLTRelocationType() { + // DT_PLTREL + // This member specifies the type of relocation entry to which the + // procedure linkage table refers. The d_val member holds DT_REL or + // DT_RELA, as appropriate. All relocations in a procedure linkage table + // must use the same relocation. + const ELFDynamic *symbol = FindDynamicSymbol(DT_PLTREL); + + if (symbol) + return symbol->d_val; + + return 0; +} + +// Returns the size of the normal plt entries and the offset of the first +// normal plt entry. The 0th entry in the plt table is usually a resolution +// entry which have different size in some architectures then the rest of the +// plt entries. +static std::pair<uint64_t, uint64_t> +GetPltEntrySizeAndOffset(const ELFSectionHeader *rel_hdr, + const ELFSectionHeader *plt_hdr) { + const elf_xword num_relocations = rel_hdr->sh_size / rel_hdr->sh_entsize; + + // Clang 3.3 sets entsize to 4 for 32-bit binaries, but the plt entries are + // 16 bytes. So round the entsize up by the alignment if addralign is set. + elf_xword plt_entsize = + plt_hdr->sh_addralign + ? llvm::alignTo(plt_hdr->sh_entsize, plt_hdr->sh_addralign) + : plt_hdr->sh_entsize; + + // Some linkers e.g ld for arm, fill plt_hdr->sh_entsize field incorrectly. + // PLT entries relocation code in general requires multiple instruction and + // should be greater than 4 bytes in most cases. Try to guess correct size + // just in case. + if (plt_entsize <= 4) { + // The linker haven't set the plt_hdr->sh_entsize field. Try to guess the + // size of the plt entries based on the number of entries and the size of + // the plt section with the assumption that the size of the 0th entry is at + // least as big as the size of the normal entries and it isn't much bigger + // then that. + if (plt_hdr->sh_addralign) + plt_entsize = plt_hdr->sh_size / plt_hdr->sh_addralign / + (num_relocations + 1) * plt_hdr->sh_addralign; + else + plt_entsize = plt_hdr->sh_size / (num_relocations + 1); + } + + elf_xword plt_offset = plt_hdr->sh_size - num_relocations * plt_entsize; + + return std::make_pair(plt_entsize, plt_offset); +} + +static unsigned ParsePLTRelocations( + Symtab *symbol_table, user_id_t start_id, unsigned rel_type, + const ELFHeader *hdr, const ELFSectionHeader *rel_hdr, + const ELFSectionHeader *plt_hdr, const ELFSectionHeader *sym_hdr, + const lldb::SectionSP &plt_section_sp, DataExtractor &rel_data, + DataExtractor &symtab_data, DataExtractor &strtab_data) { + ELFRelocation rel(rel_type); + ELFSymbol symbol; + lldb::offset_t offset = 0; + + uint64_t plt_offset, plt_entsize; + std::tie(plt_entsize, plt_offset) = + GetPltEntrySizeAndOffset(rel_hdr, plt_hdr); + const elf_xword num_relocations = rel_hdr->sh_size / rel_hdr->sh_entsize; + + typedef unsigned (*reloc_info_fn)(const ELFRelocation &rel); + reloc_info_fn reloc_type; + reloc_info_fn reloc_symbol; + + if (hdr->Is32Bit()) { + reloc_type = ELFRelocation::RelocType32; + reloc_symbol = ELFRelocation::RelocSymbol32; + } else { + reloc_type = ELFRelocation::RelocType64; + reloc_symbol = ELFRelocation::RelocSymbol64; + } + + unsigned slot_type = hdr->GetRelocationJumpSlotType(); + unsigned i; + for (i = 0; i < num_relocations; ++i) { + if (!rel.Parse(rel_data, &offset)) + break; + + if (reloc_type(rel) != slot_type) + continue; + + lldb::offset_t symbol_offset = reloc_symbol(rel) * sym_hdr->sh_entsize; + if (!symbol.Parse(symtab_data, &symbol_offset)) + break; + + const char *symbol_name = strtab_data.PeekCStr(symbol.st_name); + uint64_t plt_index = plt_offset + i * plt_entsize; + + Symbol jump_symbol( + i + start_id, // Symbol table index + symbol_name, // symbol name. + eSymbolTypeTrampoline, // Type of this symbol + false, // Is this globally visible? + false, // Is this symbol debug info? + true, // Is this symbol a trampoline? + true, // Is this symbol artificial? + plt_section_sp, // Section in which this symbol is defined or null. + plt_index, // Offset in section or symbol value. + plt_entsize, // Size in bytes of this symbol. + true, // Size is valid + false, // Contains linker annotations? + 0); // Symbol flags. + + symbol_table->AddSymbol(jump_symbol); + } + + return i; +} + +unsigned +ObjectFileELF::ParseTrampolineSymbols(Symtab *symbol_table, user_id_t start_id, + const ELFSectionHeaderInfo *rel_hdr, + user_id_t rel_id) { + assert(rel_hdr->sh_type == SHT_RELA || rel_hdr->sh_type == SHT_REL); + + // The link field points to the associated symbol table. + user_id_t symtab_id = rel_hdr->sh_link; + + // If the link field doesn't point to the appropriate symbol name table then + // try to find it by name as some compiler don't fill in the link fields. + if (!symtab_id) + symtab_id = GetSectionIndexByName(".dynsym"); + + // Get PLT section. We cannot use rel_hdr->sh_info, since current linkers + // point that to the .got.plt or .got section instead of .plt. + user_id_t plt_id = GetSectionIndexByName(".plt"); + + if (!symtab_id || !plt_id) + return 0; + + const ELFSectionHeaderInfo *plt_hdr = GetSectionHeaderByIndex(plt_id); + if (!plt_hdr) + return 0; + + const ELFSectionHeaderInfo *sym_hdr = GetSectionHeaderByIndex(symtab_id); + if (!sym_hdr) + return 0; + + SectionList *section_list = m_sections_up.get(); + if (!section_list) + return 0; + + Section *rel_section = section_list->FindSectionByID(rel_id).get(); + if (!rel_section) + return 0; + + SectionSP plt_section_sp(section_list->FindSectionByID(plt_id)); + if (!plt_section_sp) + return 0; + + Section *symtab = section_list->FindSectionByID(symtab_id).get(); + if (!symtab) + return 0; + + // sh_link points to associated string table. + Section *strtab = section_list->FindSectionByID(sym_hdr->sh_link).get(); + if (!strtab) + return 0; + + DataExtractor rel_data; + if (!ReadSectionData(rel_section, rel_data)) + return 0; + + DataExtractor symtab_data; + if (!ReadSectionData(symtab, symtab_data)) + return 0; + + DataExtractor strtab_data; + if (!ReadSectionData(strtab, strtab_data)) + return 0; + + unsigned rel_type = PLTRelocationType(); + if (!rel_type) + return 0; + + return ParsePLTRelocations(symbol_table, start_id, rel_type, &m_header, + rel_hdr, plt_hdr, sym_hdr, plt_section_sp, + rel_data, symtab_data, strtab_data); +} + +static void ApplyELF64ABS64Relocation(Symtab *symtab, ELFRelocation &rel, + DataExtractor &debug_data, + Section *rel_section) { + Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol64(rel)); + if (symbol) { + addr_t value = symbol->GetAddressRef().GetFileAddress(); + DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); + // ObjectFileELF creates a WritableDataBuffer in CreateInstance. + WritableDataBuffer *data_buffer = + llvm::cast<WritableDataBuffer>(data_buffer_sp.get()); + uint64_t *dst = reinterpret_cast<uint64_t *>( + data_buffer->GetBytes() + rel_section->GetFileOffset() + + ELFRelocation::RelocOffset64(rel)); + uint64_t val_offset = value + ELFRelocation::RelocAddend64(rel); + memcpy(dst, &val_offset, sizeof(uint64_t)); + } +} + +static void ApplyELF64ABS32Relocation(Symtab *symtab, ELFRelocation &rel, + DataExtractor &debug_data, + Section *rel_section, bool is_signed) { + Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol64(rel)); + if (symbol) { + addr_t value = symbol->GetAddressRef().GetFileAddress(); + value += ELFRelocation::RelocAddend32(rel); + if ((!is_signed && (value > UINT32_MAX)) || + (is_signed && + ((int64_t)value > INT32_MAX || (int64_t)value < INT32_MIN))) { + Log *log = GetLog(LLDBLog::Modules); + LLDB_LOGF(log, "Failed to apply debug info relocations"); + return; + } + uint32_t truncated_addr = (value & 0xFFFFFFFF); + DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); + // ObjectFileELF creates a WritableDataBuffer in CreateInstance. + WritableDataBuffer *data_buffer = + llvm::cast<WritableDataBuffer>(data_buffer_sp.get()); + uint32_t *dst = reinterpret_cast<uint32_t *>( + data_buffer->GetBytes() + rel_section->GetFileOffset() + + ELFRelocation::RelocOffset32(rel)); + memcpy(dst, &truncated_addr, sizeof(uint32_t)); + } +} + +static void ApplyELF32ABS32RelRelocation(Symtab *symtab, ELFRelocation &rel, + DataExtractor &debug_data, + Section *rel_section) { + Log *log = GetLog(LLDBLog::Modules); + Symbol *symbol = symtab->FindSymbolByID(ELFRelocation::RelocSymbol32(rel)); + if (symbol) { + addr_t value = symbol->GetAddressRef().GetFileAddress(); + if (value == LLDB_INVALID_ADDRESS) { + const char *name = symbol->GetName().GetCString(); + LLDB_LOGF(log, "Debug info symbol invalid: %s", name); + return; + } + assert(llvm::isUInt<32>(value) && "Valid addresses are 32-bit"); + DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); + // ObjectFileELF creates a WritableDataBuffer in CreateInstance. + WritableDataBuffer *data_buffer = + llvm::cast<WritableDataBuffer>(data_buffer_sp.get()); + uint8_t *dst = data_buffer->GetBytes() + rel_section->GetFileOffset() + + ELFRelocation::RelocOffset32(rel); + // Implicit addend is stored inline as a signed value. + int32_t addend; + memcpy(&addend, dst, sizeof(int32_t)); + // The sum must be positive. This extra check prevents UB from overflow in + // the actual range check below. + if (addend < 0 && static_cast<uint32_t>(-addend) > value) { + LLDB_LOGF(log, "Debug info relocation overflow: 0x%" PRIx64, + static_cast<int64_t>(value) + addend); + return; + } + if (!llvm::isUInt<32>(value + addend)) { + LLDB_LOGF(log, "Debug info relocation out of range: 0x%" PRIx64, value); + return; + } + uint32_t addr = value + addend; + memcpy(dst, &addr, sizeof(uint32_t)); + } +} + +unsigned ObjectFileELF::ApplyRelocations( + Symtab *symtab, const ELFHeader *hdr, const ELFSectionHeader *rel_hdr, + const ELFSectionHeader *symtab_hdr, const ELFSectionHeader *debug_hdr, + DataExtractor &rel_data, DataExtractor &symtab_data, + DataExtractor &debug_data, Section *rel_section) { + ELFRelocation rel(rel_hdr->sh_type); + lldb::addr_t offset = 0; + const unsigned num_relocations = rel_hdr->sh_size / rel_hdr->sh_entsize; + typedef unsigned (*reloc_info_fn)(const ELFRelocation &rel); + reloc_info_fn reloc_type; + reloc_info_fn reloc_symbol; + + if (hdr->Is32Bit()) { + reloc_type = ELFRelocation::RelocType32; + reloc_symbol = ELFRelocation::RelocSymbol32; + } else { + reloc_type = ELFRelocation::RelocType64; + reloc_symbol = ELFRelocation::RelocSymbol64; + } + + for (unsigned i = 0; i < num_relocations; ++i) { + if (!rel.Parse(rel_data, &offset)) { + GetModule()->ReportError(".rel{0}[{1:d}] failed to parse relocation", + rel_section->GetName().AsCString(), i); + break; + } + Symbol *symbol = nullptr; + + if (hdr->Is32Bit()) { + switch (hdr->e_machine) { + case llvm::ELF::EM_ARM: + switch (reloc_type(rel)) { + case R_ARM_ABS32: + ApplyELF32ABS32RelRelocation(symtab, rel, debug_data, rel_section); + break; + case R_ARM_REL32: + GetModule()->ReportError("unsupported AArch32 relocation:" + " .rel{0}[{1}], type {2}", + rel_section->GetName().AsCString(), i, + reloc_type(rel)); + break; + default: + assert(false && "unexpected relocation type"); + } + break; + case llvm::ELF::EM_386: + switch (reloc_type(rel)) { + case R_386_32: + symbol = symtab->FindSymbolByID(reloc_symbol(rel)); + if (symbol) { + addr_t f_offset = + rel_section->GetFileOffset() + ELFRelocation::RelocOffset32(rel); + DataBufferSP &data_buffer_sp = debug_data.GetSharedDataBuffer(); + // ObjectFileELF creates a WritableDataBuffer in CreateInstance. + WritableDataBuffer *data_buffer = + llvm::cast<WritableDataBuffer>(data_buffer_sp.get()); + uint32_t *dst = reinterpret_cast<uint32_t *>( + data_buffer->GetBytes() + f_offset); + + addr_t value = symbol->GetAddressRef().GetFileAddress(); + if (rel.IsRela()) { + value += ELFRelocation::RelocAddend32(rel); + } else { + value += *dst; + } + *dst = value; + } else { + GetModule()->ReportError(".rel{0}[{1}] unknown symbol id: {2:d}", + rel_section->GetName().AsCString(), i, + reloc_symbol(rel)); + } + break; + case R_386_NONE: + case R_386_PC32: + GetModule()->ReportError("unsupported i386 relocation:" + " .rel{0}[{1}], type {2}", + rel_section->GetName().AsCString(), i, + reloc_type(rel)); + break; + default: + assert(false && "unexpected relocation type"); + break; + } + break; + default: + GetModule()->ReportError("unsupported 32-bit ELF machine arch: {0}", hdr->e_machine); + break; + } + } else { + switch (hdr->e_machine) { + case llvm::ELF::EM_AARCH64: + switch (reloc_type(rel)) { + case R_AARCH64_ABS64: + ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section); + break; + case R_AARCH64_ABS32: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true); + break; + default: + assert(false && "unexpected relocation type"); + } + break; + case llvm::ELF::EM_LOONGARCH: + switch (reloc_type(rel)) { + case R_LARCH_64: + ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section); + break; + case R_LARCH_32: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true); + break; + default: + assert(false && "unexpected relocation type"); + } + break; + case llvm::ELF::EM_X86_64: + switch (reloc_type(rel)) { + case R_X86_64_64: + ApplyELF64ABS64Relocation(symtab, rel, debug_data, rel_section); + break; + case R_X86_64_32: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, + false); + break; + case R_X86_64_32S: + ApplyELF64ABS32Relocation(symtab, rel, debug_data, rel_section, true); + break; + case R_X86_64_PC32: + default: + assert(false && "unexpected relocation type"); + } + break; + default: + GetModule()->ReportError("unsupported 64-bit ELF machine arch: {0}", hdr->e_machine); + break; + } + } + } + + return 0; +} + +unsigned ObjectFileELF::RelocateDebugSections(const ELFSectionHeader *rel_hdr, + user_id_t rel_id, + lldb_private::Symtab *thetab) { + assert(rel_hdr->sh_type == SHT_RELA || rel_hdr->sh_type == SHT_REL); + + // Parse in the section list if needed. + SectionList *section_list = GetSectionList(); + if (!section_list) + return 0; + + user_id_t symtab_id = rel_hdr->sh_link; + user_id_t debug_id = rel_hdr->sh_info; + + const ELFSectionHeader *symtab_hdr = GetSectionHeaderByIndex(symtab_id); + if (!symtab_hdr) + return 0; + + const ELFSectionHeader *debug_hdr = GetSectionHeaderByIndex(debug_id); + if (!debug_hdr) + return 0; + + Section *rel = section_list->FindSectionByID(rel_id).get(); + if (!rel) + return 0; + + Section *symtab = section_list->FindSectionByID(symtab_id).get(); + if (!symtab) + return 0; + + Section *debug = section_list->FindSectionByID(debug_id).get(); + if (!debug) + return 0; + + DataExtractor rel_data; + DataExtractor symtab_data; + DataExtractor debug_data; + + if (GetData(rel->GetFileOffset(), rel->GetFileSize(), rel_data) && + GetData(symtab->GetFileOffset(), symtab->GetFileSize(), symtab_data) && + GetData(debug->GetFileOffset(), debug->GetFileSize(), debug_data)) { + ApplyRelocations(thetab, &m_header, rel_hdr, symtab_hdr, debug_hdr, + rel_data, symtab_data, debug_data, debug); + } + + return 0; +} + +void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) { + ModuleSP module_sp(GetModule()); + if (!module_sp) + return; + + Progress progress("Parsing symbol table", + m_file.GetFilename().AsCString("<Unknown>")); + ElapsedTime elapsed(module_sp->GetSymtabParseTime()); + + // We always want to use the main object file so we (hopefully) only have one + // cached copy of our symtab, dynamic sections, etc. + ObjectFile *module_obj_file = module_sp->GetObjectFile(); + if (module_obj_file && module_obj_file != this) + return module_obj_file->ParseSymtab(lldb_symtab); + + SectionList *section_list = module_sp->GetSectionList(); + if (!section_list) + return; + + uint64_t symbol_id = 0; + + // Sharable objects and dynamic executables usually have 2 distinct symbol + // tables, one named ".symtab", and the other ".dynsym". The dynsym is a + // smaller version of the symtab that only contains global symbols. The + // information found in the dynsym is therefore also found in the symtab, + // while the reverse is not necessarily true. + Section *symtab = + section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get(); + if (symtab) { + auto [num_symbols, address_class_map] = + ParseSymbolTable(&lldb_symtab, symbol_id, symtab); + m_address_class_map.merge(address_class_map); + symbol_id += num_symbols; + } + + // The symtab section is non-allocable and can be stripped, while the + // .dynsym section which should always be always be there. To support the + // minidebuginfo case we parse .dynsym when there's a .gnu_debuginfo + // section, nomatter if .symtab was already parsed or not. This is because + // minidebuginfo normally removes the .symtab symbols which have their + // matching .dynsym counterparts. + if (!symtab || + GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) { + Section *dynsym = + section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true) + .get(); + if (dynsym) { + auto [num_symbols, address_class_map] = + ParseSymbolTable(&lldb_symtab, symbol_id, dynsym); + symbol_id += num_symbols; + m_address_class_map.merge(address_class_map); + } + } + + // DT_JMPREL + // If present, this entry's d_ptr member holds the address of + // relocation + // entries associated solely with the procedure linkage table. + // Separating + // these relocation entries lets the dynamic linker ignore them during + // process initialization, if lazy binding is enabled. If this entry is + // present, the related entries of types DT_PLTRELSZ and DT_PLTREL must + // also be present. + const ELFDynamic *symbol = FindDynamicSymbol(DT_JMPREL); + if (symbol) { + // Synthesize trampoline symbols to help navigate the PLT. + addr_t addr = symbol->d_ptr; + Section *reloc_section = + section_list->FindSectionContainingFileAddress(addr).get(); + if (reloc_section) { + user_id_t reloc_id = reloc_section->GetID(); + const ELFSectionHeaderInfo *reloc_header = + GetSectionHeaderByIndex(reloc_id); + if (reloc_header) + ParseTrampolineSymbols(&lldb_symtab, symbol_id, reloc_header, reloc_id); + } + } + + if (DWARFCallFrameInfo *eh_frame = + GetModule()->GetUnwindTable().GetEHFrameInfo()) { + ParseUnwindSymbols(&lldb_symtab, eh_frame); + } + + // In the event that there's no symbol entry for the entry point we'll + // artificially create one. We delegate to the symtab object the figuring + // out of the proper size, this will usually make it span til the next + // symbol it finds in the section. This means that if there are missing + // symbols the entry point might span beyond its function definition. + // We're fine with this as it doesn't make it worse than not having a + // symbol entry at all. + if (CalculateType() == eTypeExecutable) { + ArchSpec arch = GetArchitecture(); + auto entry_point_addr = GetEntryPointAddress(); + bool is_valid_entry_point = + entry_point_addr.IsValid() && entry_point_addr.IsSectionOffset(); + addr_t entry_point_file_addr = entry_point_addr.GetFileAddress(); + if (is_valid_entry_point && !lldb_symtab.FindSymbolContainingFileAddress( + entry_point_file_addr)) { + uint64_t symbol_id = lldb_symtab.GetNumSymbols(); + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + SectionSP section_sp = entry_point_addr.GetSection(); + Symbol symbol( + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/0, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); + // When the entry point is arm thumb we need to explicitly set its + // class address to reflect that. This is important because expression + // evaluation relies on correctly setting a breakpoint at this + // address. + if (arch.GetMachine() == llvm::Triple::arm && + (entry_point_file_addr & 1)) { + symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1); + m_address_class_map[entry_point_file_addr ^ 1] = + AddressClass::eCodeAlternateISA; + } else { + m_address_class_map[entry_point_file_addr] = AddressClass::eCode; + } + lldb_symtab.AddSymbol(symbol); + } + } +} + +void ObjectFileELF::RelocateSection(lldb_private::Section *section) +{ + static const char *debug_prefix = ".debug"; + + // Set relocated bit so we stop getting called, regardless of whether we + // actually relocate. + section->SetIsRelocated(true); + + // We only relocate in ELF relocatable files + if (CalculateType() != eTypeObjectFile) + return; + + const char *section_name = section->GetName().GetCString(); + // Can't relocate that which can't be named + if (section_name == nullptr) + return; + + // We don't relocate non-debug sections at the moment + if (strncmp(section_name, debug_prefix, strlen(debug_prefix))) + return; + + // Relocation section names to look for + std::string needle = std::string(".rel") + section_name; + std::string needlea = std::string(".rela") + section_name; + + for (SectionHeaderCollIter I = m_section_headers.begin(); + I != m_section_headers.end(); ++I) { + if (I->sh_type == SHT_RELA || I->sh_type == SHT_REL) { + const char *hay_name = I->section_name.GetCString(); + if (hay_name == nullptr) + continue; + if (needle == hay_name || needlea == hay_name) { + const ELFSectionHeader &reloc_header = *I; + user_id_t reloc_id = SectionIndex(I); + RelocateDebugSections(&reloc_header, reloc_id, GetSymtab()); + break; + } + } + } +} + +void ObjectFileELF::ParseUnwindSymbols(Symtab *symbol_table, + DWARFCallFrameInfo *eh_frame) { + SectionList *section_list = GetSectionList(); + if (!section_list) + return; + + // First we save the new symbols into a separate list and add them to the + // symbol table after we collected all symbols we want to add. This is + // neccessary because adding a new symbol invalidates the internal index of + // the symtab what causing the next lookup to be slow because it have to + // recalculate the index first. + std::vector<Symbol> new_symbols; + + size_t num_symbols = symbol_table->GetNumSymbols(); + uint64_t last_symbol_id = + num_symbols ? symbol_table->SymbolAtIndex(num_symbols - 1)->GetID() : 0; + eh_frame->ForEachFDEEntries([&](lldb::addr_t file_addr, uint32_t size, + dw_offset_t) { + Symbol *symbol = symbol_table->FindSymbolAtFileAddress(file_addr); + if (symbol) { + if (!symbol->GetByteSizeIsValid()) { + symbol->SetByteSize(size); + symbol->SetSizeIsSynthesized(true); + } + } else { + SectionSP section_sp = + section_list->FindSectionContainingFileAddress(file_addr); + if (section_sp) { + addr_t offset = file_addr - section_sp->GetFileAddress(); + uint64_t symbol_id = ++last_symbol_id; + // Don't set the name for any synthetic symbols, the Symbol + // object will generate one if needed when the name is accessed + // via accessors. + Symbol eh_symbol( + /*symID=*/symbol_id, + /*name=*/llvm::StringRef(), // Name will be auto generated. + /*type=*/eSymbolTypeCode, + /*external=*/true, + /*is_debug=*/false, + /*is_trampoline=*/false, + /*is_artificial=*/true, + /*section_sp=*/section_sp, + /*offset=*/offset, + /*size=*/0, // FDE can span multiple symbols so don't use its size. + /*size_is_valid=*/false, + /*contains_linker_annotations=*/false, + /*flags=*/0); + new_symbols.push_back(eh_symbol); + } + } + return true; + }); + + for (const Symbol &s : new_symbols) + symbol_table->AddSymbol(s); +} + +bool ObjectFileELF::IsStripped() { + // TODO: determine this for ELF + return false; +} + +//===----------------------------------------------------------------------===// +// Dump +// +// Dump the specifics of the runtime file container (such as any headers +// segments, sections, etc). +void ObjectFileELF::Dump(Stream *s) { + ModuleSP module_sp(GetModule()); + if (!module_sp) { + return; + } + + std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); + s->Printf("%p: ", static_cast<void *>(this)); + s->Indent(); + s->PutCString("ObjectFileELF"); + + ArchSpec header_arch = GetArchitecture(); + + *s << ", file = '" << m_file + << "', arch = " << header_arch.GetArchitectureName() << "\n"; + + DumpELFHeader(s, m_header); + s->EOL(); + DumpELFProgramHeaders(s); + s->EOL(); + DumpELFSectionHeaders(s); + s->EOL(); + SectionList *section_list = GetSectionList(); + if (section_list) + section_list->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, + UINT32_MAX); + Symtab *symtab = GetSymtab(); + if (symtab) + symtab->Dump(s, nullptr, eSortOrderNone); + s->EOL(); + DumpDependentModules(s); + s->EOL(); +} + +// DumpELFHeader +// +// Dump the ELF header to the specified output stream +void ObjectFileELF::DumpELFHeader(Stream *s, const ELFHeader &header) { + s->PutCString("ELF Header\n"); + s->Printf("e_ident[EI_MAG0 ] = 0x%2.2x\n", header.e_ident[EI_MAG0]); + s->Printf("e_ident[EI_MAG1 ] = 0x%2.2x '%c'\n", header.e_ident[EI_MAG1], + header.e_ident[EI_MAG1]); + s->Printf("e_ident[EI_MAG2 ] = 0x%2.2x '%c'\n", header.e_ident[EI_MAG2], + header.e_ident[EI_MAG2]); + s->Printf("e_ident[EI_MAG3 ] = 0x%2.2x '%c'\n", header.e_ident[EI_MAG3], + header.e_ident[EI_MAG3]); + + s->Printf("e_ident[EI_CLASS ] = 0x%2.2x\n", header.e_ident[EI_CLASS]); + s->Printf("e_ident[EI_DATA ] = 0x%2.2x ", header.e_ident[EI_DATA]); + DumpELFHeader_e_ident_EI_DATA(s, header.e_ident[EI_DATA]); + s->Printf("\ne_ident[EI_VERSION] = 0x%2.2x\n", header.e_ident[EI_VERSION]); + s->Printf("e_ident[EI_PAD ] = 0x%2.2x\n", header.e_ident[EI_PAD]); + + s->Printf("e_type = 0x%4.4x ", header.e_type); + DumpELFHeader_e_type(s, header.e_type); + s->Printf("\ne_machine = 0x%4.4x\n", header.e_machine); + s->Printf("e_version = 0x%8.8x\n", header.e_version); + s->Printf("e_entry = 0x%8.8" PRIx64 "\n", header.e_entry); + s->Printf("e_phoff = 0x%8.8" PRIx64 "\n", header.e_phoff); + s->Printf("e_shoff = 0x%8.8" PRIx64 "\n", header.e_shoff); + s->Printf("e_flags = 0x%8.8x\n", header.e_flags); + s->Printf("e_ehsize = 0x%4.4x\n", header.e_ehsize); + s->Printf("e_phentsize = 0x%4.4x\n", header.e_phentsize); + s->Printf("e_phnum = 0x%8.8x\n", header.e_phnum); + s->Printf("e_shentsize = 0x%4.4x\n", header.e_shentsize); + s->Printf("e_shnum = 0x%8.8x\n", header.e_shnum); + s->Printf("e_shstrndx = 0x%8.8x\n", header.e_shstrndx); +} + +// DumpELFHeader_e_type +// +// Dump an token value for the ELF header member e_type +void ObjectFileELF::DumpELFHeader_e_type(Stream *s, elf_half e_type) { + switch (e_type) { + case ET_NONE: + *s << "ET_NONE"; + break; + case ET_REL: + *s << "ET_REL"; + break; + case ET_EXEC: + *s << "ET_EXEC"; + break; + case ET_DYN: + *s << "ET_DYN"; + break; + case ET_CORE: + *s << "ET_CORE"; + break; + default: + break; + } +} + +// DumpELFHeader_e_ident_EI_DATA +// +// Dump an token value for the ELF header member e_ident[EI_DATA] +void ObjectFileELF::DumpELFHeader_e_ident_EI_DATA(Stream *s, + unsigned char ei_data) { + switch (ei_data) { + case ELFDATANONE: + *s << "ELFDATANONE"; + break; + case ELFDATA2LSB: + *s << "ELFDATA2LSB - Little Endian"; + break; + case ELFDATA2MSB: + *s << "ELFDATA2MSB - Big Endian"; + break; + default: + break; + } +} + +// DumpELFProgramHeader +// +// Dump a single ELF program header to the specified output stream +void ObjectFileELF::DumpELFProgramHeader(Stream *s, + const ELFProgramHeader &ph) { + DumpELFProgramHeader_p_type(s, ph.p_type); + s->Printf(" %8.8" PRIx64 " %8.8" PRIx64 " %8.8" PRIx64, ph.p_offset, + ph.p_vaddr, ph.p_paddr); + s->Printf(" %8.8" PRIx64 " %8.8" PRIx64 " %8.8x (", ph.p_filesz, ph.p_memsz, + ph.p_flags); + + DumpELFProgramHeader_p_flags(s, ph.p_flags); + s->Printf(") %8.8" PRIx64, ph.p_align); +} + +// DumpELFProgramHeader_p_type +// +// Dump an token value for the ELF program header member p_type which describes +// the type of the program header +void ObjectFileELF::DumpELFProgramHeader_p_type(Stream *s, elf_word p_type) { + const int kStrWidth = 15; + switch (p_type) { + CASE_AND_STREAM(s, PT_NULL, kStrWidth); + CASE_AND_STREAM(s, PT_LOAD, kStrWidth); + CASE_AND_STREAM(s, PT_DYNAMIC, kStrWidth); + CASE_AND_STREAM(s, PT_INTERP, kStrWidth); + CASE_AND_STREAM(s, PT_NOTE, kStrWidth); + CASE_AND_STREAM(s, PT_SHLIB, kStrWidth); + CASE_AND_STREAM(s, PT_PHDR, kStrWidth); + CASE_AND_STREAM(s, PT_TLS, kStrWidth); + CASE_AND_STREAM(s, PT_GNU_EH_FRAME, kStrWidth); + default: + s->Printf("0x%8.8x%*s", p_type, kStrWidth - 10, ""); + break; + } +} + +// DumpELFProgramHeader_p_flags +// +// Dump an token value for the ELF program header member p_flags +void ObjectFileELF::DumpELFProgramHeader_p_flags(Stream *s, elf_word p_flags) { + *s << ((p_flags & PF_X) ? "PF_X" : " ") + << (((p_flags & PF_X) && (p_flags & PF_W)) ? '+' : ' ') + << ((p_flags & PF_W) ? "PF_W" : " ") + << (((p_flags & PF_W) && (p_flags & PF_R)) ? '+' : ' ') + << ((p_flags & PF_R) ? "PF_R" : " "); +} + +// DumpELFProgramHeaders +// +// Dump all of the ELF program header to the specified output stream +void ObjectFileELF::DumpELFProgramHeaders(Stream *s) { + if (!ParseProgramHeaders()) + return; + + s->PutCString("Program Headers\n"); + s->PutCString("IDX p_type p_offset p_vaddr p_paddr " + "p_filesz p_memsz p_flags p_align\n"); + s->PutCString("==== --------------- -------- -------- -------- " + "-------- -------- ------------------------- --------\n"); + + for (const auto &H : llvm::enumerate(m_program_headers)) { + s->Format("[{0,2}] ", H.index()); + ObjectFileELF::DumpELFProgramHeader(s, H.value()); + s->EOL(); + } +} + +// DumpELFSectionHeader +// +// Dump a single ELF section header to the specified output stream +void ObjectFileELF::DumpELFSectionHeader(Stream *s, + const ELFSectionHeaderInfo &sh) { + s->Printf("%8.8x ", sh.sh_name); + DumpELFSectionHeader_sh_type(s, sh.sh_type); + s->Printf(" %8.8" PRIx64 " (", sh.sh_flags); + DumpELFSectionHeader_sh_flags(s, sh.sh_flags); + s->Printf(") %8.8" PRIx64 " %8.8" PRIx64 " %8.8" PRIx64, sh.sh_addr, + sh.sh_offset, sh.sh_size); + s->Printf(" %8.8x %8.8x", sh.sh_link, sh.sh_info); + s->Printf(" %8.8" PRIx64 " %8.8" PRIx64, sh.sh_addralign, sh.sh_entsize); +} + +// DumpELFSectionHeader_sh_type +// +// Dump an token value for the ELF section header member sh_type which +// describes the type of the section +void ObjectFileELF::DumpELFSectionHeader_sh_type(Stream *s, elf_word sh_type) { + const int kStrWidth = 12; + switch (sh_type) { + CASE_AND_STREAM(s, SHT_NULL, kStrWidth); + CASE_AND_STREAM(s, SHT_PROGBITS, kStrWidth); + CASE_AND_STREAM(s, SHT_SYMTAB, kStrWidth); + CASE_AND_STREAM(s, SHT_STRTAB, kStrWidth); + CASE_AND_STREAM(s, SHT_RELA, kStrWidth); + CASE_AND_STREAM(s, SHT_HASH, kStrWidth); + CASE_AND_STREAM(s, SHT_DYNAMIC, kStrWidth); + CASE_AND_STREAM(s, SHT_NOTE, kStrWidth); + CASE_AND_STREAM(s, SHT_NOBITS, kStrWidth); + CASE_AND_STREAM(s, SHT_REL, kStrWidth); + CASE_AND_STREAM(s, SHT_SHLIB, kStrWidth); + CASE_AND_STREAM(s, SHT_DYNSYM, kStrWidth); + CASE_AND_STREAM(s, SHT_LOPROC, kStrWidth); + CASE_AND_STREAM(s, SHT_HIPROC, kStrWidth); + CASE_AND_STREAM(s, SHT_LOUSER, kStrWidth); + CASE_AND_STREAM(s, SHT_HIUSER, kStrWidth); + default: + s->Printf("0x%8.8x%*s", sh_type, kStrWidth - 10, ""); + break; + } +} + +// DumpELFSectionHeader_sh_flags +// +// Dump an token value for the ELF section header member sh_flags +void ObjectFileELF::DumpELFSectionHeader_sh_flags(Stream *s, + elf_xword sh_flags) { + *s << ((sh_flags & SHF_WRITE) ? "WRITE" : " ") + << (((sh_flags & SHF_WRITE) && (sh_flags & SHF_ALLOC)) ? '+' : ' ') + << ((sh_flags & SHF_ALLOC) ? "ALLOC" : " ") + << (((sh_flags & SHF_ALLOC) && (sh_flags & SHF_EXECINSTR)) ? '+' : ' ') + << ((sh_flags & SHF_EXECINSTR) ? "EXECINSTR" : " "); +} + +// DumpELFSectionHeaders +// +// Dump all of the ELF section header to the specified output stream +void ObjectFileELF::DumpELFSectionHeaders(Stream *s) { + if (!ParseSectionHeaders()) + return; + + s->PutCString("Section Headers\n"); + s->PutCString("IDX name type flags " + "addr offset size link info addralgn " + "entsize Name\n"); + s->PutCString("==== -------- ------------ -------------------------------- " + "-------- -------- -------- -------- -------- -------- " + "-------- ====================\n"); + + uint32_t idx = 0; + for (SectionHeaderCollConstIter I = m_section_headers.begin(); + I != m_section_headers.end(); ++I, ++idx) { + s->Printf("[%2u] ", idx); + ObjectFileELF::DumpELFSectionHeader(s, *I); + const char *section_name = I->section_name.AsCString(""); + if (section_name) + *s << ' ' << section_name << "\n"; + } +} + +void ObjectFileELF::DumpDependentModules(lldb_private::Stream *s) { + size_t num_modules = ParseDependentModules(); + + if (num_modules > 0) { + s->PutCString("Dependent Modules:\n"); + for (unsigned i = 0; i < num_modules; ++i) { + const FileSpec &spec = m_filespec_up->GetFileSpecAtIndex(i); + s->Printf(" %s\n", spec.GetFilename().GetCString()); + } + } +} + +ArchSpec ObjectFileELF::GetArchitecture() { + if (!ParseHeader()) + return ArchSpec(); + + if (m_section_headers.empty()) { + // Allow elf notes to be parsed which may affect the detected architecture. + ParseSectionHeaders(); + } + + if (CalculateType() == eTypeCoreFile && + !m_arch_spec.TripleOSWasSpecified()) { + // Core files don't have section headers yet they have PT_NOTE program + // headers that might shed more light on the architecture + for (const elf::ELFProgramHeader &H : ProgramHeaders()) { + if (H.p_type != PT_NOTE || H.p_offset == 0 || H.p_filesz == 0) + continue; + DataExtractor data; + if (data.SetData(m_data, H.p_offset, H.p_filesz) == H.p_filesz) { + UUID uuid; + RefineModuleDetailsFromNote(data, m_arch_spec, uuid); + } + } + } + return m_arch_spec; +} + +ObjectFile::Type ObjectFileELF::CalculateType() { + switch (m_header.e_type) { + case llvm::ELF::ET_NONE: + // 0 - No file type + return eTypeUnknown; + + case llvm::ELF::ET_REL: + // 1 - Relocatable file + return eTypeObjectFile; + + case llvm::ELF::ET_EXEC: + // 2 - Executable file + return eTypeExecutable; + + case llvm::ELF::ET_DYN: + // 3 - Shared object file + return eTypeSharedLibrary; + + case ET_CORE: + // 4 - Core file + return eTypeCoreFile; + + default: + break; + } + return eTypeUnknown; +} + +ObjectFile::Strata ObjectFileELF::CalculateStrata() { + switch (m_header.e_type) { + case llvm::ELF::ET_NONE: + // 0 - No file type + return eStrataUnknown; + + case llvm::ELF::ET_REL: + // 1 - Relocatable file + return eStrataUnknown; + + case llvm::ELF::ET_EXEC: + // 2 - Executable file + { + SectionList *section_list = GetSectionList(); + if (section_list) { + static ConstString loader_section_name(".interp"); + SectionSP loader_section = + section_list->FindSectionByName(loader_section_name); + if (loader_section) { + char buffer[256]; + size_t read_size = + ReadSectionData(loader_section.get(), 0, buffer, sizeof(buffer)); + + // We compare the content of .interp section + // It will contains \0 when counting read_size, so the size needs to + // decrease by one + llvm::StringRef loader_name(buffer, read_size - 1); + llvm::StringRef freebsd_kernel_loader_name("/red/herring"); + if (loader_name == freebsd_kernel_loader_name) + return eStrataKernel; + } + } + return eStrataUser; + } + + case llvm::ELF::ET_DYN: + // 3 - Shared object file + // TODO: is there any way to detect that an shared library is a kernel + // related executable by inspecting the program headers, section headers, + // symbols, or any other flag bits??? + return eStrataUnknown; + + case ET_CORE: + // 4 - Core file + // TODO: is there any way to detect that an core file is a kernel + // related executable by inspecting the program headers, section headers, + // symbols, or any other flag bits??? + return eStrataUnknown; + + default: + break; + } + return eStrataUnknown; +} + +size_t ObjectFileELF::ReadSectionData(Section *section, + lldb::offset_t section_offset, void *dst, + size_t dst_len) { + // If some other objectfile owns this data, pass this to them. + if (section->GetObjectFile() != this) + return section->GetObjectFile()->ReadSectionData(section, section_offset, + dst, dst_len); + + if (!section->Test(SHF_COMPRESSED)) + return ObjectFile::ReadSectionData(section, section_offset, dst, dst_len); + + // For compressed sections we need to read to full data to be able to + // decompress. + DataExtractor data; + ReadSectionData(section, data); + return data.CopyData(section_offset, dst_len, dst); +} + +size_t ObjectFileELF::ReadSectionData(Section *section, + DataExtractor §ion_data) { + // If some other objectfile owns this data, pass this to them. + if (section->GetObjectFile() != this) + return section->GetObjectFile()->ReadSectionData(section, section_data); + + size_t result = ObjectFile::ReadSectionData(section, section_data); + if (result == 0 || !(section->Get() & llvm::ELF::SHF_COMPRESSED)) + return result; + + auto Decompressor = llvm::object::Decompressor::create( + section->GetName().GetStringRef(), + {reinterpret_cast<const char *>(section_data.GetDataStart()), + size_t(section_data.GetByteSize())}, + GetByteOrder() == eByteOrderLittle, GetAddressByteSize() == 8); + if (!Decompressor) { + GetModule()->ReportWarning( + "Unable to initialize decompressor for section '{0}': {1}", + section->GetName().GetCString(), + llvm::toString(Decompressor.takeError()).c_str()); + section_data.Clear(); + return 0; + } + + auto buffer_sp = + std::make_shared<DataBufferHeap>(Decompressor->getDecompressedSize(), 0); + if (auto error = Decompressor->decompress( + {buffer_sp->GetBytes(), size_t(buffer_sp->GetByteSize())})) { + GetModule()->ReportWarning("Decompression of section '{0}' failed: {1}", + section->GetName().GetCString(), + llvm::toString(std::move(error)).c_str()); + section_data.Clear(); + return 0; + } + + section_data.SetData(buffer_sp); + return buffer_sp->GetByteSize(); +} + +llvm::ArrayRef<ELFProgramHeader> ObjectFileELF::ProgramHeaders() { + ParseProgramHeaders(); + return m_program_headers; +} + +DataExtractor ObjectFileELF::GetSegmentData(const ELFProgramHeader &H) { + return DataExtractor(m_data, H.p_offset, H.p_filesz); +} + +bool ObjectFileELF::AnySegmentHasPhysicalAddress() { + for (const ELFProgramHeader &H : ProgramHeaders()) { + if (H.p_paddr != 0) + return true; + } + return false; +} + +std::vector<ObjectFile::LoadableData> +ObjectFileELF::GetLoadableData(Target &target) { + // Create a list of loadable data from loadable segments, using physical + // addresses if they aren't all null + std::vector<LoadableData> loadables; + bool should_use_paddr = AnySegmentHasPhysicalAddress(); + for (const ELFProgramHeader &H : ProgramHeaders()) { + LoadableData loadable; + if (H.p_type != llvm::ELF::PT_LOAD) + continue; + loadable.Dest = should_use_paddr ? H.p_paddr : H.p_vaddr; + if (loadable.Dest == LLDB_INVALID_ADDRESS) + continue; + if (H.p_filesz == 0) + continue; + auto segment_data = GetSegmentData(H); + loadable.Contents = llvm::ArrayRef<uint8_t>(segment_data.GetDataStart(), + segment_data.GetByteSize()); + loadables.push_back(loadable); + } + return loadables; +} + +lldb::WritableDataBufferSP +ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size, + uint64_t Offset) { + return FileSystem::Instance().CreateWritableDataBuffer(file.GetPath(), Size, + Offset); +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h new file mode 100644 index 000000000000..844e981b1d89 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h @@ -0,0 +1,407 @@ +//===-- ObjectFileELF.h --------------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H + +#include <cstdint> + +#include <optional> +#include <vector> + +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/UUID.h" +#include "lldb/lldb-private.h" + +#include "ELFHeader.h" + +struct ELFNote { + elf::elf_word n_namesz = 0; + elf::elf_word n_descsz = 0; + elf::elf_word n_type = 0; + + std::string n_name; + + ELFNote() = default; + + /// Parse an ELFNote entry from the given DataExtractor starting at position + /// \p offset. + /// + /// \param[in] data + /// The DataExtractor to read from. + /// + /// \param[in,out] offset + /// Pointer to an offset in the data. On return the offset will be + /// advanced by the number of bytes read. + /// + /// \return + /// True if the ELFRel entry was successfully read and false otherwise. + bool Parse(const lldb_private::DataExtractor &data, lldb::offset_t *offset); + + size_t GetByteSize() const { + return 12 + llvm::alignTo(n_namesz, 4) + llvm::alignTo(n_descsz, 4); + } +}; + +/// \class ObjectFileELF +/// Generic ELF object file reader. +/// +/// This class provides a generic ELF (32/64 bit) reader plugin implementing +/// the ObjectFile protocol. +class ObjectFileELF : public lldb_private::ObjectFile { +public: + // Static Functions + static void Initialize(); + + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "elf"; } + + static llvm::StringRef GetPluginDescriptionStatic() { + return "ELF object file reader."; + } + + static lldb_private::ObjectFile * + CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const lldb_private::FileSpec *file, + lldb::offset_t file_offset, lldb::offset_t length); + + static lldb_private::ObjectFile *CreateMemoryInstance( + const lldb::ModuleSP &module_sp, lldb::WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, lldb::addr_t header_addr); + + static size_t GetModuleSpecifications(const lldb_private::FileSpec &file, + lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, + lldb::offset_t file_offset, + lldb::offset_t length, + lldb_private::ModuleSpecList &specs); + + static bool MagicBytesMatch(lldb::DataBufferSP &data_sp, lldb::addr_t offset, + lldb::addr_t length); + + // PluginInterface protocol + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + // LLVM RTTI support + static char ID; + bool isA(const void *ClassID) const override { + return ClassID == &ID || ObjectFile::isA(ClassID); + } + static bool classof(const ObjectFile *obj) { return obj->isA(&ID); } + + // ObjectFile Protocol. + bool ParseHeader() override; + + bool SetLoadAddress(lldb_private::Target &target, lldb::addr_t value, + bool value_is_offset) override; + + lldb::ByteOrder GetByteOrder() const override; + + bool IsExecutable() const override; + + uint32_t GetAddressByteSize() const override; + + lldb_private::AddressClass GetAddressClass(lldb::addr_t file_addr) override; + + void ParseSymtab(lldb_private::Symtab &symtab) override; + + bool IsStripped() override; + + void CreateSections(lldb_private::SectionList &unified_section_list) override; + + void Dump(lldb_private::Stream *s) override; + + lldb_private::ArchSpec GetArchitecture() override; + + lldb_private::UUID GetUUID() override; + + /// Return the contents of the .gnu_debuglink section, if the object file + /// contains it. + std::optional<lldb_private::FileSpec> GetDebugLink(); + + uint32_t GetDependentModules(lldb_private::FileSpecList &files) override; + + lldb_private::Address + GetImageInfoAddress(lldb_private::Target *target) override; + + lldb_private::Address GetEntryPointAddress() override; + + lldb_private::Address GetBaseAddress() override; + + ObjectFile::Type CalculateType() override; + + ObjectFile::Strata CalculateStrata() override; + + size_t ReadSectionData(lldb_private::Section *section, + lldb::offset_t section_offset, void *dst, + size_t dst_len) override; + + size_t ReadSectionData(lldb_private::Section *section, + lldb_private::DataExtractor §ion_data) override; + + llvm::ArrayRef<elf::ELFProgramHeader> ProgramHeaders(); + lldb_private::DataExtractor GetSegmentData(const elf::ELFProgramHeader &H); + + llvm::StringRef + StripLinkerSymbolAnnotations(llvm::StringRef symbol_name) const override; + + void RelocateSection(lldb_private::Section *section) override; + +protected: + + std::vector<LoadableData> + GetLoadableData(lldb_private::Target &target) override; + + static lldb::WritableDataBufferSP + MapFileDataWritable(const lldb_private::FileSpec &file, uint64_t Size, + uint64_t Offset); + +private: + ObjectFileELF(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const lldb_private::FileSpec *file, + lldb::offset_t offset, lldb::offset_t length); + + ObjectFileELF(const lldb::ModuleSP &module_sp, + lldb::DataBufferSP header_data_sp, + const lldb::ProcessSP &process_sp, lldb::addr_t header_addr); + + typedef std::vector<elf::ELFProgramHeader> ProgramHeaderColl; + + struct ELFSectionHeaderInfo : public elf::ELFSectionHeader { + lldb_private::ConstString section_name; + }; + + typedef std::vector<ELFSectionHeaderInfo> SectionHeaderColl; + typedef SectionHeaderColl::iterator SectionHeaderCollIter; + typedef SectionHeaderColl::const_iterator SectionHeaderCollConstIter; + + typedef std::vector<elf::ELFDynamic> DynamicSymbolColl; + typedef DynamicSymbolColl::iterator DynamicSymbolCollIter; + typedef DynamicSymbolColl::const_iterator DynamicSymbolCollConstIter; + + /// An ordered map of file address to address class. Used on architectures + /// like Arm where there is an alternative ISA mode like Thumb. The container + /// is ordered so that it can be binary searched. + typedef std::map<lldb::addr_t, lldb_private::AddressClass> + FileAddressToAddressClassMap; + + /// Version of this reader common to all plugins based on this class. + static const uint32_t m_plugin_version = 1; + static const uint32_t g_core_uuid_magic; + + /// ELF file header. + elf::ELFHeader m_header; + + /// ELF build ID. + lldb_private::UUID m_uuid; + + /// ELF .gnu_debuglink file and crc data if available. + std::string m_gnu_debuglink_file; + uint32_t m_gnu_debuglink_crc = 0; + + /// Collection of program headers. + ProgramHeaderColl m_program_headers; + + /// Collection of section headers. + SectionHeaderColl m_section_headers; + + /// Collection of symbols from the dynamic table. + DynamicSymbolColl m_dynamic_symbols; + + /// Object file parsed from .gnu_debugdata section (\sa + /// GetGnuDebugDataObjectFile()) + std::shared_ptr<ObjectFileELF> m_gnu_debug_data_object_file; + + /// List of file specifications corresponding to the modules (shared + /// libraries) on which this object file depends. + mutable std::unique_ptr<lldb_private::FileSpecList> m_filespec_up; + + /// Cached value of the entry point for this module. + lldb_private::Address m_entry_point_address; + + /// The architecture detected from parsing elf file contents. + lldb_private::ArchSpec m_arch_spec; + + /// The address class for each symbol in the elf file + FileAddressToAddressClassMap m_address_class_map; + + /// Returns the index of the given section header. + size_t SectionIndex(const SectionHeaderCollIter &I); + + /// Returns the index of the given section header. + size_t SectionIndex(const SectionHeaderCollConstIter &I) const; + + // Parses the ELF program headers. + static size_t GetProgramHeaderInfo(ProgramHeaderColl &program_headers, + lldb_private::DataExtractor &object_data, + const elf::ELFHeader &header); + + // Finds PT_NOTE segments and calculates their crc sum. + static uint32_t + CalculateELFNotesSegmentsCRC32(const ProgramHeaderColl &program_headers, + lldb_private::DataExtractor &data); + + /// Parses all section headers present in this object file and populates + /// m_program_headers. This method will compute the header list only once. + /// Returns true iff the headers have been successfully parsed. + bool ParseProgramHeaders(); + + /// Parses all section headers present in this object file and populates + /// m_section_headers. This method will compute the header list only once. + /// Returns the number of headers parsed. + size_t ParseSectionHeaders(); + + lldb::SectionType GetSectionType(const ELFSectionHeaderInfo &H) const; + + static void ParseARMAttributes(lldb_private::DataExtractor &data, + uint64_t length, + lldb_private::ArchSpec &arch_spec); + + /// Parses the elf section headers and returns the uuid, debug link name, + /// crc, archspec. + static size_t GetSectionHeaderInfo(SectionHeaderColl §ion_headers, + lldb_private::DataExtractor &object_data, + const elf::ELFHeader &header, + lldb_private::UUID &uuid, + std::string &gnu_debuglink_file, + uint32_t &gnu_debuglink_crc, + lldb_private::ArchSpec &arch_spec); + + /// Scans the dynamic section and locates all dependent modules (shared + /// libraries) populating m_filespec_up. This method will compute the + /// dependent module list only once. Returns the number of dependent + /// modules parsed. + size_t ParseDependentModules(); + + /// Parses the dynamic symbol table and populates m_dynamic_symbols. The + /// vector retains the order as found in the object file. Returns the + /// number of dynamic symbols parsed. + size_t ParseDynamicSymbols(); + + /// Populates the symbol table with all non-dynamic linker symbols. This + /// method will parse the symbols only once. Returns the number of symbols + /// parsed and a map of address types (used by targets like Arm that have + /// an alternative ISA mode like Thumb). + std::pair<unsigned, FileAddressToAddressClassMap> + ParseSymbolTable(lldb_private::Symtab *symbol_table, lldb::user_id_t start_id, + lldb_private::Section *symtab); + + /// Helper routine for ParseSymbolTable(). + std::pair<unsigned, FileAddressToAddressClassMap> + ParseSymbols(lldb_private::Symtab *symbol_table, lldb::user_id_t start_id, + lldb_private::SectionList *section_list, + const size_t num_symbols, + const lldb_private::DataExtractor &symtab_data, + const lldb_private::DataExtractor &strtab_data); + + /// Scans the relocation entries and adds a set of artificial symbols to the + /// given symbol table for each PLT slot. Returns the number of symbols + /// added. + unsigned ParseTrampolineSymbols(lldb_private::Symtab *symbol_table, + lldb::user_id_t start_id, + const ELFSectionHeaderInfo *rela_hdr, + lldb::user_id_t section_id); + + void ParseUnwindSymbols(lldb_private::Symtab *symbol_table, + lldb_private::DWARFCallFrameInfo *eh_frame); + + /// Relocates debug sections + unsigned RelocateDebugSections(const elf::ELFSectionHeader *rel_hdr, + lldb::user_id_t rel_id, + lldb_private::Symtab *thetab); + + unsigned ApplyRelocations(lldb_private::Symtab *symtab, + const elf::ELFHeader *hdr, + const elf::ELFSectionHeader *rel_hdr, + const elf::ELFSectionHeader *symtab_hdr, + const elf::ELFSectionHeader *debug_hdr, + lldb_private::DataExtractor &rel_data, + lldb_private::DataExtractor &symtab_data, + lldb_private::DataExtractor &debug_data, + lldb_private::Section *rel_section); + + /// Loads the section name string table into m_shstr_data. Returns the + /// number of bytes constituting the table. + size_t GetSectionHeaderStringTable(); + + /// Utility method for looking up a section given its name. Returns the + /// index of the corresponding section or zero if no section with the given + /// name can be found (note that section indices are always 1 based, and so + /// section index 0 is never valid). + lldb::user_id_t GetSectionIndexByName(const char *name); + + /// Returns the section header with the given id or NULL. + const ELFSectionHeaderInfo *GetSectionHeaderByIndex(lldb::user_id_t id); + + /// \name ELF header dump routines + //@{ + static void DumpELFHeader(lldb_private::Stream *s, + const elf::ELFHeader &header); + + static void DumpELFHeader_e_ident_EI_DATA(lldb_private::Stream *s, + unsigned char ei_data); + + static void DumpELFHeader_e_type(lldb_private::Stream *s, + elf::elf_half e_type); + //@} + + /// \name ELF program header dump routines + //@{ + void DumpELFProgramHeaders(lldb_private::Stream *s); + + static void DumpELFProgramHeader(lldb_private::Stream *s, + const elf::ELFProgramHeader &ph); + + static void DumpELFProgramHeader_p_type(lldb_private::Stream *s, + elf::elf_word p_type); + + static void DumpELFProgramHeader_p_flags(lldb_private::Stream *s, + elf::elf_word p_flags); + //@} + + /// \name ELF section header dump routines + //@{ + void DumpELFSectionHeaders(lldb_private::Stream *s); + + static void DumpELFSectionHeader(lldb_private::Stream *s, + const ELFSectionHeaderInfo &sh); + + static void DumpELFSectionHeader_sh_type(lldb_private::Stream *s, + elf::elf_word sh_type); + + static void DumpELFSectionHeader_sh_flags(lldb_private::Stream *s, + elf::elf_xword sh_flags); + //@} + + /// ELF dependent module dump routine. + void DumpDependentModules(lldb_private::Stream *s); + + const elf::ELFDynamic *FindDynamicSymbol(unsigned tag); + + unsigned PLTRelocationType(); + + static lldb_private::Status + RefineModuleDetailsFromNote(lldb_private::DataExtractor &data, + lldb_private::ArchSpec &arch_spec, + lldb_private::UUID &uuid); + + bool AnySegmentHasPhysicalAddress(); + + /// Takes the .gnu_debugdata and returns the decompressed object file that is + /// stored within that section. + /// + /// \returns either the decompressed object file stored within the + /// .gnu_debugdata section or \c nullptr if an error occured or if there's no + /// section with that name. + std::shared_ptr<ObjectFileELF> GetGnuDebugDataObjectFile(); +}; + +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp new file mode 100644 index 000000000000..ffbd87714242 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp @@ -0,0 +1,219 @@ +//===-- ObjectFileJSON.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Plugins/ObjectFile/JSON/ObjectFileJSON.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Symbol/Symbol.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "llvm/ADT/DenseSet.h" +#include <optional> + +using namespace llvm; +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(ObjectFileJSON) + +char ObjectFileJSON::ID; + +void ObjectFileJSON::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications); +} + +void ObjectFileJSON::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +ObjectFile * +ObjectFileJSON::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, + offset_t data_offset, const FileSpec *file, + offset_t file_offset, offset_t length) { + if (!data_sp) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) + return nullptr; + data_offset = 0; + } + + if (!MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) + return nullptr; + + // Update the data to contain the entire file if it doesn't already. + if (data_sp->GetByteSize() < length) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) + return nullptr; + data_offset = 0; + } + + Log *log = GetLog(LLDBLog::Symbols); + + auto text = + llvm::StringRef(reinterpret_cast<const char *>(data_sp->GetBytes())); + + Expected<json::Value> json = json::parse(text); + if (!json) { + LLDB_LOG_ERROR(log, json.takeError(), + "failed to parse JSON object file: {0}"); + return nullptr; + } + + json::Path::Root root; + Header header; + if (!fromJSON(*json, header, root)) { + LLDB_LOG_ERROR(log, root.getError(), + "failed to parse JSON object file header: {0}"); + return nullptr; + } + + ArchSpec arch(header.triple); + UUID uuid; + uuid.SetFromStringRef(header.uuid); + Type type = header.type.value_or(eTypeDebugInfo); + + Body body; + if (!fromJSON(*json, body, root)) { + LLDB_LOG_ERROR(log, root.getError(), + "failed to parse JSON object file body: {0}"); + return nullptr; + } + + return new ObjectFileJSON(module_sp, data_sp, data_offset, file, file_offset, + length, std::move(arch), std::move(uuid), type, + std::move(body.symbols), std::move(body.sections)); +} + +ObjectFile *ObjectFileJSON::CreateMemoryInstance(const ModuleSP &module_sp, + WritableDataBufferSP data_sp, + const ProcessSP &process_sp, + addr_t header_addr) { + return nullptr; +} + +size_t ObjectFileJSON::GetModuleSpecifications( + const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, + offset_t file_offset, offset_t length, ModuleSpecList &specs) { + if (!MagicBytesMatch(data_sp, data_offset, data_sp->GetByteSize())) + return 0; + + // Update the data to contain the entire file if it doesn't already. + if (data_sp->GetByteSize() < length) { + data_sp = MapFileData(file, length, file_offset); + if (!data_sp) + return 0; + data_offset = 0; + } + + Log *log = GetLog(LLDBLog::Symbols); + + auto text = + llvm::StringRef(reinterpret_cast<const char *>(data_sp->GetBytes())); + + Expected<json::Value> json = json::parse(text); + if (!json) { + LLDB_LOG_ERROR(log, json.takeError(), + "failed to parse JSON object file: {0}"); + return 0; + } + + json::Path::Root root; + Header header; + if (!fromJSON(*json, header, root)) { + LLDB_LOG_ERROR(log, root.getError(), + "failed to parse JSON object file header: {0}"); + return 0; + } + + ArchSpec arch(header.triple); + UUID uuid; + uuid.SetFromStringRef(header.uuid); + + ModuleSpec spec(file, std::move(arch)); + spec.GetUUID() = std::move(uuid); + specs.Append(spec); + return 1; +} + +ObjectFileJSON::ObjectFileJSON(const ModuleSP &module_sp, DataBufferSP &data_sp, + offset_t data_offset, const FileSpec *file, + offset_t offset, offset_t length, ArchSpec arch, + UUID uuid, Type type, + std::vector<JSONSymbol> symbols, + std::vector<JSONSection> sections) + : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), + m_arch(std::move(arch)), m_uuid(std::move(uuid)), m_type(type), + m_symbols(std::move(symbols)), m_sections(std::move(sections)) {} + +bool ObjectFileJSON::ParseHeader() { + // We already parsed the header during initialization. + return true; +} + +void ObjectFileJSON::ParseSymtab(Symtab &symtab) { + Log *log = GetLog(LLDBLog::Symbols); + SectionList *section_list = GetModule()->GetSectionList(); + for (JSONSymbol json_symbol : m_symbols) { + llvm::Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, section_list); + if (!symbol) { + LLDB_LOG_ERROR(log, symbol.takeError(), "invalid symbol: {0}"); + continue; + } + symtab.AddSymbol(*symbol); + } + symtab.Finalize(); +} + +void ObjectFileJSON::CreateSections(SectionList &unified_section_list) { + if (m_sections_up) + return; + m_sections_up = std::make_unique<SectionList>(); + + lldb::user_id_t id = 1; + for (const auto §ion : m_sections) { + auto section_sp = std::make_shared<Section>( + GetModule(), this, id++, ConstString(section.name), + section.type.value_or(eSectionTypeCode), 0, section.size.value_or(0), 0, + section.size.value_or(0), /*log2align*/ 0, /*flags*/ 0); + m_sections_up->AddSection(section_sp); + unified_section_list.AddSection(section_sp); + } +} + +bool ObjectFileJSON::MagicBytesMatch(DataBufferSP data_sp, + lldb::addr_t data_offset, + lldb::addr_t data_length) { + DataExtractor data; + data.SetData(data_sp, data_offset, data_length); + lldb::offset_t offset = 0; + uint32_t magic = data.GetU8(&offset); + return magic == '{'; +} + +namespace lldb_private { + +bool fromJSON(const json::Value &value, ObjectFileJSON::Header &header, + json::Path path) { + json::ObjectMapper o(value, path); + return o && o.map("triple", header.triple) && o.map("uuid", header.uuid) && + o.map("type", header.type); +} + +bool fromJSON(const json::Value &value, ObjectFileJSON::Body &body, + json::Path path) { + json::ObjectMapper o(value, path); + return o && o.mapOptional("symbols", body.symbols) && + o.mapOptional("sections", body.sections); +} + +} // namespace lldb_private diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h new file mode 100644 index 000000000000..b72565f46886 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h @@ -0,0 +1,125 @@ +//===-- ObjectFileJSON.h -------------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_JSON_OBJECTFILEJSON_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_JSON_OBJECTFILEJSON_H + +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/ArchSpec.h" +#include "llvm/Support/JSON.h" + +namespace lldb_private { + +class ObjectFileJSON : public ObjectFile { +public: + static void Initialize(); + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "JSON"; } + + static const char *GetPluginDescriptionStatic() { + return "JSON object file reader."; + } + + static ObjectFile * + CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const FileSpec *file, + lldb::offset_t file_offset, lldb::offset_t length); + + static ObjectFile *CreateMemoryInstance(const lldb::ModuleSP &module_sp, + lldb::WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, + lldb::addr_t header_addr); + + static size_t GetModuleSpecifications(const FileSpec &file, + lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, + lldb::offset_t file_offset, + lldb::offset_t length, + ModuleSpecList &specs); + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + // LLVM RTTI support + static char ID; + bool isA(const void *ClassID) const override { + return ClassID == &ID || ObjectFile::isA(ClassID); + } + static bool classof(const ObjectFile *obj) { return obj->isA(&ID); } + + bool ParseHeader() override; + + lldb::ByteOrder GetByteOrder() const override { + return m_arch.GetByteOrder(); + } + + bool IsExecutable() const override { return false; } + + uint32_t GetAddressByteSize() const override { + return m_arch.GetAddressByteSize(); + } + + AddressClass GetAddressClass(lldb::addr_t file_addr) override { + return AddressClass::eInvalid; + } + + void ParseSymtab(lldb_private::Symtab &symtab) override; + + bool IsStripped() override { return false; } + + void CreateSections(SectionList &unified_section_list) override; + + void Dump(Stream *s) override {} + + ArchSpec GetArchitecture() override { return m_arch; } + + UUID GetUUID() override { return m_uuid; } + + uint32_t GetDependentModules(FileSpecList &files) override { return 0; } + + Type CalculateType() override { return m_type; } + + Strata CalculateStrata() override { return eStrataUser; } + + static bool MagicBytesMatch(lldb::DataBufferSP data_sp, lldb::addr_t offset, + lldb::addr_t length); + + struct Header { + std::string triple; + std::string uuid; + std::optional<ObjectFile::Type> type; + }; + + struct Body { + std::vector<JSONSection> sections; + std::vector<JSONSymbol> symbols; + }; + +private: + ArchSpec m_arch; + UUID m_uuid; + ObjectFile::Type m_type; + std::optional<uint64_t> m_size; + std::vector<JSONSymbol> m_symbols; + std::vector<JSONSection> m_sections; + + ObjectFileJSON(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, const FileSpec *file, + lldb::offset_t offset, lldb::offset_t length, ArchSpec arch, + UUID uuid, Type type, std::vector<JSONSymbol> symbols, + std::vector<JSONSection> sections); +}; + +bool fromJSON(const llvm::json::Value &value, ObjectFileJSON::Header &header, + llvm::json::Path path); + +bool fromJSON(const llvm::json::Value &value, ObjectFileJSON::Body &body, + llvm::json::Path path); + +} // namespace lldb_private +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_JSON_OBJECTFILEJSON_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp new file mode 100644 index 000000000000..de212c6b20da --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp @@ -0,0 +1,1207 @@ +//===-- MinidumpFileBuilder.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "MinidumpFileBuilder.h" + +#include "Plugins/Process/minidump/RegisterContextMinidump_ARM64.h" +#include "Plugins/Process/minidump/RegisterContextMinidump_x86_64.h" + +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleList.h" +#include "lldb/Core/Section.h" +#include "lldb/Target/ABI.h" +#include "lldb/Target/MemoryRegionInfo.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/RegisterContext.h" +#include "lldb/Target/StopInfo.h" +#include "lldb/Target/ThreadList.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/DataExtractor.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "lldb/Utility/RangeMap.h" +#include "lldb/Utility/RegisterValue.h" + +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Minidump.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/TargetParser/Triple.h" + +#include "Plugins/Process/minidump/MinidumpTypes.h" +#include "lldb/lldb-enumerations.h" +#include "lldb/lldb-forward.h" +#include "lldb/lldb-types.h" + +#include <algorithm> +#include <cinttypes> +#include <climits> +#include <cstddef> +#include <cstdint> +#include <functional> +#include <iostream> +#include <set> +#include <utility> +#include <vector> + +using namespace lldb; +using namespace lldb_private; +using namespace llvm::minidump; + +Status MinidumpFileBuilder::AddHeaderAndCalculateDirectories() { + // First set the offset on the file, and on the bytes saved + m_saved_data_size = HEADER_SIZE; + // We know we will have at least Misc, SystemInfo, Modules, and ThreadList + // (corresponding memory list for stacks) And an additional memory list for + // non-stacks. + lldb_private::Target &target = m_process_sp->GetTarget(); + m_expected_directories = 6; + // Check if OS is linux and reserve directory space for all linux specific + // breakpad extension directories. + if (target.GetArchitecture().GetTriple().getOS() == + llvm::Triple::OSType::Linux) + m_expected_directories += 9; + + // Go through all of the threads and check for exceptions. + lldb_private::ThreadList thread_list = m_process_sp->GetThreadList(); + const uint32_t num_threads = thread_list.GetSize(); + for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { + ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx)); + StopInfoSP stop_info_sp = thread_sp->GetStopInfo(); + if (stop_info_sp) { + const StopReason &stop_reason = stop_info_sp->GetStopReason(); + if (stop_reason == StopReason::eStopReasonException || + stop_reason == StopReason::eStopReasonSignal) + m_expected_directories++; + } + } + + m_saved_data_size += + m_expected_directories * sizeof(llvm::minidump::Directory); + Status error; + offset_t new_offset = m_core_file->SeekFromStart(m_saved_data_size); + if (new_offset != m_saved_data_size) + error.SetErrorStringWithFormat("Failed to fill in header and directory " + "sections. Written / Expected (%" PRIx64 + " / %" PRIx64 ")", + new_offset, m_saved_data_size); + + return error; +} + +Status MinidumpFileBuilder::AddDirectory(StreamType type, + uint64_t stream_size) { + // We explicitly cast type, an 32b enum, to uint32_t to avoid warnings. + Status error; + if (GetCurrentDataEndOffset() > UINT32_MAX) { + error.SetErrorStringWithFormat("Unable to add directory for stream type " + "%x, offset is greater then 32 bit limit.", + (uint32_t)type); + return error; + } + + if (m_directories.size() + 1 > m_expected_directories) { + error.SetErrorStringWithFormat( + "Unable to add directory for stream type %x, exceeded expected number " + "of directories %zu.", + (uint32_t)type, m_expected_directories); + return error; + } + + LocationDescriptor loc; + loc.DataSize = static_cast<llvm::support::ulittle32_t>(stream_size); + // Stream will begin at the current end of data section + loc.RVA = static_cast<llvm::support::ulittle32_t>(GetCurrentDataEndOffset()); + + Directory dir; + dir.Type = static_cast<llvm::support::little_t<StreamType>>(type); + dir.Location = loc; + + m_directories.push_back(dir); + return error; +} + +Status MinidumpFileBuilder::AddSystemInfo() { + Status error; + const llvm::Triple &target_triple = + m_process_sp->GetTarget().GetArchitecture().GetTriple(); + error = + AddDirectory(StreamType::SystemInfo, sizeof(llvm::minidump::SystemInfo)); + if (error.Fail()) + return error; + + llvm::minidump::ProcessorArchitecture arch; + switch (target_triple.getArch()) { + case llvm::Triple::ArchType::x86_64: + arch = ProcessorArchitecture::AMD64; + break; + case llvm::Triple::ArchType::x86: + arch = ProcessorArchitecture::X86; + break; + case llvm::Triple::ArchType::arm: + arch = ProcessorArchitecture::ARM; + break; + case llvm::Triple::ArchType::aarch64: + arch = ProcessorArchitecture::ARM64; + break; + case llvm::Triple::ArchType::mips64: + case llvm::Triple::ArchType::mips64el: + case llvm::Triple::ArchType::mips: + case llvm::Triple::ArchType::mipsel: + arch = ProcessorArchitecture::MIPS; + break; + case llvm::Triple::ArchType::ppc64: + case llvm::Triple::ArchType::ppc: + case llvm::Triple::ArchType::ppc64le: + arch = ProcessorArchitecture::PPC; + break; + default: + error.SetErrorStringWithFormat("Architecture %s not supported.", + target_triple.getArchName().str().c_str()); + return error; + }; + + llvm::support::little_t<OSPlatform> platform_id; + switch (target_triple.getOS()) { + case llvm::Triple::OSType::Linux: + if (target_triple.getEnvironment() == + llvm::Triple::EnvironmentType::Android) + platform_id = OSPlatform::Android; + else + platform_id = OSPlatform::Linux; + break; + case llvm::Triple::OSType::Win32: + platform_id = OSPlatform::Win32NT; + break; + case llvm::Triple::OSType::MacOSX: + platform_id = OSPlatform::MacOSX; + break; + case llvm::Triple::OSType::IOS: + platform_id = OSPlatform::IOS; + break; + default: + error.SetErrorStringWithFormat("OS %s not supported.", + target_triple.getOSName().str().c_str()); + return error; + }; + + llvm::minidump::SystemInfo sys_info; + sys_info.ProcessorArch = + static_cast<llvm::support::little_t<ProcessorArchitecture>>(arch); + // Global offset to beginning of a csd_string in a data section + sys_info.CSDVersionRVA = static_cast<llvm::support::ulittle32_t>( + GetCurrentDataEndOffset() + sizeof(llvm::minidump::SystemInfo)); + sys_info.PlatformId = platform_id; + m_data.AppendData(&sys_info, sizeof(llvm::minidump::SystemInfo)); + + std::string csd_string; + + error = WriteString(csd_string, &m_data); + if (error.Fail()) { + error.SetErrorString("Unable to convert the csd string to UTF16."); + return error; + } + + return error; +} + +Status WriteString(const std::string &to_write, + lldb_private::DataBufferHeap *buffer) { + Status error; + // let the StringRef eat also null termination char + llvm::StringRef to_write_ref(to_write.c_str(), to_write.size() + 1); + llvm::SmallVector<llvm::UTF16, 128> to_write_utf16; + + bool converted = convertUTF8ToUTF16String(to_write_ref, to_write_utf16); + if (!converted) { + error.SetErrorStringWithFormat( + "Unable to convert the string to UTF16. Failed to convert %s", + to_write.c_str()); + return error; + } + + // size of the UTF16 string should be written without the null termination + // character that is stored in 2 bytes + llvm::support::ulittle32_t to_write_size(to_write_utf16.size_in_bytes() - 2); + + buffer->AppendData(&to_write_size, sizeof(llvm::support::ulittle32_t)); + buffer->AppendData(to_write_utf16.data(), to_write_utf16.size_in_bytes()); + + return error; +} + +llvm::Expected<uint64_t> getModuleFileSize(Target &target, + const ModuleSP &mod) { + // JIT module has the same vm and file size. + uint64_t SizeOfImage = 0; + if (mod->GetObjectFile()->CalculateType() == ObjectFile::Type::eTypeJIT) { + for (const auto §ion : *mod->GetObjectFile()->GetSectionList()) { + SizeOfImage += section->GetByteSize(); + } + return SizeOfImage; + } + SectionSP sect_sp = mod->GetObjectFile()->GetBaseAddress().GetSection(); + + if (!sect_sp) { + return llvm::createStringError(std::errc::operation_not_supported, + "Couldn't obtain the section information."); + } + lldb::addr_t sect_addr = sect_sp->GetLoadBaseAddress(&target); + // Use memory size since zero fill sections, like ".bss", will be smaller on + // disk. + lldb::addr_t sect_size = sect_sp->GetByteSize(); + // This will usually be zero, but make sure to calculate the BaseOfImage + // offset. + const lldb::addr_t base_sect_offset = + mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target) - + sect_addr; + SizeOfImage = sect_size - base_sect_offset; + lldb::addr_t next_sect_addr = sect_addr + sect_size; + Address sect_so_addr; + target.ResolveLoadAddress(next_sect_addr, sect_so_addr); + lldb::SectionSP next_sect_sp = sect_so_addr.GetSection(); + while (next_sect_sp && + next_sect_sp->GetLoadBaseAddress(&target) == next_sect_addr) { + sect_size = sect_sp->GetByteSize(); + SizeOfImage += sect_size; + next_sect_addr += sect_size; + target.ResolveLoadAddress(next_sect_addr, sect_so_addr); + next_sect_sp = sect_so_addr.GetSection(); + } + + return SizeOfImage; +} + +// ModuleList stream consists of a number of modules, followed by an array +// of llvm::minidump::Module's structures. Every structure informs about a +// single module. Additional data of variable length, such as module's names, +// are stored just after the ModuleList stream. The llvm::minidump::Module +// structures point to this helper data by global offset. +Status MinidumpFileBuilder::AddModuleList() { + constexpr size_t minidump_module_size = sizeof(llvm::minidump::Module); + Status error; + + lldb_private::Target &target = m_process_sp->GetTarget(); + const ModuleList &modules = target.GetImages(); + llvm::support::ulittle32_t modules_count = + static_cast<llvm::support::ulittle32_t>(modules.GetSize()); + + // This helps us with getting the correct global offset in minidump + // file later, when we will be setting up offsets from the + // the llvm::minidump::Module's structures into helper data + size_t size_before = GetCurrentDataEndOffset(); + + // This is the size of the main part of the ModuleList stream. + // It consists of a module number and corresponding number of + // structs describing individual modules + size_t module_stream_size = + sizeof(llvm::support::ulittle32_t) + modules_count * minidump_module_size; + + // Adding directory describing this stream. + error = AddDirectory(StreamType::ModuleList, module_stream_size); + if (error.Fail()) + return error; + + m_data.AppendData(&modules_count, sizeof(llvm::support::ulittle32_t)); + + // Temporary storage for the helper data (of variable length) + // as these cannot be dumped to m_data before dumping entire + // array of module structures. + DataBufferHeap helper_data; + + for (size_t i = 0; i < modules_count; ++i) { + ModuleSP mod = modules.GetModuleAtIndex(i); + std::string module_name = mod->GetSpecificationDescription(); + auto maybe_mod_size = getModuleFileSize(target, mod); + if (!maybe_mod_size) { + llvm::Error mod_size_err = maybe_mod_size.takeError(); + llvm::handleAllErrors(std::move(mod_size_err), + [&](const llvm::ErrorInfoBase &E) { + error.SetErrorStringWithFormat( + "Unable to get the size of module %s: %s.", + module_name.c_str(), E.message().c_str()); + }); + return error; + } + + uint64_t mod_size = std::move(*maybe_mod_size); + + llvm::support::ulittle32_t signature = + static_cast<llvm::support::ulittle32_t>( + static_cast<uint32_t>(minidump::CvSignature::ElfBuildId)); + auto uuid = mod->GetUUID().GetBytes(); + + VSFixedFileInfo info; + info.Signature = static_cast<llvm::support::ulittle32_t>(0u); + info.StructVersion = static_cast<llvm::support::ulittle32_t>(0u); + info.FileVersionHigh = static_cast<llvm::support::ulittle32_t>(0u); + info.FileVersionLow = static_cast<llvm::support::ulittle32_t>(0u); + info.ProductVersionHigh = static_cast<llvm::support::ulittle32_t>(0u); + info.ProductVersionLow = static_cast<llvm::support::ulittle32_t>(0u); + info.FileFlagsMask = static_cast<llvm::support::ulittle32_t>(0u); + info.FileFlags = static_cast<llvm::support::ulittle32_t>(0u); + info.FileOS = static_cast<llvm::support::ulittle32_t>(0u); + info.FileType = static_cast<llvm::support::ulittle32_t>(0u); + info.FileSubtype = static_cast<llvm::support::ulittle32_t>(0u); + info.FileDateHigh = static_cast<llvm::support::ulittle32_t>(0u); + info.FileDateLow = static_cast<llvm::support::ulittle32_t>(0u); + + LocationDescriptor ld; + ld.DataSize = static_cast<llvm::support::ulittle32_t>(0u); + ld.RVA = static_cast<llvm::support::ulittle32_t>(0u); + + // Setting up LocationDescriptor for uuid string. The global offset into + // minidump file is calculated. + LocationDescriptor ld_cv; + ld_cv.DataSize = static_cast<llvm::support::ulittle32_t>( + sizeof(llvm::support::ulittle32_t) + uuid.size()); + ld_cv.RVA = static_cast<llvm::support::ulittle32_t>( + size_before + module_stream_size + helper_data.GetByteSize()); + + helper_data.AppendData(&signature, sizeof(llvm::support::ulittle32_t)); + helper_data.AppendData(uuid.begin(), uuid.size()); + + llvm::minidump::Module m; + m.BaseOfImage = static_cast<llvm::support::ulittle64_t>( + mod->GetObjectFile()->GetBaseAddress().GetLoadAddress(&target)); + m.SizeOfImage = static_cast<llvm::support::ulittle32_t>(mod_size); + m.Checksum = static_cast<llvm::support::ulittle32_t>(0); + m.TimeDateStamp = + static_cast<llvm::support::ulittle32_t>(std::time(nullptr)); + m.ModuleNameRVA = static_cast<llvm::support::ulittle32_t>( + size_before + module_stream_size + helper_data.GetByteSize()); + m.VersionInfo = info; + m.CvRecord = ld_cv; + m.MiscRecord = ld; + + error = WriteString(module_name, &helper_data); + + if (error.Fail()) + return error; + + m_data.AppendData(&m, sizeof(llvm::minidump::Module)); + } + + m_data.AppendData(helper_data.GetBytes(), helper_data.GetByteSize()); + return error; +} + +uint16_t read_register_u16_raw(RegisterContext *reg_ctx, + llvm::StringRef reg_name) { + const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name); + if (!reg_info) + return 0; + lldb_private::RegisterValue reg_value; + bool success = reg_ctx->ReadRegister(reg_info, reg_value); + if (!success) + return 0; + return reg_value.GetAsUInt16(); +} + +uint32_t read_register_u32_raw(RegisterContext *reg_ctx, + llvm::StringRef reg_name) { + const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name); + if (!reg_info) + return 0; + lldb_private::RegisterValue reg_value; + bool success = reg_ctx->ReadRegister(reg_info, reg_value); + if (!success) + return 0; + return reg_value.GetAsUInt32(); +} + +uint64_t read_register_u64_raw(RegisterContext *reg_ctx, + llvm::StringRef reg_name) { + const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name); + if (!reg_info) + return 0; + lldb_private::RegisterValue reg_value; + bool success = reg_ctx->ReadRegister(reg_info, reg_value); + if (!success) + return 0; + return reg_value.GetAsUInt64(); +} + +llvm::support::ulittle16_t read_register_u16(RegisterContext *reg_ctx, + llvm::StringRef reg_name) { + return static_cast<llvm::support::ulittle16_t>( + read_register_u16_raw(reg_ctx, reg_name)); +} + +llvm::support::ulittle32_t read_register_u32(RegisterContext *reg_ctx, + llvm::StringRef reg_name) { + return static_cast<llvm::support::ulittle32_t>( + read_register_u32_raw(reg_ctx, reg_name)); +} + +llvm::support::ulittle64_t read_register_u64(RegisterContext *reg_ctx, + llvm::StringRef reg_name) { + return static_cast<llvm::support::ulittle64_t>( + read_register_u64_raw(reg_ctx, reg_name)); +} + +void read_register_u128(RegisterContext *reg_ctx, llvm::StringRef reg_name, + uint8_t *dst) { + const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(reg_name); + if (reg_info) { + lldb_private::RegisterValue reg_value; + if (reg_ctx->ReadRegister(reg_info, reg_value)) { + Status error; + uint32_t bytes_copied = reg_value.GetAsMemoryData( + *reg_info, dst, 16, lldb::ByteOrder::eByteOrderLittle, error); + if (bytes_copied == 16) + return; + } + } + // If anything goes wrong, then zero out the register value. + memset(dst, 0, 16); +} + +lldb_private::minidump::MinidumpContext_x86_64 +GetThreadContext_x86_64(RegisterContext *reg_ctx) { + lldb_private::minidump::MinidumpContext_x86_64 thread_context = {}; + thread_context.p1_home = {}; + thread_context.context_flags = static_cast<uint32_t>( + lldb_private::minidump::MinidumpContext_x86_64_Flags::x86_64_Flag | + lldb_private::minidump::MinidumpContext_x86_64_Flags::Control | + lldb_private::minidump::MinidumpContext_x86_64_Flags::Segments | + lldb_private::minidump::MinidumpContext_x86_64_Flags::Integer); + thread_context.rax = read_register_u64(reg_ctx, "rax"); + thread_context.rbx = read_register_u64(reg_ctx, "rbx"); + thread_context.rcx = read_register_u64(reg_ctx, "rcx"); + thread_context.rdx = read_register_u64(reg_ctx, "rdx"); + thread_context.rdi = read_register_u64(reg_ctx, "rdi"); + thread_context.rsi = read_register_u64(reg_ctx, "rsi"); + thread_context.rbp = read_register_u64(reg_ctx, "rbp"); + thread_context.rsp = read_register_u64(reg_ctx, "rsp"); + thread_context.r8 = read_register_u64(reg_ctx, "r8"); + thread_context.r9 = read_register_u64(reg_ctx, "r9"); + thread_context.r10 = read_register_u64(reg_ctx, "r10"); + thread_context.r11 = read_register_u64(reg_ctx, "r11"); + thread_context.r12 = read_register_u64(reg_ctx, "r12"); + thread_context.r13 = read_register_u64(reg_ctx, "r13"); + thread_context.r14 = read_register_u64(reg_ctx, "r14"); + thread_context.r15 = read_register_u64(reg_ctx, "r15"); + thread_context.rip = read_register_u64(reg_ctx, "rip"); + thread_context.eflags = read_register_u32(reg_ctx, "rflags"); + thread_context.cs = read_register_u16(reg_ctx, "cs"); + thread_context.fs = read_register_u16(reg_ctx, "fs"); + thread_context.gs = read_register_u16(reg_ctx, "gs"); + thread_context.ss = read_register_u16(reg_ctx, "ss"); + thread_context.ds = read_register_u16(reg_ctx, "ds"); + return thread_context; +} + +minidump::RegisterContextMinidump_ARM64::Context +GetThreadContext_ARM64(RegisterContext *reg_ctx) { + minidump::RegisterContextMinidump_ARM64::Context thread_context = {}; + thread_context.context_flags = static_cast<uint32_t>( + minidump::RegisterContextMinidump_ARM64::Flags::ARM64_Flag | + minidump::RegisterContextMinidump_ARM64::Flags::Integer | + minidump::RegisterContextMinidump_ARM64::Flags::FloatingPoint); + char reg_name[16]; + for (uint32_t i = 0; i < 31; ++i) { + snprintf(reg_name, sizeof(reg_name), "x%u", i); + thread_context.x[i] = read_register_u64(reg_ctx, reg_name); + } + // Work around a bug in debugserver where "sp" on arm64 doesn't have the alt + // name set to "x31" + thread_context.x[31] = read_register_u64(reg_ctx, "sp"); + thread_context.pc = read_register_u64(reg_ctx, "pc"); + thread_context.cpsr = read_register_u32(reg_ctx, "cpsr"); + thread_context.fpsr = read_register_u32(reg_ctx, "fpsr"); + thread_context.fpcr = read_register_u32(reg_ctx, "fpcr"); + for (uint32_t i = 0; i < 32; ++i) { + snprintf(reg_name, sizeof(reg_name), "v%u", i); + read_register_u128(reg_ctx, reg_name, &thread_context.v[i * 16]); + } + return thread_context; +} + +class ArchThreadContexts { + llvm::Triple::ArchType m_arch; + union { + lldb_private::minidump::MinidumpContext_x86_64 x86_64; + lldb_private::minidump::RegisterContextMinidump_ARM64::Context arm64; + }; + +public: + ArchThreadContexts(llvm::Triple::ArchType arch) : m_arch(arch) {} + + bool prepareRegisterContext(RegisterContext *reg_ctx) { + switch (m_arch) { + case llvm::Triple::ArchType::x86_64: + x86_64 = GetThreadContext_x86_64(reg_ctx); + return true; + case llvm::Triple::ArchType::aarch64: + arm64 = GetThreadContext_ARM64(reg_ctx); + return true; + default: + break; + } + return false; + } + + const void *data() const { return &x86_64; } + + size_t size() const { + switch (m_arch) { + case llvm::Triple::ArchType::x86_64: + return sizeof(x86_64); + case llvm::Triple::ArchType::aarch64: + return sizeof(arm64); + default: + break; + } + return 0; + } +}; + +Status MinidumpFileBuilder::FixThreadStacks() { + Status error; + // If we have anything in the heap flush it. + FlushBufferToDisk(); + m_core_file->SeekFromStart(m_thread_list_start); + for (auto &pair : m_thread_by_range_end) { + // The thread objects will get a new memory descriptor added + // When we are emitting the memory list and then we write it here + const llvm::minidump::Thread &thread = pair.second; + size_t bytes_to_write = sizeof(llvm::minidump::Thread); + size_t bytes_written = bytes_to_write; + error = m_core_file->Write(&thread, bytes_written); + if (error.Fail() || bytes_to_write != bytes_written) { + error.SetErrorStringWithFormat( + "Wrote incorrect number of bytes to minidump file. (written %zd/%zd)", + bytes_written, bytes_to_write); + return error; + } + } + + return error; +} + +Status MinidumpFileBuilder::AddThreadList() { + constexpr size_t minidump_thread_size = sizeof(llvm::minidump::Thread); + lldb_private::ThreadList thread_list = m_process_sp->GetThreadList(); + + // size of the entire thread stream consists of: + // number of threads and threads array + size_t thread_stream_size = sizeof(llvm::support::ulittle32_t) + + thread_list.GetSize() * minidump_thread_size; + // save for the ability to set up RVA + size_t size_before = GetCurrentDataEndOffset(); + Status error; + error = AddDirectory(StreamType::ThreadList, thread_stream_size); + if (error.Fail()) + return error; + + llvm::support::ulittle32_t thread_count = + static_cast<llvm::support::ulittle32_t>(thread_list.GetSize()); + m_data.AppendData(&thread_count, sizeof(llvm::support::ulittle32_t)); + + // Take the offset after the thread count. + m_thread_list_start = GetCurrentDataEndOffset(); + DataBufferHeap helper_data; + + const uint32_t num_threads = thread_list.GetSize(); + Log *log = GetLog(LLDBLog::Object); + for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { + ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx)); + RegisterContextSP reg_ctx_sp(thread_sp->GetRegisterContext()); + + if (!reg_ctx_sp) { + error.SetErrorString("Unable to get the register context."); + return error; + } + RegisterContext *reg_ctx = reg_ctx_sp.get(); + Target &target = m_process_sp->GetTarget(); + const ArchSpec &arch = target.GetArchitecture(); + ArchThreadContexts thread_context(arch.GetMachine()); + if (!thread_context.prepareRegisterContext(reg_ctx)) { + error.SetErrorStringWithFormat( + "architecture %s not supported.", + arch.GetTriple().getArchName().str().c_str()); + return error; + } + + uint64_t sp = reg_ctx->GetSP(); + MemoryRegionInfo sp_region; + m_process_sp->GetMemoryRegionInfo(sp, sp_region); + + // Emit a blank descriptor + MemoryDescriptor stack; + LocationDescriptor empty_label; + empty_label.DataSize = 0; + empty_label.RVA = 0; + stack.Memory = empty_label; + stack.StartOfMemoryRange = 0; + LocationDescriptor thread_context_memory_locator; + thread_context_memory_locator.DataSize = + static_cast<llvm::support::ulittle32_t>(thread_context.size()); + thread_context_memory_locator.RVA = static_cast<llvm::support::ulittle32_t>( + size_before + thread_stream_size + helper_data.GetByteSize()); + // Cache thie thread context memory so we can reuse for exceptions. + m_tid_to_reg_ctx[thread_sp->GetID()] = thread_context_memory_locator; + + LLDB_LOGF(log, "AddThreadList for thread %d: thread_context %zu bytes", + thread_idx, thread_context.size()); + helper_data.AppendData(thread_context.data(), thread_context.size()); + + llvm::minidump::Thread t; + t.ThreadId = static_cast<llvm::support::ulittle32_t>(thread_sp->GetID()); + t.SuspendCount = static_cast<llvm::support::ulittle32_t>( + (thread_sp->GetState() == StateType::eStateSuspended) ? 1 : 0); + t.PriorityClass = static_cast<llvm::support::ulittle32_t>(0); + t.Priority = static_cast<llvm::support::ulittle32_t>(0); + t.EnvironmentBlock = static_cast<llvm::support::ulittle64_t>(0); + t.Stack = stack, t.Context = thread_context_memory_locator; + + // We save off the stack object so we can circle back and clean it up. + m_thread_by_range_end[sp_region.GetRange().GetRangeEnd()] = t; + m_data.AppendData(&t, sizeof(llvm::minidump::Thread)); + } + + LLDB_LOGF(log, "AddThreadList(): total helper_data %" PRIx64 " bytes", + helper_data.GetByteSize()); + m_data.AppendData(helper_data.GetBytes(), helper_data.GetByteSize()); + return Status(); +} + +Status MinidumpFileBuilder::AddExceptions() { + lldb_private::ThreadList thread_list = m_process_sp->GetThreadList(); + Status error; + const uint32_t num_threads = thread_list.GetSize(); + for (uint32_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) { + ThreadSP thread_sp(thread_list.GetThreadAtIndex(thread_idx)); + StopInfoSP stop_info_sp = thread_sp->GetStopInfo(); + bool add_exception = false; + if (stop_info_sp) { + switch (stop_info_sp->GetStopReason()) { + case eStopReasonSignal: + case eStopReasonException: + add_exception = true; + break; + default: + break; + } + } + if (add_exception) { + constexpr size_t minidump_exception_size = + sizeof(llvm::minidump::ExceptionStream); + error = AddDirectory(StreamType::Exception, minidump_exception_size); + if (error.Fail()) + return error; + + StopInfoSP stop_info_sp = thread_sp->GetStopInfo(); + RegisterContextSP reg_ctx_sp(thread_sp->GetRegisterContext()); + Exception exp_record = {}; + exp_record.ExceptionCode = + static_cast<llvm::support::ulittle32_t>(stop_info_sp->GetValue()); + exp_record.ExceptionFlags = static_cast<llvm::support::ulittle32_t>(0); + exp_record.ExceptionRecord = static_cast<llvm::support::ulittle64_t>(0); + exp_record.ExceptionAddress = reg_ctx_sp->GetPC(); + exp_record.NumberParameters = static_cast<llvm::support::ulittle32_t>(0); + exp_record.UnusedAlignment = static_cast<llvm::support::ulittle32_t>(0); + // exp_record.ExceptionInformation; + + ExceptionStream exp_stream; + exp_stream.ThreadId = + static_cast<llvm::support::ulittle32_t>(thread_sp->GetID()); + exp_stream.UnusedAlignment = static_cast<llvm::support::ulittle32_t>(0); + exp_stream.ExceptionRecord = exp_record; + auto Iter = m_tid_to_reg_ctx.find(thread_sp->GetID()); + if (Iter != m_tid_to_reg_ctx.end()) { + exp_stream.ThreadContext = Iter->second; + } else { + exp_stream.ThreadContext.DataSize = 0; + exp_stream.ThreadContext.RVA = 0; + } + m_data.AppendData(&exp_stream, minidump_exception_size); + } + } + + return error; +} + +lldb_private::Status MinidumpFileBuilder::AddMiscInfo() { + Status error; + error = AddDirectory(StreamType::MiscInfo, + sizeof(lldb_private::minidump::MinidumpMiscInfo)); + if (error.Fail()) + return error; + + lldb_private::minidump::MinidumpMiscInfo misc_info; + misc_info.size = static_cast<llvm::support::ulittle32_t>( + sizeof(lldb_private::minidump::MinidumpMiscInfo)); + // Default set flags1 to 0, in case that we will not be able to + // get any information + misc_info.flags1 = static_cast<llvm::support::ulittle32_t>(0); + + lldb_private::ProcessInstanceInfo process_info; + m_process_sp->GetProcessInfo(process_info); + if (process_info.ProcessIDIsValid()) { + // Set flags1 to reflect that PID is filled in + misc_info.flags1 = + static_cast<llvm::support::ulittle32_t>(static_cast<uint32_t>( + lldb_private::minidump::MinidumpMiscInfoFlags::ProcessID)); + misc_info.process_id = + static_cast<llvm::support::ulittle32_t>(process_info.GetProcessID()); + } + + m_data.AppendData(&misc_info, + sizeof(lldb_private::minidump::MinidumpMiscInfo)); + return error; +} + +std::unique_ptr<llvm::MemoryBuffer> +getFileStreamHelper(const std::string &path) { + auto maybe_stream = llvm::MemoryBuffer::getFileAsStream(path); + if (!maybe_stream) + return nullptr; + return std::move(maybe_stream.get()); +} + +Status MinidumpFileBuilder::AddLinuxFileStreams() { + Status error; + // No-op if we are not on linux. + if (m_process_sp->GetTarget().GetArchitecture().GetTriple().getOS() != + llvm::Triple::Linux) + return error; + + std::vector<std::pair<StreamType, std::string>> files_with_stream_types = { + {StreamType::LinuxCPUInfo, "/proc/cpuinfo"}, + {StreamType::LinuxLSBRelease, "/etc/lsb-release"}, + }; + + lldb_private::ProcessInstanceInfo process_info; + m_process_sp->GetProcessInfo(process_info); + if (process_info.ProcessIDIsValid()) { + lldb::pid_t pid = process_info.GetProcessID(); + std::string pid_str = std::to_string(pid); + files_with_stream_types.push_back( + {StreamType::LinuxProcStatus, "/proc/" + pid_str + "/status"}); + files_with_stream_types.push_back( + {StreamType::LinuxCMDLine, "/proc/" + pid_str + "/cmdline"}); + files_with_stream_types.push_back( + {StreamType::LinuxEnviron, "/proc/" + pid_str + "/environ"}); + files_with_stream_types.push_back( + {StreamType::LinuxAuxv, "/proc/" + pid_str + "/auxv"}); + files_with_stream_types.push_back( + {StreamType::LinuxMaps, "/proc/" + pid_str + "/maps"}); + files_with_stream_types.push_back( + {StreamType::LinuxProcStat, "/proc/" + pid_str + "/stat"}); + files_with_stream_types.push_back( + {StreamType::LinuxProcFD, "/proc/" + pid_str + "/fd"}); + } + + for (const auto &entry : files_with_stream_types) { + StreamType stream = entry.first; + std::string path = entry.second; + auto memory_buffer = getFileStreamHelper(path); + + if (memory_buffer) { + size_t size = memory_buffer->getBufferSize(); + if (size == 0) + continue; + error = AddDirectory(stream, size); + if (error.Fail()) + return error; + m_data.AppendData(memory_buffer->getBufferStart(), size); + } + } + + return error; +} + +Status MinidumpFileBuilder::AddMemoryList(SaveCoreStyle core_style) { + Status error; + + // We first save the thread stacks to ensure they fit in the first UINT32_MAX + // bytes of the core file. Thread structures in minidump files can only use + // 32 bit memory descriptiors, so we emit them first to ensure the memory is + // in accessible with a 32 bit offset. + Process::CoreFileMemoryRanges ranges_32; + Process::CoreFileMemoryRanges ranges_64; + error = m_process_sp->CalculateCoreFileSaveRanges( + SaveCoreStyle::eSaveCoreStackOnly, ranges_32); + if (error.Fail()) + return error; + + // Calculate totalsize including the current offset. + uint64_t total_size = GetCurrentDataEndOffset(); + total_size += ranges_32.size() * sizeof(llvm::minidump::MemoryDescriptor); + std::unordered_set<addr_t> stack_start_addresses; + for (const auto &core_range : ranges_32) { + stack_start_addresses.insert(core_range.range.start()); + total_size += core_range.range.size(); + } + + if (total_size >= UINT32_MAX) { + error.SetErrorStringWithFormat("Unable to write minidump. Stack memory " + "exceeds 32b limit. (Num Stacks %zu)", + ranges_32.size()); + return error; + } + + Process::CoreFileMemoryRanges all_core_memory_ranges; + if (core_style != SaveCoreStyle::eSaveCoreStackOnly) { + error = m_process_sp->CalculateCoreFileSaveRanges(core_style, + all_core_memory_ranges); + if (error.Fail()) + return error; + } + + // After saving the stacks, we start packing as much as we can into 32b. + // We apply a generous padding here so that the Directory, MemoryList and + // Memory64List sections all begin in 32b addressable space. + // Then anything overflow extends into 64b addressable space. + // All core memeroy ranges will either container nothing on stacks only + // or all the memory ranges including stacks + if (!all_core_memory_ranges.empty()) + total_size += + 256 + (all_core_memory_ranges.size() - stack_start_addresses.size()) * + sizeof(llvm::minidump::MemoryDescriptor_64); + + for (const auto &core_range : all_core_memory_ranges) { + const addr_t range_size = core_range.range.size(); + if (stack_start_addresses.count(core_range.range.start()) > 0) + // Don't double save stacks. + continue; + + if (total_size + range_size < UINT32_MAX) { + ranges_32.push_back(core_range); + total_size += range_size; + } else { + ranges_64.push_back(core_range); + } + } + + error = AddMemoryList_32(ranges_32); + if (error.Fail()) + return error; + + // Add the remaining memory as a 64b range. + if (!ranges_64.empty()) { + error = AddMemoryList_64(ranges_64); + if (error.Fail()) + return error; + } + + return FixThreadStacks(); +} + +Status MinidumpFileBuilder::DumpHeader() const { + // write header + llvm::minidump::Header header; + header.Signature = static_cast<llvm::support::ulittle32_t>( + llvm::minidump::Header::MagicSignature); + header.Version = static_cast<llvm::support::ulittle32_t>( + llvm::minidump::Header::MagicVersion); + header.NumberOfStreams = + static_cast<llvm::support::ulittle32_t>(m_directories.size()); + // We write the directories right after the header. + header.StreamDirectoryRVA = + static_cast<llvm::support::ulittle32_t>(HEADER_SIZE); + header.Checksum = static_cast<llvm::support::ulittle32_t>( + 0u), // not used in most of the writers + header.TimeDateStamp = + static_cast<llvm::support::ulittle32_t>(std::time(nullptr)); + header.Flags = + static_cast<llvm::support::ulittle64_t>(0u); // minidump normal flag + + Status error; + size_t bytes_written; + + m_core_file->SeekFromStart(0); + bytes_written = HEADER_SIZE; + error = m_core_file->Write(&header, bytes_written); + if (error.Fail() || bytes_written != HEADER_SIZE) { + if (bytes_written != HEADER_SIZE) + error.SetErrorStringWithFormat( + "Unable to write the minidump header (written %zd/%zd)", + bytes_written, HEADER_SIZE); + return error; + } + return error; +} + +offset_t MinidumpFileBuilder::GetCurrentDataEndOffset() const { + return m_data.GetByteSize() + m_saved_data_size; +} + +Status MinidumpFileBuilder::DumpDirectories() const { + Status error; + size_t bytes_written; + m_core_file->SeekFromStart(HEADER_SIZE); + for (const Directory &dir : m_directories) { + bytes_written = DIRECTORY_SIZE; + error = m_core_file->Write(&dir, bytes_written); + if (error.Fail() || bytes_written != DIRECTORY_SIZE) { + if (bytes_written != DIRECTORY_SIZE) + error.SetErrorStringWithFormat( + "unable to write the directory (written %zd/%zd)", bytes_written, + DIRECTORY_SIZE); + return error; + } + } + + return error; +} + +static uint64_t +GetLargestRangeSize(const Process::CoreFileMemoryRanges &ranges) { + uint64_t max_size = 0; + for (const auto &core_range : ranges) + max_size = std::max(max_size, core_range.range.size()); + return max_size; +} + +Status +MinidumpFileBuilder::AddMemoryList_32(Process::CoreFileMemoryRanges &ranges) { + std::vector<MemoryDescriptor> descriptors; + Status error; + if (ranges.size() == 0) + return error; + + Log *log = GetLog(LLDBLog::Object); + size_t region_index = 0; + auto data_up = + std::make_unique<DataBufferHeap>(GetLargestRangeSize(ranges), 0); + for (const auto &core_range : ranges) { + // Take the offset before we write. + const offset_t offset_for_data = GetCurrentDataEndOffset(); + const addr_t addr = core_range.range.start(); + const addr_t size = core_range.range.size(); + const addr_t end = core_range.range.end(); + + LLDB_LOGF(log, + "AddMemoryList %zu/%zu reading memory for region " + "(%" PRIx64 " bytes) [%" PRIx64 ", %" PRIx64 ")", + region_index, ranges.size(), size, addr, addr + size); + ++region_index; + + const size_t bytes_read = + m_process_sp->ReadMemory(addr, data_up->GetBytes(), size, error); + if (error.Fail() || bytes_read == 0) { + LLDB_LOGF(log, "Failed to read memory region. Bytes read: %zu, error: %s", + bytes_read, error.AsCString()); + // Just skip sections with errors or zero bytes in 32b mode + continue; + } else if (bytes_read != size) { + LLDB_LOGF( + log, "Memory region at: %" PRIx64 " failed to read %" PRIx64 " bytes", + addr, size); + } + + MemoryDescriptor descriptor; + descriptor.StartOfMemoryRange = + static_cast<llvm::support::ulittle64_t>(addr); + descriptor.Memory.DataSize = + static_cast<llvm::support::ulittle32_t>(bytes_read); + descriptor.Memory.RVA = + static_cast<llvm::support::ulittle32_t>(offset_for_data); + descriptors.push_back(descriptor); + if (m_thread_by_range_end.count(end) > 0) + m_thread_by_range_end[end].Stack = descriptor; + + // Add the data to the buffer, flush as needed. + error = AddData(data_up->GetBytes(), bytes_read); + if (error.Fail()) + return error; + } + + // Add a directory that references this list + // With a size of the number of ranges as a 32 bit num + // And then the size of all the ranges + error = AddDirectory(StreamType::MemoryList, + sizeof(llvm::support::ulittle32_t) + + descriptors.size() * + sizeof(llvm::minidump::MemoryDescriptor)); + if (error.Fail()) + return error; + + llvm::support::ulittle32_t memory_ranges_num = + static_cast<llvm::support::ulittle32_t>(descriptors.size()); + m_data.AppendData(&memory_ranges_num, sizeof(llvm::support::ulittle32_t)); + // For 32b we can get away with writing off the descriptors after the data. + // This means no cleanup loop needed. + m_data.AppendData(descriptors.data(), + descriptors.size() * sizeof(MemoryDescriptor)); + + return error; +} + +Status +MinidumpFileBuilder::AddMemoryList_64(Process::CoreFileMemoryRanges &ranges) { + Status error; + if (ranges.empty()) + return error; + + error = AddDirectory(StreamType::Memory64List, + (sizeof(llvm::support::ulittle64_t) * 2) + + ranges.size() * + sizeof(llvm::minidump::MemoryDescriptor_64)); + if (error.Fail()) + return error; + + llvm::support::ulittle64_t memory_ranges_num = + static_cast<llvm::support::ulittle64_t>(ranges.size()); + m_data.AppendData(&memory_ranges_num, sizeof(llvm::support::ulittle64_t)); + // Capture the starting offset for all the descriptors so we can clean them up + // if needed. + offset_t starting_offset = + GetCurrentDataEndOffset() + sizeof(llvm::support::ulittle64_t); + // The base_rva needs to start after the directories, which is right after + // this 8 byte variable. + offset_t base_rva = + starting_offset + + (ranges.size() * sizeof(llvm::minidump::MemoryDescriptor_64)); + llvm::support::ulittle64_t memory_ranges_base_rva = + static_cast<llvm::support::ulittle64_t>(base_rva); + m_data.AppendData(&memory_ranges_base_rva, + sizeof(llvm::support::ulittle64_t)); + + bool cleanup_required = false; + std::vector<MemoryDescriptor_64> descriptors; + // Enumerate the ranges and create the memory descriptors so we can append + // them first + for (const auto core_range : ranges) { + // Add the space required to store the memory descriptor + MemoryDescriptor_64 memory_desc; + memory_desc.StartOfMemoryRange = + static_cast<llvm::support::ulittle64_t>(core_range.range.start()); + memory_desc.DataSize = + static_cast<llvm::support::ulittle64_t>(core_range.range.size()); + descriptors.push_back(memory_desc); + // Now write this memory descriptor to the buffer. + m_data.AppendData(&memory_desc, sizeof(MemoryDescriptor_64)); + } + + Log *log = GetLog(LLDBLog::Object); + size_t region_index = 0; + auto data_up = + std::make_unique<DataBufferHeap>(GetLargestRangeSize(ranges), 0); + for (const auto &core_range : ranges) { + const addr_t addr = core_range.range.start(); + const addr_t size = core_range.range.size(); + + LLDB_LOGF(log, + "AddMemoryList_64 %zu/%zu reading memory for region " + "(%" PRIx64 "bytes) " + "[%" PRIx64 ", %" PRIx64 ")", + region_index, ranges.size(), size, addr, addr + size); + ++region_index; + + const size_t bytes_read = + m_process_sp->ReadMemory(addr, data_up->GetBytes(), size, error); + if (error.Fail()) { + LLDB_LOGF(log, "Failed to read memory region. Bytes read: %zu, error: %s", + bytes_read, error.AsCString()); + error.Clear(); + cleanup_required = true; + descriptors[region_index].DataSize = 0; + } + if (bytes_read != size) { + LLDB_LOGF( + log, "Memory region at: %" PRIx64 " failed to read %" PRIx64 " bytes", + addr, size); + cleanup_required = true; + descriptors[region_index].DataSize = bytes_read; + } + + // Add the data to the buffer, flush as needed. + error = AddData(data_up->GetBytes(), bytes_read); + if (error.Fail()) + return error; + } + + // Early return if there is no cleanup needed. + if (!cleanup_required) { + return error; + } else { + // Flush to disk we can make the fixes in place. + FlushBufferToDisk(); + // Fixup the descriptors that were not read correctly. + m_core_file->SeekFromStart(starting_offset); + size_t bytes_written = sizeof(MemoryDescriptor_64) * descriptors.size(); + error = m_core_file->Write(descriptors.data(), bytes_written); + if (error.Fail() || + bytes_written != sizeof(MemoryDescriptor_64) * descriptors.size()) { + error.SetErrorStringWithFormat( + "unable to write the memory descriptors (written %zd/%zd)", + bytes_written, sizeof(MemoryDescriptor_64) * descriptors.size()); + } + + return error; + } +} + +Status MinidumpFileBuilder::AddData(const void *data, uint64_t size) { + // This should also get chunked, because worst case we copy over a big + // object / memory range, say 5gb. In that case, we'd have to allocate 10gb + // 5 gb for the buffer we're copying from, and then 5gb for the buffer we're + // copying to. Which will be short lived and immedaitely go to disk, the goal + // here is to limit the number of bytes we need to host in memory at any given + // time. + m_data.AppendData(data, size); + if (m_data.GetByteSize() > MAX_WRITE_CHUNK_SIZE) + return FlushBufferToDisk(); + + return Status(); +} + +Status MinidumpFileBuilder::FlushBufferToDisk() { + Status error; + // Set the stream to it's end. + m_core_file->SeekFromStart(m_saved_data_size); + addr_t starting_size = m_data.GetByteSize(); + addr_t remaining_bytes = starting_size; + offset_t offset = 0; + + while (remaining_bytes > 0) { + size_t bytes_written = remaining_bytes; + // We don't care how many bytes we wrote unless we got an error + // so just decrement the remaining bytes. + error = m_core_file->Write(m_data.GetBytes() + offset, bytes_written); + if (error.Fail()) { + error.SetErrorStringWithFormat( + "Wrote incorrect number of bytes to minidump file. (written %" PRIx64 + "/%" PRIx64 ")", + starting_size - remaining_bytes, starting_size); + return error; + } + + offset += bytes_written; + remaining_bytes -= bytes_written; + } + + m_saved_data_size += starting_size; + m_data.Clear(); + return error; +} + +Status MinidumpFileBuilder::DumpFile() { + Status error; + // If anything is left unsaved, dump it. + error = FlushBufferToDisk(); + if (error.Fail()) + return error; + + // Overwrite the header which we filled in earlier. + error = DumpHeader(); + if (error.Fail()) + return error; + + // Overwrite the space saved for directories + error = DumpDirectories(); + if (error.Fail()) + return error; + + return error; +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h new file mode 100644 index 000000000000..20564e0661f2 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h @@ -0,0 +1,169 @@ +//===-- MinidumpFileBuilder.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Structure holding data neccessary for minidump file creation. +/// +/// The class MinidumpFileWriter is used to hold the data that will eventually +/// be dumped to the file. +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_MINIDUMPFILEBUILDER_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_MINIDUMPFILEBUILDER_H + +#include <cstddef> +#include <cstdint> +#include <map> +#include <unordered_map> +#include <utility> +#include <variant> + +#include "lldb/Target/Process.h" +#include "lldb/Target/Target.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/Status.h" +#include "lldb/lldb-forward.h" +#include "lldb/lldb-types.h" + +#include "llvm/BinaryFormat/Minidump.h" +#include "llvm/Object/Minidump.h" + +// Write std::string to minidump in the UTF16 format(with null termination char) +// with the size(without null termination char) preceding the UTF16 string. +// Empty strings are also printed with zero length and just null termination +// char. +lldb_private::Status WriteString(const std::string &to_write, + lldb_private::DataBufferHeap *buffer); + +/// \class MinidumpFileBuilder +/// Minidump writer for Linux +/// +/// This class provides a Minidump writer that is able to +/// snapshot the current process state. +/// +/// Minidumps are a Microsoft format for dumping process state. +/// This class constructs the minidump on disk starting with +/// Headers and Directories are written at the top of the file, +/// with the amount of bytes being precalculates before any writing takes place +/// Then the smaller data sections are written +/// SystemInfo, ModuleList, Misc Info. +/// Then Threads are emitted, threads are the first section that needs to be +/// 'fixed up' this happens when later we emit the memory stream, we identify if +/// that stream is the expected stack, and if so we update the stack with the +/// current RVA. Lastly the Memory lists are added. For Memory List, this will +/// contain everything that can fit within 4.2gb. MemoryList has it's +/// descriptors written at the end so it cannot be allowed to overflow. +/// +/// Memory64List is a special case where it has to be begin before 4.2gb but can +/// expand forever The difference in Memory64List is there are no RVA's and all +/// the addresses are figured out by starting at the base RVA, and adding the +/// antecedent memory sections. +/// +/// Because Memory64List can be arbitrarily large, this class has to write +/// chunks to disk this means we have to precalculate the descriptors and write +/// them first, and if we encounter any error, or are unable to read the same +/// number of bytes we have to go back and update them on disk. +/// +/// And as the last step, after all the directories have been added, we go back +/// to the top of the file to fill in the header and the redirectory sections +/// that we preallocated. +class MinidumpFileBuilder { +public: + MinidumpFileBuilder(lldb::FileUP &&core_file, + const lldb::ProcessSP &process_sp) + : m_process_sp(process_sp), m_core_file(std::move(core_file)){}; + + MinidumpFileBuilder(const MinidumpFileBuilder &) = delete; + MinidumpFileBuilder &operator=(const MinidumpFileBuilder &) = delete; + + MinidumpFileBuilder(MinidumpFileBuilder &&other) = default; + MinidumpFileBuilder &operator=(MinidumpFileBuilder &&other) = default; + + ~MinidumpFileBuilder() = default; + + // This method only calculates the amount of bytes the header and directories + // will take up. It does not write the directories or headers. This function + // must be called with a followup to fill in the data. + lldb_private::Status AddHeaderAndCalculateDirectories(); + // Add SystemInfo stream, used for storing the most basic information + // about the system, platform etc... + lldb_private::Status AddSystemInfo(); + // Add ModuleList stream, containing information about all loaded modules + // at the time of saving minidump. + lldb_private::Status AddModuleList(); + // Add ThreadList stream, containing information about all threads running + // at the moment of core saving. Contains information about thread + // contexts. + lldb_private::Status AddThreadList(); + // Add Exception streams for any threads that stopped with exceptions. + lldb_private::Status AddExceptions(); + // Add MemoryList stream, containing dumps of important memory segments + lldb_private::Status AddMemoryList(lldb::SaveCoreStyle core_style); + // Add MiscInfo stream, mainly providing ProcessId + lldb_private::Status AddMiscInfo(); + // Add informative files about a Linux process + lldb_private::Status AddLinuxFileStreams(); + + // Run cleanup and write all remaining bytes to file + lldb_private::Status DumpFile(); + +private: + // Add data to the end of the buffer, if the buffer exceeds the flush level, + // trigger a flush. + lldb_private::Status AddData(const void *data, uint64_t size); + // Add MemoryList stream, containing dumps of important memory segments + lldb_private::Status + AddMemoryList_64(lldb_private::Process::CoreFileMemoryRanges &ranges); + lldb_private::Status + AddMemoryList_32(lldb_private::Process::CoreFileMemoryRanges &ranges); + // Update the thread list on disk with the newly emitted stack RVAs. + lldb_private::Status FixThreadStacks(); + lldb_private::Status FlushBufferToDisk(); + + lldb_private::Status DumpHeader() const; + lldb_private::Status DumpDirectories() const; + // Add directory of StreamType pointing to the current end of the prepared + // file with the specified size. + lldb_private::Status AddDirectory(llvm::minidump::StreamType type, + uint64_t stream_size); + lldb::offset_t GetCurrentDataEndOffset() const; + // Stores directories to fill in later + std::vector<llvm::minidump::Directory> m_directories; + // When we write off the threads for the first time, we need to clean them up + // and give them the correct RVA once we write the stack memory list. + // We save by the end because we only take from the stack pointer up + // So the saved off range base can differ from the memory region the stack + // pointer is in. + std::unordered_map<lldb::addr_t, llvm::minidump::Thread> + m_thread_by_range_end; + // Main data buffer consisting of data without the minidump header and + // directories + lldb_private::DataBufferHeap m_data; + lldb::ProcessSP m_process_sp; + + size_t m_expected_directories = 0; + uint64_t m_saved_data_size = 0; + lldb::offset_t m_thread_list_start = 0; + // We set the max write amount to 128 mb, this is arbitrary + // but we want to try to keep the size of m_data small + // and we will only exceed a 128 mb buffer if we get a memory region + // that is larger than 128 mb. + static constexpr size_t MAX_WRITE_CHUNK_SIZE = (1024 * 1024 * 128); + + static constexpr size_t HEADER_SIZE = sizeof(llvm::minidump::Header); + static constexpr size_t DIRECTORY_SIZE = sizeof(llvm::minidump::Directory); + + // More that one place can mention the register thread context locations, + // so when we emit the thread contents, remember where it is so we don't have + // to duplicate it in the exception data. + std::unordered_map<lldb::tid_t, llvm::minidump::LocationDescriptor> + m_tid_to_reg_ctx; + lldb::FileUP m_core_file; +}; + +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_MINIDUMPFILEBUILDER_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp new file mode 100644 index 000000000000..faa144bfb5f6 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp @@ -0,0 +1,137 @@ +//===-- ObjectFileMinidump.cpp --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjectFileMinidump.h" + +#include "MinidumpFileBuilder.h" + +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Target/Process.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" + +#include "llvm/Support/FileSystem.h" + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(ObjectFileMinidump) + +void ObjectFileMinidump::Initialize() { + PluginManager::RegisterPlugin( + GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications, SaveCore); +} + +void ObjectFileMinidump::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +ObjectFile *ObjectFileMinidump::CreateInstance( + const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const lldb_private::FileSpec *file, + lldb::offset_t offset, lldb::offset_t length) { + return nullptr; +} + +ObjectFile *ObjectFileMinidump::CreateMemoryInstance( + const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp, + const ProcessSP &process_sp, lldb::addr_t header_addr) { + return nullptr; +} + +size_t ObjectFileMinidump::GetModuleSpecifications( + const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, lldb::offset_t file_offset, + lldb::offset_t length, lldb_private::ModuleSpecList &specs) { + specs.Clear(); + return 0; +} + +bool ObjectFileMinidump::SaveCore(const lldb::ProcessSP &process_sp, + const lldb_private::SaveCoreOptions &options, + lldb_private::Status &error) { + // Output file and process_sp are both checked in PluginManager::SaveCore. + assert(options.GetOutputFile().has_value()); + assert(process_sp); + + // Minidump defaults to stacks only. + SaveCoreStyle core_style = options.GetStyle(); + if (core_style == SaveCoreStyle::eSaveCoreUnspecified) + core_style = SaveCoreStyle::eSaveCoreStackOnly; + + llvm::Expected<lldb::FileUP> maybe_core_file = FileSystem::Instance().Open( + options.GetOutputFile().value(), + File::eOpenOptionWriteOnly | File::eOpenOptionCanCreate); + if (!maybe_core_file) { + error = maybe_core_file.takeError(); + return false; + } + MinidumpFileBuilder builder(std::move(maybe_core_file.get()), process_sp); + + Log *log = GetLog(LLDBLog::Object); + error = builder.AddHeaderAndCalculateDirectories(); + if (error.Fail()) { + LLDB_LOGF(log, "AddHeaderAndCalculateDirectories failed: %s", + error.AsCString()); + return false; + }; + error = builder.AddSystemInfo(); + if (error.Fail()) { + LLDB_LOGF(log, "AddSystemInfo failed: %s", error.AsCString()); + return false; + } + + error = builder.AddModuleList(); + if (error.Fail()) { + LLDB_LOGF(log, "AddModuleList failed: %s", error.AsCString()); + return false; + } + error = builder.AddMiscInfo(); + if (error.Fail()) { + LLDB_LOGF(log, "AddMiscInfo failed: %s", error.AsCString()); + return false; + } + + error = builder.AddThreadList(); + if (error.Fail()) { + LLDB_LOGF(log, "AddThreadList failed: %s", error.AsCString()); + return false; + } + + error = builder.AddLinuxFileStreams(); + if (error.Fail()) { + LLDB_LOGF(log, "AddLinuxFileStreams failed: %s", error.AsCString()); + return false; + } + + // Add any exceptions but only if there are any in any threads. + error = builder.AddExceptions(); + if (error.Fail()) { + LLDB_LOGF(log, "AddExceptions failed: %s", error.AsCString()); + return false; + } + + // Note: add memory HAS to be the last thing we do. It can overflow into 64b + // land and many RVA's only support 32b + error = builder.AddMemoryList(core_style); + if (error.Fail()) { + LLDB_LOGF(log, "AddMemoryList failed: %s", error.AsCString()); + return false; + } + + error = builder.DumpFile(); + if (error.Fail()) { + LLDB_LOGF(log, "DumpFile failed: %s", error.AsCString()); + return false; + } + + return true; +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h new file mode 100644 index 000000000000..0cd31a0e482d --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h @@ -0,0 +1,65 @@ +//===-- ObjectFileMinidump.h ---------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Placeholder plugin for the save core functionality. +/// +/// ObjectFileMinidump is created only to be able to save minidump core files +/// from existing processes with the ObjectFileMinidump::SaveCore function. +/// Minidump files are not ObjectFile objects, but they are core files and +/// currently LLDB's ObjectFile plug-ins handle emitting core files. If the +/// core file saving ever moves into a new plug-in type within LLDB, this code +/// should move as well, but for now this is the best place architecturally. +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_OBJECTFILEMINIDUMP_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_OBJECTFILEMINIDUMP_H + +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/ArchSpec.h" + +class ObjectFileMinidump : public lldb_private::PluginInterface { +public: + // Static Functions + static void Initialize(); + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "minidump"; } + static const char *GetPluginDescriptionStatic() { + return "Minidump object file."; + } + + // PluginInterface protocol + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + static lldb_private::ObjectFile * + CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const lldb_private::FileSpec *file, + lldb::offset_t offset, lldb::offset_t length); + + static lldb_private::ObjectFile *CreateMemoryInstance( + const lldb::ModuleSP &module_sp, lldb::WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, lldb::addr_t header_addr); + + static size_t GetModuleSpecifications(const lldb_private::FileSpec &file, + lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, + lldb::offset_t file_offset, + lldb::offset_t length, + lldb_private::ModuleSpecList &specs); + + // Saves dump in Minidump file format + static bool SaveCore(const lldb::ProcessSP &process_sp, + const lldb_private::SaveCoreOptions &options, + lldb_private::Status &error); + +private: + ObjectFileMinidump() = default; +}; + +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_MINIDUMP_OBJECTFILEMINIDUMP_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp new file mode 100644 index 000000000000..f0832dbf0734 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp @@ -0,0 +1,195 @@ +//===-- ObjectFilePDB.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjectFilePDB.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Utility/StreamString.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/Support/BinaryByteStream.h" + +using namespace lldb; +using namespace lldb_private; +using namespace llvm::pdb; +using namespace llvm::codeview; + +LLDB_PLUGIN_DEFINE(ObjectFilePDB) + +static UUID GetPDBUUID(InfoStream &IS, DbiStream &DS) { + UUID::CvRecordPdb70 debug_info; + memcpy(&debug_info.Uuid, IS.getGuid().Guid, sizeof(debug_info.Uuid)); + debug_info.Age = DS.getAge(); + return UUID(debug_info); +} + +char ObjectFilePDB::ID; + +void ObjectFilePDB::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications); +} + +void ObjectFilePDB::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +ArchSpec ObjectFilePDB::GetArchitecture() { + auto dbi_stream = m_file_up->getPDBDbiStream(); + if (!dbi_stream) { + llvm::consumeError(dbi_stream.takeError()); + return ArchSpec(); + } + + PDB_Machine machine = dbi_stream->getMachineType(); + switch (machine) { + default: + break; + case PDB_Machine::Amd64: + case PDB_Machine::x86: + case PDB_Machine::PowerPC: + case PDB_Machine::PowerPCFP: + case PDB_Machine::Arm: + case PDB_Machine::ArmNT: + case PDB_Machine::Thumb: + case PDB_Machine::Arm64: + ArchSpec arch; + arch.SetArchitecture(eArchTypeCOFF, static_cast<int>(machine), + LLDB_INVALID_CPUTYPE); + return arch; + } + return ArchSpec(); +} + +bool ObjectFilePDB::initPDBFile() { + m_file_up = loadPDBFile(m_file.GetPath(), m_allocator); + if (!m_file_up) + return false; + auto info_stream = m_file_up->getPDBInfoStream(); + if (!info_stream) { + llvm::consumeError(info_stream.takeError()); + return false; + } + auto dbi_stream = m_file_up->getPDBDbiStream(); + if (!dbi_stream) { + llvm::consumeError(dbi_stream.takeError()); + return false; + } + m_uuid = GetPDBUUID(*info_stream, *dbi_stream); + return true; +} + +ObjectFile * +ObjectFilePDB::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, + offset_t data_offset, const FileSpec *file, + offset_t file_offset, offset_t length) { + auto objfile_up = std::make_unique<ObjectFilePDB>( + module_sp, data_sp, data_offset, file, file_offset, length); + if (!objfile_up->initPDBFile()) + return nullptr; + return objfile_up.release(); +} + +ObjectFile *ObjectFilePDB::CreateMemoryInstance(const ModuleSP &module_sp, + WritableDataBufferSP data_sp, + const ProcessSP &process_sp, + addr_t header_addr) { + return nullptr; +} + +size_t ObjectFilePDB::GetModuleSpecifications( + const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, + offset_t file_offset, offset_t length, ModuleSpecList &specs) { + const size_t initial_count = specs.GetSize(); + ModuleSpec module_spec(file); + llvm::BumpPtrAllocator allocator; + std::unique_ptr<PDBFile> pdb_file = loadPDBFile(file.GetPath(), allocator); + if (!pdb_file) + return initial_count; + + auto info_stream = pdb_file->getPDBInfoStream(); + if (!info_stream) { + llvm::consumeError(info_stream.takeError()); + return initial_count; + } + auto dbi_stream = pdb_file->getPDBDbiStream(); + if (!dbi_stream) { + llvm::consumeError(dbi_stream.takeError()); + return initial_count; + } + + lldb_private::UUID &uuid = module_spec.GetUUID(); + uuid = GetPDBUUID(*info_stream, *dbi_stream); + + ArchSpec &module_arch = module_spec.GetArchitecture(); + switch (dbi_stream->getMachineType()) { + case PDB_Machine::Amd64: + module_arch.SetTriple("x86_64-pc-windows"); + specs.Append(module_spec); + break; + case PDB_Machine::x86: + module_arch.SetTriple("i386-pc-windows"); + specs.Append(module_spec); + break; + case PDB_Machine::ArmNT: + module_arch.SetTriple("armv7-pc-windows"); + specs.Append(module_spec); + break; + case PDB_Machine::Arm64: + module_arch.SetTriple("aarch64-pc-windows"); + specs.Append(module_spec); + break; + default: + break; + } + + return specs.GetSize() - initial_count; +} + +ObjectFilePDB::ObjectFilePDB(const ModuleSP &module_sp, DataBufferSP &data_sp, + offset_t data_offset, const FileSpec *file, + offset_t offset, offset_t length) + : ObjectFile(module_sp, file, offset, length, data_sp, data_offset) {} + +std::unique_ptr<PDBFile> +ObjectFilePDB::loadPDBFile(std::string PdbPath, + llvm::BumpPtrAllocator &Allocator) { + llvm::file_magic magic; + auto ec = llvm::identify_magic(PdbPath, magic); + if (ec || magic != llvm::file_magic::pdb) + return nullptr; + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ErrorOrBuffer = + llvm::MemoryBuffer::getFile(PdbPath, /*IsText=*/false, + /*RequiresNullTerminator=*/false); + if (!ErrorOrBuffer) + return nullptr; + std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(*ErrorOrBuffer); + + llvm::StringRef Path = Buffer->getBufferIdentifier(); + auto Stream = std::make_unique<llvm::MemoryBufferByteStream>( + std::move(Buffer), llvm::endianness::little); + + auto File = std::make_unique<PDBFile>(Path, std::move(Stream), Allocator); + if (auto EC = File->parseFileHeaders()) { + llvm::consumeError(std::move(EC)); + return nullptr; + } + if (auto EC = File->parseStreamData()) { + llvm::consumeError(std::move(EC)); + return nullptr; + } + + return File; +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h new file mode 100644 index 000000000000..c06e72650e01 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h @@ -0,0 +1,105 @@ +//===-- ObjectFilePDB.h --------------------------------------- -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_PDB_OBJECTFILEPDB_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_PDB_OBJECTFILEPDB_H + +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/ArchSpec.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/PDBTypes.h" + +namespace lldb_private { + +class ObjectFilePDB : public ObjectFile { +public: + // Static Functions + static void Initialize(); + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "pdb"; } + static const char *GetPluginDescriptionStatic() { + return "PDB object file reader."; + } + + static std::unique_ptr<llvm::pdb::PDBFile> + loadPDBFile(std::string PdbPath, llvm::BumpPtrAllocator &Allocator); + + static ObjectFile * + CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const FileSpec *file, + lldb::offset_t file_offset, lldb::offset_t length); + + static ObjectFile *CreateMemoryInstance(const lldb::ModuleSP &module_sp, + lldb::WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, + lldb::addr_t header_addr); + + static size_t GetModuleSpecifications(const FileSpec &file, + lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, + lldb::offset_t file_offset, + lldb::offset_t length, + ModuleSpecList &specs); + + // PluginInterface protocol + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + // LLVM RTTI support + static char ID; + bool isA(const void *ClassID) const override { + return ClassID == &ID || ObjectFile::isA(ClassID); + } + static bool classof(const ObjectFile *obj) { return obj->isA(&ID); } + + // ObjectFile Protocol. + uint32_t GetAddressByteSize() const override { return 8; } + + lldb::ByteOrder GetByteOrder() const override { + return lldb::eByteOrderLittle; + } + + bool ParseHeader() override { return true; } + + bool IsExecutable() const override { return false; } + + void ParseSymtab(lldb_private::Symtab &symtab) override {} + + bool IsStripped() override { return false; } + + // No section in PDB file. + void CreateSections(SectionList &unified_section_list) override {} + + void Dump(Stream *s) override {} + + ArchSpec GetArchitecture() override; + + UUID GetUUID() override { return m_uuid; } + + uint32_t GetDependentModules(FileSpecList &files) override { return 0; } + + Type CalculateType() override { return eTypeDebugInfo; } + + Strata CalculateStrata() override { return eStrataUser; } + + llvm::pdb::PDBFile &GetPDBFile() { return *m_file_up; } + + ObjectFilePDB(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, const FileSpec *file, + lldb::offset_t offset, lldb::offset_t length); + +private: + UUID m_uuid; + llvm::BumpPtrAllocator m_allocator; + std::unique_ptr<llvm::pdb::PDBFile> m_file_up; + + bool initPDBFile(); +}; + +} // namespace lldb_private +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_PDB_OBJECTFILEPDB_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Placeholder/ObjectFilePlaceholder.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Placeholder/ObjectFilePlaceholder.cpp new file mode 100644 index 000000000000..ec1f3f61892d --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Placeholder/ObjectFilePlaceholder.cpp @@ -0,0 +1,70 @@ +//===-- ObjectFilePlaceholder.cpp----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjectFilePlaceholder.h" + +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Target/SectionLoadList.h" +#include "lldb/Target/Target.h" + +#include <memory> + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(ObjectFilePlaceholder) + +ObjectFilePlaceholder::ObjectFilePlaceholder( + const lldb::ModuleSP &module_sp, + const lldb_private::ModuleSpec &module_spec, lldb::addr_t base, + lldb::addr_t size) + : ObjectFile(module_sp, &module_spec.GetFileSpec(), /*file_offset*/ 0, + /*length*/ 0, /*data_sp*/ nullptr, /*data_offset*/ 0), + m_arch(module_spec.GetArchitecture()), m_uuid(module_spec.GetUUID()), + m_base(base), m_size(size) { + m_symtab_up = std::make_unique<lldb_private::Symtab>(this); +} + +void ObjectFilePlaceholder::CreateSections( + lldb_private::SectionList &unified_section_list) { + m_sections_up = std::make_unique<lldb_private::SectionList>(); + auto section_sp = std::make_shared<lldb_private::Section>( + GetModule(), this, /*sect_id*/ 0, + lldb_private::ConstString(".module_image"), eSectionTypeOther, m_base, + m_size, /*file_offset*/ 0, /*file_size*/ 0, + /*log2align*/ 0, /*flags*/ 0); + section_sp->SetPermissions(ePermissionsReadable | ePermissionsExecutable); + m_sections_up->AddSection(section_sp); + unified_section_list.AddSection(std::move(section_sp)); +} + +lldb_private::Address ObjectFilePlaceholder::GetBaseAddress() { + return lldb_private::Address(m_sections_up->GetSectionAtIndex(0), 0); +} + +bool ObjectFilePlaceholder::SetLoadAddress(Target &target, addr_t value, + bool value_is_offset) { + assert(!value_is_offset); + assert(value == m_base); + + // Create sections if they haven't been created already. + GetModule()->GetSectionList(); + assert(m_sections_up->GetNumSections(0) == 1); + + target.GetSectionLoadList().SetSectionLoadAddress( + m_sections_up->GetSectionAtIndex(0), m_base); + return true; +} + +void ObjectFilePlaceholder::Dump(lldb_private::Stream *s) { + s->Format("Placeholder object file for {0} loaded at [{1:x}-{2:x})\n", + GetFileSpec(), m_base, m_base + m_size); +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Placeholder/ObjectFilePlaceholder.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Placeholder/ObjectFilePlaceholder.h new file mode 100644 index 000000000000..8798bcc5259e --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/Placeholder/ObjectFilePlaceholder.h @@ -0,0 +1,74 @@ +//===-- ObjectFilePlaceholder.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_PLACEHOLDER_OBJECTFILEPLACEHOLDER_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_PLACEHOLDER_OBJECTFILEPLACEHOLDER_H + +#include "lldb/Symbol/ObjectFile.h" + +#include "lldb/Target/Target.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/FileSpec.h" +#include "lldb/Utility/UUID.h" +#include "lldb/lldb-private.h" + +/// A minimal ObjectFile implementation providing a dummy object file for the +/// cases when the real module binary is not available. This allows the module +/// to show up in "image list" and symbols to be added to it. +class ObjectFilePlaceholder : public lldb_private::ObjectFile { +public: + // Static Functions + static void Initialize() {} + + static void Terminate() {} + + static llvm::StringRef GetPluginNameStatic() { return "placeholder"; } + + ObjectFilePlaceholder(const lldb::ModuleSP &module_sp, + const lldb_private::ModuleSpec &module_spec, + lldb::addr_t base, lldb::addr_t size); + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + bool ParseHeader() override { return true; } + Type CalculateType() override { return eTypeUnknown; } + Strata CalculateStrata() override { return eStrataUnknown; } + uint32_t GetDependentModules(lldb_private::FileSpecList &file_list) override { + return 0; + } + bool IsExecutable() const override { return false; } + lldb_private::ArchSpec GetArchitecture() override { return m_arch; } + lldb_private::UUID GetUUID() override { return m_uuid; } + void ParseSymtab(lldb_private::Symtab &symtab) override {} + bool IsStripped() override { return true; } + lldb::ByteOrder GetByteOrder() const override { + return m_arch.GetByteOrder(); + } + + uint32_t GetAddressByteSize() const override { + return m_arch.GetAddressByteSize(); + } + + lldb_private::Address GetBaseAddress() override; + + void CreateSections(lldb_private::SectionList &unified_section_list) override; + + bool SetLoadAddress(lldb_private::Target &target, lldb::addr_t value, + bool value_is_offset) override; + + void Dump(lldb_private::Stream *s) override; + + lldb::addr_t GetBaseImageAddress() const { return m_base; } + +private: + lldb_private::ArchSpec m_arch; + lldb_private::UUID m_uuid; + lldb::addr_t m_base; + lldb::addr_t m_size; +}; + +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_PLACEHOLDER_OBJECTFILEPLACEHOLDER_H diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp new file mode 100644 index 000000000000..06eb6ff9cafb --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -0,0 +1,478 @@ +//===-- ObjectFileWasm.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ObjectFileWasm.h" +#include "lldb/Core/Module.h" +#include "lldb/Core/ModuleSpec.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" +#include "lldb/Target/Process.h" +#include "lldb/Target/SectionLoadList.h" +#include "lldb/Target/Target.h" +#include "lldb/Utility/DataBufferHeap.h" +#include "lldb/Utility/LLDBLog.h" +#include "lldb/Utility/Log.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Format.h" +#include <optional> + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::wasm; + +LLDB_PLUGIN_DEFINE(ObjectFileWasm) + +static const uint32_t kWasmHeaderSize = + sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion); + +/// Checks whether the data buffer starts with a valid Wasm module header. +static bool ValidateModuleHeader(const DataBufferSP &data_sp) { + if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize) + return false; + + if (llvm::identify_magic(toStringRef(data_sp->GetData())) != + llvm::file_magic::wasm_object) + return false; + + const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic); + + uint32_t version = llvm::support::endian::read32le(Ptr); + return version == llvm::wasm::WasmVersion; +} + +static std::optional<ConstString> +GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { + // A Wasm string is encoded as a vector of UTF-8 codes. + // Vectors are encoded with their u32 length followed by the element + // sequence. + uint64_t len = data.getULEB128(c); + if (!c) { + consumeError(c.takeError()); + return std::nullopt; + } + + if (len >= (uint64_t(1) << 32)) { + return std::nullopt; + } + + llvm::SmallVector<uint8_t, 32> str_storage; + data.getU8(c, str_storage, len); + if (!c) { + consumeError(c.takeError()); + return std::nullopt; + } + + llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage)); + return ConstString(str); +} + +char ObjectFileWasm::ID; + +void ObjectFileWasm::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), + GetPluginDescriptionStatic(), CreateInstance, + CreateMemoryInstance, GetModuleSpecifications); +} + +void ObjectFileWasm::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +ObjectFile * +ObjectFileWasm::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp, + offset_t data_offset, const FileSpec *file, + offset_t file_offset, offset_t length) { + Log *log = GetLog(LLDBLog::Object); + + if (!data_sp) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) { + LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s", + file->GetPath().c_str()); + return nullptr; + } + data_offset = 0; + } + + assert(data_sp); + if (!ValidateModuleHeader(data_sp)) { + LLDB_LOGF(log, + "Failed to create ObjectFileWasm instance: invalid Wasm header"); + return nullptr; + } + + // Update the data to contain the entire file if it doesn't contain it + // already. + if (data_sp->GetByteSize() < length) { + data_sp = MapFileData(*file, length, file_offset); + if (!data_sp) { + LLDB_LOGF(log, + "Failed to create ObjectFileWasm instance: cannot read file %s", + file->GetPath().c_str()); + return nullptr; + } + data_offset = 0; + } + + std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm( + module_sp, data_sp, data_offset, file, file_offset, length)); + ArchSpec spec = objfile_up->GetArchitecture(); + if (spec && objfile_up->SetModulesArchitecture(spec)) { + LLDB_LOGF(log, + "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s", + static_cast<void *>(objfile_up.get()), + static_cast<void *>(objfile_up->GetModule().get()), + objfile_up->GetModule()->GetSpecificationDescription().c_str(), + file ? file->GetPath().c_str() : "<NULL>"); + return objfile_up.release(); + } + + LLDB_LOGF(log, "Failed to create ObjectFileWasm instance"); + return nullptr; +} + +ObjectFile *ObjectFileWasm::CreateMemoryInstance(const ModuleSP &module_sp, + WritableDataBufferSP data_sp, + const ProcessSP &process_sp, + addr_t header_addr) { + if (!ValidateModuleHeader(data_sp)) + return nullptr; + + std::unique_ptr<ObjectFileWasm> objfile_up( + new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr)); + ArchSpec spec = objfile_up->GetArchitecture(); + if (spec && objfile_up->SetModulesArchitecture(spec)) + return objfile_up.release(); + return nullptr; +} + +bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { + // Buffer sufficient to read a section header and find the pointer to the next + // section. + const uint32_t kBufferSize = 1024; + DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize); + + llvm::DataExtractor data = section_header_data.GetAsLLVM(); + llvm::DataExtractor::Cursor c(0); + + // Each section consists of: + // - a one-byte section id, + // - the u32 size of the contents, in bytes, + // - the actual contents. + uint8_t section_id = data.getU8(c); + uint64_t payload_len = data.getULEB128(c); + if (!c) + return !llvm::errorToBool(c.takeError()); + + if (payload_len >= (uint64_t(1) << 32)) + return false; + + if (section_id == llvm::wasm::WASM_SEC_CUSTOM) { + // Custom sections have the id 0. Their contents consist of a name + // identifying the custom section, followed by an uninterpreted sequence + // of bytes. + lldb::offset_t prev_offset = c.tell(); + std::optional<ConstString> sect_name = GetWasmString(data, c); + if (!sect_name) + return false; + + if (payload_len < c.tell() - prev_offset) + return false; + + uint32_t section_length = payload_len - (c.tell() - prev_offset); + m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, + section_id, *sect_name}); + *offset_ptr += (c.tell() + section_length); + } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { + m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), + static_cast<uint32_t>(payload_len), + section_id, ConstString()}); + *offset_ptr += (c.tell() + payload_len); + } else { + // Invalid section id. + return false; + } + return true; +} + +bool ObjectFileWasm::DecodeSections() { + lldb::offset_t offset = kWasmHeaderSize; + if (IsInMemory()) { + offset += m_memory_addr; + } + + while (DecodeNextSection(&offset)) + ; + return true; +} + +size_t ObjectFileWasm::GetModuleSpecifications( + const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset, + offset_t file_offset, offset_t length, ModuleSpecList &specs) { + if (!ValidateModuleHeader(data_sp)) { + return 0; + } + + ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm")); + specs.Append(spec); + return 1; +} + +ObjectFileWasm::ObjectFileWasm(const ModuleSP &module_sp, DataBufferSP data_sp, + offset_t data_offset, const FileSpec *file, + offset_t offset, offset_t length) + : ObjectFile(module_sp, file, offset, length, data_sp, data_offset), + m_arch("wasm32-unknown-unknown-wasm") { + m_data.SetAddressByteSize(4); +} + +ObjectFileWasm::ObjectFileWasm(const lldb::ModuleSP &module_sp, + lldb::WritableDataBufferSP header_data_sp, + const lldb::ProcessSP &process_sp, + lldb::addr_t header_addr) + : ObjectFile(module_sp, process_sp, header_addr, header_data_sp), + m_arch("wasm32-unknown-unknown-wasm") {} + +bool ObjectFileWasm::ParseHeader() { + // We already parsed the header during initialization. + return true; +} + +void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} + +static SectionType GetSectionTypeFromName(llvm::StringRef Name) { + if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { + return llvm::StringSwitch<SectionType>(Name) + .Case("abbrev", eSectionTypeDWARFDebugAbbrev) + .Case("abbrev.dwo", eSectionTypeDWARFDebugAbbrevDwo) + .Case("addr", eSectionTypeDWARFDebugAddr) + .Case("aranges", eSectionTypeDWARFDebugAranges) + .Case("cu_index", eSectionTypeDWARFDebugCuIndex) + .Case("frame", eSectionTypeDWARFDebugFrame) + .Case("info", eSectionTypeDWARFDebugInfo) + .Case("info.dwo", eSectionTypeDWARFDebugInfoDwo) + .Cases("line", "line.dwo", eSectionTypeDWARFDebugLine) + .Cases("line_str", "line_str.dwo", eSectionTypeDWARFDebugLineStr) + .Case("loc", eSectionTypeDWARFDebugLoc) + .Case("loc.dwo", eSectionTypeDWARFDebugLocDwo) + .Case("loclists", eSectionTypeDWARFDebugLocLists) + .Case("loclists.dwo", eSectionTypeDWARFDebugLocListsDwo) + .Case("macinfo", eSectionTypeDWARFDebugMacInfo) + .Cases("macro", "macro.dwo", eSectionTypeDWARFDebugMacro) + .Case("names", eSectionTypeDWARFDebugNames) + .Case("pubnames", eSectionTypeDWARFDebugPubNames) + .Case("pubtypes", eSectionTypeDWARFDebugPubTypes) + .Case("ranges", eSectionTypeDWARFDebugRanges) + .Case("rnglists", eSectionTypeDWARFDebugRngLists) + .Case("rnglists.dwo", eSectionTypeDWARFDebugRngListsDwo) + .Case("str", eSectionTypeDWARFDebugStr) + .Case("str.dwo", eSectionTypeDWARFDebugStrDwo) + .Case("str_offsets", eSectionTypeDWARFDebugStrOffsets) + .Case("str_offsets.dwo", eSectionTypeDWARFDebugStrOffsetsDwo) + .Case("tu_index", eSectionTypeDWARFDebugTuIndex) + .Case("types", eSectionTypeDWARFDebugTypes) + .Case("types.dwo", eSectionTypeDWARFDebugTypesDwo) + .Default(eSectionTypeOther); + } + return eSectionTypeOther; +} + +void ObjectFileWasm::CreateSections(SectionList &unified_section_list) { + if (m_sections_up) + return; + + m_sections_up = std::make_unique<SectionList>(); + + if (m_sect_infos.empty()) { + DecodeSections(); + } + + for (const section_info §_info : m_sect_infos) { + SectionType section_type = eSectionTypeOther; + ConstString section_name; + offset_t file_offset = sect_info.offset & 0xffffffff; + addr_t vm_addr = file_offset; + size_t vm_size = sect_info.size; + + if (llvm::wasm::WASM_SEC_CODE == sect_info.id) { + section_type = eSectionTypeCode; + section_name = ConstString("code"); + + // A code address in DWARF for WebAssembly is the offset of an + // instruction relative within the Code section of the WebAssembly file. + // For this reason Section::GetFileAddress() must return zero for the + // Code section. + vm_addr = 0; + } else { + section_type = GetSectionTypeFromName(sect_info.name.GetStringRef()); + if (section_type == eSectionTypeOther) + continue; + section_name = sect_info.name; + if (!IsInMemory()) { + vm_size = 0; + vm_addr = 0; + } + } + + SectionSP section_sp( + new Section(GetModule(), // Module to which this section belongs. + this, // ObjectFile to which this section belongs and + // should read section data from. + section_type, // Section ID. + section_name, // Section name. + section_type, // Section type. + vm_addr, // VM address. + vm_size, // VM size in bytes of this section. + file_offset, // Offset of this section in the file. + sect_info.size, // Size of the section as found in the file. + 0, // Alignment of the section + 0, // Flags for this section. + 1)); // Number of host bytes per target byte + m_sections_up->AddSection(section_sp); + unified_section_list.AddSection(section_sp); + } +} + +bool ObjectFileWasm::SetLoadAddress(Target &target, lldb::addr_t load_address, + bool value_is_offset) { + /// In WebAssembly, linear memory is disjointed from code space. The VM can + /// load multiple instances of a module, which logically share the same code. + /// We represent a wasm32 code address with 64-bits, like: + /// 63 32 31 0 + /// +---------------+---------------+ + /// + module_id | offset | + /// +---------------+---------------+ + /// where the lower 32 bits represent a module offset (relative to the module + /// start not to the beginning of the code section) and the higher 32 bits + /// uniquely identify the module in the WebAssembly VM. + /// In other words, we assume that each WebAssembly module is loaded by the + /// engine at a 64-bit address that starts at the boundary of 4GB pages, like + /// 0x0000000400000000 for module_id == 4. + /// These 64-bit addresses will be used to request code ranges for a specific + /// module from the WebAssembly engine. + + assert(m_memory_addr == LLDB_INVALID_ADDRESS || + m_memory_addr == load_address); + + ModuleSP module_sp = GetModule(); + if (!module_sp) + return false; + + DecodeSections(); + + size_t num_loaded_sections = 0; + SectionList *section_list = GetSectionList(); + if (!section_list) + return false; + + const size_t num_sections = section_list->GetSize(); + for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { + SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); + if (target.SetSectionLoadAddress( + section_sp, load_address | section_sp->GetFileOffset())) { + ++num_loaded_sections; + } + } + + return num_loaded_sections > 0; +} + +DataExtractor ObjectFileWasm::ReadImageData(offset_t offset, uint32_t size) { + DataExtractor data; + if (m_file) { + if (offset < GetByteSize()) { + size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset); + auto buffer_sp = MapFileData(m_file, size, offset); + return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize()); + } + } else { + ProcessSP process_sp(m_process_wp.lock()); + if (process_sp) { + auto data_up = std::make_unique<DataBufferHeap>(size, 0); + Status readmem_error; + size_t bytes_read = process_sp->ReadMemory( + offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error); + if (bytes_read > 0) { + DataBufferSP buffer_sp(data_up.release()); + data.SetData(buffer_sp, 0, buffer_sp->GetByteSize()); + } + } + } + + data.SetByteOrder(GetByteOrder()); + return data; +} + +std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() { + static ConstString g_sect_name_external_debug_info("external_debug_info"); + + for (const section_info §_info : m_sect_infos) { + if (g_sect_name_external_debug_info == sect_info.name) { + const uint32_t kBufferSize = 1024; + DataExtractor section_header_data = + ReadImageData(sect_info.offset, kBufferSize); + llvm::DataExtractor data = section_header_data.GetAsLLVM(); + llvm::DataExtractor::Cursor c(0); + std::optional<ConstString> symbols_url = GetWasmString(data, c); + if (symbols_url) + return FileSpec(symbols_url->GetStringRef()); + } + } + return std::nullopt; +} + +void ObjectFileWasm::Dump(Stream *s) { + ModuleSP module_sp(GetModule()); + if (!module_sp) + return; + + std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex()); + + llvm::raw_ostream &ostream = s->AsRawOstream(); + ostream << static_cast<void *>(this) << ": "; + s->Indent(); + ostream << "ObjectFileWasm, file = '"; + m_file.Dump(ostream); + ostream << "', arch = "; + ostream << GetArchitecture().GetArchitectureName() << "\n"; + + SectionList *sections = GetSectionList(); + if (sections) { + sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true, + UINT32_MAX); + } + ostream << "\n"; + DumpSectionHeaders(ostream); + ostream << "\n"; +} + +void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream, + const section_info_t &sh) { + ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " " + << llvm::format_hex(sh.offset, 10) << " " + << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6) + << "\n"; +} + +void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) { + ostream << "Section Headers\n"; + ostream << "IDX name addr size id\n"; + ostream << "==== ---------------- ---------- ---------- ------\n"; + + uint32_t idx = 0; + for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end(); + ++pos, ++idx) { + ostream << "[" << llvm::format_decimal(idx, 2) << "] "; + ObjectFileWasm::DumpSectionHeader(ostream, *pos); + } +} diff --git a/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h new file mode 100644 index 000000000000..531b5f0437a4 --- /dev/null +++ b/contrib/llvm-project/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h @@ -0,0 +1,151 @@ +//===-- ObjectFileWasm.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_WASM_OBJECTFILEWASM_H +#define LLDB_SOURCE_PLUGINS_OBJECTFILE_WASM_OBJECTFILEWASM_H + +#include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/ArchSpec.h" +#include <optional> + +namespace lldb_private { +namespace wasm { + +/// Generic Wasm object file reader. +/// +/// This class provides a generic wasm32 reader plugin implementing the +/// ObjectFile protocol. +class ObjectFileWasm : public ObjectFile { +public: + static void Initialize(); + static void Terminate(); + + static llvm::StringRef GetPluginNameStatic() { return "wasm"; } + static const char *GetPluginDescriptionStatic() { + return "WebAssembly object file reader."; + } + + static ObjectFile * + CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const FileSpec *file, + lldb::offset_t file_offset, lldb::offset_t length); + + static ObjectFile *CreateMemoryInstance(const lldb::ModuleSP &module_sp, + lldb::WritableDataBufferSP data_sp, + const lldb::ProcessSP &process_sp, + lldb::addr_t header_addr); + + static size_t GetModuleSpecifications(const FileSpec &file, + lldb::DataBufferSP &data_sp, + lldb::offset_t data_offset, + lldb::offset_t file_offset, + lldb::offset_t length, + ModuleSpecList &specs); + + /// PluginInterface protocol. + /// \{ + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + /// \} + + /// LLVM RTTI support + /// \{ + static char ID; + bool isA(const void *ClassID) const override { + return ClassID == &ID || ObjectFile::isA(ClassID); + } + static bool classof(const ObjectFile *obj) { return obj->isA(&ID); } + /// \} + + /// ObjectFile Protocol. + /// \{ + bool ParseHeader() override; + + lldb::ByteOrder GetByteOrder() const override { + return m_arch.GetByteOrder(); + } + + bool IsExecutable() const override { return false; } + + uint32_t GetAddressByteSize() const override { + return m_arch.GetAddressByteSize(); + } + + AddressClass GetAddressClass(lldb::addr_t file_addr) override { + return AddressClass::eInvalid; + } + + void ParseSymtab(lldb_private::Symtab &symtab) override; + + bool IsStripped() override { return !!GetExternalDebugInfoFileSpec(); } + + void CreateSections(SectionList &unified_section_list) override; + + void Dump(Stream *s) override; + + ArchSpec GetArchitecture() override { return m_arch; } + + UUID GetUUID() override { return m_uuid; } + + uint32_t GetDependentModules(FileSpecList &files) override { return 0; } + + Type CalculateType() override { return eTypeSharedLibrary; } + + Strata CalculateStrata() override { return eStrataUser; } + + bool SetLoadAddress(lldb_private::Target &target, lldb::addr_t value, + bool value_is_offset) override; + + lldb_private::Address GetBaseAddress() override { + return IsInMemory() ? Address(m_memory_addr) : Address(0); + } + /// \} + + /// A Wasm module that has external DWARF debug information should contain a + /// custom section named "external_debug_info", whose payload is an UTF-8 + /// encoded string that points to a Wasm module that contains the debug + /// information for this module. + std::optional<FileSpec> GetExternalDebugInfoFileSpec(); + +private: + ObjectFileWasm(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp, + lldb::offset_t data_offset, const FileSpec *file, + lldb::offset_t offset, lldb::offset_t length); + ObjectFileWasm(const lldb::ModuleSP &module_sp, + lldb::WritableDataBufferSP header_data_sp, + const lldb::ProcessSP &process_sp, lldb::addr_t header_addr); + + /// Wasm section decoding routines. + /// \{ + bool DecodeNextSection(lldb::offset_t *offset_ptr); + bool DecodeSections(); + /// \} + + /// Read a range of bytes from the Wasm module. + DataExtractor ReadImageData(lldb::offset_t offset, uint32_t size); + + typedef struct section_info { + lldb::offset_t offset; + uint32_t size; + uint32_t id; + ConstString name; + } section_info_t; + + /// Wasm section header dump routines. + /// \{ + void DumpSectionHeader(llvm::raw_ostream &ostream, const section_info_t &sh); + void DumpSectionHeaders(llvm::raw_ostream &ostream); + /// \} + + std::vector<section_info_t> m_sect_infos; + ArchSpec m_arch; + UUID m_uuid; +}; + +} // namespace wasm +} // namespace lldb_private +#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_WASM_OBJECTFILEWASM_H |