diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-14 18:58:48 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-08 19:03:59 +0000 |
commit | 753f127f3ace09432b2baeffd71a308760641a62 (patch) | |
tree | 97694ab339c0ca6145ebb429c7505019565b9a60 /contrib/llvm-project/lldb/source/Core/Disassembler.cpp | |
parent | 81ad626541db97eb356e2c1d4a20eb2a26a766ab (diff) | |
parent | 1f917f69ff07f09b6dbb670971f57f8efe718b84 (diff) |
Diffstat (limited to 'contrib/llvm-project/lldb/source/Core/Disassembler.cpp')
-rw-r--r-- | contrib/llvm-project/lldb/source/Core/Disassembler.cpp | 376 |
1 files changed, 371 insertions, 5 deletions
diff --git a/contrib/llvm-project/lldb/source/Core/Disassembler.cpp b/contrib/llvm-project/lldb/source/Core/Disassembler.cpp index 00d92053bc4f..7a9e214748a7 100644 --- a/contrib/llvm-project/lldb/source/Core/Disassembler.cpp +++ b/contrib/llvm-project/lldb/source/Core/Disassembler.cpp @@ -527,8 +527,11 @@ void Disassembler::PrintInstructions(Debugger &debugger, const ArchSpec &arch, } const bool show_bytes = (options & eOptionShowBytes) != 0; - inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc, - &prev_sc, nullptr, address_text_size); + const bool show_control_flow_kind = + (options & eOptionShowControlFlowKind) != 0; + inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, + show_control_flow_kind, &exe_ctx, &sc, &prev_sc, nullptr, + address_text_size); strm.EOL(); } else { break; @@ -568,6 +571,334 @@ Instruction::Instruction(const Address &address, AddressClass addr_class) Instruction::~Instruction() = default; +namespace x86 { + +/// These are the three values deciding instruction control flow kind. +/// InstructionLengthDecode function decodes an instruction and get this struct. +/// +/// primary_opcode +/// Primary opcode of the instruction. +/// For one-byte opcode instruction, it's the first byte after prefix. +/// For two- and three-byte opcodes, it's the second byte. +/// +/// opcode_len +/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. +/// +/// modrm +/// ModR/M byte of the instruction. +/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] +/// may contain a register or specify an addressing mode, depending on MOD. +struct InstructionOpcodeAndModrm { + uint8_t primary_opcode; + uint8_t opcode_len; + uint8_t modrm; +}; + +/// Determine the InstructionControlFlowKind based on opcode and modrm bytes. +/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and +/// instruction set. +/// +/// \param[in] opcode_and_modrm +/// Contains primary_opcode byte, its length, and ModR/M byte. +/// Refer to the struct InstructionOpcodeAndModrm for details. +/// +/// \return +/// The control flow kind of the instruction or +/// eInstructionControlFlowKindOther if the instruction doesn't affect +/// the control flow of the program. +lldb::InstructionControlFlowKind +MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { + uint8_t opcode = opcode_and_modrm.primary_opcode; + uint8_t opcode_len = opcode_and_modrm.opcode_len; + uint8_t modrm = opcode_and_modrm.modrm; + + if (opcode_len > 2) + return lldb::eInstructionControlFlowKindOther; + + if (opcode >= 0x70 && opcode <= 0x7F) { + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + if (opcode >= 0x80 && opcode <= 0x8F) { + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + switch (opcode) { + case 0x9A: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xFF: + if (opcode_len == 1) { + uint8_t modrm_reg = (modrm >> 3) & 7; + if (modrm_reg == 2) + return lldb::eInstructionControlFlowKindCall; + else if (modrm_reg == 3) + return lldb::eInstructionControlFlowKindFarCall; + else if (modrm_reg == 4) + return lldb::eInstructionControlFlowKindJump; + else if (modrm_reg == 5) + return lldb::eInstructionControlFlowKindFarJump; + } + break; + case 0xE8: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCall; + break; + case 0xCD: + case 0xCC: + case 0xCE: + case 0xF1: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xCF: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0xE9: + case 0xEB: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindJump; + break; + case 0xEA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarJump; + break; + case 0xE3: + case 0xE0: + case 0xE1: + case 0xE2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + break; + case 0xC3: + case 0xC2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindReturn; + break; + case 0xCB: + case 0xCA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x05: + case 0x34: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0x35: + case 0x07: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x01: + if (opcode_len == 2) { + switch (modrm) { + case 0xc1: + return lldb::eInstructionControlFlowKindFarCall; + case 0xc2: + case 0xc3: + return lldb::eInstructionControlFlowKindFarReturn; + default: + break; + } + } + break; + default: + break; + } + + return lldb::eInstructionControlFlowKindOther; +} + +/// Decode an instruction into opcode, modrm and opcode_len. +/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. +/// Opcodes in x86 are generally the first byte of instruction, though two-byte +/// instructions and prefixes exist. ModR/M is the byte following the opcode +/// and adds additional information for how the instruction is executed. +/// +/// \param[in] inst_bytes +/// Raw bytes of the instruction +/// +/// +/// \param[in] bytes_len +/// The length of the inst_bytes array. +/// +/// \param[in] is_exec_mode_64b +/// If true, the execution mode is 64 bit. +/// +/// \return +/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding +/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition +/// for more details. +/// Otherwise if the given instruction is invalid, returns None. +llvm::Optional<InstructionOpcodeAndModrm> +InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, + bool is_exec_mode_64b) { + int op_idx = 0; + bool prefix_done = false; + InstructionOpcodeAndModrm ret = {0, 0, 0}; + + // In most cases, the primary_opcode is the first byte of the instruction + // but some instructions have a prefix to be skipped for these calculations. + // The following mapping is inspired from libipt's instruction decoding logic + // in `src/pt_ild.c` + while (!prefix_done) { + if (op_idx >= bytes_len) + return llvm::None; + + ret.primary_opcode = inst_bytes[op_idx]; + switch (ret.primary_opcode) { + // prefix_ignore + case 0x26: + case 0x2e: + case 0x36: + case 0x3e: + case 0x64: + case 0x65: + // prefix_osz, prefix_asz + case 0x66: + case 0x67: + // prefix_lock, prefix_f2, prefix_f3 + case 0xf0: + case 0xf2: + case 0xf3: + op_idx++; + break; + + // prefix_rex + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + if (is_exec_mode_64b) + op_idx++; + else + prefix_done = true; + break; + + // prefix_vex_c4, c5 + case 0xc5: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + + ret.opcode_len = 2; + ret.primary_opcode = inst_bytes[op_idx + 2]; + ret.modrm = inst_bytes[op_idx + 3]; + return ret; + + case 0xc4: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; + ret.primary_opcode = inst_bytes[op_idx + 3]; + ret.modrm = inst_bytes[op_idx + 4]; + return ret; + + // prefix_evex + case 0x62: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; + ret.primary_opcode = inst_bytes[op_idx + 4]; + ret.modrm = inst_bytes[op_idx + 5]; + return ret; + + default: + prefix_done = true; + break; + } + } // prefix done + + ret.primary_opcode = inst_bytes[op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + ret.opcode_len = 1; + + // If the first opcode is 0F, it's two- or three- byte opcodes. + if (ret.primary_opcode == 0x0F) { + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + + if (ret.primary_opcode == 0x38) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x3A) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if ((ret.primary_opcode & 0xf8) == 0x38) { + ret.opcode_len = 0; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x0F) { + ret.opcode_len = 3; + // opcode is 0x0F, no needs to update + ret.modrm = inst_bytes[op_idx + 1]; + } else { + ret.opcode_len = 2; + ret.modrm = inst_bytes[op_idx + 1]; + } + } + + return ret; +} + +lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, + Opcode m_opcode) { + llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None; + + if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { + // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes + return lldb::eInstructionControlFlowKindUnknown; + } + + // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. + // These are the three values deciding instruction control flow kind. + ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), + m_opcode.GetByteSize(), is_exec_mode_64b); + if (!ret) + return lldb::eInstructionControlFlowKindUnknown; + else + return MapOpcodeIntoControlFlowKind(ret.value()); +} + +} // namespace x86 + +lldb::InstructionControlFlowKind +Instruction::GetControlFlowKind(const ArchSpec &arch) { + if (arch.GetTriple().getArch() == llvm::Triple::x86) + return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode); + else if (arch.GetTriple().getArch() == llvm::Triple::x86_64) + return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode); + else + return eInstructionControlFlowKindUnknown; // not implemented +} + AddressClass Instruction::GetAddressClass() { if (m_address_class == AddressClass::eInvalid) m_address_class = m_address.GetAddressClass(); @@ -576,6 +907,7 @@ AddressClass Instruction::GetAddressClass() { void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, bool show_address, bool show_bytes, + bool show_control_flow_kind, const ExecutionContext *exe_ctx, const SymbolContext *sym_ctx, const SymbolContext *prev_sym_ctx, @@ -613,6 +945,38 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, } } + if (show_control_flow_kind) { + switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) { + case eInstructionControlFlowKindUnknown: + ss.Printf("%-12s", "unknown"); + break; + case eInstructionControlFlowKindOther: + ss.Printf("%-12s", "other"); + break; + case eInstructionControlFlowKindCall: + ss.Printf("%-12s", "call"); + break; + case eInstructionControlFlowKindReturn: + ss.Printf("%-12s", "return"); + break; + case eInstructionControlFlowKindJump: + ss.Printf("%-12s", "jump"); + break; + case eInstructionControlFlowKindCondJump: + ss.Printf("%-12s", "cond jump"); + break; + case eInstructionControlFlowKindFarCall: + ss.Printf("%-12s", "far call"); + break; + case eInstructionControlFlowKindFarReturn: + ss.Printf("%-12s", "far return"); + break; + case eInstructionControlFlowKindFarJump: + ss.Printf("%-12s", "far jump"); + break; + } + } + const size_t opcode_pos = ss.GetSizeOfLastLine(); // The default opcode size of 7 characters is plenty for most architectures @@ -957,6 +1321,7 @@ InstructionSP InstructionList::GetInstructionAtAddress(const Address &address) { } void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, + bool show_control_flow_kind, const ExecutionContext *exe_ctx) { const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize(); collection::const_iterator pos, begin, end; @@ -975,8 +1340,9 @@ void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, pos != end; ++pos) { if (pos != begin) s->EOL(); - (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx, - nullptr, nullptr, disassembly_format, 0); + (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, + show_control_flow_kind, exe_ctx, nullptr, nullptr, + disassembly_format, 0); } } @@ -994,7 +1360,7 @@ InstructionList::GetIndexOfNextBranchInstruction(uint32_t start, size_t num_instructions = m_instructions.size(); uint32_t next_branch = UINT32_MAX; - + if (found_calls) *found_calls = false; for (size_t i = start; i < num_instructions; i++) { |