diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-07-27 20:11:54 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-08 19:04:48 +0000 |
commit | 972a253a57b6f144b0e4a3e2080a2a0076ec55a0 (patch) | |
tree | a8aeeb0997a0a52500f1fa0644244206cf71df94 /contrib/llvm-project/lldb/source/Core/Disassembler.cpp | |
parent | fcaf7f8644a9988098ac6be2165bce3ea4786e91 (diff) | |
parent | 08e8dd7b9db7bb4a9de26d44c1cbfd24e869c014 (diff) |
Diffstat (limited to 'contrib/llvm-project/lldb/source/Core/Disassembler.cpp')
-rw-r--r-- | contrib/llvm-project/lldb/source/Core/Disassembler.cpp | 385 |
1 files changed, 28 insertions, 357 deletions
diff --git a/contrib/llvm-project/lldb/source/Core/Disassembler.cpp b/contrib/llvm-project/lldb/source/Core/Disassembler.cpp index 7a9e214748a7..4c57be44dc9c 100644 --- a/contrib/llvm-project/lldb/source/Core/Disassembler.cpp +++ b/contrib/llvm-project/lldb/source/Core/Disassembler.cpp @@ -571,340 +571,36 @@ Instruction::Instruction(const Address &address, AddressClass addr_class) Instruction::~Instruction() = default; -namespace x86 { - -/// These are the three values deciding instruction control flow kind. -/// InstructionLengthDecode function decodes an instruction and get this struct. -/// -/// primary_opcode -/// Primary opcode of the instruction. -/// For one-byte opcode instruction, it's the first byte after prefix. -/// For two- and three-byte opcodes, it's the second byte. -/// -/// opcode_len -/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. -/// -/// modrm -/// ModR/M byte of the instruction. -/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] -/// may contain a register or specify an addressing mode, depending on MOD. -struct InstructionOpcodeAndModrm { - uint8_t primary_opcode; - uint8_t opcode_len; - uint8_t modrm; -}; - -/// Determine the InstructionControlFlowKind based on opcode and modrm bytes. -/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and -/// instruction set. -/// -/// \param[in] opcode_and_modrm -/// Contains primary_opcode byte, its length, and ModR/M byte. -/// Refer to the struct InstructionOpcodeAndModrm for details. -/// -/// \return -/// The control flow kind of the instruction or -/// eInstructionControlFlowKindOther if the instruction doesn't affect -/// the control flow of the program. -lldb::InstructionControlFlowKind -MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { - uint8_t opcode = opcode_and_modrm.primary_opcode; - uint8_t opcode_len = opcode_and_modrm.opcode_len; - uint8_t modrm = opcode_and_modrm.modrm; - - if (opcode_len > 2) - return lldb::eInstructionControlFlowKindOther; - - if (opcode >= 0x70 && opcode <= 0x7F) { - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindCondJump; - else - return lldb::eInstructionControlFlowKindOther; - } - - if (opcode >= 0x80 && opcode <= 0x8F) { - if (opcode_len == 2) - return lldb::eInstructionControlFlowKindCondJump; - else - return lldb::eInstructionControlFlowKindOther; - } - - switch (opcode) { - case 0x9A: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarCall; - break; - case 0xFF: - if (opcode_len == 1) { - uint8_t modrm_reg = (modrm >> 3) & 7; - if (modrm_reg == 2) - return lldb::eInstructionControlFlowKindCall; - else if (modrm_reg == 3) - return lldb::eInstructionControlFlowKindFarCall; - else if (modrm_reg == 4) - return lldb::eInstructionControlFlowKindJump; - else if (modrm_reg == 5) - return lldb::eInstructionControlFlowKindFarJump; - } - break; - case 0xE8: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindCall; - break; - case 0xCD: - case 0xCC: - case 0xCE: - case 0xF1: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarCall; - break; - case 0xCF: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarReturn; - break; - case 0xE9: - case 0xEB: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindJump; - break; - case 0xEA: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarJump; - break; - case 0xE3: - case 0xE0: - case 0xE1: - case 0xE2: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindCondJump; - break; - case 0xC3: - case 0xC2: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindReturn; - break; - case 0xCB: - case 0xCA: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarReturn; - break; - case 0x05: - case 0x34: - if (opcode_len == 2) - return lldb::eInstructionControlFlowKindFarCall; - break; - case 0x35: - case 0x07: - if (opcode_len == 2) - return lldb::eInstructionControlFlowKindFarReturn; - break; - case 0x01: - if (opcode_len == 2) { - switch (modrm) { - case 0xc1: - return lldb::eInstructionControlFlowKindFarCall; - case 0xc2: - case 0xc3: - return lldb::eInstructionControlFlowKindFarReturn; - default: - break; - } - } - break; - default: - break; - } - - return lldb::eInstructionControlFlowKindOther; -} - -/// Decode an instruction into opcode, modrm and opcode_len. -/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. -/// Opcodes in x86 are generally the first byte of instruction, though two-byte -/// instructions and prefixes exist. ModR/M is the byte following the opcode -/// and adds additional information for how the instruction is executed. -/// -/// \param[in] inst_bytes -/// Raw bytes of the instruction -/// -/// -/// \param[in] bytes_len -/// The length of the inst_bytes array. -/// -/// \param[in] is_exec_mode_64b -/// If true, the execution mode is 64 bit. -/// -/// \return -/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding -/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition -/// for more details. -/// Otherwise if the given instruction is invalid, returns None. -llvm::Optional<InstructionOpcodeAndModrm> -InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, - bool is_exec_mode_64b) { - int op_idx = 0; - bool prefix_done = false; - InstructionOpcodeAndModrm ret = {0, 0, 0}; - - // In most cases, the primary_opcode is the first byte of the instruction - // but some instructions have a prefix to be skipped for these calculations. - // The following mapping is inspired from libipt's instruction decoding logic - // in `src/pt_ild.c` - while (!prefix_done) { - if (op_idx >= bytes_len) - return llvm::None; - - ret.primary_opcode = inst_bytes[op_idx]; - switch (ret.primary_opcode) { - // prefix_ignore - case 0x26: - case 0x2e: - case 0x36: - case 0x3e: - case 0x64: - case 0x65: - // prefix_osz, prefix_asz - case 0x66: - case 0x67: - // prefix_lock, prefix_f2, prefix_f3 - case 0xf0: - case 0xf2: - case 0xf3: - op_idx++; - break; - - // prefix_rex - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4a: - case 0x4b: - case 0x4c: - case 0x4d: - case 0x4e: - case 0x4f: - if (is_exec_mode_64b) - op_idx++; - else - prefix_done = true; - break; - - // prefix_vex_c4, c5 - case 0xc5: - if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { - prefix_done = true; - break; - } - - ret.opcode_len = 2; - ret.primary_opcode = inst_bytes[op_idx + 2]; - ret.modrm = inst_bytes[op_idx + 3]; - return ret; - - case 0xc4: - if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { - prefix_done = true; - break; - } - ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; - ret.primary_opcode = inst_bytes[op_idx + 3]; - ret.modrm = inst_bytes[op_idx + 4]; - return ret; - - // prefix_evex - case 0x62: - if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { - prefix_done = true; - break; - } - ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; - ret.primary_opcode = inst_bytes[op_idx + 4]; - ret.modrm = inst_bytes[op_idx + 5]; - return ret; - - default: - prefix_done = true; - break; - } - } // prefix done - - ret.primary_opcode = inst_bytes[op_idx]; - ret.modrm = inst_bytes[op_idx + 1]; - ret.opcode_len = 1; - - // If the first opcode is 0F, it's two- or three- byte opcodes. - if (ret.primary_opcode == 0x0F) { - ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte - - if (ret.primary_opcode == 0x38) { - ret.opcode_len = 3; - ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte - ret.modrm = inst_bytes[op_idx + 1]; - } else if (ret.primary_opcode == 0x3A) { - ret.opcode_len = 3; - ret.primary_opcode = inst_bytes[++op_idx]; - ret.modrm = inst_bytes[op_idx + 1]; - } else if ((ret.primary_opcode & 0xf8) == 0x38) { - ret.opcode_len = 0; - ret.primary_opcode = inst_bytes[++op_idx]; - ret.modrm = inst_bytes[op_idx + 1]; - } else if (ret.primary_opcode == 0x0F) { - ret.opcode_len = 3; - // opcode is 0x0F, no needs to update - ret.modrm = inst_bytes[op_idx + 1]; - } else { - ret.opcode_len = 2; - ret.modrm = inst_bytes[op_idx + 1]; - } - } - - return ret; -} - -lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, - Opcode m_opcode) { - llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None; - - if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { - // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes - return lldb::eInstructionControlFlowKindUnknown; - } - - // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. - // These are the three values deciding instruction control flow kind. - ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), - m_opcode.GetByteSize(), is_exec_mode_64b); - if (!ret) - return lldb::eInstructionControlFlowKindUnknown; - else - return MapOpcodeIntoControlFlowKind(ret.value()); -} - -} // namespace x86 - -lldb::InstructionControlFlowKind -Instruction::GetControlFlowKind(const ArchSpec &arch) { - if (arch.GetTriple().getArch() == llvm::Triple::x86) - return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode); - else if (arch.GetTriple().getArch() == llvm::Triple::x86_64) - return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode); - else - return eInstructionControlFlowKindUnknown; // not implemented -} - AddressClass Instruction::GetAddressClass() { if (m_address_class == AddressClass::eInvalid) m_address_class = m_address.GetAddressClass(); return m_address_class; } +const char *Instruction::GetNameForInstructionControlFlowKind( + lldb::InstructionControlFlowKind instruction_control_flow_kind) { + switch (instruction_control_flow_kind) { + case eInstructionControlFlowKindUnknown: + return "unknown"; + case eInstructionControlFlowKindOther: + return "other"; + case eInstructionControlFlowKindCall: + return "call"; + case eInstructionControlFlowKindReturn: + return "return"; + case eInstructionControlFlowKindJump: + return "jump"; + case eInstructionControlFlowKindCondJump: + return "cond jump"; + case eInstructionControlFlowKindFarCall: + return "far call"; + case eInstructionControlFlowKindFarReturn: + return "far return"; + case eInstructionControlFlowKindFarJump: + return "far jump"; + } +} + void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, bool show_address, bool show_bytes, bool show_control_flow_kind, @@ -946,35 +642,10 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, } if (show_control_flow_kind) { - switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) { - case eInstructionControlFlowKindUnknown: - ss.Printf("%-12s", "unknown"); - break; - case eInstructionControlFlowKindOther: - ss.Printf("%-12s", "other"); - break; - case eInstructionControlFlowKindCall: - ss.Printf("%-12s", "call"); - break; - case eInstructionControlFlowKindReturn: - ss.Printf("%-12s", "return"); - break; - case eInstructionControlFlowKindJump: - ss.Printf("%-12s", "jump"); - break; - case eInstructionControlFlowKindCondJump: - ss.Printf("%-12s", "cond jump"); - break; - case eInstructionControlFlowKindFarCall: - ss.Printf("%-12s", "far call"); - break; - case eInstructionControlFlowKindFarReturn: - ss.Printf("%-12s", "far return"); - break; - case eInstructionControlFlowKindFarJump: - ss.Printf("%-12s", "far jump"); - break; - } + lldb::InstructionControlFlowKind instruction_control_flow_kind = + GetControlFlowKind(exe_ctx); + ss.Printf("%-12s", GetNameForInstructionControlFlowKind( + instruction_control_flow_kind)); } const size_t opcode_pos = ss.GetSizeOfLastLine(); |