diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp | 154 |
1 files changed, 110 insertions, 44 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 49651da63ecf..59e2008f5632 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -150,12 +150,18 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext, dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; + case MAP4: + dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; case MAP5: dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; case MAP6: dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; + case MAP7: + dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; } switch (dec->modrm_type) { @@ -194,7 +200,7 @@ template <typename T> static bool consume(InternalInstruction *insn, T &ptr) { uint64_t offset = insn->readerCursor - insn->startLocation; if (offset + sizeof(T) > r.size()) return true; - ptr = support::endian::read<T>(&r[offset], support::little); + ptr = support::endian::read<T>(&r[offset], llvm::endianness::little); insn->readerCursor += sizeof(T); return false; } @@ -203,6 +209,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f; } +static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) { + return insn->mode == MODE_64BIT && prefix == 0xd5; +} + // Consumes all of an instruction's prefix bytes, and marks the // instruction as having them. Also sets the instruction's default operand, // address, and other relevant data sizes to report operands correctly. @@ -334,8 +344,7 @@ static int readPrefixes(struct InternalInstruction *insn) { return -1; } - if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && - ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) { + if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) { insn->vectorExtensionType = TYPE_EVEX; } else { --insn->readerCursor; // unconsume byte1 @@ -354,13 +363,19 @@ static int readPrefixes(struct InternalInstruction *insn) { return -1; } - // We simulate the REX prefix for simplicity's sake if (insn->mode == MODE_64BIT) { + // We simulate the REX prefix for simplicity's sake insn->rexPrefix = 0x40 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); + + // We simulate the REX2 prefix for simplicity's sake + insn->rex2ExtensionPrefix[1] = + (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) | + (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) | + (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4); } LLVM_DEBUG( @@ -471,6 +486,23 @@ static int readPrefixes(struct InternalInstruction *insn) { insn->vectorExtensionPrefix[1], insn->vectorExtensionPrefix[2])); } + } else if (isREX2(insn, byte)) { + uint8_t byte1; + if (peek(insn, byte1)) { + LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2"); + return -1; + } + insn->rex2ExtensionPrefix[0] = byte; + consume(insn, insn->rex2ExtensionPrefix[1]); + + // We simulate the REX prefix for simplicity's sake + insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) | + (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) | + (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) | + (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0); + LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx", + insn->rex2ExtensionPrefix[0], + insn->rex2ExtensionPrefix[1])); } else if (isREX(insn, byte)) { if (peek(insn, nextByte)) return -1; @@ -529,7 +561,8 @@ static int readSIB(struct InternalInstruction *insn) { if (consume(insn, insn->sib)) return -1; - index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); + index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) | + (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); if (index == 0x4) { insn->sibIndex = SIB_INDEX_NONE; @@ -539,7 +572,8 @@ static int readSIB(struct InternalInstruction *insn) { insn->sibScale = 1 << scaleFromSIB(insn->sib); - base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); + base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) | + (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); switch (base) { case 0x5: @@ -601,7 +635,7 @@ static int readDisplacement(struct InternalInstruction *insn) { // Consumes all addressing information (ModR/M byte, SIB byte, and displacement. static int readModRM(struct InternalInstruction *insn) { - uint8_t mod, rm, reg, evexrm; + uint8_t mod, rm, reg; LLVM_DEBUG(dbgs() << "readModRM()"); if (insn->consumedModRM) @@ -633,14 +667,13 @@ static int readModRM(struct InternalInstruction *insn) { break; } - reg |= rFromREX(insn->rexPrefix) << 3; - rm |= bFromREX(insn->rexPrefix) << 3; + reg |= (rFromREX(insn->rexPrefix) << 3) | + (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); + rm |= (bFromREX(insn->rexPrefix) << 3) | + (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4); - evexrm = 0; - if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) { + if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; - evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; - } insn->reg = (Reg)(insn->regBase + reg); @@ -728,7 +761,7 @@ static int readModRM(struct InternalInstruction *insn) { break; case 0x3: insn->eaDisplacement = EA_DISP_NONE; - insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm); + insn->eaBase = (EABase)(insn->eaRegBase + rm); break; } break; @@ -738,7 +771,7 @@ static int readModRM(struct InternalInstruction *insn) { return 0; } -#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \ +#define GENERIC_FIXUP_FUNC(name, base, prefix) \ static uint16_t name(struct InternalInstruction *insn, OperandType type, \ uint8_t index, uint8_t *valid) { \ *valid = 1; \ @@ -750,28 +783,15 @@ static int readModRM(struct InternalInstruction *insn) { case TYPE_Rv: \ return base + index; \ case TYPE_R8: \ - index &= mask; \ - if (index > 0xf) \ - *valid = 0; \ - if (insn->rexPrefix && index >= 4 && index <= 7) { \ + if (insn->rexPrefix && index >= 4 && index <= 7) \ return prefix##_SPL + (index - 4); \ - } else { \ + else \ return prefix##_AL + index; \ - } \ case TYPE_R16: \ - index &= mask; \ - if (index > 0xf) \ - *valid = 0; \ return prefix##_AX + index; \ case TYPE_R32: \ - index &= mask; \ - if (index > 0xf) \ - *valid = 0; \ return prefix##_EAX + index; \ case TYPE_R64: \ - index &= mask; \ - if (index > 0xf) \ - *valid = 0; \ return prefix##_RAX + index; \ case TYPE_ZMM: \ return prefix##_ZMM0 + index; \ @@ -821,8 +841,8 @@ static int readModRM(struct InternalInstruction *insn) { // @param valid - The address of a uint8_t. The target is set to 1 if the // field is valid for the register class; 0 if not. // @return - The proper value. -GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f) -GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf) +GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) +GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) // Consult an operand specifier to determine which of the fixup*Value functions // to use in correcting readModRM()'ss interpretation. @@ -852,8 +872,31 @@ static int fixupReg(struct InternalInstruction *insn, if (!valid) return -1; break; - case ENCODING_SIB: CASE_ENCODING_RM: + if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && + modFromModRM(insn->modRM) == 3) { + // EVEX_X can extend the register id to 32 for a non-GPR register that is + // encoded in RM. + // mode : MODE_64_BIT + // Only 8 vector registers are available in 32 bit mode + // mod : 3 + // RM encodes a register + switch (op->type) { + case TYPE_Rv: + case TYPE_R8: + case TYPE_R16: + case TYPE_R32: + case TYPE_R64: + break; + default: + insn->eaBase = + (EABase)(insn->eaBase + + (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4)); + break; + } + } + [[fallthrough]]; + case ENCODING_SIB: if (insn->eaBase >= insn->eaRegBase) { insn->eaBase = (EABase)fixupRMValue( insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); @@ -889,6 +932,9 @@ static bool readOpcode(struct InternalInstruction *insn) { case VEX_LOB_0F3A: insn->opcodeType = THREEBYTE_3A; return consume(insn, insn->opcode); + case VEX_LOB_MAP4: + insn->opcodeType = MAP4; + return consume(insn, insn->opcode); case VEX_LOB_MAP5: insn->opcodeType = MAP5; return consume(insn, insn->opcode); @@ -918,6 +964,9 @@ static bool readOpcode(struct InternalInstruction *insn) { case VEX_LOB_MAP6: insn->opcodeType = MAP6; return consume(insn, insn->opcode); + case VEX_LOB_MAP7: + insn->opcodeType = MAP7; + return consume(insn, insn->opcode); } } else if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->opcodeType = TWOBYTE; @@ -939,6 +988,10 @@ static bool readOpcode(struct InternalInstruction *insn) { insn->opcodeType = XOPA_MAP; return consume(insn, insn->opcode); } + } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) { + // m bit indicates opcode map 1 + insn->opcodeType = TWOBYTE; + return consume(insn, insn->opcode); } if (consume(insn, current)) @@ -1053,12 +1106,18 @@ static int getInstructionIDWithAttrMask(uint16_t *instructionID, case THREEDNOW_MAP: decision = &THREEDNOW_MAP_SYM; break; + case MAP4: + decision = &MAP4_SYM; + break; case MAP5: decision = &MAP5_SYM; break; case MAP6: decision = &MAP6_SYM; break; + case MAP7: + decision = &MAP7_SYM; + break; } if (decision->opcodeDecisions[insnCtx] @@ -1207,6 +1266,11 @@ static int getInstructionID(struct InternalInstruction *insn, attrMask &= ~ATTR_ADSIZE; } + // Absolute jump and pushp/popp need special handling + if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE && + (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50)) + attrMask |= ATTR_REX2; + if (insn->mode == MODE_16BIT) { // JCXZ/JECXZ need special handling for 16-bit mode because the meaning // of the AdSize prefix is inverted w.r.t. 32-bit mode. @@ -1266,7 +1330,8 @@ static int getInstructionID(struct InternalInstruction *insn, // any position. if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || - (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) { + (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) || + (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) { // Make sure we observed the prefixes in any position. if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; @@ -1379,10 +1444,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { if (size == 0) size = insn->registerSize; + auto setOpcodeRegister = [&](unsigned base) { + insn->opcodeRegister = + (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) | + (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) | + (insn->opcode & 7))); + }; + switch (size) { case 1: - insn->opcodeRegister = (Reg)( - MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); + setOpcodeRegister(MODRM_REG_AL); if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && insn->opcodeRegister < MODRM_REG_AL + 0x8) { insn->opcodeRegister = @@ -1391,18 +1462,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { break; case 2: - insn->opcodeRegister = (Reg)( - MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); + setOpcodeRegister(MODRM_REG_AX); break; case 4: - insn->opcodeRegister = - (Reg)(MODRM_REG_EAX + - ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); + setOpcodeRegister(MODRM_REG_EAX); break; case 8: - insn->opcodeRegister = - (Reg)(MODRM_REG_RAX + - ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); + setOpcodeRegister(MODRM_REG_RAX); break; } |