aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:11:55 +0000
commit5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
parent3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp154
1 files changed, 110 insertions, 44 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 49651da63ecf..59e2008f5632 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -150,12 +150,18 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext,
dec =
&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case MAP4:
+ dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
case MAP5:
dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
case MAP6:
dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case MAP7:
+ dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -194,7 +200,7 @@ template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
uint64_t offset = insn->readerCursor - insn->startLocation;
if (offset + sizeof(T) > r.size())
return true;
- ptr = support::endian::read<T>(&r[offset], support::little);
+ ptr = support::endian::read<T>(&r[offset], llvm::endianness::little);
insn->readerCursor += sizeof(T);
return false;
}
@@ -203,6 +209,10 @@ static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
}
+static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
+ return insn->mode == MODE_64BIT && prefix == 0xd5;
+}
+
// Consumes all of an instruction's prefix bytes, and marks the
// instruction as having them. Also sets the instruction's default operand,
// address, and other relevant data sizes to report operands correctly.
@@ -334,8 +344,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
return -1;
}
- if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
- ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
+ if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
insn->vectorExtensionType = TYPE_EVEX;
} else {
--insn->readerCursor; // unconsume byte1
@@ -354,13 +363,19 @@ static int readPrefixes(struct InternalInstruction *insn) {
return -1;
}
- // We simulate the REX prefix for simplicity's sake
if (insn->mode == MODE_64BIT) {
+ // We simulate the REX prefix for simplicity's sake
insn->rexPrefix = 0x40 |
(wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
(rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
(xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
(bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
+
+ // We simulate the REX2 prefix for simplicity's sake
+ insn->rex2ExtensionPrefix[1] =
+ (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |
+ (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |
+ (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);
}
LLVM_DEBUG(
@@ -471,6 +486,23 @@ static int readPrefixes(struct InternalInstruction *insn) {
insn->vectorExtensionPrefix[1],
insn->vectorExtensionPrefix[2]));
}
+ } else if (isREX2(insn, byte)) {
+ uint8_t byte1;
+ if (peek(insn, byte1)) {
+ LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
+ return -1;
+ }
+ insn->rex2ExtensionPrefix[0] = byte;
+ consume(insn, insn->rex2ExtensionPrefix[1]);
+
+ // We simulate the REX prefix for simplicity's sake
+ insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |
+ (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |
+ (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |
+ (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);
+ LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
+ insn->rex2ExtensionPrefix[0],
+ insn->rex2ExtensionPrefix[1]));
} else if (isREX(insn, byte)) {
if (peek(insn, nextByte))
return -1;
@@ -529,7 +561,8 @@ static int readSIB(struct InternalInstruction *insn) {
if (consume(insn, insn->sib))
return -1;
- index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+ index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |
+ (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
if (index == 0x4) {
insn->sibIndex = SIB_INDEX_NONE;
@@ -539,7 +572,8 @@ static int readSIB(struct InternalInstruction *insn) {
insn->sibScale = 1 << scaleFromSIB(insn->sib);
- base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+ base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |
+ (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
switch (base) {
case 0x5:
@@ -601,7 +635,7 @@ static int readDisplacement(struct InternalInstruction *insn) {
// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
static int readModRM(struct InternalInstruction *insn) {
- uint8_t mod, rm, reg, evexrm;
+ uint8_t mod, rm, reg;
LLVM_DEBUG(dbgs() << "readModRM()");
if (insn->consumedModRM)
@@ -633,14 +667,13 @@ static int readModRM(struct InternalInstruction *insn) {
break;
}
- reg |= rFromREX(insn->rexPrefix) << 3;
- rm |= bFromREX(insn->rexPrefix) << 3;
+ reg |= (rFromREX(insn->rexPrefix) << 3) |
+ (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
+ rm |= (bFromREX(insn->rexPrefix) << 3) |
+ (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
- evexrm = 0;
- if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
- evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
- }
insn->reg = (Reg)(insn->regBase + reg);
@@ -728,7 +761,7 @@ static int readModRM(struct InternalInstruction *insn) {
break;
case 0x3:
insn->eaDisplacement = EA_DISP_NONE;
- insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
+ insn->eaBase = (EABase)(insn->eaRegBase + rm);
break;
}
break;
@@ -738,7 +771,7 @@ static int readModRM(struct InternalInstruction *insn) {
return 0;
}
-#define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
+#define GENERIC_FIXUP_FUNC(name, base, prefix) \
static uint16_t name(struct InternalInstruction *insn, OperandType type, \
uint8_t index, uint8_t *valid) { \
*valid = 1; \
@@ -750,28 +783,15 @@ static int readModRM(struct InternalInstruction *insn) {
case TYPE_Rv: \
return base + index; \
case TYPE_R8: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
- if (insn->rexPrefix && index >= 4 && index <= 7) { \
+ if (insn->rexPrefix && index >= 4 && index <= 7) \
return prefix##_SPL + (index - 4); \
- } else { \
+ else \
return prefix##_AL + index; \
- } \
case TYPE_R16: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
return prefix##_AX + index; \
case TYPE_R32: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
return prefix##_EAX + index; \
case TYPE_R64: \
- index &= mask; \
- if (index > 0xf) \
- *valid = 0; \
return prefix##_RAX + index; \
case TYPE_ZMM: \
return prefix##_ZMM0 + index; \
@@ -821,8 +841,8 @@ static int readModRM(struct InternalInstruction *insn) {
// @param valid - The address of a uint8_t. The target is set to 1 if the
// field is valid for the register class; 0 if not.
// @return - The proper value.
-GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
-GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
// Consult an operand specifier to determine which of the fixup*Value functions
// to use in correcting readModRM()'ss interpretation.
@@ -852,8 +872,31 @@ static int fixupReg(struct InternalInstruction *insn,
if (!valid)
return -1;
break;
- case ENCODING_SIB:
CASE_ENCODING_RM:
+ if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
+ modFromModRM(insn->modRM) == 3) {
+ // EVEX_X can extend the register id to 32 for a non-GPR register that is
+ // encoded in RM.
+ // mode : MODE_64_BIT
+ // Only 8 vector registers are available in 32 bit mode
+ // mod : 3
+ // RM encodes a register
+ switch (op->type) {
+ case TYPE_Rv:
+ case TYPE_R8:
+ case TYPE_R16:
+ case TYPE_R32:
+ case TYPE_R64:
+ break;
+ default:
+ insn->eaBase =
+ (EABase)(insn->eaBase +
+ (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
+ break;
+ }
+ }
+ [[fallthrough]];
+ case ENCODING_SIB:
if (insn->eaBase >= insn->eaRegBase) {
insn->eaBase = (EABase)fixupRMValue(
insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
@@ -889,6 +932,9 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP4:
+ insn->opcodeType = MAP4;
+ return consume(insn, insn->opcode);
case VEX_LOB_MAP5:
insn->opcodeType = MAP5;
return consume(insn, insn->opcode);
@@ -918,6 +964,9 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_MAP6:
insn->opcodeType = MAP6;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP7:
+ insn->opcodeType = MAP7;
+ return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->opcodeType = TWOBYTE;
@@ -939,6 +988,10 @@ static bool readOpcode(struct InternalInstruction *insn) {
insn->opcodeType = XOPA_MAP;
return consume(insn, insn->opcode);
}
+ } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {
+ // m bit indicates opcode map 1
+ insn->opcodeType = TWOBYTE;
+ return consume(insn, insn->opcode);
}
if (consume(insn, current))
@@ -1053,12 +1106,18 @@ static int getInstructionIDWithAttrMask(uint16_t *instructionID,
case THREEDNOW_MAP:
decision = &THREEDNOW_MAP_SYM;
break;
+ case MAP4:
+ decision = &MAP4_SYM;
+ break;
case MAP5:
decision = &MAP5_SYM;
break;
case MAP6:
decision = &MAP6_SYM;
break;
+ case MAP7:
+ decision = &MAP7_SYM;
+ break;
}
if (decision->opcodeDecisions[insnCtx]
@@ -1207,6 +1266,11 @@ static int getInstructionID(struct InternalInstruction *insn,
attrMask &= ~ATTR_ADSIZE;
}
+ // Absolute jump and pushp/popp need special handling
+ if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE &&
+ (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))
+ attrMask |= ATTR_REX2;
+
if (insn->mode == MODE_16BIT) {
// JCXZ/JECXZ need special handling for 16-bit mode because the meaning
// of the AdSize prefix is inverted w.r.t. 32-bit mode.
@@ -1266,7 +1330,8 @@ static int getInstructionID(struct InternalInstruction *insn,
// any position.
if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
(insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
- (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
+ (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) ||
+ (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) {
// Make sure we observed the prefixes in any position.
if (insn->hasAdSize)
attrMask |= ATTR_ADSIZE;
@@ -1379,10 +1444,16 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
if (size == 0)
size = insn->registerSize;
+ auto setOpcodeRegister = [&](unsigned base) {
+ insn->opcodeRegister =
+ (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |
+ (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |
+ (insn->opcode & 7)));
+ };
+
switch (size) {
case 1:
- insn->opcodeRegister = (Reg)(
- MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_AL);
if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
insn->opcodeRegister < MODRM_REG_AL + 0x8) {
insn->opcodeRegister =
@@ -1391,18 +1462,13 @@ static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
break;
case 2:
- insn->opcodeRegister = (Reg)(
- MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_AX);
break;
case 4:
- insn->opcodeRegister =
- (Reg)(MODRM_REG_EAX +
- ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_EAX);
break;
case 8:
- insn->opcodeRegister =
- (Reg)(MODRM_REG_RAX +
- ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
+ setOpcodeRegister(MODRM_REG_RAX);
break;
}