summaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp94
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h14
-rw-r--r--lib/Target/X86/X86AsmPrinter.h1
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp273
-rw-r--r--lib/Target/X86/X86InstrAVX512.td39
-rw-r--r--lib/Target/X86/X86InstrInfo.td6
-rw-r--r--lib/Target/X86/X86InstrSSE.td9
-rw-r--r--lib/Target/X86/X86InstructionSelector.cpp30
-rw-r--r--lib/Target/X86/X86LegalizerInfo.cpp102
-rw-r--r--lib/Target/X86/X86LegalizerInfo.h5
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp80
-rw-r--r--lib/Target/X86/X86OptimizeLEAs.cpp8
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.cpp23
-rw-r--r--lib/Target/X86/X86RegisterBankInfo.h7
-rw-r--r--lib/Target/X86/X86Subtarget.cpp10
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp32
17 files changed, 442 insertions, 295 deletions
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c1cfc82b4a812..32ab475f11867 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -776,11 +776,6 @@ private:
bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
- /// MS-compatibility:
- /// Obtain an appropriate size qualifier, when facing its absence,
- /// upon AVX512 vector/broadcast memory operand
- unsigned AdjustAVX512Mem(unsigned Size, X86Operand* UnsizedMemOpNext);
-
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
return getSTI().getFeatureBits()[X86::Mode64Bit];
@@ -1206,27 +1201,16 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
Identifier, Info.OpDecl);
}
+
// We either have a direct symbol reference, or an offset from a symbol. The
// parser always puts the symbol on the LHS, so look there for size
// calculation purposes.
+ unsigned FrontendSize = 0;
const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
bool IsSymRef =
isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
- if (IsSymRef) {
- if (!Size) {
- Size = Info.Type * 8; // Size is in terms of bits in this context.
- if (Size)
- InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
- /*Len=*/0, Size);
- if (AllowBetterSizeMatch)
- // Handle cases where size qualifier is absent, upon an indirect symbol
- // reference - e.g. "vaddps zmm1, zmm2, [var]"
- // set Size to zero to allow matching mechansim to try and find a better
- // size qualifier than our initial guess, based on available variants of
- // the given instruction
- Size = 0;
- }
- }
+ if (IsSymRef && !Size && Info.Type)
+ FrontendSize = Info.Type * 8; // Size is in terms of bits in this context.
// When parsing inline assembly we set the base register to a non-zero value
// if we don't know the actual value at this time. This is necessary to
@@ -1234,7 +1218,7 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
BaseReg = BaseReg ? BaseReg : 1;
return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
IndexReg, Scale, Start, End, Size, Identifier,
- Info.OpDecl);
+ Info.OpDecl, FrontendSize);
}
static void
@@ -2884,23 +2868,6 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
return true;
}
-unsigned X86AsmParser::AdjustAVX512Mem(unsigned Size,
- X86Operand* UnsizedMemOpNext) {
- // Check for the existence of an AVX512 platform
- if (!getSTI().getFeatureBits()[X86::FeatureAVX512])
- return 0;
- // Allow adjusting upon a (x|y|z)mm
- if (Size == 512 || Size == 256 || Size == 128)
- return Size;
- // This is an allegadly broadcasting mem op adjustment,
- // allow some more inquiring to validate it
- if (Size == 64 || Size == 32)
- return UnsizedMemOpNext && UnsizedMemOpNext->isToken() &&
- UnsizedMemOpNext->getToken().substr(0, 4).equals("{1to") ? Size : 0;
- // Do not allow any other type of adjustments
- return 0;
-}
-
bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -2920,19 +2887,14 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
// Find one unsized memory operand, if present.
X86Operand *UnsizedMemOp = nullptr;
- // If unsized memory operand was found - obtain following operand.
- // For use in AdjustAVX512Mem
- X86Operand *UnsizedMemOpNext = nullptr;
for (const auto &Op : Operands) {
X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
- if (UnsizedMemOp) {
- UnsizedMemOpNext = X86Op;
+ if (X86Op->isMemUnsized()) {
+ UnsizedMemOp = X86Op;
// Have we found an unqualified memory operand,
// break. IA allows only one memory operand.
break;
}
- if (X86Op->isMemUnsized())
- UnsizedMemOp = X86Op;
}
// Allow some instructions to have implicitly pointer-sized operands. This is
@@ -2978,7 +2940,6 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
// If an unsized memory operand is present, try to match with each memory
// operand size. In Intel assembly, the size is not part of the instruction
// mnemonic.
- unsigned MatchedSize = 0;
if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
for (unsigned Size : MopSizes) {
@@ -2993,17 +2954,10 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
// If this returned as a missing feature failure, remember that.
if (Match.back() == Match_MissingFeature)
ErrorInfoMissingFeature = ErrorInfoIgnore;
- if (M == Match_Success)
- // MS-compatability:
- // Adjust AVX512 vector/broadcast memory operand,
- // when facing the absence of a size qualifier.
- // Match GCC behavior on respective cases.
- MatchedSize = AdjustAVX512Mem(Size, UnsizedMemOpNext);
}
// Restore the size of the unsized memory operand if we modified it.
- if (UnsizedMemOp)
- UnsizedMemOp->Mem.Size = 0;
+ UnsizedMemOp->Mem.Size = 0;
}
// If we haven't matched anything yet, this is not a basic integer or FPU
@@ -3027,20 +2981,30 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
Op.getLocRange(), MatchingInlineAsm);
}
+ unsigned NumSuccessfulMatches =
+ std::count(std::begin(Match), std::end(Match), Match_Success);
+
+ // If matching was ambiguous and we had size information from the frontend,
+ // try again with that. This handles cases like "movxz eax, m8/m16".
+ if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
+ UnsizedMemOp->getMemFrontendSize()) {
+ UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
+ unsigned M = MatchInstruction(
+ Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
+ if (M == Match_Success)
+ NumSuccessfulMatches = 1;
+
+ // Add a rewrite that encodes the size information we used from the
+ // frontend.
+ InstInfo->AsmRewrites->emplace_back(
+ AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
+ /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
+ }
+
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- unsigned NumSuccessfulMatches =
- std::count(std::begin(Match), std::end(Match), Match_Success);
if (NumSuccessfulMatches == 1) {
- if (MatchedSize && isParsingInlineAsm() && isParsingIntelSyntax())
- // MS compatibility -
- // Fix the rewrite according to the matched memory size
- // MS inline assembly only
- for (AsmRewrite &AR : *InstInfo->AsmRewrites)
- if ((AR.Loc.getPointer() == UnsizedMemOp->StartLoc.getPointer()) &&
- (AR.Kind == AOK_SizeDirective))
- AR.Val = MatchedSize;
// Some instructions need post-processing to, for example, tweak which
// encoding is selected. Loop on it while changes happen so the individual
// transformations can chain off each other.
@@ -3057,7 +3021,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
"multiple matches only possible with unsized memory operands");
return Error(UnsizedMemOp->getStartLoc(),
"ambiguous operand size for instruction '" + Mnemonic + "\'",
- UnsizedMemOp->getLocRange(), MatchingInlineAsm);
+ UnsizedMemOp->getLocRange());
}
// If one instruction matched with a missing feature, report this as a
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 9f1fa6c659070..33eff14b82157 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -62,6 +62,10 @@ struct X86Operand : public MCParsedAsmOperand {
unsigned Scale;
unsigned Size;
unsigned ModeSize;
+
+ /// If the memory operand is unsized and there are multiple instruction
+ /// matches, prefer the one with this size.
+ unsigned FrontendSize;
};
union {
@@ -136,6 +140,10 @@ struct X86Operand : public MCParsedAsmOperand {
assert(Kind == Memory && "Invalid access!");
return Mem.ModeSize;
}
+ unsigned getMemFrontendSize() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.FrontendSize;
+ }
bool isToken() const override {return Kind == Token; }
@@ -512,7 +520,7 @@ struct X86Operand : public MCParsedAsmOperand {
static std::unique_ptr<X86Operand>
CreateMem(unsigned ModeSize, const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0, StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
+ void *OpDecl = nullptr, unsigned FrontendSize = 0) {
auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -521,6 +529,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
+ Res->Mem.FrontendSize = FrontendSize;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
@@ -532,7 +541,7 @@ struct X86Operand : public MCParsedAsmOperand {
CreateMem(unsigned ModeSize, unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc,
SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
+ void *OpDecl = nullptr, unsigned FrontendSize = 0) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -548,6 +557,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
Res->Mem.ModeSize = ModeSize;
+ Res->Mem.FrontendSize = FrontendSize;
Res->SymName = SymName;
Res->OpDecl = OpDecl;
Res->AddressOf = false;
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 44bc373b0394c..d7c3b74d3efb2 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -91,6 +91,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
X86MCInstLower &MCIL);
void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+ void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index a94045cd536d1..331e56976db7c 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -2990,6 +2990,10 @@ unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF)
void X86FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
+ // Mark the function as not having WinCFI. We will set it back to true in
+ // emitPrologue if it gets called and emits CFI.
+ MF.setHasWinCFI(false);
+
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
const Function *Fn = MF.getFunction();
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 83542aaa013bd..9ee2234595f9a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1224,10 +1224,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
- setOperationAction(ISD::VSELECT, MVT::v8i1, Expand);
- setOperationAction(ISD::VSELECT, MVT::v16i1, Expand);
+
if (Subtarget.hasDQI()) {
for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
setOperationAction(ISD::SINT_TO_FP, VT, Legal);
@@ -1243,8 +1240,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
if (Subtarget.hasVLX()) {
- setOperationAction(ISD::ABS, MVT::v4i64, Legal);
- setOperationAction(ISD::ABS, MVT::v2i64, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
@@ -1270,8 +1265,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
}
- setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
@@ -1304,33 +1297,34 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
- setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
- setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
-
setOperationAction(ISD::MUL, MVT::v8i64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
- setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
- setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
-
- setOperationAction(ISD::ADD, MVT::v8i1, Custom);
- setOperationAction(ISD::ADD, MVT::v16i1, Custom);
- setOperationAction(ISD::SUB, MVT::v8i1, Custom);
- setOperationAction(ISD::SUB, MVT::v16i1, Custom);
- setOperationAction(ISD::MUL, MVT::v8i1, Custom);
- setOperationAction(ISD::MUL, MVT::v16i1, Custom);
setOperationAction(ISD::MUL, MVT::v16i32, Legal);
+ // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
+ setOperationAction(ISD::ABS, MVT::v4i64, Legal);
+ setOperationAction(ISD::ABS, MVT::v2i64, Legal);
+
+ for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
+ setOperationAction(ISD::ADD, VT, Custom);
+ setOperationAction(ISD::SUB, VT, Custom);
+ setOperationAction(ISD::MUL, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::TRUNCATE, VT, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ }
+
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
@@ -1352,33 +1346,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
if (Subtarget.hasCDI()) {
- setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
- setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
-
- setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
- setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
- setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
- setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
-
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
-
- if (Subtarget.hasVLX()) {
- setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
- setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
- setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
- setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
- } else {
- setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
- setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
- setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
- setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
+ // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
+ for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
+ MVT::v4i64, MVT::v8i64}) {
+ setOperationAction(ISD::CTLZ, VT, Legal);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
-
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
} // Subtarget.hasCDI()
if (Subtarget.hasDQI()) {
@@ -6070,7 +6043,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- if (NumNonZero > 8)
+ if (NumNonZero > 8 && !Subtarget.hasSSE41())
return SDValue();
SDLoc dl(Op);
@@ -6158,7 +6131,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- if (NumNonZero > 4)
+ if (NumNonZero > 4 && !Subtarget.hasSSE41())
return SDValue();
SDLoc dl(Op);
@@ -6241,7 +6214,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
Elt = Op->getOperand(EltIdx);
// By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
- EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
+ EltMaskIdx = Elt.getConstantOperandVal(1);
if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
break;
Mask[EltIdx] = EltIdx;
@@ -6272,8 +6245,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
SDValue SrcVector = Current->getOperand(0);
if (!V1.getNode())
V1 = SrcVector;
- CanFold = SrcVector == V1 &&
- cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
+ CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
}
if (!CanFold)
@@ -20944,54 +20916,62 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
+// Split an unary integer op into 2 half sized ops.
+static SDValue LowerVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+ unsigned SizeInBits = VT.getSizeInBits();
+
+ // Extract the Lo/Hi vectors
+ SDLoc dl(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue Lo = extractSubVector(Src, 0, DAG, dl, SizeInBits / 2);
+ SDValue Hi = extractSubVector(Src, NumElems / 2, DAG, dl, SizeInBits / 2);
+
+ MVT EltVT = VT.getVectorElementType();
+ MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, NewVT, Lo),
+ DAG.getNode(Op.getOpcode(), dl, NewVT, Hi));
+}
+
+// Decompose 256-bit ops into smaller 128-bit ops.
+static SDValue Lower256IntUnary(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return LowerVectorIntUnary(Op, DAG);
+}
+
+// Decompose 512-bit ops into smaller 256-bit ops.
+static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getSimpleValueType().is512BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 512-bit vector integer operation");
+ return LowerVectorIntUnary(Op, DAG);
+}
+
/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
//
-// 1. i32/i64 128/256-bit vector (native support require VLX) are expended
-// to 512-bit vector.
-// 2. i8/i16 vector implemented using dword LZCNT vector instruction
-// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
-// split the vector, perform operation on it's Lo a Hi part and
-// concatenate the results.
-static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) {
+// i8/i16 vector implemented using dword LZCNT vector instruction
+// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
+// split the vector, perform operation on it's Lo a Hi part and
+// concatenate the results.
+static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::CTLZ);
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
unsigned NumElems = VT.getVectorNumElements();
- if (EltVT == MVT::i64 || EltVT == MVT::i32) {
- // Extend to 512 bit vector.
- assert((VT.is256BitVector() || VT.is128BitVector()) &&
- "Unsupported value type for operation");
-
- MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits());
- SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
- DAG.getUNDEF(NewVT),
- Op.getOperand(0),
- DAG.getIntPtrConstant(0, dl));
- SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512);
-
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode,
- DAG.getIntPtrConstant(0, dl));
- }
-
assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
"Unsupported element type");
- if (16 < NumElems) {
- // Split vector, it's Lo and Hi parts will be handled in next iteration.
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
- MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2);
-
- Lo = DAG.getNode(ISD::CTLZ, dl, OutVT, Lo);
- Hi = DAG.getNode(ISD::CTLZ, dl, OutVT, Hi);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
- }
+ // Split vector, it's Lo and Hi parts will be handled in next iteration.
+ if (16 < NumElems)
+ return LowerVectorIntUnary(Op, DAG);
MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
-
assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
"Unsupported value type for operation");
@@ -21078,23 +21058,17 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- SDValue Op0 = Op.getOperand(0);
- if (Subtarget.hasAVX512())
- return LowerVectorCTLZ_AVX512(Op, DAG);
+ if (Subtarget.hasCDI())
+ return LowerVectorCTLZ_AVX512CDI(Op, DAG);
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector() && !Subtarget.hasInt256()) {
- unsigned NumElems = VT.getVectorNumElements();
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return Lower256IntUnary(Op, DAG);
- // Extract each 128-bit vector, perform ctlz and concat the result.
- SDValue LHS = extract128BitVector(Op0, 0, DAG, DL);
- SDValue RHS = extract128BitVector(Op0, NumElems / 2, DAG, DL);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
- DAG.getNode(ISD::CTLZ, DL, LHS.getValueType(), LHS),
- DAG.getNode(ISD::CTLZ, DL, RHS.getValueType(), RHS));
- }
+ // Decompose 512-bit ops into smaller 256-bit ops.
+ if (VT.is512BitVector() && !Subtarget.hasBWI())
+ return Lower512IntUnary(Op, DAG);
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
@@ -21258,19 +21232,7 @@ static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
- MVT VT = Op.getSimpleValueType();
- unsigned NumElems = VT.getVectorNumElements();
-
- SDLoc dl(Op);
- SDValue Src = Op.getOperand(0);
- SDValue Lo = extract128BitVector(Src, 0, DAG, dl);
- SDValue Hi = extract128BitVector(Src, NumElems / 2, DAG, dl);
-
- MVT EltVT = VT.getVectorElementType();
- MVT NewVT = MVT::getVectorVT(EltVT, NumElems / 2);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(ISD::ABS, dl, NewVT, Lo),
- DAG.getNode(ISD::ABS, dl, NewVT, Hi));
+ return Lower256IntUnary(Op, DAG);
}
static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
@@ -23049,29 +23011,13 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
return LowerVectorCTPOPBitmath(Op0, DL, Subtarget, DAG);
}
- if (VT.is256BitVector() && !Subtarget.hasInt256()) {
- unsigned NumElems = VT.getVectorNumElements();
-
- // Extract each 128-bit vector, compute pop count and concat the result.
- SDValue LHS = extract128BitVector(Op0, 0, DAG, DL);
- SDValue RHS = extract128BitVector(Op0, NumElems / 2, DAG, DL);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
- LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG),
- LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG));
- }
-
- if (VT.is512BitVector() && !Subtarget.hasBWI()) {
- unsigned NumElems = VT.getVectorNumElements();
-
- // Extract each 256-bit vector, compute pop count and concat the result.
- SDValue LHS = extract256BitVector(Op0, 0, DAG, DL);
- SDValue RHS = extract256BitVector(Op0, NumElems / 2, DAG, DL);
+ // Decompose 256-bit ops into smaller 128-bit ops.
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return Lower256IntUnary(Op, DAG);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
- LowerVectorCTPOPInRegLUT(LHS, DL, Subtarget, DAG),
- LowerVectorCTPOPInRegLUT(RHS, DL, Subtarget, DAG));
- }
+ // Decompose 512-bit ops into smaller 256-bit ops.
+ if (VT.is512BitVector() && !Subtarget.hasBWI())
+ return Lower512IntUnary(Op, DAG);
return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG);
}
@@ -23098,20 +23044,12 @@ static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
DAG.getIntPtrConstant(0, DL));
}
- MVT SVT = VT.getVectorElementType();
int NumElts = VT.getVectorNumElements();
int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
// Decompose 256-bit ops into smaller 128-bit ops.
- if (VT.is256BitVector()) {
- SDValue Lo = extract128BitVector(In, 0, DAG, DL);
- SDValue Hi = extract128BitVector(In, NumElts / 2, DAG, DL);
-
- MVT HalfVT = MVT::getVectorVT(SVT, NumElts / 2);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
- DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Lo),
- DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Hi));
- }
+ if (VT.is256BitVector())
+ return Lower256IntUnary(Op, DAG);
assert(VT.is128BitVector() &&
"Only 128-bit vector bitreverse lowering supported.");
@@ -23152,14 +23090,8 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
"Only byte vector BITREVERSE supported");
// Decompose 256-bit ops into smaller 128-bit ops on pre-AVX2.
- if (VT.is256BitVector() && !Subtarget.hasInt256()) {
- MVT HalfVT = MVT::getVectorVT(MVT::i8, NumElts / 2);
- SDValue Lo = extract128BitVector(In, 0, DAG, DL);
- SDValue Hi = extract128BitVector(In, NumElts / 2, DAG, DL);
- Lo = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Lo);
- Hi = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, Hi);
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
- }
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return Lower256IntUnary(Op, DAG);
// Perform BITREVERSE using PSHUFB lookups. Each byte is split into
// two nibbles and a PSHUFB lookup to find the bitreverse of each
@@ -26585,6 +26517,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
+
+ case TargetOpcode::PATCHABLE_EVENT_CALL:
+ // Do nothing here, handle in xray instrumentation pass.
+ return BB;
case X86::LCMPXCHG8B: {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -26667,7 +26603,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
- Known.Zero.clearAllBits(); Known.One.clearAllBits();
+ Known.resetAll();
switch (Opc) {
default: break;
case X86ISD::ADD:
@@ -26697,7 +26633,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case X86ISD::VSRLI: {
if (auto *ShiftImm = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
if (ShiftImm->getAPIntValue().uge(VT.getScalarSizeInBits())) {
- Known.Zero.setAllBits();
+ Known.setAllZero();
break;
}
@@ -26729,8 +26665,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = KnownBits(InBitWidth);
APInt DemandedSrcElts = APInt::getLowBitsSet(InNumElts, NumElts);
DAG.computeKnownBits(N0, Known, DemandedSrcElts, Depth + 1);
- Known.One = Known.One.zext(BitWidth);
- Known.Zero = Known.Zero.zext(BitWidth);
+ Known = Known.zext(BitWidth);
Known.Zero.setBitsFrom(InBitWidth);
break;
}
@@ -31671,10 +31606,9 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
if (auto *AmtConst = AmtBV->getConstantSplatNode())
SraAmt = AmtConst->getZExtValue();
- } else if (Mask.getOpcode() == X86ISD::VSRAI) {
- SDValue SraC = Mask.getOperand(1);
- SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
- }
+ } else if (Mask.getOpcode() == X86ISD::VSRAI)
+ SraAmt = Mask.getConstantOperandVal(1);
+
if ((SraAmt + 1) != EltBits)
return SDValue();
@@ -31708,7 +31642,9 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
V = Y;
if (V) {
- assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
+ if (EltBits != 8 && EltBits != 16 && EltBits != 32)
+ return SDValue();
+
SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
SDValue SubOp2 = Mask;
@@ -34488,8 +34424,7 @@ static SDValue combineX86ADD(SDNode *N, SelectionDAG &DAG,
if (Carry.getOpcode() == ISD::SETCC ||
Carry.getOpcode() == X86ISD::SETCC ||
Carry.getOpcode() == X86ISD::SETCC_CARRY) {
- auto *Cond = cast<ConstantSDNode>(Carry.getOperand(0));
- if (Cond->getZExtValue() == X86::COND_B)
+ if (Carry.getConstantOperandVal(0) == X86::COND_B)
return DCI.CombineTo(N, SDValue(N, 0), Carry.getOperand(1));
}
}
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index c38c13bb97571..71d395244b4ad 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -8631,6 +8631,20 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
+// VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
+let Predicates = [HasAVX512, NoVLX] in {
+ def : Pat<(v4i64 (abs VR256X:$src)),
+ (EXTRACT_SUBREG
+ (VPABSQZrr
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
+ sub_ymm)>;
+ def : Pat<(v2i64 (abs VR128X:$src)),
+ (EXTRACT_SUBREG
+ (VPABSQZrr
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
+ sub_xmm)>;
+}
+
multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
@@ -8639,6 +8653,31 @@ multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
+// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
+let Predicates = [HasCDI, NoVLX] in {
+ def : Pat<(v4i64 (ctlz VR256X:$src)),
+ (EXTRACT_SUBREG
+ (VPLZCNTQZrr
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
+ sub_ymm)>;
+ def : Pat<(v2i64 (ctlz VR128X:$src)),
+ (EXTRACT_SUBREG
+ (VPLZCNTQZrr
+ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
+ sub_xmm)>;
+
+ def : Pat<(v8i32 (ctlz VR256X:$src)),
+ (EXTRACT_SUBREG
+ (VPLZCNTDZrr
+ (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
+ sub_ymm)>;
+ def : Pat<(v4i32 (ctlz VR128X:$src)),
+ (EXTRACT_SUBREG
+ (VPLZCNTDZrr
+ (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
+ sub_xmm)>;
+}
+
//===---------------------------------------------------------------------===//
// Replicate Single FP - MOVSHDUP and MOVSLDUP
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index cdf7ce19cdc8f..902b0c2c04e3e 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1995,11 +1995,11 @@ def REX64_PREFIX : I<0x48, RawFrm, (outs), (ins), "rex64", []>,
Requires<[In64BitMode]>;
// Data16 instruction prefix
-def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>,
+def DATA16_PREFIX : I<0x66, RawFrm, (outs), (ins), "data16", []>,
Requires<[Not16BitMode]>;
// Data instruction prefix
-def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>,
+def DATA32_PREFIX : I<0x66, RawFrm, (outs), (ins), "data32", []>,
Requires<[In16BitMode]>;
// Repeat string operation instruction prefixes
@@ -2518,7 +2518,7 @@ let SchedRW = [ WriteSystem ] in {
}
let Uses = [ ECX, EAX, EBX ] in {
- def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
+ def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
[(int_x86_mwaitx ECX, EAX, EBX)], IIC_SSE_MWAITX>,
TB, Requires<[ HasMWAITX ]>;
}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f22a50200c9a3..48da2fa607af0 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -6718,22 +6718,23 @@ let Constraints = "$src1 = $dst" in {
SSE_INTMUL_ITINS_P, 1>;
}
-let Predicates = [HasAVX, NoVLX] in {
+let Predicates = [HasAVX, NoVLX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
VEX_4V, VEX_WIG;
+let Predicates = [HasAVX] in
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_WIG;
-}
-let Predicates = [HasAVX2] in {
+
+let Predicates = [HasAVX2, NoVLX] in
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_PMULLD_ITINS>,
VEX_4V, VEX_L, VEX_WIG;
+let Predicates = [HasAVX2] in
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L, VEX_WIG;
-}
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp
index 38f7bc0af5c72..d65eb1de8d094 100644
--- a/lib/Target/X86/X86InstructionSelector.cpp
+++ b/lib/Target/X86/X86InstructionSelector.cpp
@@ -65,8 +65,8 @@ private:
MachineFunction &MF) const;
bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
- bool selectFrameIndex(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF) const;
+ bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -235,7 +235,7 @@ bool X86InstructionSelector::select(MachineInstr &I) const {
return true;
if (selectLoadStoreOp(I, MRI, MF))
return true;
- if (selectFrameIndex(I, MRI, MF))
+ if (selectFrameIndexOrGep(I, MRI, MF))
return true;
if (selectConstant(I, MRI, MF))
return true;
@@ -427,27 +427,37 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
-bool X86InstructionSelector::selectFrameIndex(MachineInstr &I,
- MachineRegisterInfo &MRI,
- MachineFunction &MF) const {
- if (I.getOpcode() != TargetOpcode::G_FRAME_INDEX)
+bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ unsigned Opc = I.getOpcode();
+
+ if (Opc != TargetOpcode::G_FRAME_INDEX && Opc != TargetOpcode::G_GEP)
return false;
const unsigned DefReg = I.getOperand(0).getReg();
LLT Ty = MRI.getType(DefReg);
- // Use LEA to calculate frame index.
+ // Use LEA to calculate frame index and GEP
unsigned NewOpc;
if (Ty == LLT::pointer(0, 64))
NewOpc = X86::LEA64r;
else if (Ty == LLT::pointer(0, 32))
NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
else
- llvm_unreachable("Can't select G_FRAME_INDEX, unsupported type.");
+ llvm_unreachable("Can't select G_FRAME_INDEX/G_GEP, unsupported type.");
I.setDesc(TII.get(NewOpc));
MachineInstrBuilder MIB(MF, I);
- addOffset(MIB, 0);
+
+ if (Opc == TargetOpcode::G_FRAME_INDEX) {
+ addOffset(MIB, 0);
+ } else {
+ MachineOperand &InxOp = I.getOperand(2);
+ I.addOperand(InxOp); // set IndexReg
+ InxOp.ChangeToImmediate(1); // set Scale
+ MIB.addImm(0).addReg(0);
+ }
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp
index a437f6bf4714d..4f5e70414aa93 100644
--- a/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/lib/Target/X86/X86LegalizerInfo.cpp
@@ -34,6 +34,11 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
setLegalizerInfo64bit();
setLegalizerInfoSSE1();
setLegalizerInfoSSE2();
+ setLegalizerInfoSSE41();
+ setLegalizerInfoAVX2();
+ setLegalizerInfoAVX512();
+ setLegalizerInfoAVX512DQ();
+ setLegalizerInfoAVX512BW();
computeTables();
}
@@ -50,7 +55,7 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- for (unsigned BinOp : {G_ADD, G_SUB})
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL})
for (auto Ty : {s8, s16, s32})
setAction({BinOp, Ty}, Legal);
@@ -65,6 +70,12 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
// Pointer-handling
setAction({G_FRAME_INDEX, p0}, Legal);
+ setAction({G_GEP, p0}, Legal);
+ setAction({G_GEP, 1, s32}, Legal);
+
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_GEP, 1, Ty}, WidenScalar);
+
// Constants
for (auto Ty : {s8, s16, s32, p0})
setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
@@ -94,7 +105,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
- for (unsigned BinOp : {G_ADD, G_SUB})
+ for (unsigned BinOp : {G_ADD, G_SUB, G_MUL})
for (auto Ty : {s8, s16, s32, s64})
setAction({BinOp, Ty}, Legal);
@@ -109,6 +120,13 @@ void X86LegalizerInfo::setLegalizerInfo64bit() {
// Pointer-handling
setAction({G_FRAME_INDEX, p0}, Legal);
+ setAction({G_GEP, p0}, Legal);
+ setAction({G_GEP, 1, s32}, Legal);
+ setAction({G_GEP, 1, s64}, Legal);
+
+ for (auto Ty : {s1, s8, s16})
+ setAction({G_GEP, 1, Ty}, WidenScalar);
+
// Constants
for (auto Ty : {s8, s16, s32, s64, p0})
setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
@@ -149,6 +167,7 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
return;
const LLT s64 = LLT::scalar(64);
+ const LLT v8s16 = LLT::vector(8, 16);
const LLT v4s32 = LLT::vector(4, 32);
const LLT v2s64 = LLT::vector(2, 64);
@@ -159,4 +178,83 @@ void X86LegalizerInfo::setLegalizerInfoSSE2() {
for (unsigned BinOp : {G_ADD, G_SUB})
for (auto Ty : {v4s32})
setAction({BinOp, Ty}, Legal);
+
+ setAction({G_MUL, v8s16}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoSSE41() {
+ if (!Subtarget.hasSSE41())
+ return;
+
+ const LLT v4s32 = LLT::vector(4, 32);
+
+ setAction({G_MUL, v4s32}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoAVX2() {
+ if (!Subtarget.hasAVX2())
+ return;
+
+ const LLT v16s16 = LLT::vector(16, 16);
+ const LLT v8s32 = LLT::vector(8, 32);
+
+ for (auto Ty : {v16s16, v8s32})
+ setAction({G_MUL, Ty}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoAVX512() {
+ if (!Subtarget.hasAVX512())
+ return;
+
+ const LLT v16s32 = LLT::vector(16, 32);
+
+ setAction({G_MUL, v16s32}, Legal);
+
+ /************ VLX *******************/
+ if (!Subtarget.hasVLX())
+ return;
+
+ const LLT v4s32 = LLT::vector(4, 32);
+ const LLT v8s32 = LLT::vector(8, 32);
+
+ for (auto Ty : {v4s32, v8s32})
+ setAction({G_MUL, Ty}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoAVX512DQ() {
+ if (!(Subtarget.hasAVX512() && Subtarget.hasDQI()))
+ return;
+
+ const LLT v8s64 = LLT::vector(8, 64);
+
+ setAction({G_MUL, v8s64}, Legal);
+
+ /************ VLX *******************/
+ if (!Subtarget.hasVLX())
+ return;
+
+ const LLT v2s64 = LLT::vector(2, 64);
+ const LLT v4s64 = LLT::vector(4, 64);
+
+ for (auto Ty : {v2s64, v4s64})
+ setAction({G_MUL, Ty}, Legal);
+}
+
+void X86LegalizerInfo::setLegalizerInfoAVX512BW() {
+ if (!(Subtarget.hasAVX512() && Subtarget.hasBWI()))
+ return;
+
+ const LLT v32s16 = LLT::vector(32, 16);
+
+ setAction({G_MUL, v32s16}, Legal);
+
+ /************ VLX *******************/
+ if (!Subtarget.hasVLX())
+ return;
+
+ const LLT v8s16 = LLT::vector(8, 16);
+ const LLT v16s16 = LLT::vector(16, 16);
+
+ for (auto Ty : {v8s16, v16s16})
+ setAction({G_MUL, Ty}, Legal);
}
diff --git a/lib/Target/X86/X86LegalizerInfo.h b/lib/Target/X86/X86LegalizerInfo.h
index 3f00898b42322..ab5405a704273 100644
--- a/lib/Target/X86/X86LegalizerInfo.h
+++ b/lib/Target/X86/X86LegalizerInfo.h
@@ -38,6 +38,11 @@ private:
void setLegalizerInfo64bit();
void setLegalizerInfoSSE1();
void setLegalizerInfoSSE2();
+ void setLegalizerInfoSSE41();
+ void setLegalizerInfoAVX2();
+ void setLegalizerInfoAVX512();
+ void setLegalizerInfoAVX512DQ();
+ void setLegalizerInfoAVX512BW();
};
} // namespace llvm
#endif
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 550e3543a71e8..598d88d8b9c3b 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -1040,6 +1040,83 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
getSubtargetInfo());
}
+void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
+ X86MCInstLower &MCIL) {
+ assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64");
+
+ // We want to emit the following pattern, which follows the x86 calling
+ // convention to prepare for the trampoline call to be patched in.
+ //
+ // <args placement according SysV64 calling convention>
+ // .p2align 1, ...
+ // .Lxray_event_sled_N:
+ // jmp +N // jump across the call instruction
+ // callq __xray_CustomEvent // force relocation to symbol
+ // <args cleanup, jump to here>
+ //
+ // The relative jump needs to jump forward 24 bytes:
+ // 10 (args) + 5 (nops) + 9 (cleanup)
+ //
+ // After patching, it would look something like:
+ //
+ // nopw (2-byte nop)
+ // callq __xrayCustomEvent // already lowered
+ //
+ // ---
+ // First we emit the label and the jump.
+ auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
+ OutStreamer->AddComment("# XRay Custom Event Log");
+ OutStreamer->EmitCodeAlignment(2);
+ OutStreamer->EmitLabel(CurSled);
+
+ // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
+ // an operand (computed as an offset from the jmp instruction).
+ // FIXME: Find another less hacky way do force the relative jump.
+ OutStreamer->EmitBytes("\xeb\x14");
+
+ // The default C calling convention will place two arguments into %rcx and
+ // %rdx -- so we only work with those.
+ unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX};
+
+ // Because we will use %rax, we preserve that across the call.
+ EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
+
+ // Then we put the operands in the %rdi and %rsi registers.
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I)
+ if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
+ if (Op->isImm())
+ EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
+ .addReg(UsedRegs[I])
+ .addImm(Op->getImm()));
+ else if (Op->isReg()) {
+ if (Op->getReg() != UsedRegs[I])
+ EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(UsedRegs[I])
+ .addReg(Op->getReg()));
+ else
+ EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo());
+ }
+ }
+
+ // We emit a hard dependency on the __xray_CustomEvent symbol, which is the
+ // name of the trampoline to be implemented by the XRay runtime. We put this
+ // explicitly in the %rax register.
+ auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent");
+ MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym);
+ EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
+ .addReg(X86::RAX)
+ .addOperand(MCIL.LowerSymbolOperand(TOp, TSym)));
+
+ // Emit the call instruction.
+ EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX));
+
+ // Restore caller-saved and used registers.
+ OutStreamer->AddComment("xray custom event end.");
+ EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX));
+
+ recordSled(CurSled, MI, SledKind::CUSTOM_EVENT);
+}
+
void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
X86MCInstLower &MCIL) {
// We want to emit the following pattern:
@@ -1415,6 +1492,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case TargetOpcode::PATCHABLE_TAIL_CALL:
return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
+
+ case TargetOpcode::PATCHABLE_EVENT_CALL:
+ return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering);
case X86::MORESTACK_RET:
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index 7be0a7fd40678..aabbf67a16b62 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -223,8 +223,6 @@ public:
StringRef getPassName() const override { return "X86 LEA Optimize"; }
- bool doInitialization(Module &M) override;
-
/// \brief Loop over all of the basic blocks, replacing address
/// calculations in load and store instructions, if it's already
/// been calculated by LEA. Also, remove redundant LEAs.
@@ -280,7 +278,6 @@ private:
MachineRegisterInfo *MRI;
const X86InstrInfo *TII;
const X86RegisterInfo *TRI;
- Module *TheModule;
static char ID;
};
@@ -649,11 +646,6 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
return Changed;
}
-bool OptimizeLEAPass::doInitialization(Module &M) {
- TheModule = &M;
- return false;
-}
-
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
diff --git a/lib/Target/X86/X86RegisterBankInfo.cpp b/lib/Target/X86/X86RegisterBankInfo.cpp
index 0f8a750a02352..efd3df26dd424 100644
--- a/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -139,8 +139,9 @@ bool X86RegisterBankInfo::getInstrValueMapping(
return true;
}
-RegisterBankInfo::InstructionMapping
-X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) {
+const RegisterBankInfo::InstructionMapping &
+X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI,
+ bool isFP) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -152,10 +153,10 @@ X86RegisterBankInfo::getSameOperandsMapping(const MachineInstr &MI, bool isFP) {
llvm_unreachable("Unsupported operand mapping yet.");
auto Mapping = getValueMapping(getPartialMappingIdx(Ty, isFP), 3);
- return InstructionMapping{DefaultMappingID, 1, Mapping, NumOperands};
+ return getInstructionMapping(DefaultMappingID, 1, Mapping, NumOperands);
}
-RegisterBankInfo::InstructionMapping
+const RegisterBankInfo::InstructionMapping &
X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -164,7 +165,7 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Try the default logic for non-generic instructions that are either copies
// or already have some operands assigned to banks.
if (!isPreISelGenericOpcode(Opc)) {
- InstructionMapping Mapping = getInstrMappingImpl(MI);
+ const InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
return Mapping;
}
@@ -193,10 +194,10 @@ X86RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Finally construct the computed mapping.
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping))
- return InstructionMapping();
+ return getInvalidInstructionMapping();
- return InstructionMapping{DefaultMappingID, /* Cost */ 1,
- getOperandsMapping(OpdsMapping), NumOperands};
+ return getInstructionMapping(DefaultMappingID, /* Cost */ 1,
+ getOperandsMapping(OpdsMapping), NumOperands);
}
void X86RegisterBankInfo::applyMappingImpl(
@@ -231,10 +232,10 @@ X86RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
if (!getInstrValueMapping(MI, OpRegBankIdx, OpdsMapping))
break;
- RegisterBankInfo::InstructionMapping Mapping = InstructionMapping{
- /*ID*/ 1, /*Cost*/ 1, getOperandsMapping(OpdsMapping), NumOperands};
+ const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping(
+ /*ID*/ 1, /*Cost*/ 1, getOperandsMapping(OpdsMapping), NumOperands);
InstructionMappings AltMappings;
- AltMappings.emplace_back(std::move(Mapping));
+ AltMappings.push_back(&Mapping);
return AltMappings;
}
default:
diff --git a/lib/Target/X86/X86RegisterBankInfo.h b/lib/Target/X86/X86RegisterBankInfo.h
index a1e01a9ab9497..e227880427f3c 100644
--- a/lib/Target/X86/X86RegisterBankInfo.h
+++ b/lib/Target/X86/X86RegisterBankInfo.h
@@ -46,8 +46,8 @@ private:
/// Get an instruction mapping.
/// \return An InstructionMappings with a statically allocated
/// OperandsMapping.
- static InstructionMapping getSameOperandsMapping(const MachineInstr &MI,
- bool isFP);
+ const InstructionMapping &getSameOperandsMapping(const MachineInstr &MI,
+ bool isFP) const;
/// Track the bank of each instruction operand(register)
static void
@@ -74,7 +74,8 @@ public:
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
- InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+ const InstructionMapping &
+ getInstrMapping(const MachineInstr &MI) const override;
};
} // namespace llvm
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 9ab751e2b002a..d66d39dcee174 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -139,12 +139,18 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
return X86II::MO_NO_FLAG;
assert(!isTargetCOFF());
+ const Function *F = dyn_cast_or_null<Function>(GV);
- if (isTargetELF())
+ if (isTargetELF()) {
+ if (is64Bit() && F && (CallingConv::X86_RegCall == F->getCallingConv()))
+ // According to psABI, PLT stub clobbers XMM8-XMM15.
+ // In Regcall calling convention those registers are used for passing
+ // parameters. Thus we need to prevent lazy binding in Regcall.
+ return X86II::MO_GOTPCREL;
return X86II::MO_PLT;
+ }
if (is64Bit()) {
- auto *F = dyn_cast_or_null<Function>(GV);
if (F && F->hasFnAttribute(Attribute::NonLazyBind))
// If the function is marked as non-lazy, generate an indirect call
// which loads from the GOT directly. This avoids runtime overhead
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index b742fb472372c..f3b619a2956a0 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1426,25 +1426,25 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/
};
static const CostTblEntry AVX1CostTbl[] = {
- { ISD::BITREVERSE, MVT::v4i64, 10 },
- { ISD::BITREVERSE, MVT::v8i32, 10 },
- { ISD::BITREVERSE, MVT::v16i16, 10 },
- { ISD::BITREVERSE, MVT::v32i8, 10 },
+ { ISD::BITREVERSE, MVT::v4i64, 12 }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v8i32, 12 }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v16i16, 12 }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v32i8, 12 }, // 2 x 128-bit Op + extract/insert
{ ISD::BSWAP, MVT::v4i64, 4 },
{ ISD::BSWAP, MVT::v8i32, 4 },
{ ISD::BSWAP, MVT::v16i16, 4 },
- { ISD::CTLZ, MVT::v4i64, 46 },
- { ISD::CTLZ, MVT::v8i32, 36 },
- { ISD::CTLZ, MVT::v16i16, 28 },
- { ISD::CTLZ, MVT::v32i8, 18 },
- { ISD::CTPOP, MVT::v4i64, 14 },
- { ISD::CTPOP, MVT::v8i32, 22 },
- { ISD::CTPOP, MVT::v16i16, 18 },
- { ISD::CTPOP, MVT::v32i8, 12 },
- { ISD::CTTZ, MVT::v4i64, 20 },
- { ISD::CTTZ, MVT::v8i32, 28 },
- { ISD::CTTZ, MVT::v16i16, 24 },
- { ISD::CTTZ, MVT::v32i8, 18 },
+ { ISD::CTLZ, MVT::v4i64, 48 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTLZ, MVT::v8i32, 38 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTLZ, MVT::v16i16, 30 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTLZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v4i64, 16 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v8i32, 24 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v16i16, 20 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTPOP, MVT::v32i8, 14 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v4i64, 22 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert
+ { ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert
{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/