summaryrefslogtreecommitdiff
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp23
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp1
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp6
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp32
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp5
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.cpp82
-rw-r--r--lib/Target/X86/Utils/X86ShuffleDecode.h8
-rw-r--r--lib/Target/X86/X86FastISel.cpp69
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp3
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp29
-rw-r--r--lib/Target/X86/X86FrameLowering.h4
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp13
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp940
-rw-r--r--lib/Target/X86/X86ISelLowering.h47
-rw-r--r--lib/Target/X86/X86InstrAVX512.td541
-rw-r--r--lib/Target/X86/X86InstrControl.td6
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td83
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp106
-rw-r--r--lib/Target/X86/X86InstrInfo.td14
-rw-r--r--lib/Target/X86/X86InstrSSE.td65
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h157
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h83
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp21
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp12
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h3
-rw-r--r--lib/Target/X86/X86Subtarget.cpp21
-rw-r--r--lib/Target/X86/X86Subtarget.h22
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp37
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.h15
-rw-r--r--lib/Target/X86/X86WinEHState.cpp31
30 files changed, 1669 insertions, 810 deletions
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 3cad9fa1e2ae..91b144a44824 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -878,6 +878,29 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
DestName = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::EXTRQI:
+ if (MI->getOperand(2).isImm() &&
+ MI->getOperand(3).isImm())
+ DecodeEXTRQIMask(MI->getOperand(2).getImm(),
+ MI->getOperand(3).getImm(),
+ ShuffleMask);
+
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+
+ case X86::INSERTQI:
+ if (MI->getOperand(3).isImm() &&
+ MI->getOperand(4).isImm())
+ DecodeINSERTQIMask(MI->getOperand(3).getImm(),
+ MI->getOperand(4).getImm(),
+ ShuffleMask);
+
+ DestName = getRegName(MI->getOperand(0).getReg());
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+
case X86::PMOVZXBWrr:
case X86::PMOVZXBDrr:
case X86::PMOVZXBQrr:
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 3e0dc1424609..629802f5dc5e 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -220,7 +220,6 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
case X86::PUSH32i8: return X86::PUSHi32;
case X86::PUSH16i8: return X86::PUSHi16;
case X86::PUSH64i8: return X86::PUSH64i32;
- case X86::PUSH64i16: return X86::PUSH64i32;
}
}
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 89f394582631..ddb764facdbf 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -34,14 +34,16 @@ public:
report_fatal_error(EC.message());
StringRef SymName = *SymNameOrErr;
- uint64_t SymAddr; SymI->getAddress(SymAddr);
+ ErrorOr<uint64_t> SymAddr = SymI->getAddress();
+ if (std::error_code EC = SymAddr.getError())
+ report_fatal_error(EC.message());
uint64_t SymSize = SymI->getSize();
int64_t Addend = *ELFRelocationRef(Rel).getAddend();
MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
// FIXME: check that the value is actually the same.
if (!Sym->isVariable())
- Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(*SymAddr, Ctx));
const MCExpr *Expr = nullptr;
// If hasAddend is true, then we need to add Addend (r_addend) to Expr.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 431010d4cbc2..83b4091d7665 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -88,9 +88,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
if (CPUName.empty())
CPUName = "generic";
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
- return X;
+ return createX86MCSubtargetInfoImpl(TT, CPUName, ArchFS);
}
static MCInstrInfo *createX86MCInstrInfo() {
@@ -99,17 +97,14 @@ static MCInstrInfo *createX86MCInstrInfo() {
return X;
}
-static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
- Triple TheTriple(TT);
- unsigned RA = (TheTriple.getArch() == Triple::x86_64)
- ? X86::RIP // Should have dwarf #16.
- : X86::EIP; // Should have dwarf #8.
+static MCRegisterInfo *createX86MCRegisterInfo(const Triple &TT) {
+ unsigned RA = (TT.getArch() == Triple::x86_64)
+ ? X86::RIP // Should have dwarf #16.
+ : X86::EIP; // Should have dwarf #8.
MCRegisterInfo *X = new MCRegisterInfo();
- InitX86MCRegisterInfo(X, RA,
- X86_MC::getDwarfRegFlavour(TheTriple, false),
- X86_MC::getDwarfRegFlavour(TheTriple, true),
- RA);
+ InitX86MCRegisterInfo(X, RA, X86_MC::getDwarfRegFlavour(TT, false),
+ X86_MC::getDwarfRegFlavour(TT, true), RA);
X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
}
@@ -156,24 +151,23 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
-static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createX86MCCodeGenInfo(const Triple &TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
- Triple T(TT);
- bool is64Bit = T.getArch() == Triple::x86_64;
+ bool is64Bit = TT.getArch() == Triple::x86_64;
if (RM == Reloc::Default) {
// Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
// Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
// use static relocation model by default.
- if (T.isOSDarwin()) {
+ if (TT.isOSDarwin()) {
if (is64Bit)
RM = Reloc::PIC_;
else
RM = Reloc::DynamicNoPIC;
- } else if (T.isOSWindows() && is64Bit)
+ } else if (TT.isOSWindows() && is64Bit)
RM = Reloc::PIC_;
else
RM = Reloc::Static;
@@ -186,13 +180,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
if (RM == Reloc::DynamicNoPIC) {
if (is64Bit)
RM = Reloc::PIC_;
- else if (!T.isOSDarwin())
+ else if (!TT.isOSDarwin())
RM = Reloc::Static;
}
// If we are on Darwin, disallow static relocation model in X86-64 mode, since
// the Mach-O file format doesn't support it.
- if (RM == Reloc::Static && T.isOSDarwin() && is64Bit)
+ if (RM == Reloc::Static && TT.isOSDarwin() && is64Bit)
RM = Reloc::PIC_;
// For static codegen, if we're not already set, use Small codegen.
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index c9479b62f7b6..9bfe999424fa 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -34,7 +34,7 @@ public:
if (std::error_code EC = SymNameOrErr.getError())
report_fatal_error(EC.message());
StringRef SymName = *SymNameOrErr;
- uint64_t SymAddr; SymI->getAddress(SymAddr);
+ uint64_t SymAddr = SymI->getValue();
any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
bool isPCRel = Obj->getAnyRelocationPCRel(RE);
@@ -90,8 +90,7 @@ public:
const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx);
symbol_iterator RSymI = Rel.getSymbol();
- uint64_t RSymAddr;
- RSymI->getAddress(RSymAddr);
+ uint64_t RSymAddr = RSymI->getValue();
ErrorOr<StringRef> RSymName = RSymI->getName();
if (std::error_code EC = RSymName.getError())
report_fatal_error(EC.message());
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index ef3318ba7580..cae865a40819 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -255,15 +255,13 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
- if (Imm & 0x88)
- return; // Not a shuffle
-
unsigned HalfSize = VT.getVectorNumElements() / 2;
for (unsigned l = 0; l != 2; ++l) {
- unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+ unsigned HalfMask = Imm >> (l * 4);
+ unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
- ShuffleMask.push_back(i);
+ ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
}
}
@@ -431,4 +429,78 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
for (unsigned i = 1; i < NumElts; i++)
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
+
+void DecodeEXTRQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit extraction instruction as a shuffle if both the
+ // length and index work with whole bytes.
+ if (0 != (Len % 8) || 0 != (Idx % 8))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(16, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with bytes.
+ Len /= 8;
+ Idx /= 8;
+
+ // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
+ // of the lower 64-bits. The upper 64-bits are undefined.
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + Idx);
+ for (int i = Len; i != 8; ++i)
+ ShuffleMask.push_back(SM_SentinelZero);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
+void DecodeINSERTQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask) {
+ // Only the bottom 6 bits are valid for each immediate.
+ Len &= 0x3F;
+ Idx &= 0x3F;
+
+ // We can only decode this bit insertion instruction as a shuffle if both the
+ // length and index work with whole bytes.
+ if (0 != (Len % 8) || 0 != (Idx % 8))
+ return;
+
+ // A length of zero is equivalent to a bit length of 64.
+ if (Len == 0)
+ Len = 64;
+
+ // If the length + index exceeds the bottom 64 bits the result is undefined.
+ if ((Len + Idx) > 64) {
+ ShuffleMask.append(16, SM_SentinelUndef);
+ return;
+ }
+
+ // Convert index and index to work with bytes.
+ Len /= 8;
+ Idx /= 8;
+
+ // INSERTQ: Extract lowest Len bytes from lower half of second source and
+ // insert over first source starting at Idx byte. The upper 64-bits are
+ // undefined.
+ for (int i = 0; i != Idx; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 0; i != Len; ++i)
+ ShuffleMask.push_back(i + 16);
+ for (int i = Idx + Len; i != 8; ++i)
+ ShuffleMask.push_back(i);
+ for (int i = 8; i != 16; ++i)
+ ShuffleMask.push_back(SM_SentinelUndef);
+}
+
} // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 14b69434806e..3d10d18e860e 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,14 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a scalar float move instruction as a shuffle mask.
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
+void DecodeEXTRQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
+void DecodeINSERTQIMask(int Len, int Idx,
+ SmallVectorImpl<int> &ShuffleMask);
} // llvm namespace
#endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 02645460b6a2..b4319c8bb04f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -317,7 +317,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
}
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
- EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+ EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
@@ -608,7 +608,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
// Prepare for inserting code in the local-value area.
SavePoint SaveInsertPt = enterLocalValueArea();
- if (TLI.getPointerTy() == MVT::i64) {
+ if (TLI.getPointerTy(DL) == MVT::i64) {
Opc = X86::MOV64rm;
RC = &X86::GR64RegClass;
@@ -690,13 +690,14 @@ redo_gep:
case Instruction::IntToPtr:
// Look past no-op inttoptrs.
- if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return X86SelectAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints.
- if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return X86SelectAddress(U->getOperand(0), AM);
break;
@@ -866,14 +867,14 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
case Instruction::IntToPtr:
// Look past no-op inttoptrs if its operand is in the same BB.
if (InMBB &&
- TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+ TLI.getPointerTy(DL))
return X86SelectCallAddress(U->getOperand(0), AM);
break;
case Instruction::PtrToInt:
// Look past no-op ptrtoints if its operand is in the same BB.
- if (InMBB &&
- TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
return X86SelectCallAddress(U->getOperand(0), AM);
break;
}
@@ -1000,7 +1001,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ValLocs;
@@ -1031,7 +1032,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
return false;
unsigned SrcReg = Reg + VA.getValNo();
- EVT SrcVT = TLI.getValueType(RV->getType());
+ EVT SrcVT = TLI.getValueType(DL, RV->getType());
EVT DstVT = VA.getValVT();
// Special handling for extended integers.
if (SrcVT != DstVT) {
@@ -1300,7 +1301,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
}
bool X86FastISel::X86SelectZExt(const Instruction *I) {
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (!TLI.isTypeLegal(DstVT))
return false;
@@ -1309,7 +1310,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
return false;
// Handle zero-extension from i1 to i8, which is common.
- MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
+ MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
if (SrcVT.SimpleTy == MVT::i1) {
// Set the high bits to zero.
ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
@@ -1362,7 +1363,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
X86::CondCode CC;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
- EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+ EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
@@ -1802,7 +1803,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
@@ -2004,7 +2005,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
if (NeedSwap)
std::swap(CmpLHS, CmpRHS);
- EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
return false;
} else {
@@ -2166,8 +2167,8 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
}
bool X86FastISel::X86SelectTrunc(const Instruction *I) {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
// This code only handles truncation to byte.
if (DstVT != MVT::i8 && DstVT != MVT::i1)
@@ -2416,7 +2417,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
case Intrinsic::stackprotector: {
// Emit code to store the stack guard onto the stack.
- EVT PtrTy = TLI.getPointerTy();
+ EVT PtrTy = TLI.getPointerTy(DL);
const Value *Op1 = II->getArgOperand(0); // The guard's value.
const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
@@ -2735,7 +2736,7 @@ bool X86FastISel::fastLowerArguments() {
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
- EVT ArgVT = TLI.getValueType(ArgTy);
+ EVT ArgVT = TLI.getValueType(DL, ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
default: return false;
@@ -2772,7 +2773,7 @@ bool X86FastISel::fastLowerArguments() {
unsigned GPRIdx = 0;
unsigned FPRIdx = 0;
for (auto const &Arg : F->args()) {
- MVT VT = TLI.getSimpleValueType(Arg.getType());
+ MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned SrcReg;
switch (VT.SimpleTy) {
@@ -3108,7 +3109,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ !GV->isStrongDefinitionForLinker() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
@@ -3240,8 +3241,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I) {
return X86SelectSIToFP(I);
case Instruction::IntToPtr: // Deliberate fall-through.
case Instruction::PtrToInt: {
- EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
- EVT DstVT = TLI.getValueType(I->getType());
+ EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+ EVT DstVT = TLI.getValueType(DL, I->getType());
if (DstVT.bitsGT(SrcVT))
return X86SelectZExt(I);
if (DstVT.bitsLT(SrcVT))
@@ -3384,7 +3385,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
addDirectMem(MIB, AddrReg);
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
- TM.getDataLayout()->getPointerSize(), Align);
+ DL.getPointerSize(), Align);
MIB->addMemOperand(*FuncInfo.MF, MMO);
return ResultReg;
}
@@ -3411,17 +3412,17 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (TM.getRelocationModel() == Reloc::Static &&
- TLI.getPointerTy() == MVT::i64) {
+ TLI.getPointerTy(DL) == MVT::i64) {
// The displacement code could be more than 32 bits away so we need to use
// an instruction with a 64 bit immediate
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
ResultReg)
.addGlobalAddress(GV);
} else {
- unsigned Opc = TLI.getPointerTy() == MVT::i32
- ? (Subtarget->isTarget64BitILP32()
- ? X86::LEA64_32r : X86::LEA32r)
- : X86::LEA64r;
+ unsigned Opc =
+ TLI.getPointerTy(DL) == MVT::i32
+ ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
}
@@ -3431,7 +3432,7 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
}
unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(C->getType(), true);
+ EVT CEVT = TLI.getValueType(DL, C->getType(), true);
// Only handle simple types.
if (!CEVT.isSimple())
@@ -3463,11 +3464,11 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
X86AddressMode AM;
if (!X86SelectAddress(C, AM))
return 0;
- unsigned Opc = TLI.getPointerTy() == MVT::i32
- ? (Subtarget->isTarget64BitILP32()
- ? X86::LEA64_32r : X86::LEA32r)
- : X86::LEA64r;
- const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned Opc =
+ TLI.getPointerTy(DL) == MVT::i32
+ ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+ : X86::LEA64r;
+ const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 40b9c8a863a3..36a8cdbab55b 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -301,8 +301,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
bool FPIsUsed = false;
static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned i = 0; i <= 6; ++i)
- if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+ if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
FPIsUsed = true;
break;
}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 85c5b6499131..2a35c4cf31f3 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -90,7 +90,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
TRI->needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
+ MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MMI.callsUnwindInit() || MMI.callsEHReturn() ||
MFI->hasStackMap() || MFI->hasPatchPoint());
@@ -967,13 +967,26 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
if (X86FI->getRestoreBasePointer()) {
- // Stash value of base pointer. Saving RSP instead of EBP shortens dependence chain.
+ // Stash value of base pointer. Saving RSP instead of EBP shortens
+ // dependence chain. Used by SjLj EH.
unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
}
+
+ if (X86FI->getHasSEHFramePtrSave()) {
+ // Stash the value of the frame pointer relative to the base pointer for
+ // Win32 EH. This supports Win32 EH, which does the inverse of the above:
+ // it recovers the frame pointer from the base pointer rather than the
+ // other way around.
+ unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+ addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true,
+ getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex()))
+ .addReg(FramePtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
@@ -1412,9 +1425,11 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-void
-X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1436,7 +1451,7 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Spill the BasePtr if it's used.
if (TRI->hasBasePointer(MF))
- MF.getRegInfo().setPhysRegUsed(TRI->getBaseRegister());
+ SavedRegs.set(TRI->getBaseRegister());
}
static bool
@@ -1667,8 +1682,6 @@ void X86FrameLowering::adjustForSegmentedStacks(
.addImm(StackSize);
BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
.addImm(X86FI->getArgumentStackSize());
- MF.getRegInfo().setPhysRegUsed(Reg10);
- MF.getRegInfo().setPhysRegUsed(Reg11);
} else {
BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
.addImm(X86FI->getArgumentStackSize());
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index c274c8820149..495cfcd1c3f7 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -68,8 +68,8 @@ public:
void adjustForHiPEPrologue(MachineFunction &MF,
MachineBasicBlock &PrologueMBB) const override;
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = nullptr) const override;
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS = nullptr) const override;
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6b23e62a2d35..d5351d25d6ed 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -246,8 +246,9 @@ namespace {
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
- ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
- TLI->getPointerTy())
+ ? CurDAG->getTargetFrameIndex(
+ AM.Base_FrameIndex,
+ TLI->getPointerTy(CurDAG->getDataLayout()))
: AM.Base_Reg;
Scale = getI8Imm(AM.Scale, DL);
Index = AM.IndexReg;
@@ -581,11 +582,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
void X86DAGToDAGISel::EmitSpecialCodeForMain() {
if (Subtarget->isTargetCygMing()) {
TargetLowering::ArgListTy Args;
+ auto &DL = CurDAG->getDataLayout();
TargetLowering::CallLoweringInfo CLI(*CurDAG);
CLI.setChain(CurDAG->getRoot())
.setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
- CurDAG->getExternalSymbol("__main", TLI->getPointerTy()),
+ CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
std::move(Args), 0);
const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
@@ -1025,7 +1027,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
switch (N.getOpcode()) {
default: break;
- case ISD::FRAME_ALLOC_RECOVER: {
+ case ISD::LOCAL_RECOVER: {
if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
// Use the symbol and don't prefix it.
@@ -1638,7 +1640,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
///
SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
- return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
+ auto &DL = MF->getDataLayout();
+ return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
}
/// Atomic opcode table
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b16bd18aefaa..6e22ab30057c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -76,7 +76,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
: TargetLowering(TM), Subtarget(&STI) {
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
- TD = getDataLayout();
+ TD = TM.getDataLayout();
// Set up the TargetLowering object.
static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
@@ -505,7 +505,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom);
// GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
@@ -825,6 +825,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);
+ setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
+
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
@@ -944,6 +949,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
}
+ setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1018,6 +1032,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SHL, MVT::v2i64, Custom);
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v2i64, Custom);
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
}
@@ -1141,6 +1156,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
+ setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
+ setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
+ setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
+ setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
+ setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i32, Legal);
+
// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -1184,6 +1212,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SHL, MVT::v4i64, Custom);
setOperationAction(ISD::SHL, MVT::v8i32, Custom);
+ setOperationAction(ISD::SRA, MVT::v4i64, Custom);
setOperationAction(ISD::SRA, MVT::v8i32, Custom);
// Custom lower several nodes for 256-bit types.
@@ -1376,6 +1405,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
+ setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
+ setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
+ setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
+ setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
+ setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
+ setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
+ setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
+ setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
+
setOperationAction(ISD::ADD, MVT::v8i64, Legal);
setOperationAction(ISD::ADD, MVT::v16i32, Legal);
@@ -1473,6 +1511,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, MVT::v32i16, Legal);
setOperationAction(ISD::SUB, MVT::v64i8, Legal);
setOperationAction(ISD::MUL, MVT::v32i16, Legal);
+ setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
+ setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
@@ -1492,6 +1532,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
+ setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
+ setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
+ setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
+ setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
+ setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
+ setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
+ setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
+ setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
+
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
@@ -1531,6 +1580,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::v4i32, Legal);
setOperationAction(ISD::SRA, MVT::v2i64, Custom);
setOperationAction(ISD::SRA, MVT::v4i64, Custom);
+
+ setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
+ setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
+ setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
+ setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
+ setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
+ setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
+ setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
+ setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
}
// We want to custom lower some of our intrinsics.
@@ -1611,6 +1669,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::BUILD_VECTOR);
@@ -1652,7 +1711,8 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
-EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+ EVT VT) const {
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
@@ -1724,10 +1784,11 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const {
if (Subtarget->is64Bit()) {
// Max of 8 and alignment of type.
- unsigned TyAlign = TD->getABITypeAlignment(Ty);
+ unsigned TyAlign = DL.getABITypeAlignment(Ty);
if (TyAlign > 8)
return TyAlign;
return 8;
@@ -1840,7 +1901,8 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
if (!Subtarget->is64Bit())
// This doesn't have SDLoc associated with it, but is not really the
// same as a Register.
- return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
+ return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+ getPointerTy(DAG.getDataLayout()));
return Table;
}
@@ -2032,7 +2094,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// false, then an sret argument may be implicitly inserted in the SelDAG. In
// either case FuncInfo->setSRetReturnReg() will have been called.
if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
- SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy());
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
+ getPointerTy(MF.getDataLayout()));
unsigned RetValReg
= (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
@@ -2041,7 +2104,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
// RAX/EAX now acts like a return value.
- RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
+ RetOps.push_back(
+ DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
RetOps[0] = Chain; // Update chain.
@@ -2288,11 +2352,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned Bytes = Flags.getByValSize();
if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
- return DAG.getFrameIndex(FI, getPointerTy());
+ return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
VA.getLocMemOffset(), isImmutable);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue Val = DAG.getLoad(ValVT, dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, false, 0);
@@ -2471,7 +2535,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (Ins[i].Flags.isSRet()) {
unsigned Reg = FuncInfo->getSRetReturnReg();
if (!Reg) {
- MVT PtrTy = getPointerTy();
+ MVT PtrTy = getPointerTy(DAG.getDataLayout());
Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
}
@@ -2499,7 +2563,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
MachineModuleInfo &MMI = MF.getMMI();
const Function *WinEHParent = nullptr;
- if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
+ if (MMI.hasWinEHFuncInfo(Fn))
WinEHParent = MMI.getWinEHParent(Fn);
bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
@@ -2561,11 +2625,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
unsigned Offset = FuncInfo->getVarArgsGPOffset();
for (SDValue Val : LiveGPRs) {
- SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
- DAG.getIntPtrConstant(Offset, dl));
+ SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ RSFIN, DAG.getIntPtrConstant(Offset, dl));
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo::getFixedStack(
@@ -2592,7 +2656,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- } else if (IsWinEHOutlined) {
+ } else if (IsWin64 && IsWinEHOutlined) {
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2605,8 +2669,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Store the second integer parameter (rdx) into rsp+16 relative to the
// stack pointer at the entry of the function.
- SDValue RSFIN =
- DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+ getPointerTy(DAG.getDataLayout()));
unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
Chain = DAG.getStore(
@@ -2680,14 +2744,21 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setArgumentStackSize(StackSize);
if (IsWinEHParent) {
- int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
- SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
- MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
- SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
- Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
- MachinePointerInfo::getFixedStack(UnwindHelpFI),
- /*isVolatile=*/true,
- /*isNonTemporal=*/false, /*Alignment=*/0);
+ if (Is64Bit) {
+ int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+ SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
+ MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
+ SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
+ Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
+ MachinePointerInfo::getFixedStack(UnwindHelpFI),
+ /*isVolatile=*/true,
+ /*isNonTemporal=*/false, /*Alignment=*/0);
+ } else {
+ // Functions using Win32 EH are considered to have opaque SP adjustments
+ // to force local variables to be addressed from the frame or base
+ // pointers.
+ MFI->setHasOpaqueSPAdjustment(true);
+ }
}
return Chain;
@@ -2701,7 +2772,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
ISD::ArgFlagsTy Flags) const {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
- PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, PtrOff);
if (Flags.isByVal())
return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
@@ -2718,7 +2790,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
bool IsTailCall, bool Is64Bit,
int FPDiff, SDLoc dl) const {
// Adjust the Return address stack slot.
- EVT VT = getPointerTy();
+ EVT VT = getPointerTy(DAG.getDataLayout());
OutRetAddr = getReturnAddressFrameIndex(DAG);
// Load the "old" Return address.
@@ -2942,7 +3014,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(VA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
}
@@ -2955,8 +3027,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (!isTailCall) {
- RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
+ RegsToPass.push_back(std::make_pair(
+ unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+ getPointerTy(DAG.getDataLayout()))));
} else {
// If we are tail calling and generating PIC/GOT style code load the
// address of the callee into ECX. The value in ecx is used as target of
@@ -3036,16 +3109,16 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
- FIN = DAG.getFrameIndex(FI, getPointerTy());
+ FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
if (Flags.isByVal()) {
// Copy relative to framepointer.
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, dl,
- RegInfo->getStackRegister(),
- getPointerTy());
- Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+ StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+ getPointerTy(DAG.getDataLayout()));
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ StackPtr, Source);
MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
ArgChain,
@@ -3064,8 +3137,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
- getPointerTy(), RegInfo->getSlotSize(),
- FPDiff, dl);
+ getPointerTy(DAG.getDataLayout()),
+ RegInfo->getSlotSize(), FPDiff, dl);
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -3106,7 +3179,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- (GV->isDeclaration() || GV->isWeakForLinker()) &&
+ !GV->isStrongDefinitionForLinker() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
@@ -3123,17 +3196,18 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ExtraLoad = true;
}
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
- G->getOffset(), OpFlags);
+ Callee = DAG.getTargetGlobalAddress(
+ GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
// Add a wrapper if needed.
if (WrapperKind != ISD::DELETED_NODE)
- Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+ Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
+ getPointerTy(DAG.getDataLayout()), Callee);
// Add extra indirection if needed.
if (ExtraLoad)
- Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
- MachinePointerInfo::getGOT(),
- false, false, false, 0);
+ Callee = DAG.getLoad(
+ getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
@@ -3152,8 +3226,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
OpFlags = X86II::MO_DARWIN_STUB;
}
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
- OpFlags);
+ Callee = DAG.getTargetExternalSymbol(
+ S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
} else if (Subtarget->isTarget64BitILP32() &&
Callee->getValueType(0) == MVT::i32) {
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -3184,9 +3258,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+ const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
+
+ // If this is an invoke in a 32-bit function using an MSVC personality, assume
+ // the function clobbers all registers. If an exception is thrown, the runtime
+ // will not restore CSRs.
+ // FIXME: Model this more precisely so that we can register allocate across
+ // the normal edge and spill and fill across the exceptional edge.
+ if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
+ const Function *CallerFn = MF.getFunction();
+ EHPersonality Pers =
+ CallerFn->hasPersonalityFn()
+ ? classifyEHPersonality(CallerFn->getPersonalityFn())
+ : EHPersonality::Unknown;
+ if (isMSVCEHPersonality(Pers))
+ Mask = RegInfo->getNoPreservedMask();
+ }
+
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
@@ -3650,7 +3739,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
FuncInfo->setRAIndex(ReturnAddrIndex);
}
- return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
}
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
@@ -3881,6 +3970,15 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget->hasLZCNT();
}
+/// isUndefInRange - Return true if every element in Mask, beginning
+/// from position Pos and ending in Pos+Size is undef.
+static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
+ if (0 <= Mask[i])
+ return false;
+ return true;
+}
+
/// isUndefOrInRange - Return true if Val is undef or if its value falls within
/// the specified range (L, H].
static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -4322,6 +4420,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// IsUnary to true if only uses one source. Note that this will set IsUnary for
/// shuffles which use a single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
+/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
@@ -4451,6 +4550,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
+ // Mask only contains negative index if an element is zero.
+ if (std::any_of(Mask.begin(), Mask.end(),
+ [](int M){ return M == SM_SentinelZero; }))
+ return false;
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
@@ -4764,7 +4867,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
MVT ShVT = MVT::v2i64;
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp);
- MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
+ MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
@@ -5082,7 +5185,8 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
assert(C && "Invalid constant type");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+ SDValue CP =
+ DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
MachinePointerInfo::getConstantPool(),
@@ -6857,6 +6961,136 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
return SDValue();
}
+/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
+static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+ assert(!Zeroable.all() && "Fully zeroable shuffle mask");
+
+ int Size = Mask.size();
+ int HalfSize = Size / 2;
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ // Upper half must be undefined.
+ if (!isUndefInRange(Mask, HalfSize, HalfSize))
+ return SDValue();
+
+ // EXTRQ: Extract Len elements from lower half of source, starting at Idx.
+ // Remainder of lower half result is zero and upper half is all undef.
+ auto LowerAsEXTRQ = [&]() {
+ // Determine the extraction length from the part of the
+ // lower half that isn't zeroable.
+ int Len = HalfSize;
+ for (; Len >= 0; --Len)
+ if (!Zeroable[Len - 1])
+ break;
+ assert(Len > 0 && "Zeroable shuffle mask");
+
+ // Attempt to match first Len sequential elements from the lower half.
+ SDValue Src;
+ int Idx = -1;
+ for (int i = 0; i != Len; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ SDValue &V = (M < Size ? V1 : V2);
+ M = M % Size;
+
+ // All mask elements must be in the lower half.
+ if (M > HalfSize)
+ return SDValue();
+
+ if (Idx < 0 || (Src == V && Idx == (M - i))) {
+ Src = V;
+ Idx = M - i;
+ continue;
+ }
+ return SDValue();
+ }
+
+ if (Idx < 0)
+ return SDValue();
+
+ assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
+ int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ };
+
+ if (SDValue ExtrQ = LowerAsEXTRQ())
+ return ExtrQ;
+
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source, starting at Idx.
+ // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
+ auto LowerAsInsertQ = [&]() {
+ for (int Idx = 0; Idx != HalfSize; ++Idx) {
+ SDValue Base;
+
+ // Attempt to match first source from mask before insertion point.
+ if (isUndefInRange(Mask, 0, Idx)) {
+ /* EMPTY */
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
+ Base = V1;
+ } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
+ Base = V2;
+ } else {
+ continue;
+ }
+
+ // Extend the extraction length looking to match both the insertion of
+ // the second source and the remaining elements of the first.
+ for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
+ SDValue Insert;
+ int Len = Hi - Idx;
+
+ // Match insertion.
+ if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
+ Insert = V1;
+ } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
+ Insert = V2;
+ } else {
+ continue;
+ }
+
+ // Match the remaining elements of the lower half.
+ if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
+ /* EMPTY */
+ } else if ((!Base || (Base == V1)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
+ Base = V1;
+ } else if ((!Base || (Base == V2)) &&
+ isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
+ Size + Hi)) {
+ Base = V2;
+ } else {
+ continue;
+ }
+
+ // We may not have a base (first source) - this can safely be undefined.
+ if (!Base)
+ Base = DAG.getUNDEF(VT);
+
+ int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+ int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+ return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ }
+ }
+
+ return SDValue();
+ };
+
+ if (SDValue InsertQ = LowerAsInsertQ())
+ return InsertQ;
+
+ return SDValue();
+}
+
/// \brief Lower a vector shuffle as a zero or any extension.
///
/// Given a specific number of elements, element bit width, and extension
@@ -6864,7 +7098,7 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
/// features of the subtarget.
static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
assert(Scale > 1 && "Need a scale to extend.");
int NumElements = VT.getVectorNumElements();
int EltBits = VT.getScalarSizeInBits();
@@ -6901,6 +7135,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
}
+ // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
+ // to 64-bits.
+ if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
+ assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
+ assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+
+ SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(0, DL, MVT::i8)));
+ if (isUndefInRange(Mask, NumElements/2, NumElements/2))
+ return DAG.getNode(ISD::BITCAST, DL, VT, Lo);
+
+ SDValue Hi =
+ DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+ DAG.getConstant(EltBits, DL, MVT::i8),
+ DAG.getConstant(EltBits, DL, MVT::i8)));
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
+ }
+
// If this would require more than 2 unpack instructions to expand, use
// pshufb when available. We can only use more than 2 unpack instructions
// when zero extending i8 elements which also makes it easier to use pshufb.
@@ -6991,7 +7247,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
return SDValue();
return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
- DL, VT, Scale, AnyExt, InputV, Subtarget, DAG);
+ DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG);
};
// The widest scale possible for extending is to a 64-bit integer.
@@ -7166,9 +7422,9 @@ static SDValue lowerVectorShuffleAsElementInsertion(
V2 = DAG.getBitcast(MVT::v2i64, V2);
V2 = DAG.getNode(
X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
- DAG.getConstant(
- V2Index * EltVT.getSizeInBits()/8, DL,
- DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
+ DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL,
+ DAG.getTargetLoweringInfo().getScalarShiftAmountTy(
+ DAG.getDataLayout(), VT)));
V2 = DAG.getBitcast(VT, V2);
}
}
@@ -8518,6 +8774,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, DAG))
return Shift;
+ // See if we can use SSE4A Extraction / Insertion.
+ if (Subtarget->hasSSE4A())
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
+ return V;
+
// There are special ways we can lower some single-element blends.
if (NumV2Inputs == 1)
if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2,
@@ -8670,6 +8931,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
return ZExt;
+ // See if we can use SSE4A Extraction / Insertion.
+ if (Subtarget->hasSSE4A())
+ if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
+ return V;
+
int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
@@ -10613,12 +10879,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MaskEltVT.getSizeInBits());
Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
- getZeroVector(MaskVT, Subtarget, DAG, dl),
- Idx, DAG.getConstant(0, dl, getPointerTy()));
+ getZeroVector(MaskVT, Subtarget, DAG, dl), Idx,
+ DAG.getConstant(0, dl, PtrVT));
SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
- Perm, DAG.getConstant(0, dl, getPointerTy()));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm,
+ DAG.getConstant(0, dl, PtrVT));
}
return SDValue();
}
@@ -11009,17 +11276,16 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
- SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
- CP->getAlignment(),
- CP->getOffset(), OpFlag);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetConstantPool(
+ CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag);
SDLoc DL(CP);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag) {
- Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
- Result);
+ Result =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
}
return Result;
@@ -11042,17 +11308,16 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
else if (Subtarget->isPICStyleStubPIC())
OpFlag = X86II::MO_PIC_BASE_OFFSET;
- SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
- OpFlag);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
SDLoc DL(JT);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (OpFlag)
- Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
- Result);
+ Result =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
return Result;
}
@@ -11080,24 +11345,24 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
OpFlag = X86II::MO_DARWIN_NONLAZY;
}
- SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);
SDLoc DL(Op);
- Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
- Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
- Result);
+ Result =
+ DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
}
// For symbols that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlag))
- Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
return Result;
@@ -11112,20 +11377,19 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
- SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset,
- OpFlags);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
- Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
else
- Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
- Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
- Result);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
}
return Result;
@@ -11139,40 +11403,40 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
unsigned char OpFlags =
Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
CodeModel::Model M = DAG.getTarget().getCodeModel();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Result;
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
// A direct static reference to a global.
- Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
Offset = 0;
} else {
- Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+ Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
}
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
- Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
else
- Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+ Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
// With PIC, the address is actually $g + Offset.
if (isGlobalRelativeToPICBase(OpFlags)) {
- Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
- Result);
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
}
// For globals that require a load from a stub to get the address, emit the
// load.
if (isGlobalStubReference(OpFlags))
- Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+ Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(), false, false, false, 0);
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
if (Offset != 0)
- Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
- DAG.getConstant(Offset, dl, getPointerTy()));
+ Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result,
+ DAG.getConstant(Offset, dl, PtrVT));
return Result;
}
@@ -11336,22 +11600,23 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Subtarget->isTargetELF()) {
TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
if (Subtarget->is64Bit())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
- return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
case TLSModel::LocalDynamic:
- return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
+ return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
- return LowerToTLSExecModel(
- GA, DAG, getPointerTy(), model, Subtarget->is64Bit(),
- DAG.getTarget().getRelocationModel() == Reloc::PIC_);
+ return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(),
+ DAG.getTarget().getRelocationModel() ==
+ Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -11374,13 +11639,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
GA->getValueType(0),
GA->getOffset(), OpFlag);
- SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+ SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
// With PIC32, the address is actually $g + Offset.
if (PIC32)
- Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), getPointerTy()),
+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
Offset);
// Lowering the machine isd will make sure everything is in the right
@@ -11397,8 +11661,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// And our return value (tls address) is in the standard call return value
// location.
unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
- return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
- Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
}
if (Subtarget->isTargetKnownWindowsMSVC() ||
@@ -11426,50 +11689,50 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
: Type::getInt32PtrTy(*DAG.getContext(),
257));
- SDValue TlsArray =
- Subtarget->is64Bit()
- ? DAG.getIntPtrConstant(0x58, dl)
- : (Subtarget->isTargetWindowsGNU()
- ? DAG.getIntPtrConstant(0x2C, dl)
- : DAG.getExternalSymbol("_tls_array", getPointerTy()));
+ SDValue TlsArray = Subtarget->is64Bit()
+ ? DAG.getIntPtrConstant(0x58, dl)
+ : (Subtarget->isTargetWindowsGNU()
+ ? DAG.getIntPtrConstant(0x2C, dl)
+ : DAG.getExternalSymbol("_tls_array", PtrVT));
SDValue ThreadPointer =
- DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
- MachinePointerInfo(Ptr), false, false, false, 0);
+ DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr), false,
+ false, false, 0);
SDValue res;
if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {
res = ThreadPointer;
} else {
// Load the _tls_index variable
- SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
+ SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
if (Subtarget->is64Bit())
- IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain, IDX,
+ IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
MachinePointerInfo(), MVT::i32, false, false,
false, 0);
else
- IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
- false, false, false, 0);
+ IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo(), false,
+ false, false, 0);
- SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), dl,
- getPointerTy());
- IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+ auto &DL = DAG.getDataLayout();
+ SDValue Scale =
+ DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, PtrVT);
+ IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale);
- res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
+ res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX);
}
- res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
- false, false, false, 0);
+ res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo(), false, false,
+ false, 0);
// Get the offset of start of .tls section
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), X86II::MO_SECREL);
- SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
+ SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
// The address of the thread local variable is the add of the thread
// pointer with the offset of the variable.
- return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
+ return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset);
}
llvm_unreachable("TLS not implemented for this target.");
@@ -11564,8 +11827,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
@@ -11614,7 +11878,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
Tys = DAG.getVTList(MVT::Other);
SDValue Ops[] = {
Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
@@ -11656,7 +11921,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
// Build some magic constants.
static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
Constant *C0 = ConstantDataVector::get(*Context, CV0);
- SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
SmallVector<Constant*,2> CV1;
CV1.push_back(
@@ -11666,7 +11932,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
APInt(64, 0x4530000000000000ULL))));
Constant *C1 = ConstantVector::get(CV1);
- SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+ SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
// Load the 64-bit value into an XMM register.
SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
@@ -11882,6 +12148,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (Op.getValueType().isVector())
return lowerUINT_TO_FP_vec(Op, DAG);
@@ -11904,9 +12171,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
if (SrcVT == MVT::i32) {
- SDValue WordOff = DAG.getConstant(4, dl, getPointerTy());
- SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
- getPointerTy(), StackSlot, WordOff);
+ SDValue WordOff = DAG.getConstant(4, dl, PtrVT);
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, WordOff);
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, MachinePointerInfo(),
false, false, 0);
@@ -11940,22 +12206,20 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
APInt FF(32, 0x5F800000ULL);
// Check whether the sign bit is set.
- SDValue SignSet = DAG.getSetCC(dl,
- getSetCCResultType(*DAG.getContext(), MVT::i64),
- Op.getOperand(0),
- DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
+ SDValue SignSet = DAG.getSetCC(
+ dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
// Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
SDValue FudgePtr = DAG.getConstantPool(
- ConstantInt::get(*DAG.getContext(), FF.zext(64)),
- getPointerTy());
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT);
// Get a pointer to FF if the sign bit was set, or to 0 otherwise.
SDValue Zero = DAG.getIntPtrConstant(0, dl);
SDValue Four = DAG.getIntPtrConstant(4, dl);
SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
Zero, Four);
- FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
@@ -11974,6 +12238,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
EVT DstTy = Op.getValueType();
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -11998,7 +12263,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
unsigned Opc;
if (!IsSigned && isIntegerTypeFTOL(DstTy))
@@ -12032,7 +12297,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
- StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
}
MachineMemOperand *MMO =
@@ -12403,7 +12668,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
Constant *C = ConstantInt::get(*Context, MaskElt);
C = ConstantVector::getSplat(NumElts, C);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
+ SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
@@ -12462,7 +12727,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
CV[0] = ConstantFP::get(*Context,
APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1)));
Constant *C = ConstantVector::get(CV);
- SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
+ auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16);
SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
@@ -12483,7 +12749,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1)));
}
C = ConstantVector::get(CV);
- CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
+ CPIdx = DAG.getConstantPool(C, PtrVT, 16);
SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(),
false, false, false, 16);
@@ -13352,8 +13618,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (hasMinMax) {
switch (SetCCOpcode) {
default: break;
- case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
- case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
+ case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
+ case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
}
if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
@@ -14172,8 +14438,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
SmallVector<SDValue, 8> Chains;
SDValue Ptr = Ld->getBasePtr();
- SDValue Increment =
- DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
for (unsigned i = 0; i < NumLoads; ++i) {
@@ -14613,7 +14879,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
EVT VT = Op.getNode()->getValueType(0);
bool Is64Bit = Subtarget->is64Bit();
- EVT SPTy = getPointerTy();
+ MVT SPTy = getPointerTy(DAG.getDataLayout());
if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -14630,8 +14896,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
"have nested arguments.");
}
- const TargetRegisterClass *AddrRegClass =
- getRegClassFor(getPointerTy());
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@@ -14666,6 +14931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ auto PtrVT = getPointerTy(MF.getDataLayout());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -14674,8 +14940,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
- SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
- getPointerTy());
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV), false, false, 0);
}
@@ -14695,8 +14960,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store fp_offset
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- FIN, DAG.getIntPtrConstant(4, DL));
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
Store = DAG.getStore(Op.getOperand(0), DL,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL,
MVT::i32),
@@ -14704,20 +14968,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- FIN, DAG.getIntPtrConstant(4, DL));
- SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
- getPointerTy());
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
+ SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
MachinePointerInfo(SV, 8),
false, false, 0);
MemOps.push_back(Store);
// Store ptr to reg_save_area.
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
- FIN, DAG.getIntPtrConstant(8, DL));
- SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
- getPointerTy());
+ FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL));
+ SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
@@ -14739,7 +14999,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
EVT ArgVT = Op.getNode()->getValueType(0);
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+ uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
uint8_t ArgMode;
// Decide which area this value should be read from.
@@ -14768,7 +15028,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32),
DAG.getConstant(ArgMode, dl, MVT::i8),
DAG.getConstant(Align, dl, MVT::i32)};
- SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+ SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
VTs, InstOps, MVT::i64,
MachinePointerInfo(SV),
@@ -14995,6 +15255,20 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
}
+static int getSEHRegistrationNodeSize(const Function *Fn) {
+ if (!Fn->hasPersonalityFn())
+ report_fatal_error(
+ "querying registration node size for function without personality");
+ // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
+ // WinEHStatePass for the full struct definition.
+ switch (classifyEHPersonality(Fn->getPersonalityFn())) {
+ case EHPersonality::MSVC_X86SEH: return 24;
+ case EHPersonality::MSVC_CXX: return 16;
+ default: break;
+ }
+ report_fatal_error("can only recover FP for MSVC EH personality functions");
+}
+
/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
/// function or when returning to a parent frame after catching an exception, we
/// recover the parent frame pointer by doing arithmetic on the incoming EBP.
@@ -15009,7 +15283,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
SDLoc dl;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy();
+ MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
// It's possible that the parent function no longer has a personality function
// if the exceptional code was optimized away, in which case we just return
@@ -15017,15 +15291,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
if (!Fn->hasPersonalityFn())
return EntryEBP;
- // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
- // WinEHStatePass for the full struct definition.
- int RegNodeSize;
- switch (classifyEHPersonality(Fn->getPersonalityFn())) {
- default:
- report_fatal_error("can only recover FP for MSVC EH personality functions");
- case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break;
- case EHPersonality::MSVC_CXX: RegNodeSize = 16; break;
- }
+ int RegNodeSize = getSEHRegistrationNodeSize(Fn);
// Get an MCSymbol that will ultimately resolve to the frame offset of the EH
// registration.
@@ -15034,7 +15300,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
GlobalValue::getRealLinkageName(Fn->getName()));
SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
SDValue RegNodeFrameOffset =
- DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal);
+ DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);
// RegNodeBase = EntryEBP - RegNodeSize
// ParentFP = RegNodeBase - RegNodeFrameOffset
@@ -15059,6 +15325,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
case INTR_TYPE_3OP:
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
+ case INTR_TYPE_4OP:
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
case INTR_TYPE_1OP_MASK_RM: {
SDValue Src = Op.getOperand(1);
SDValue PassThru = Op.getOperand(2);
@@ -15143,7 +15412,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
SDValue Rnd;
if (Op.getNumOperands() == 6)
Rnd = Op.getOperand(5);
- else
+ else
Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
Src1, Src2, Rnd),
@@ -15173,7 +15442,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
Src1, Src2, Src3),
Mask, PassThru, Subtarget, DAG);
}
- case VPERM_3OP_MASKZ:
+ case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:
case FMA_OP_MASK3:
case FMA_OP_MASKZ:
@@ -15499,6 +15768,19 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
"llvm.x86.seh.recoverfp must take a function as the first argument");
return recoverFramePointer(DAG, Fn, IncomingFPOp);
}
+
+ case Intrinsic::localaddress: {
+ // Returns one of the stack, base, or frame pointer registers, depending on
+ // which is used to reference local variables.
+ MachineFunction &MF = DAG.getMachineFunction();
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned Reg;
+ if (RegInfo->hasBasePointer(MF))
+ Reg = RegInfo->getBaseRegister();
+ else // This function handles the SP or FP case.
+ Reg = RegInfo->getPtrSizedFrameRegister(MF);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+ }
}
}
@@ -15712,34 +15994,60 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
+ const Function *Fn = MF.getFunction();
SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
+ assert(Subtarget->getFrameLowering()->hasFP(MF) &&
+ "using llvm.x86.seh.restoreframe requires a frame pointer");
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT VT = TLI.getPointerTy();
+ MVT VT = TLI.getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FrameReg =
RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
unsigned SPReg = RegInfo->getStackRegister();
+ unsigned SlotSize = RegInfo->getSlotSize();
// Get incoming EBP.
SDValue IncomingEBP =
DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
- // Load [EBP-24] into SP.
- SDValue SPAddr =
- DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT));
+ // SP is saved in the first field of every registration node, so load
+ // [EBP-RegNodeSize] into SP.
+ int RegNodeSize = getSEHRegistrationNodeSize(Fn);
+ SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP,
+ DAG.getConstant(-RegNodeSize, dl, VT));
SDValue NewSP =
DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
false, VT.getScalarSizeInBits() / 8);
Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
- // FIXME: Restore the base pointer in case of stack realignment!
+ if (!RegInfo->needsStackRealignment(MF)) {
+ // Adjust EBP to point back to the original frame position.
+ SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP);
+ Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
+ } else {
+ assert(RegInfo->hasBasePointer(MF) &&
+ "functions with Win32 EH must use frame or base pointer register");
+
+ // Reload the base pointer (ESI) with the adjusted incoming EBP.
+ SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP);
+ Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP);
+
+ // Reload the spilled EBP value, now that the stack and base pointers are
+ // set up.
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ X86FI->setHasSEHFramePtrSave(true);
+ int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize);
+ X86FI->setSEHFramePtrSaveIndex(FI);
+ SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT),
+ MachinePointerInfo(), false, false, false,
+ VT.getScalarSizeInBits() / 8);
+ Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP);
+ }
- // Adjust EBP to point back to the original frame position.
- SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP);
- Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
return Chain;
}
@@ -15910,7 +16218,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -15969,14 +16277,36 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
-unsigned X86TargetLowering::getRegisterByName(const char* RegName,
- EVT VT) const {
+unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const {
+ const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+ const MachineFunction &MF = DAG.getMachineFunction();
+
unsigned Reg = StringSwitch<unsigned>(RegName)
.Case("esp", X86::ESP)
.Case("rsp", X86::RSP)
+ .Case("ebp", X86::EBP)
+ .Case("rbp", X86::RBP)
.Default(0);
+
+ if (Reg == X86::EBP || Reg == X86::RBP) {
+ if (!TFI.hasFP(MF))
+ report_fatal_error("register " + StringRef(RegName) +
+ " is allocatable: function has no frame pointer");
+#ifndef NDEBUG
+ else {
+ const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned FrameReg =
+ RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+ assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
+ "Invalid Frame Register!");
+ }
+#endif
+ }
+
if (Reg)
return Reg;
+
report_fatal_error("Invalid register name global variable");
}
@@ -15992,7 +16322,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Handler = Op.getOperand(2);
SDLoc dl (Op);
- EVT PtrVT = getPointerTy();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
@@ -16211,7 +16541,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
// Save FP Control Word to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
- SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+ SDValue StackSlot =
+ DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
@@ -16572,7 +16903,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
}
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
- getPointerTy());
+ getPointerTy(DAG.getDataLayout()));
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
@@ -16642,9 +16973,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
if (IsSigned && !Subtarget->hasSSE41()) {
- SDValue ShAmt =
- DAG.getConstant(31, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(VT));
+ SDValue ShAmt = DAG.getConstant(
+ 31, dl,
+ DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
@@ -16717,6 +17048,38 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
(Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
+ auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
+ MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
+ SDValue Ex = DAG.getBitcast(ExVT, R);
+
+ if (ShiftAmt >= 32) {
+ // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32.
+ SDValue Upper =
+ getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG);
+ SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
+ ShiftAmt - 32, DAG);
+ if (VT == MVT::v2i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3});
+ if (VT == MVT::v4i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
+ {9, 1, 11, 3, 13, 5, 15, 7});
+ } else {
+ // SRA upper i32, SHL whole i64 and select lower i32.
+ SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
+ ShiftAmt, DAG);
+ SDValue Lower =
+ getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG);
+ Lower = DAG.getBitcast(ExVT, Lower);
+ if (VT == MVT::v2i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3});
+ if (VT == MVT::v4i64)
+ Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
+ {8, 1, 10, 3, 12, 5, 14, 7});
+ }
+ return DAG.getBitcast(VT, Ex);
+ };
+
// Optimize shl/srl/sra with constant shift amount.
if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
@@ -16725,6 +17088,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+ // i64 SRA needs to be performed as partial shifts.
+ if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+ Op.getOpcode() == ISD::SRA)
+ return ArithmeticShiftRight64(ShiftAmt);
+
if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
unsigned NumElts = VT.getVectorNumElements();
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
@@ -16808,7 +17176,12 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
if (ShAmt != ShiftAmt)
return SDValue();
}
- return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+
+ if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+ return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+
+ if (Op.getOpcode() == ISD::SRA)
+ return ArithmeticShiftRight64(ShiftAmt);
}
return SDValue();
@@ -16890,7 +17263,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
if (Vals[j] != Amt.getOperand(i + j))
return SDValue();
}
- return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
+
+ if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
+ return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
}
return SDValue();
}
@@ -17042,6 +17417,53 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
}
}
+ // v4i32 Non Uniform Shifts.
+ // If the shift amount is constant we can shift each lane using the SSE2
+ // immediate shifts, else we need to zero-extend each lane to the lower i64
+ // and shift using the SSE2 variable shifts.
+ // The separate results can then be blended together.
+ if (VT == MVT::v4i32) {
+ unsigned Opc = Op.getOpcode();
+ SDValue Amt0, Amt1, Amt2, Amt3;
+ if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
+ } else {
+ // ISD::SHL is handled above but we include it here for completeness.
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unknown target vector shift node");
+ case ISD::SHL:
+ Opc = X86ISD::VSHL;
+ break;
+ case ISD::SRL:
+ Opc = X86ISD::VSRL;
+ break;
+ case ISD::SRA:
+ Opc = X86ISD::VSRA;
+ break;
+ }
+ // The SSE2 shifts use the lower i64 as the same shift amount for
+ // all lanes and the upper i64 is ignored. These shuffle masks
+ // optimally zero-extend each lanes on SSE2/SSE41/AVX targets.
+ SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+ Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
+ Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
+ Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
+ Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
+ }
+
+ SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+ SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
+ SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2);
+ SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3);
+ SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
+ SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
+ return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
+ }
+
if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
unsigned ShiftOpcode = Op->getOpcode();
@@ -17944,7 +18366,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
// the results are returned via SRet in memory.
const char *LibcallName = isF64 ? "__sincos_stret" : "__sincosf_stret";
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
+ SDValue Callee =
+ DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
Type *RetTy = isF64
? (Type*)StructType::get(ArgTy, ArgTy, nullptr)
@@ -18443,10 +18866,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
- case X86ISD::UMAX: return "X86ISD::UMAX";
- case X86ISD::UMIN: return "X86ISD::UMIN";
- case X86ISD::SMAX: return "X86ISD::SMAX";
- case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
@@ -18456,6 +18875,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FMINC: return "X86ISD::FMINC";
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
+ case X86ISD::EXTRQI: return "X86ISD::EXTRQI";
+ case X86ISD::INSERTQI: return "X86ISD::INSERTQI";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
@@ -18478,6 +18899,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPROUND: return "X86ISD::VFPROUND";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
+ case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
case X86ISD::VSHL: return "X86ISD::VSHL";
@@ -18594,16 +19016,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";
+ case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
case X86ISD::UINT_TO_FP_RND: return "X86ISD::UINT_TO_FP_RND";
+ case X86ISD::FP_TO_SINT_RND: return "X86ISD::FP_TO_SINT_RND";
+ case X86ISD::FP_TO_UINT_RND: return "X86ISD::FP_TO_UINT_RND";
}
return nullptr;
}
// isLegalAddressingMode - Return true if the addressing mode represented
// by AM is legal for this target, for a load/store of the specified type.
-bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty,
+bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
@@ -19555,7 +19980,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *AddrRegClass =
- getRegClassFor(getPointerTy());
+ getRegClassFor(getPointerTy(MF->getDataLayout()));
unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
@@ -19750,7 +20175,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MemOpndSlot = CurOp;
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -19882,7 +20307,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
- MVT PVT = getPointerTy();
+ MVT PVT = getPointerTy(MF->getDataLayout());
assert((PVT == MVT::i64 || PVT == MVT::i32) &&
"Invalid Pointer Size!");
@@ -21377,7 +21802,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
// alignment is valid.
unsigned Align = LN0->getAlignment();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
EltVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
@@ -21513,14 +21938,15 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector);
- EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
+ auto &DL = DAG.getDataLayout();
+ EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL);
SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(0, dl, VecIdxTy));
SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
DAG.getConstant(1, dl, VecIdxTy));
- SDValue ShAmt = DAG.getConstant(32, dl,
- DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64));
+ SDValue ShAmt = DAG.getConstant(
+ 32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL));
Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt));
@@ -21539,10 +21965,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Replace each use (extract) with a load of the appropriate element.
for (unsigned i = 0; i < 4; ++i) {
uint64_t Offset = EltSize * i;
- SDValue OffsetVal = DAG.getConstant(Offset, dl, TLI.getPointerTy());
+ auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT);
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
- StackPtr, OffsetVal);
+ SDValue ScalarAddr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal);
// Load the scalar.
Vals[i] = DAG.getLoad(ElementType, dl, Ch,
@@ -21622,16 +22049,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
+ Opc = hasUnsigned ? ISD::UMIN : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
+ Opc = hasUnsigned ? ISD::UMAX : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- Opc = hasSigned ? X86ISD::SMIN : 0u; break;
+ Opc = hasSigned ? ISD::SMIN : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- Opc = hasSigned ? X86ISD::SMAX : 0u; break;
+ Opc = hasSigned ? ISD::SMAX : 0; break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
@@ -21640,16 +22067,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
- Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
+ Opc = hasUnsigned ? ISD::UMAX : 0; break;
case ISD::SETUGT:
case ISD::SETUGE:
- Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
+ Opc = hasUnsigned ? ISD::UMIN : 0; break;
case ISD::SETLT:
case ISD::SETLE:
- Opc = hasSigned ? X86ISD::SMAX : 0u; break;
+ Opc = hasSigned ? ISD::SMAX : 0; break;
case ISD::SETGT:
case ISD::SETGE:
- Opc = hasSigned ? X86ISD::SMIN : 0u; break;
+ Opc = hasSigned ? ISD::SMIN : 0; break;
}
}
@@ -22106,7 +22533,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Check if the selector will be produced by CMPP*/PCMP*
Cond.getOpcode() == ISD::SETCC &&
// Check if SETCC has already been promoted
- TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT) {
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
+ CondVT) {
bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
@@ -22826,7 +23254,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
// We shift all of the values by one. In many cases we do not have
// hardware support for this operation. This is better expressed as an ADD
// of two values.
- if (N1SplatC->getZExtValue() == 1)
+ if (N1SplatC->getAPIntValue() == 1)
return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
@@ -23478,7 +23906,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDValue Ptr = Ld->getBasePtr();
- SDValue Increment = DAG.getConstant(16, dl, TLI.getPointerTy());
+ SDValue Increment =
+ DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout()));
EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
NumElems/2);
@@ -23687,7 +24116,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
- SDValue Stride = DAG.getConstant(16, dl, TLI.getPointerTy());
+ SDValue Stride =
+ DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout()));
SDValue Ptr0 = St->getBasePtr();
SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
@@ -23760,8 +24190,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
SmallVector<SDValue, 8> Chains;
- SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl,
- TLI.getPointerTy());
+ SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, dl,
+ TLI.getPointerTy(DAG.getDataLayout()));
SDValue Ptr = St->getBasePtr();
// Perform one or more big stores into memory.
@@ -24659,6 +25089,31 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT InVT = Op0.getValueType();
+ EVT InSVT = InVT.getScalarType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
+ // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
+ if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+
+ if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT))
+ return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
+ }
+
+ return SDValue();
+}
+
static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
// First try to optimize away the conversion entirely when it's
@@ -24913,6 +25368,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, Subtarget);
+ case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
@@ -25135,7 +25591,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
(matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
AsmPieces.clear();
- const std::string &ConstraintsStr = IA->getConstraintString();
+ StringRef ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
@@ -25149,7 +25605,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
AsmPieces.clear();
- const std::string &ConstraintsStr = IA->getConstraintString();
+ StringRef ConstraintsStr = IA->getConstraintString();
SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
array_pod_sort(AsmPieces.begin(), AsmPieces.end());
if (clobbersFlagRegisters(AsmPieces))
@@ -25176,7 +25632,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
X86TargetLowering::ConstraintType
-X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+X86TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'R':
@@ -25508,7 +25964,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::pair<unsigned, const TargetRegisterClass *>
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
+ StringRef Constraint,
MVT VT) const {
// First, see if this is a constraint that directly corresponds to an LLVM
// register class.
@@ -25717,8 +26173,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return Res;
}
-int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty,
+int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
unsigned AS) const {
// Scaling factors are not free at all.
// An indexed folded instruction, i.e., inst (reg1, reg2, scale),
@@ -25738,7 +26194,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
// vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
- if (isLegalAddressingMode(AM, Ty, AS))
+ if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
return AM.Scale != 0;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 17660891635c..723d5304495c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -233,12 +233,6 @@ namespace llvm {
/// Floating point horizontal sub.
FHSUB,
- /// Unsigned integer max and min.
- UMAX, UMIN,
-
- /// Signed integer max and min.
- SMAX, SMIN,
-
// Integer absolute value
ABS,
@@ -298,8 +292,8 @@ namespace llvm {
// Vector FP round.
VFPROUND,
- // Vector signed integer to double.
- CVTDQ2PD,
+ // Vector signed/unsigned integer to double.
+ CVTDQ2PD, CVTUDQ2PD,
// 128-bit vector logical left / right shift
VSHLDQ, VSRLDQ,
@@ -400,10 +394,15 @@ namespace llvm {
VINSERT,
VEXTRACT,
+ /// SSE4A Extraction and Insertion.
+ EXTRQI, INSERTQI,
+
// Vector multiply packed unsigned doubleword integers
PMULUDQ,
// Vector multiply packed signed doubleword integers
PMULDQ,
+ // Vector Multiply Packed UnsignedIntegers with Round and Scale
+ MULHRS,
// FMA nodes
FMADD,
@@ -429,6 +428,9 @@ namespace llvm {
//with rounding mode
SINT_TO_FP_RND,
UINT_TO_FP_RND,
+
+ // Vector float/double to signed/unsigned integer.
+ FP_TO_SINT_RND, FP_TO_UINT_RND,
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
@@ -599,7 +601,9 @@ namespace llvm {
unsigned getJumpTableEncoding() const override;
bool useSoftFloat() const override;
- MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+ return MVT::i8;
+ }
const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
@@ -617,7 +621,8 @@ namespace llvm {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
- unsigned getByValTypeAlignment(Type *Ty) const override;
+ unsigned getByValTypeAlignment(Type *Ty,
+ const DataLayout &DL) const override;
/// Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
@@ -685,7 +690,8 @@ namespace llvm {
bool isCheapToSpeculateCtlz() const override;
/// Return the value type to use for ISD::SETCC.
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
/// Determine which of the bits specified in Mask are known to be either
/// zero or one and return them in the KnownZero/KnownOne bitsets.
@@ -707,8 +713,7 @@ namespace llvm {
bool ExpandInlineAsm(CallInst *CI) const override;
- ConstraintType
- getConstraintType(const std::string &Constraint) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
@@ -726,8 +731,8 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
+ unsigned
+ getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "i")
return InlineAsm::Constraint_i;
else if (ConstraintCode == "o")
@@ -745,13 +750,12 @@ namespace llvm {
/// error, this returns a register number of 0.
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
- const std::string &Constraint,
- MVT VT) const override;
+ StringRef Constraint, MVT VT) const override;
/// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS) const override;
/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
@@ -770,7 +774,7 @@ namespace llvm {
/// of the specified type.
/// If the AM is supported, the return value must be >= 0.
/// If the AM is not supported, it returns a negative value.
- int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+ int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
unsigned AS) const override;
bool isVectorShiftByScalarCheap(Type *Ty) const override;
@@ -872,7 +876,8 @@ namespace llvm {
return nullptr; // nothing to do, move along.
}
- unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT,
+ SelectionDAG &DAG) const override;
/// This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index b309b8210851..faa91500b181 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -3136,6 +3136,12 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P,
+ HasBWI, 1>;
+defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>;
+defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P,
+ HasBWI, 1>, T8PD;
defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
SSE_INTALU_ITINS_P, HasBWI, 1>;
@@ -3230,32 +3236,32 @@ let Predicates = [HasBWI] in {
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
}
-defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax,
+defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax,
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax,
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax,
+defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin,
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin,
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin,
+defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin,
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
+defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
//===----------------------------------------------------------------------===//
@@ -4035,7 +4041,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1,
- _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
}
}
@@ -4394,16 +4400,16 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
def : Pat<(f64 (sint_to_fp GR64:$src)),
(VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
-defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32,
+defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
v4f32x_info, i32mem, loadi32,
"cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
XS, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info,
+defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
i32mem, loadi32, "cvtusi2sd{l}">,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
XD, VEX_W, EVEX_CD8<64, CD8VT1>;
@@ -4604,117 +4610,389 @@ def : Pat<(extloadf32 addr:$src),
def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
Requires<[HasAVX512]>;
-multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
- Domain d> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
- def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
- !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
- [], d>, EVEX, EVEX_B, EVEX_RC;
- let mayLoad = 1 in
- def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
-} // hasSideEffects = 0
+//===----------------------------------------------------------------------===//
+// AVX-512 Vector convert from signed/unsigned integer to float/double
+// and from float/double to signed/unsigned integer
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNode,
+ string Broadcast = _.BroadcastStr,
+ string Alias = ""> {
+
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
+ (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
+
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+ (_.VT (OpNode (_Src.VT
+ (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
+
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _Src.MemOp:$src), OpcodeStr,
+ "${src}"##Broadcast, "${src}"##Broadcast,
+ (_.VT (OpNode (_Src.VT
+ (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
+ ))>, EVEX, EVEX_B;
+}
+// Coversion with SAE - suppress all exceptions
+multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src), OpcodeStr,
+ "{sae}, $src", "$src, {sae}",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
+ (i32 FROUND_NO_EXC)))>,
+ EVEX, EVEX_B;
}
-multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
- Domain d> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
- let mayLoad = 1 in
- def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
-} // hasSideEffects = 0
+// Conversion with rounding control (RC)
+multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+ defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src", "$src, $rc",
+ (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
+ EVEX, EVEX_B, EVEX_RC;
}
-defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
- loadv8f64, f512mem, v8f32, v8f64,
- SSEPackedSingle>, EVEX_V512, VEX_W, PD,
- EVEX_CD8<64, CD8VF>;
+// Extend Float to Double
+multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fextend>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
+ X86vfpextRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
+ X86vfpext, "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fextend>,
+ EVEX_V256;
+ }
+}
+
+// Truncate Double to Float
+multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fround>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ X86vfproundRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
+ X86vfpround, "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fround,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
+ VEX_W, PD, EVEX_CD8<64, CD8VF>;
+defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
+ PS, EVEX_CD8<32, CD8VH>;
-defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
- loadv4f64, f256mem, v8f64, v8f32,
- SSEPackedDouble>, EVEX_V512, PS,
- EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
-def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
- (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
- (VCVTPD2PSZrr VR512:$src)>;
+let Predicates = [HasVLX] in {
+ def : Pat<(v4f64 (extloadv4f32 addr:$src)),
+ (VCVTPS2PDZ256rm addr:$src)>;
+}
-def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
- (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
- (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
+// Convert Signed/Unsigned Doubleword to Double
+multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNode128> {
+ // No rounding in this op
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
+ EVEX_V512;
-//===----------------------------------------------------------------------===//
-// AVX-512 Vector convert from sign integer to float/double
-//===----------------------------------------------------------------------===//
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
+ OpNode128, "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
-defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
- loadv8i64, i512mem, v16f32, v16i32,
- SSEPackedSingle>, EVEX_V512, PS,
- EVEX_CD8<32, CD8VF>;
+// Convert Signed/Unsigned Doubleword to Float
+multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
+ OpNodeRnd>, EVEX_V512;
+
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Doubleword
+multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Double to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+ // memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
+ "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+// Convert Double to Signed/Unsigned Doubleword
+multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasVLX] in {
+ // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+ // memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
+ "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+// Convert Double to Signed/Unsigned Quardword
+multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
+ EVEX_V256;
+ }
+}
-defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
- loadv4i64, i256mem, v8f64, v8i32,
- SSEPackedDouble>, EVEX_V512, XS,
+// Convert Double to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Signed/Unsigned Quardword to Double
+multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
+ EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Quardword
+multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v4f32x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
+ "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Float to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v4f32x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
+ "{1to2}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
+ EVEX_V256;
+ }
+}
+
+// Convert Signed/Unsigned Quardword to Float
+multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeRnd> {
+ let Predicates = [HasDQI] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
+ OpNodeRnd>, EVEX_V512;
+ }
+ let Predicates = [HasDQI, HasVLX] in {
+ // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+ // memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode,
+ "{1to2}", "{x}">, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
+ "{1to4}", "{y}">, EVEX_V256;
+ }
+}
+
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS,
EVEX_CD8<32, CD8VH>;
-defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
- loadv16f32, f512mem, v16i32, v16f32,
- SSEPackedSingle>, EVEX_V512, XS,
+defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
+ X86VSintToFpRnd>,
+ PS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
+ X86VFpToSintRnd>,
+ XS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,
+ X86VFpToSintRnd>,
+ PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
+ X86VFpToUintRnd>, PS,
EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
- loadv8f64, f512mem, v8i32, v8f64,
- SSEPackedDouble>, EVEX_V512, PD, VEX_W,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
+ X86VFpToUintRnd>, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
-defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
- loadv16f32, f512mem, v16i32, v16f32,
- SSEPackedSingle>, EVEX_V512, PS,
+defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
+ XS, EVEX_CD8<32, CD8VH>;
+
+defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
+ X86VUintToFpRnd>, XD,
EVEX_CD8<32, CD8VF>;
-// cvttps2udq (src, 0, mask-all-ones, sae-current)
-def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
- (VCVTTPS2UDQZrr VR512:$src)>;
+defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtps2Int,
+ X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
- loadv8f64, f512mem, v8i32, v8f64,
- SSEPackedDouble>, EVEX_V512, PS, VEX_W,
+defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtpd2Int,
+ X86cvtpd2IntRnd>, XD, VEX_W,
EVEX_CD8<64, CD8VF>;
-// cvttpd2udq (src, 0, mask-all-ones, sae-current)
-def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
- (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
- (VCVTTPD2UDQZrr VR512:$src)>;
+defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtps2UInt,
+ X86cvtps2UIntRnd>,
+ PS, EVEX_CD8<32, CD8VF>;
+defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtpd2UInt,
+ X86cvtpd2UIntRnd>, VEX_W,
+ PS, EVEX_CD8<64, CD8VF>;
-defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
- loadv4i64, f256mem, v8f64, v8i32,
- SSEPackedDouble>, EVEX_V512, XS,
- EVEX_CD8<32, CD8VH>;
+defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtpd2Int,
+ X86cvtpd2IntRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
-defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
- loadv16i32, f512mem, v16f32, v16i32,
- SSEPackedSingle>, EVEX_V512, XD,
- EVEX_CD8<32, CD8VF>;
+defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtps2Int,
+ X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtpd2UInt,
+ X86cvtpd2UIntRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtps2UInt,
+ X86cvtps2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
+ X86VFpToSlongRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint,
+ X86VFpToSlongRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
+ X86VFpToUlongRnd>, VEX_W,
+ PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint,
+ X86VFpToUlongRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
+ X86VSlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
+defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
+ X86VUlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
+
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
+ X86VSlongToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
+ X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
+
+let Predicates = [NoVLX] in {
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -4734,67 +5012,8 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
(v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
- (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
-def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VCVTDQ2PDZrr VR256X:$src)>;
-def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
- (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
-def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VCVTUDQ2PDZrr VR256X:$src)>;
-
-multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, PatFrag mem_frag,
- X86MemOperand x86memop, Domain d> {
-let hasSideEffects = 0 in {
- def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [], d>, EVEX;
- def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
- !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
- [], d>, EVEX, EVEX_B, EVEX_RC;
- let mayLoad = 1 in
- def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
- [], d>, EVEX;
-} // hasSideEffects = 0
}
-defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
- loadv16f32, f512mem, SSEPackedSingle>, PD,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
- loadv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
- EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
- (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
-
-def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
- (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
- (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
-
-defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
- loadv16f32, f512mem, SSEPackedSingle>,
- PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
- loadv8f64, f512mem, SSEPackedDouble>, VEX_W,
- PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
- (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
- (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
-
-def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
- (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
- (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
-
let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
(VCVTPD2PSZrm addr:$src)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 6ab961f04ecf..4cd5563ce727 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -105,14 +105,16 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in
// jecxz.
let Uses = [CX] in
def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jcxz\t$dst", [], IIC_JCXZ>, AdSize16;
+ "jcxz\t$dst", [], IIC_JCXZ>, AdSize16,
+ Requires<[Not64BitMode]>;
let Uses = [ECX] in
def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
"jecxz\t$dst", [], IIC_JCXZ>, AdSize32;
let Uses = [RCX] in
def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
- "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64;
+ "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64,
+ Requires<[In64BitMode]>;
}
// Indirect branches
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index fe245c3a7e38..1f61ffa84e9a 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -39,11 +39,6 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
SDTCisVec<1>]>;
-def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
-def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
-def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
-def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;
-
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
@@ -75,6 +70,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
SDTCisVT<1, v4i32>]>>;
+def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
+ SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
+ SDTCisVT<1, v4i32>]>>;
def X86pshufb : SDNode<"X86ISD::PSHUFB",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
@@ -187,6 +185,7 @@ def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
+def X86mulhrs : SDNode<"X86ISD::MULHRS" , SDTIntBinOp>;
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
@@ -208,6 +207,14 @@ def X86pmuldq : SDNode<"X86ISD::PMULDQ",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<1,2>]>>;
+def X86extrqi : SDNode<"X86ISD::EXTRQI",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>;
+def X86insertqi : SDNode<"X86ISD::INSERTQI",
+ SDTypeProfile<1, 4, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisVT<3, i8>,
+ SDTCisVT<4, i8>]>>;
+
// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
// translated into one of the target nodes below during lowering.
// Note: this is a work in progress...
@@ -357,8 +364,70 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
-def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
-def X86SuintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
+def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
+
+def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
+
+def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
+ SDTCisInt<2>]>;
+def SDTVlongToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCVecEltisVT<1, i64>,
+ SDTCisInt<2>]>;
+
+def SDTVFPToIntRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<1>, SDTCVecEltisVT<0, i32>,
+ SDTCisInt<2>]>;
+def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<1>, SDTCVecEltisVT<0, i64>,
+ SDTCisInt<2>]>;
+
+// Scalar
+def X86SintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTintToFPRound>;
+def X86UintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTintToFPRound>;
+
+// Vector with rounding mode
+
+// cvtt fp-to-int staff
+def X86VFpToSintRnd : SDNode<"ISD::FP_TO_SINT", SDTVFPToIntRound>;
+def X86VFpToUintRnd : SDNode<"ISD::FP_TO_UINT", SDTVFPToIntRound>;
+def X86VFpToSlongRnd : SDNode<"ISD::FP_TO_SINT", SDTVFPToLongRound>;
+def X86VFpToUlongRnd : SDNode<"ISD::FP_TO_UINT", SDTVFPToLongRound>;
+
+def X86VSintToFpRnd : SDNode<"ISD::SINT_TO_FP", SDTVintToFPRound>;
+def X86VUintToFpRnd : SDNode<"ISD::UINT_TO_FP", SDTVintToFPRound>;
+def X86VSlongToFpRnd : SDNode<"ISD::SINT_TO_FP", SDTVlongToFPRound>;
+def X86VUlongToFpRnd : SDNode<"ISD::UINT_TO_FP", SDTVlongToFPRound>;
+
+// cvt fp-to-int staff
+def X86cvtps2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTFloatToIntRnd>;
+def X86cvtps2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToIntRnd>;
+def X86cvtpd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToIntRnd>;
+def X86cvtpd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToIntRnd>;
+
+// Vector without rounding mode
+def X86cvtps2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTFloatToInt>;
+def X86cvtps2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTFloatToInt>;
+def X86cvtpd2Int : SDNode<"X86ISD::FP_TO_SINT_RND", SDTDoubleToInt>;
+def X86cvtpd2UInt : SDNode<"X86ISD::FP_TO_UINT_RND", SDTDoubleToInt>;
+
+def X86vfpextRnd : SDNode<"X86ISD::VFPEXT",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisInt<2>]>>;
+def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisFP<0>, SDTCisFP<1>,
+ SDTCVecEltisVT<0, f32>,
+ SDTCVecEltisVT<1, f64>,
+ SDTCisInt<2>]>>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index b92ba99fb100..786150760b93 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -269,14 +269,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::XOR8rr, X86::XOR8mr, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2Addr); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable2Addr[i].RegOp;
- unsigned MemOp = MemoryFoldTable2Addr[i].MemOp;
- unsigned Flags = MemoryFoldTable2Addr[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 0, folded load and store, no alignment requirement.
- Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
+ Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
}
static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
@@ -424,12 +421,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable0); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable0[i].RegOp;
- unsigned MemOp = MemoryFoldTable0[i].MemOp;
- unsigned Flags = MemoryFoldTable0[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
- RegOp, MemOp, TB_INDEX_0 | Flags);
+ Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
}
static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
@@ -862,14 +856,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable1); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable1[i].RegOp;
- unsigned MemOp = MemoryFoldTable1[i].MemOp;
- unsigned Flags = MemoryFoldTable1[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 1, folded load
- Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
}
static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
@@ -1116,6 +1107,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
{ X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
{ X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
+ { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
+ { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
{ X86::SBB32rr, X86::SBB32rm, 0 },
{ X86::SBB64rr, X86::SBB64rm, 0 },
{ X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
@@ -1412,6 +1405,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
{ X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
{ X86::VPXORrr, X86::VPXORrm, 0 },
+ { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
+ { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
{ X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
{ X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
{ X86::VSUBPDrr, X86::VSUBPDrm, 0 },
@@ -1733,14 +1728,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable2[i].RegOp;
- unsigned MemOp = MemoryFoldTable2[i].MemOp;
- unsigned Flags = MemoryFoldTable2[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 2, folded load
- Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
}
static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
@@ -1949,14 +1941,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable3); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable3[i].RegOp;
- unsigned MemOp = MemoryFoldTable3[i].MemOp;
- unsigned Flags = MemoryFoldTable3[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 3, folded load
- Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
@@ -2001,14 +1990,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }
};
- for (unsigned i = 0, e = array_lengthof(MemoryFoldTable4); i != e; ++i) {
- unsigned RegOp = MemoryFoldTable4[i].RegOp;
- unsigned MemOp = MemoryFoldTable4[i].MemOp;
- unsigned Flags = MemoryFoldTable4[i].Flags;
+ for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
- RegOp, MemOp,
+ Entry.RegOp, Entry.MemOp,
// Index 4, folded load
- Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
+ Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
}
}
@@ -3820,7 +3806,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
X86::MOVPQIto64rr);
if (X86::VR64RegClass.contains(SrcReg))
// Copy from a VR64 register to a GR64 register.
- return X86::MOVSDto64rr;
+ return X86::MMX_MOVD64from64rr;
} else if (X86::GR64RegClass.contains(SrcReg)) {
// Copy from a GR64 register to a VR128 register.
if (X86::VR128XRegClass.contains(DestReg))
@@ -3828,7 +3814,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
X86::MOV64toPQIrr);
// Copy from a GR64 register to a VR64 register.
if (X86::VR64RegClass.contains(DestReg))
- return X86::MOV64toSDrr;
+ return X86::MMX_MOVD64to64rr;
}
// SrcReg(FR32) -> DestReg(GR32)
@@ -6413,22 +6399,40 @@ static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
return true;
-
+
return false;
}
+// TODO: There are many more machine instruction opcodes to match:
+// 1. Other data types (integer, vectors)
+// 2. Other math / logic operations (and, or)
+static bool isAssociativeAndCommutative(unsigned Opcode) {
+ switch (Opcode) {
+ case X86::ADDSDrr:
+ case X86::ADDSSrr:
+ case X86::VADDSDrr:
+ case X86::VADDSSrr:
+ case X86::MULSDrr:
+ case X86::MULSSrr:
+ case X86::VMULSDrr:
+ case X86::VMULSSrr:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Return true if the input instruction is part of a chain of dependent ops
/// that are suitable for reassociation, otherwise return false.
/// If the instruction's operands must be commuted to have a previous
/// instruction of the same type define the first source operand, Commuted will
/// be set to true.
-static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode,
- bool &Commuted) {
- // 1. The instruction must have the correct type.
+static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
+ // 1. The operation must be associative and commutative.
// 2. The instruction must have virtual register definitions for its
// operands in the same basic block.
- // 3. The instruction must have a reassociatable sibling.
- if (Inst.getOpcode() == AssocOpcode &&
+ // 3. The instruction must have a reassociable sibling.
+ if (isAssociativeAndCommutative(Inst.getOpcode()) &&
hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
hasReassocSibling(Inst, Commuted))
return true;
@@ -6455,14 +6459,8 @@ bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
// B = A op X (Prev)
// C = B op Y (Root)
- // TODO: There are many more associative instruction types to match:
- // 1. Other forms of scalar FP add (non-AVX)
- // 2. Other data types (double, integer, vectors)
- // 3. Other math / logic operations (mul, and, or)
- unsigned AssocOpcode = X86::VADDSSrr;
-
- bool Commute = false;
- if (isReassocCandidate(Root, AssocOpcode, Commute)) {
+ bool Commute;
+ if (isReassocCandidate(Root, Commute)) {
// We found a sequence of instructions that may be suitable for a
// reassociation of operands to increase ILP. Specify each commutation
// possibility for the Prev instruction in the sequence and let the
@@ -6512,7 +6510,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
MachineOperand &OpC = Root.getOperand(0);
-
+
unsigned RegA = OpA.getReg();
unsigned RegB = OpB.getReg();
unsigned RegX = OpX.getReg();
@@ -6547,7 +6545,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
.addReg(RegX, getKillRegState(KillX))
.addReg(RegY, getKillRegState(KillY));
InsInstrs.push_back(MIB1);
-
+
MachineInstrBuilder MIB2 =
BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
.addReg(RegA, getKillRegState(KillA))
@@ -6579,7 +6577,7 @@ void X86InstrInfo::genAlternativeCodeSequence(
Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
}
assert(Prev && "Unknown pattern for machine combiner");
-
+
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
return;
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 6f38cb8eaf33..52bab9c79b45 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -194,7 +194,7 @@ def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
-def X86RecoverFrameAlloc : SDNode<"ISD::FRAME_ALLOC_RECOVER",
+def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
@@ -1028,14 +1028,13 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
- "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
- Requires<[Not64BitMode]>;
+ "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
+def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
+
def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
-def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
- Requires<[Not64BitMode]>;
def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
"push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[Not64BitMode]>;
@@ -1081,9 +1080,6 @@ let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
SchedRW = [WriteStore] in {
def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>, Requires<[In64BitMode]>;
-def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
- Requires<[In64BitMode]>;
def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
"push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2a896dfe8aa8..a5ff9edf05a3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4035,13 +4035,13 @@ defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
SSE_INTALU_ITINS_P, 0>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
SSE_INTALU_ITINS_P, 0>;
-defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
-defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
-defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
-defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
// Intrinsic forms
@@ -6834,29 +6834,28 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
let Predicates = [HasAVX, NoVLX] in {
- let isCommutable = 0 in
- defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+ defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+ defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+ defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+ defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+ defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+ defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+ defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+ defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
@@ -6865,29 +6864,28 @@ let Predicates = [HasAVX, NoVLX] in {
}
let Predicates = [HasAVX2, NoVLX] in {
- let isCommutable = 0 in
- defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+ defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+ defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+ defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+ defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+ defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+ defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+ defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+ defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
@@ -6896,22 +6894,21 @@ let Predicates = [HasAVX2, NoVLX] in {
}
let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in
- defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+ defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+ defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+ defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+ defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+ defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+ defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+ defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+ defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32,
VR128, memopv2i64, i128mem,
@@ -7773,7 +7770,7 @@ let Constraints = "$src = $dst" in {
def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
(ins VR128:$src, u8imm:$len, u8imm:$idx),
"extrq\t{$idx, $len, $src|$src, $len, $idx}",
- [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
+ [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
imm:$idx))]>, PD;
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
@@ -7784,8 +7781,8 @@ def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
- [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
- VR128:$src2, imm:$len, imm:$idx))]>, XD;
+ [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
+ imm:$len, imm:$idx))]>, XD;
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
"insertq\t{$mask, $src|$src, $mask}",
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 61a33484b8bf..2c8b95bcba22 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -19,7 +19,7 @@ namespace llvm {
enum IntrinsicType {
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
- INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
+ INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
@@ -213,18 +213,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
@@ -596,60 +596,69 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_128, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_256, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_512, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulh_w_128, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulh_w_256, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulh_w_512, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_128, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_256, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_512, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
@@ -1008,10 +1017,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
@@ -1049,14 +1058,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse3_hsub_ps, INTR_TYPE_2OP, X86ISD::FHSUB, 0),
X86_INTRINSIC_DATA(sse41_insertps, INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
X86_INTRINSIC_DATA(sse41_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, X86ISD::SMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, X86ISD::UMAX, 0),
- X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, X86ISD::SMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, X86ISD::UMIN, 0),
- X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsb, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxsd, INTR_TYPE_2OP, ISD::SMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxud, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pmaxuw, INTR_TYPE_2OP, ISD::UMAX, 0),
+ X86_INTRINSIC_DATA(sse41_pminsb, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminsd, INTR_TYPE_2OP, ISD::SMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminud, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse41_pminuw, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(sse41_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
@@ -1070,6 +1079,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse41_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(sse41_pmuldq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(sse4a_extrqi, INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
+ X86_INTRINSIC_DATA(sse4a_insertqi, INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse_comigt_ss, COMI, X86ISD::COMI, ISD::SETGT),
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index d598b55aae3e..e6db9708b677 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -30,59 +30,67 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// pointer for reasons other than it containing dynamic allocation or
/// that FP eliminatation is turned off. For example, Cygwin main function
/// contains stack pointer re-alignment code which requires FP.
- bool ForceFramePointer;
+ bool ForceFramePointer = false;
/// RestoreBasePointerOffset - Non-zero if the function has base pointer
/// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
/// displacement from the frame pointer to a slot where the base pointer
/// is stashed.
- signed char RestoreBasePointerOffset;
+ signed char RestoreBasePointerOffset = 0;
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
/// stack frame in bytes.
- unsigned CalleeSavedFrameSize;
+ unsigned CalleeSavedFrameSize = 0;
/// BytesToPopOnReturn - Number of bytes function pops on return (in addition
/// to the space used by the return address).
/// Used on windows platform for stdcall & fastcall name decoration
- unsigned BytesToPopOnReturn;
+ unsigned BytesToPopOnReturn = 0;
/// ReturnAddrIndex - FrameIndex for return slot.
- int ReturnAddrIndex;
+ int ReturnAddrIndex = 0;
/// \brief FrameIndex for return slot.
- int FrameAddrIndex;
+ int FrameAddrIndex = 0;
/// TailCallReturnAddrDelta - The number of bytes by which return address
/// stack slot is moved as the result of tail call optimization.
- int TailCallReturnAddrDelta;
+ int TailCallReturnAddrDelta = 0;
/// SRetReturnReg - Some subtargets require that sret lowering includes
/// returning the value of the returned struct in a register. This field
/// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
+ unsigned SRetReturnReg = 0;
/// GlobalBaseReg - keeps track of the virtual register initialized for
/// use as the global base register. This is used for PIC in some PIC
/// relocation models.
- unsigned GlobalBaseReg;
+ unsigned GlobalBaseReg = 0;
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
- int VarArgsFrameIndex;
+ int VarArgsFrameIndex = 0;
/// RegSaveFrameIndex - X86-64 vararg func register save area.
- int RegSaveFrameIndex;
+ int RegSaveFrameIndex = 0;
/// VarArgsGPOffset - X86-64 vararg func int reg offset.
- unsigned VarArgsGPOffset;
+ unsigned VarArgsGPOffset = 0;
/// VarArgsFPOffset - X86-64 vararg func fp reg offset.
- unsigned VarArgsFPOffset;
+ unsigned VarArgsFPOffset = 0;
/// ArgumentStackSize - The number of bytes on stack consumed by the arguments
/// being passed on the stack.
- unsigned ArgumentStackSize;
+ unsigned ArgumentStackSize = 0;
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
- unsigned NumLocalDynamics;
+ unsigned NumLocalDynamics = 0;
/// HasPushSequences - Keeps track of whether this function uses sequences
/// of pushes to pass function parameters.
- bool HasPushSequences;
+ bool HasPushSequences = false;
+
+ /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill
+ /// slot for the frame pointer.
+ bool HasSEHFramePtrSave = false;
+
+ /// The frame index of a stack object containing the original frame pointer
+ /// used to address arguments in a function using a base pointer.
+ int SEHFramePtrSaveIndex = 0;
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
@@ -90,40 +98,9 @@ private:
SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
public:
- X86MachineFunctionInfo() : ForceFramePointer(false),
- RestoreBasePointerOffset(0),
- CalleeSavedFrameSize(0),
- BytesToPopOnReturn(0),
- ReturnAddrIndex(0),
- FrameAddrIndex(0),
- TailCallReturnAddrDelta(0),
- SRetReturnReg(0),
- GlobalBaseReg(0),
- VarArgsFrameIndex(0),
- RegSaveFrameIndex(0),
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
- NumLocalDynamics(0),
- HasPushSequences(false) {}
-
- explicit X86MachineFunctionInfo(MachineFunction &MF)
- : ForceFramePointer(false),
- RestoreBasePointerOffset(0),
- CalleeSavedFrameSize(0),
- BytesToPopOnReturn(0),
- ReturnAddrIndex(0),
- FrameAddrIndex(0),
- TailCallReturnAddrDelta(0),
- SRetReturnReg(0),
- GlobalBaseReg(0),
- VarArgsFrameIndex(0),
- RegSaveFrameIndex(0),
- VarArgsGPOffset(0),
- VarArgsFPOffset(0),
- ArgumentStackSize(0),
- NumLocalDynamics(0),
- HasPushSequences(false) {}
+ X86MachineFunctionInfo() = default;
+
+ explicit X86MachineFunctionInfo(MachineFunction &MF) {};
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -174,6 +151,12 @@ public:
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+ bool getHasSEHFramePtrSave() const { return HasSEHFramePtrSave; }
+ void setHasSEHFramePtrSave(bool V) { HasSEHFramePtrSave = V; }
+
+ int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; }
+ void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; }
+
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
return ForwardedMustTailRegParms;
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 0033b5058187..d8495e53e0e3 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -202,7 +202,7 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
unsigned
X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
@@ -343,7 +343,7 @@ X86RegisterInfo::getNoPreservedMask() const {
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
// Set the stack-pointer register and its aliases as reserved.
for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
@@ -452,7 +452,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// use both the SP and the FP, we need a separate base pointer register.
bool CantUseFP = needsStackRealignment(MF);
bool CantUseSP =
- MFI->hasVarSizedObjects() || MFI->hasInlineAsmWithSPAdjust();
+ MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
return CantUseFP && CantUseSP;
}
@@ -477,9 +477,9 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
const Function *F = MF.getFunction();
- unsigned StackAlign =
- MF.getSubtarget().getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
F->hasFnAttribute(Attribute::StackAlignment));
@@ -503,7 +503,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
unsigned BasePtr;
@@ -519,18 +519,17 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else
BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
- // FRAME_ALLOC uses a single offset, with no register. It only works in the
+ // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
// simple FP case, and doesn't work with stack realignment. On 32-bit, the
// offset is from the traditional base pointer location. On 64-bit, the
// offset is from the SP at the end of the prologue, not the FP location. This
// matches the behavior of llvm.frameaddress.
- if (Opc == TargetOpcode::FRAME_ALLOC) {
+ if (Opc == TargetOpcode::LOCAL_ESCAPE) {
MachineOperand &FI = MI.getOperand(FIOperandNum);
bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int Offset;
if (IsWinEH)
- Offset = static_cast<const X86FrameLowering *>(TFI)
- ->getFrameIndexOffsetFromSP(MF, FrameIndex);
+ Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex);
else
Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
FI.ChangeToImmediate(Offset);
@@ -584,7 +583,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const X86FrameLowering *TFI = getFrameLowering(MF);
return TFI->hasFP(MF) ? FramePtr : StackPtr;
}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 5ca40bc0091b..ce79fcf9ad81 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -24,11 +24,6 @@ using namespace llvm;
#define DEBUG_TYPE "x86-selectiondag-info"
-X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL)
- : TargetSelectionDAGInfo(&DL) {}
-
-X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
-
bool X86SelectionDAGInfo::isBaseRegConflictPossible(
SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const {
// We cannot use TRI->hasBasePointer() until *after* we select all basic
@@ -37,7 +32,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
// dynamic stack adjustments (hopefully rare) and the base pointer would
// conflict if we had to use it.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust())
+ if (!MFI->hasVarSizedObjects() && !MFI->hasOpaqueSPAdjustment())
return false;
const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
@@ -81,8 +76,9 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
if (const char *bzeroEntry = V &&
V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
- EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy();
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ EVT IntPtr =
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index eb7e0ed9de6c..961bd8c8d5ef 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -29,8 +29,7 @@ class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
ArrayRef<unsigned> ClobberSet) const;
public:
- explicit X86SelectionDAGInfo(const DataLayout &DL);
- ~X86SelectionDAGInfo();
+ explicit X86SelectionDAGInfo() = default;
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 3b25d30dc221..dff3624b7efe 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -68,7 +68,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
- bool isDecl = GV->isDeclarationForLinker();
+ bool isDef = GV->isStrongDefinitionForLinker();
// X86-64 in PIC mode.
if (isPICStyleRIPRel()) {
@@ -80,8 +80,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If symbol visibility is hidden, the extra load is not needed if
// target is x86-64 or the symbol is definitely defined in the current
// translation unit.
- if (GV->hasDefaultVisibility() &&
- (isDecl || GV->isWeakForLinker()))
+ if (GV->hasDefaultVisibility() && !isDef)
return X86II::MO_GOTPCREL;
} else if (!isTargetWin64()) {
assert(isTargetELF() && "Unknown rip-relative target");
@@ -107,7 +106,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If this is a strong reference to a definition, it is definitely not
// through a stub.
- if (!isDecl && !GV->isWeakForLinker())
+ if (isDef)
return X86II::MO_PIC_BASE_OFFSET;
// Unless we have a symbol with hidden visibility, we have to go through a
@@ -117,7 +116,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If symbol visibility is hidden, we have a stub for common symbol
// references and external declarations.
- if (isDecl || GV->hasCommonLinkage()) {
+ if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
// Hidden $non_lazy_ptr reference.
return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
}
@@ -131,7 +130,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
// If this is a strong reference to a definition, it is definitely not
// through a stub.
- if (!isDecl && !GV->isWeakForLinker())
+ if (isDef)
return X86II::MO_NO_FLAG;
// Unless we have a symbol with hidden visibility, we have to go through a
@@ -193,12 +192,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit,+sse2";
}
- // If feature string is not empty, parse features string.
+ // Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
- // Make sure the right MCSchedModel is used.
- InitCPUSchedModel(CPUName);
-
InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
@@ -298,9 +294,8 @@ X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() == Triple::CODE16),
- TSInfo(*TM.getDataLayout()),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- FrameLowering(*this, getStackAlignment()) {
+ TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
// Determine the PICStyle based on the target selected.
if (TM.getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d420abbe1433..f026d4295f71 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -447,8 +447,26 @@ public:
}
bool isCallingConvWin64(CallingConv::ID CC) const {
- return (isTargetWin64() && CC != CallingConv::X86_64_SysV) ||
- CC == CallingConv::X86_64_Win64;
+ switch (CC) {
+ // On Win64, all these conventions just use the default convention.
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_ThisCall:
+ case CallingConv::X86_VectorCall:
+ case CallingConv::Intel_OCL_BI:
+ return isTargetWin64();
+ // This convention allows using the Win64 convention on other targets.
+ case CallingConv::X86_64_Win64:
+ return true;
+ // This convention allows using the SysV convention on Windows targets.
+ case CallingConv::X86_64_SysV:
+ return false;
+ // Otherwise, who knows what this is.
+ default:
+ return false;
+ }
}
/// ClassifyGlobalReference - Classify a global variable reference for the
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 0c82a700952b..7df726091843 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -89,7 +89,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
TTI::OperandValueProperties Opd2PropInfo) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
@@ -117,6 +117,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
static const CostTblEntry<MVT::SimpleValueType>
AVX2UniformConstCostTable[] = {
+ { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
+
{ ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
{ ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
{ ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
@@ -211,6 +213,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
+ { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
{ ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
@@ -261,12 +264,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
{ ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRL, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
{ ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
{ ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
{ ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
- { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized.
+ { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
{ ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
// It is not a good idea to vectorize division. We have to scalarize it and
@@ -352,7 +355,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
unsigned Cost = 1;
if (LT.second.getSizeInBits() > 128)
Cost = 3; // Extract + insert + copy.
@@ -364,7 +367,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
if (Kind == TTI::SK_Alternate) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
// The backend knows how to generate a single VEX.256 version of
// instruction VPBLENDW if the target supports AVX2.
@@ -464,8 +467,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
- std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
- std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+ std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+ std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
static const TypeConversionCostTblEntry<MVT::SimpleValueType>
SSE2ConvTbl[] = {
@@ -537,8 +540,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
if (Idx != -1)
return AVX512ConversionTbl[Idx].Cost;
}
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
+ EVT SrcTy = TLI->getValueType(DL, Src);
+ EVT DstTy = TLI->getValueType(DL, Dst);
// The function getSimpleVT only handles simple value types.
if (!SrcTy.isSimple() || !DstTy.isSimple())
@@ -667,7 +670,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
Type *CondTy) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@@ -740,7 +743,7 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index != -1U) {
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
// This type is legalized to a scalar type.
if (!LT.second.isVector())
@@ -803,7 +806,7 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
"Invalid Opcode");
@@ -850,9 +853,9 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
}
// Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
unsigned Cost = 0;
- if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
+ if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
@@ -887,7 +890,7 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
MVT MTy = LT.second;
@@ -1117,11 +1120,11 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
int DataWidth = DataTy->getPrimitiveSizeInBits();
-
+
// Todo: AVX512 allows gather/scatter, works with strided and random as well
if ((DataWidth < 32) || (Consecutive == 0))
return false;
- if (ST->hasAVX512() || ST->hasAVX2())
+ if (ST->hasAVX512() || ST->hasAVX2())
return true;
return false;
}
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index a83158440193..da3f36c2e27e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -40,7 +40,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
public:
explicit X86TTIImpl(const X86TargetMachine *TM, Function &F)
- : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
// Provide value semantics. MSVC requires that we spell all of these out.
X86TTIImpl(const X86TTIImpl &Arg)
@@ -48,18 +49,6 @@ public:
X86TTIImpl(X86TTIImpl &&Arg)
: BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
TLI(std::move(Arg.TLI)) {}
- X86TTIImpl &operator=(const X86TTIImpl &RHS) {
- BaseT::operator=(static_cast<const BaseT &>(RHS));
- ST = RHS.ST;
- TLI = RHS.TLI;
- return *this;
- }
- X86TTIImpl &operator=(X86TTIImpl &&RHS) {
- BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
- ST = std::move(RHS.ST);
- TLI = std::move(RHS.TLI);
- return *this;
- }
/// \name Scalar TTI Implementations
/// @{
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 90357257b9ef..9190d0be9e4d 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -113,8 +113,8 @@ char WinEHStatePass::ID = 0;
bool WinEHStatePass::doInitialization(Module &M) {
TheModule = &M;
- FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape);
- FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover);
+ FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape);
+ FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::localrecover);
FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress);
return false;
}
@@ -133,7 +133,7 @@ bool WinEHStatePass::doFinalization(Module &M) {
void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
// This pass should only insert a stack allocation, memory accesses, and
- // framerecovers.
+ // localrecovers.
AU.setPreservesCFG();
}
@@ -336,9 +336,11 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
FunctionType *TargetFuncTy =
FunctionType::get(Int32Ty, makeArrayRef(&ArgTys[0], 5),
/*isVarArg=*/false);
- Function *Trampoline = Function::Create(
- TrampolineTy, GlobalValue::InternalLinkage,
- Twine("__ehhandler$") + ParentFunc->getName(), TheModule);
+ Function *Trampoline =
+ Function::Create(TrampolineTy, GlobalValue::InternalLinkage,
+ Twine("__ehhandler$") + GlobalValue::getRealLinkageName(
+ ParentFunc->getName()),
+ TheModule);
BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline);
IRBuilder<> Builder(EntryBB);
Value *LSDA = emitEHLSDA(Builder, ParentFunc);
@@ -419,14 +421,14 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
}
/// Escape RegNode so that we can access it from child handlers. Find the call
-/// to frameescape, if any, in the entry block and append RegNode to the list
+/// to localescape, if any, in the entry block and append RegNode to the list
/// of arguments.
int WinEHStatePass::escapeRegNode(Function &F) {
- // Find the call to frameescape and extract its arguments.
+ // Find the call to localescape and extract its arguments.
IntrinsicInst *EscapeCall = nullptr;
for (Instruction &I : F.getEntryBlock()) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
- if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+ if (II && II->getIntrinsicID() == Intrinsic::localescape) {
EscapeCall = II;
break;
}
@@ -440,8 +442,10 @@ int WinEHStatePass::escapeRegNode(Function &F) {
// Replace the call (if it exists) with new one. Otherwise, insert at the end
// of the entry block.
- IRBuilder<> Builder(&F.getEntryBlock(),
- EscapeCall ? EscapeCall : F.getEntryBlock().end());
+ Instruction *InsertPt = EscapeCall;
+ if (!EscapeCall)
+ InsertPt = F.getEntryBlock().getTerminator();
+ IRBuilder<> Builder(&F.getEntryBlock(), InsertPt);
Builder.CreateCall(FrameEscape, Args);
if (EscapeCall)
EscapeCall->eraseFromParent();
@@ -520,6 +524,11 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
for (auto &Handler : ActionList) {
if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
+#ifndef NDEBUG
+ for (BasicBlock *Pred : predecessors(BA->getBasicBlock()))
+ assert(Pred->isLandingPad() &&
+ "WinEHPrepare failed to split block");
+#endif
ExceptBlocks.insert(BA->getBasicBlock());
}
}