summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86FastISel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r--lib/Target/X86/X86FastISel.cpp332
1 files changed, 253 insertions, 79 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f48b47934e038..dfe3c80be21d1 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -22,7 +22,6 @@
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -30,6 +29,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
@@ -82,7 +82,8 @@ public:
#include "X86GenFastISel.inc"
private:
- bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
+ bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
+ const DebugLoc &DL);
bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
unsigned &ResultReg, unsigned Alignment = 1);
@@ -347,6 +348,11 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
MachineMemOperand *MMO, unsigned &ResultReg,
unsigned Alignment) {
+ bool HasSSE41 = Subtarget->hasSSE41();
+ bool HasAVX = Subtarget->hasAVX();
+ bool HasAVX2 = Subtarget->hasAVX2();
+ bool IsNonTemporal = MMO && MMO->isNonTemporal();
+
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@@ -372,7 +378,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
break;
case MVT::f32:
if (X86ScalarSSEf32) {
- Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
@@ -381,7 +387,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
break;
case MVT::f64:
if (X86ScalarSSEf64) {
- Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
@@ -392,29 +398,91 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
// No f80 support yet.
return false;
case MVT::v4f32:
- if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ if (IsNonTemporal && Alignment >= 16 && HasSSE41)
+ Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+ else if (Alignment >= 16)
+ Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
RC = &X86::VR128RegClass;
break;
case MVT::v2f64:
- if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ if (IsNonTemporal && Alignment >= 16 && HasSSE41)
+ Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+ else if (Alignment >= 16)
+ Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
RC = &X86::VR128RegClass;
break;
case MVT::v4i32:
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+ if (IsNonTemporal && Alignment >= 16)
+ Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+ else if (Alignment >= 16)
+ Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
RC = &X86::VR128RegClass;
break;
+ case MVT::v8f32:
+ assert(HasAVX);
+ if (IsNonTemporal && Alignment >= 32 && HasAVX2)
+ Opc = X86::VMOVNTDQAYrm;
+ else
+ Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
+ RC = &X86::VR256RegClass;
+ break;
+ case MVT::v4f64:
+ assert(HasAVX);
+ if (IsNonTemporal && Alignment >= 32 && HasAVX2)
+ Opc = X86::VMOVNTDQAYrm;
+ else
+ Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
+ RC = &X86::VR256RegClass;
+ break;
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v16i16:
+ case MVT::v32i8:
+ assert(HasAVX);
+ if (IsNonTemporal && Alignment >= 32 && HasAVX2)
+ Opc = X86::VMOVNTDQAYrm;
+ else
+ Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
+ RC = &X86::VR256RegClass;
+ break;
+ case MVT::v16f32:
+ assert(Subtarget->hasAVX512());
+ if (IsNonTemporal && Alignment >= 64)
+ Opc = X86::VMOVNTDQAZrm;
+ else
+ Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
+ RC = &X86::VR512RegClass;
+ break;
+ case MVT::v8f64:
+ assert(Subtarget->hasAVX512());
+ if (IsNonTemporal && Alignment >= 64)
+ Opc = X86::VMOVNTDQAZrm;
+ else
+ Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
+ RC = &X86::VR512RegClass;
+ break;
+ case MVT::v8i64:
+ case MVT::v16i32:
+ case MVT::v32i16:
+ case MVT::v64i8:
+ assert(Subtarget->hasAVX512());
+ // Note: There are a lot more choices based on type with AVX-512, but
+ // there's really no advantage when the load isn't masked.
+ if (IsNonTemporal && Alignment >= 64)
+ Opc = X86::VMOVNTDQAZrm;
+ else
+ Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
+ RC = &X86::VR512RegClass;
+ break;
}
ResultReg = createResultReg(RC);
@@ -507,12 +575,70 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
else
Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
} else
- Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ break;
+ case MVT::v8f32:
+ assert(HasAVX);
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
+ else
+ Opc = X86::VMOVUPSYmr;
+ break;
+ case MVT::v4f64:
+ assert(HasAVX);
+ if (Aligned) {
+ Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
+ } else
+ Opc = X86::VMOVUPDYmr;
+ break;
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v16i16:
+ case MVT::v32i8:
+ assert(HasAVX);
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
+ else
+ Opc = X86::VMOVDQUYmr;
+ break;
+ case MVT::v16f32:
+ assert(Subtarget->hasAVX512());
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
+ else
+ Opc = X86::VMOVUPSZmr;
+ break;
+ case MVT::v8f64:
+ assert(Subtarget->hasAVX512());
+ if (Aligned) {
+ Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
+ } else
+ Opc = X86::VMOVUPDZmr;
+ break;
+ case MVT::v8i64:
+ case MVT::v16i32:
+ case MVT::v32i16:
+ case MVT::v64i8:
+ assert(Subtarget->hasAVX512());
+ // Note: There are a lot more choices based on type with AVX-512, but
+ // there's really no advantage when the store isn't masked.
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr;
+ else
+ Opc = X86::VMOVDQU64Zmr;
break;
}
+ const MCInstrDesc &Desc = TII.get(Opc);
+ // Some of the instructions in the previous switch use FR128 instead
+ // of FR32 for ValReg. Make sure the register we feed the instruction
+ // matches its register class constraints.
+ // Note: This is fine to do a copy from FR32 to FR128, this is the
+ // same registers behind the scene and actually why it did not trigger
+ // any bugs before.
+ ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1);
MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc);
addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
if (MMO)
MIB->addMemOperand(*FuncInfo.MF, MMO);
@@ -598,7 +724,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
AM.GV = GV;
// Allow the subtarget to classify the global.
- unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+ unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
// If this reference is relative to the pic base, set it now.
if (isGlobalRelativeToPICBase(GVFlags)) {
@@ -831,9 +957,8 @@ redo_gep:
// our address and just match the value instead of completely failing.
AM = SavedAM;
- for (SmallVectorImpl<const Value *>::reverse_iterator
- I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
- if (handleConstantAddresses(*I, AM))
+ for (const Value *I : reverse(GEPs))
+ if (handleConstantAddresses(I, AM))
return true;
return false;
@@ -938,10 +1063,8 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
// base and index registers are unused.
assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
AM.Base.Reg = X86::RIP;
- } else if (Subtarget->isPICStyleStubPIC()) {
- AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
- } else if (Subtarget->isPICStyleGOT()) {
- AM.GVOpFlags = X86II::MO_GOTOFF;
+ } else {
+ AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr);
}
return true;
@@ -972,6 +1095,21 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (S->isAtomic())
return false;
+ const Value *PtrV = I->getOperand(1);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return false;
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
+ if (Alloca->isSwiftError())
+ return false;
+ }
+ }
+
const Value *Val = S->getValueOperand();
const Value *Ptr = S->getPointerOperand();
@@ -1002,6 +1140,10 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSwiftError() &&
+ F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ return false;
+
if (TLI.supportSplitCSR(FuncInfo.MF))
return false;
@@ -1009,14 +1151,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
CC != CallingConv::X86_FastCall &&
- CC != CallingConv::X86_64_SysV)
+ CC != CallingConv::X86_StdCall &&
+ CC != CallingConv::X86_ThisCall &&
+ CC != CallingConv::X86_64_SysV &&
+ CC != CallingConv::X86_64_Win64)
return false;
- if (Subtarget->isCallingConvWin64(CC))
- return false;
-
- // Don't handle popping bytes on return for now.
- if (X86MFInfo->getBytesToPopOnReturn() != 0)
+ // Don't handle popping bytes if they don't fit the ret's immediate.
+ if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn()))
return false;
// fastcc with -tailcallopt is intended to provide a guaranteed
@@ -1101,11 +1243,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
+ // Swift calling convention does not require we copy the sret argument
+ // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
+
// All x86 ABIs require that for returning structs by value we copy
// the sret argument into %rax/%eax (depending on ABI) for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into %rax/%eax.
- if (F.hasStructRetAttr()) {
+ if (F.hasStructRetAttr() && CC != CallingConv::Swift) {
unsigned Reg = X86MFInfo->getSRetReturnReg();
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -1116,9 +1261,15 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
}
// Now emit the RET.
- MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
+ MachineInstrBuilder MIB;
+ if (X86MFInfo->getBytesToPopOnReturn()) {
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
+ .addImm(X86MFInfo->getBytesToPopOnReturn());
+ } else {
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
+ }
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
@@ -1133,6 +1284,21 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
if (LI->isAtomic())
return false;
+ const Value *SV = I->getOperand(0);
+ if (TLI.supportSwiftError()) {
+ // Swifterror values can come from either a function parameter with
+ // swifterror attribute or an alloca with swifterror attribute.
+ if (const Argument *Arg = dyn_cast<Argument>(SV)) {
+ if (Arg->hasSwiftErrorAttr())
+ return false;
+ }
+
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
+ if (Alloca->isSwiftError())
+ return false;
+ }
+ }
+
MVT VT;
if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
return false;
@@ -1204,8 +1370,8 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
}
}
-bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
- EVT VT, DebugLoc CurDbgLoc) {
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT,
+ const DebugLoc &CurDbgLoc) {
unsigned Op0Reg = getRegForValue(Op0);
if (Op0Reg == 0) return false;
@@ -1244,6 +1410,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
return false;
+ if (I->getType()->isIntegerTy(1) && Subtarget->hasAVX512())
+ return false;
+
// Try to optimize or fold the cmp.
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
unsigned ResultReg = 0;
@@ -2294,8 +2463,10 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// register class VR128 by method 'constrainOperandRegClass' which is
// directly called by 'fastEmitInst_ri'.
// Instruction VCVTPS2PHrr takes an extra immediate operand which is
- // used to provide rounding control.
- InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0);
+ // used to provide rounding control: use MXCSR.RC, encoded as 0b100.
+ // It's consistent with the other FP instructions, which are usually
+ // controlled by MXCSR.
+ InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4);
// Move the lower 32-bits of ResultReg to another register of class GR32.
ResultReg = createResultReg(&X86::GR32RegClass);
@@ -2477,7 +2648,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
// is not generated by FastISel yet.
// FIXME: Update this code once tablegen can handle it.
- static const unsigned SqrtOpc[2][2] = {
+ static const uint16_t SqrtOpc[2][2] = {
{X86::SQRTSSr, X86::VSQRTSSr},
{X86::SQRTSDr, X86::VSQRTSDr}
};
@@ -2577,7 +2748,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned ResultReg = 0;
// Check if we have an immediate version.
if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
- static const unsigned Opc[2][4] = {
+ static const uint16_t Opc[2][4] = {
{ X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
};
@@ -2607,9 +2778,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
// it manually.
if (BaseOpc == X86ISD::UMUL && !ResultReg) {
- static const unsigned MULOpc[] =
+ static const uint16_t MULOpc[] =
{ X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
- static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
+ static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
// First copy the first operand into RAX, which is an implicit input to
// the X86::MUL*r instruction.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -2618,7 +2789,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
} else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
- static const unsigned MULOpc[] =
+ static const uint16_t MULOpc[] =
{ X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
if (VT == MVT::i8) {
// Copy the first operand into AL, which is an implicit input to the
@@ -2671,7 +2842,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!isTypeLegal(RetTy, VT))
return false;
- static const unsigned CvtOpc[2][2][2] = {
+ static const uint16_t CvtOpc[2][2][2] = {
{ { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
{ X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
{ { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
@@ -2742,6 +2913,8 @@ bool X86FastISel::fastLowerArguments() {
if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) ||
+ F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) ||
F->getAttributes().hasAttribute(Idx, Attribute::Nest))
return false;
@@ -2809,9 +2982,9 @@ bool X86FastISel::fastLowerArguments() {
return true;
}
-static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
- CallingConv::ID CC,
- ImmutableCallSite *CS) {
+static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget,
+ CallingConv::ID CC,
+ ImmutableCallSite *CS) {
if (Subtarget->is64Bit())
return 0;
if (Subtarget->getTargetTriple().isOSMSVCRT())
@@ -2849,7 +3022,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::WebKit_JS:
+ case CallingConv::Swift:
case CallingConv::X86_FastCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_ThisCall:
case CallingConv::X86_64_Win64:
case CallingConv::X86_64_SysV:
break;
@@ -2873,10 +3049,9 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (CLI.CS && CLI.CS->hasInAllocaArgument())
return false;
- // Fast-isel doesn't know about callee-pop yet.
- if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
- TM.Options.GuaranteedTailCallOpt))
- return false;
+ for (auto Flag : CLI.OutFlags)
+ if (Flag.isSwiftError())
+ return false;
SmallVector<MVT, 16> OutVTs;
SmallVector<unsigned, 16> ArgRegs;
@@ -2964,6 +3139,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CCValAssign::SExt: {
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
"Unexpected extend");
+
+ if (ArgVT.SimpleTy == MVT::i1)
+ return false;
+
bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
ArgVT, ArgReg);
assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
@@ -2973,6 +3152,17 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CCValAssign::ZExt: {
assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
"Unexpected extend");
+
+ // Handle zero-extension from i1 to i8, which is common.
+ if (ArgVT.SimpleTy == MVT::i1) {
+ // Set the high bits to zero.
+ ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
+ ArgVT = MVT::i8;
+
+ if (ArgReg == 0)
+ return false;
+ }
+
bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
ArgVT, ArgReg);
assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
@@ -3113,25 +3303,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
// See if we need any target-specific flags on the GV operand.
- unsigned char OpFlags = 0;
-
- // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
- // external symbols most go through the PLT in PIC mode. If the symbol
- // has hidden or protected visibility, or if it is static or local, then
- // we don't need to use the PLT - we can directly call it.
- if (Subtarget->isTargetELF() &&
- TM.getRelocationModel() == Reloc::PIC_ &&
- GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
- OpFlags = X86II::MO_PLT;
- } else if (Subtarget->isPICStyleStubAny() &&
- !GV->isStrongDefinitionForLinker() &&
- (!Subtarget->getTargetTriple().isMacOSX() ||
- Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
- // PC-relative references to external symbols should go through $stub,
- // unless we're building with the leopard linker or later, which
- // automatically synthesizes these stubs.
- OpFlags = X86II::MO_DARWIN_STUB;
- }
+ unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV);
+ // Ignore NonLazyBind attribute in FastISel
+ if (OpFlags == X86II::MO_GOTPCREL)
+ OpFlags = 0;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
if (Symbol)
@@ -3157,7 +3332,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Issue CALLSEQ_END
unsigned NumBytesForCalleeToPop =
- computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
+ X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
+ TM.Options.GuaranteedTailCallOpt)
+ ? NumBytes // Callee pops everything.
+ : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS);
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(NumBytesForCalleeToPop);
@@ -3398,17 +3576,13 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
- unsigned char OpFlag = 0;
- if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
- OpFlag = X86II::MO_PIC_BASE_OFFSET;
+ unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr);
+ if (OpFlag == X86II::MO_PIC_BASE_OFFSET)
PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- } else if (Subtarget->isPICStyleGOT()) {
- OpFlag = X86II::MO_GOTOFF;
+ else if (OpFlag == X86II::MO_GOTOFF)
PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- } else if (Subtarget->isPICStyleRIPRel() &&
- TM.getCodeModel() == CodeModel::Small) {
+ else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small)
PICBase = X86::RIP;
- }
// Create the load from the constant pool.
unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
@@ -3572,7 +3746,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
AM.getFullAddress(AddrOps);
MachineInstr *Result = XII.foldMemoryOperandImpl(
- *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
+ *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment,
/*AllowCommute=*/true);
if (!Result)
return false;