diff options
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 332 |
1 files changed, 253 insertions, 79 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index f48b47934e038..dfe3c80be21d1 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -22,7 +22,6 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -30,6 +29,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" @@ -82,7 +82,8 @@ public: #include "X86GenFastISel.inc" private: - bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL); + bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, + const DebugLoc &DL); bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO, unsigned &ResultReg, unsigned Alignment = 1); @@ -347,6 +348,11 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO, unsigned &ResultReg, unsigned Alignment) { + bool HasSSE41 = Subtarget->hasSSE41(); + bool HasAVX = Subtarget->hasAVX(); + bool HasAVX2 = Subtarget->hasAVX2(); + bool IsNonTemporal = MMO && MMO->isNonTemporal(); + // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; @@ -372,7 +378,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, break; case MVT::f32: if (X86ScalarSSEf32) { - Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; + Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm; RC = &X86::FR32RegClass; } else { Opc = X86::LD_Fp32m; @@ -381,7 +387,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, break; case MVT::f64: if (X86ScalarSSEf64) { - Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; + Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm; RC = &X86::FR64RegClass; } else { Opc = X86::LD_Fp64m; @@ -392,29 +398,91 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, // No f80 support yet. return false; case MVT::v4f32: - if (Alignment >= 16) - Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm; + if (IsNonTemporal && Alignment >= 16 && HasSSE41) + Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; + else if (Alignment >= 16) + Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm; else - Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm; + Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm; RC = &X86::VR128RegClass; break; case MVT::v2f64: - if (Alignment >= 16) - Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm; + if (IsNonTemporal && Alignment >= 16 && HasSSE41) + Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; + else if (Alignment >= 16) + Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm; else - Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm; + Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm; RC = &X86::VR128RegClass; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (Alignment >= 16) - Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm; + if (IsNonTemporal && Alignment >= 16) + Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; + else if (Alignment >= 16) + Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm; else - Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm; + Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm; RC = &X86::VR128RegClass; break; + case MVT::v8f32: + assert(HasAVX); + if (IsNonTemporal && Alignment >= 32 && HasAVX2) + Opc = X86::VMOVNTDQAYrm; + else + Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm; + RC = &X86::VR256RegClass; + break; + case MVT::v4f64: + assert(HasAVX); + if (IsNonTemporal && Alignment >= 32 && HasAVX2) + Opc = X86::VMOVNTDQAYrm; + else + Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm; + RC = &X86::VR256RegClass; + break; + case MVT::v8i32: + case MVT::v4i64: + case MVT::v16i16: + case MVT::v32i8: + assert(HasAVX); + if (IsNonTemporal && Alignment >= 32 && HasAVX2) + Opc = X86::VMOVNTDQAYrm; + else + Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm; + RC = &X86::VR256RegClass; + break; + case MVT::v16f32: + assert(Subtarget->hasAVX512()); + if (IsNonTemporal && Alignment >= 64) + Opc = X86::VMOVNTDQAZrm; + else + Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm; + RC = &X86::VR512RegClass; + break; + case MVT::v8f64: + assert(Subtarget->hasAVX512()); + if (IsNonTemporal && Alignment >= 64) + Opc = X86::VMOVNTDQAZrm; + else + Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm; + RC = &X86::VR512RegClass; + break; + case MVT::v8i64: + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: + assert(Subtarget->hasAVX512()); + // Note: There are a lot more choices based on type with AVX-512, but + // there's really no advantage when the load isn't masked. + if (IsNonTemporal && Alignment >= 64) + Opc = X86::VMOVNTDQAZrm; + else + Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm; + RC = &X86::VR512RegClass; + break; } ResultReg = createResultReg(RC); @@ -507,12 +575,70 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, else Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr; } else - Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; + Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr; + break; + case MVT::v8f32: + assert(HasAVX); + if (Aligned) + Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr; + else + Opc = X86::VMOVUPSYmr; + break; + case MVT::v4f64: + assert(HasAVX); + if (Aligned) { + Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr; + } else + Opc = X86::VMOVUPDYmr; + break; + case MVT::v8i32: + case MVT::v4i64: + case MVT::v16i16: + case MVT::v32i8: + assert(HasAVX); + if (Aligned) + Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr; + else + Opc = X86::VMOVDQUYmr; + break; + case MVT::v16f32: + assert(Subtarget->hasAVX512()); + if (Aligned) + Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr; + else + Opc = X86::VMOVUPSZmr; + break; + case MVT::v8f64: + assert(Subtarget->hasAVX512()); + if (Aligned) { + Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr; + } else + Opc = X86::VMOVUPDZmr; + break; + case MVT::v8i64: + case MVT::v16i32: + case MVT::v32i16: + case MVT::v64i8: + assert(Subtarget->hasAVX512()); + // Note: There are a lot more choices based on type with AVX-512, but + // there's really no advantage when the store isn't masked. + if (Aligned) + Opc = IsNonTemporal ? X86::VMOVNTDQZmr : X86::VMOVDQA64Zmr; + else + Opc = X86::VMOVDQU64Zmr; break; } + const MCInstrDesc &Desc = TII.get(Opc); + // Some of the instructions in the previous switch use FR128 instead + // of FR32 for ValReg. Make sure the register we feed the instruction + // matches its register class constraints. + // Note: This is fine to do a copy from FR32 to FR128, this is the + // same registers behind the scene and actually why it did not trigger + // any bugs before. + ValReg = constrainOperandRegClass(Desc, ValReg, Desc.getNumOperands() - 1); MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, Desc); addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); if (MMO) MIB->addMemOperand(*FuncInfo.MF, MMO); @@ -598,7 +724,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { AM.GV = GV; // Allow the subtarget to classify the global. - unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); + unsigned char GVFlags = Subtarget->classifyGlobalReference(GV); // If this reference is relative to the pic base, set it now. if (isGlobalRelativeToPICBase(GVFlags)) { @@ -831,9 +957,8 @@ redo_gep: // our address and just match the value instead of completely failing. AM = SavedAM; - for (SmallVectorImpl<const Value *>::reverse_iterator - I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I) - if (handleConstantAddresses(*I, AM)) + for (const Value *I : reverse(GEPs)) + if (handleConstantAddresses(I, AM)) return true; return false; @@ -938,10 +1063,8 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { // base and index registers are unused. assert(AM.Base.Reg == 0 && AM.IndexReg == 0); AM.Base.Reg = X86::RIP; - } else if (Subtarget->isPICStyleStubPIC()) { - AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; - } else if (Subtarget->isPICStyleGOT()) { - AM.GVOpFlags = X86II::MO_GOTOFF; + } else { + AM.GVOpFlags = Subtarget->classifyLocalReference(nullptr); } return true; @@ -972,6 +1095,21 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; + const Value *PtrV = I->getOperand(1); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { + if (Arg->hasSwiftErrorAttr()) + return false; + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { + if (Alloca->isSwiftError()) + return false; + } + } + const Value *Val = S->getValueOperand(); const Value *Ptr = S->getPointerOperand(); @@ -1002,6 +1140,10 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + if (TLI.supportSwiftError() && + F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + return false; + if (TLI.supportSplitCSR(FuncInfo.MF)) return false; @@ -1009,14 +1151,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { if (CC != CallingConv::C && CC != CallingConv::Fast && CC != CallingConv::X86_FastCall && - CC != CallingConv::X86_64_SysV) + CC != CallingConv::X86_StdCall && + CC != CallingConv::X86_ThisCall && + CC != CallingConv::X86_64_SysV && + CC != CallingConv::X86_64_Win64) return false; - if (Subtarget->isCallingConvWin64(CC)) - return false; - - // Don't handle popping bytes on return for now. - if (X86MFInfo->getBytesToPopOnReturn() != 0) + // Don't handle popping bytes if they don't fit the ret's immediate. + if (!isUInt<16>(X86MFInfo->getBytesToPopOnReturn())) return false; // fastcc with -tailcallopt is intended to provide a guaranteed @@ -1101,11 +1243,14 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { RetRegs.push_back(VA.getLocReg()); } + // Swift calling convention does not require we copy the sret argument + // into %rax/%eax for the return, and SRetReturnReg is not set for Swift. + // All x86 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. - if (F.hasStructRetAttr()) { + if (F.hasStructRetAttr() && CC != CallingConv::Swift) { unsigned Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); @@ -1116,9 +1261,15 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { } // Now emit the RET. - MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); + MachineInstrBuilder MIB; + if (X86MFInfo->getBytesToPopOnReturn()) { + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL)) + .addImm(X86MFInfo->getBytesToPopOnReturn()); + } else { + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); + } for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) MIB.addReg(RetRegs[i], RegState::Implicit); return true; @@ -1133,6 +1284,21 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { if (LI->isAtomic()) return false; + const Value *SV = I->getOperand(0); + if (TLI.supportSwiftError()) { + // Swifterror values can come from either a function parameter with + // swifterror attribute or an alloca with swifterror attribute. + if (const Argument *Arg = dyn_cast<Argument>(SV)) { + if (Arg->hasSwiftErrorAttr()) + return false; + } + + if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { + if (Alloca->isSwiftError()) + return false; + } + } + MVT VT; if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) return false; @@ -1204,8 +1370,8 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { } } -bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, - EVT VT, DebugLoc CurDbgLoc) { +bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, EVT VT, + const DebugLoc &CurDbgLoc) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; @@ -1244,6 +1410,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; + if (I->getType()->isIntegerTy(1) && Subtarget->hasAVX512()) + return false; + // Try to optimize or fold the cmp. CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); unsigned ResultReg = 0; @@ -2294,8 +2463,10 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // register class VR128 by method 'constrainOperandRegClass' which is // directly called by 'fastEmitInst_ri'. // Instruction VCVTPS2PHrr takes an extra immediate operand which is - // used to provide rounding control. - InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0); + // used to provide rounding control: use MXCSR.RC, encoded as 0b100. + // It's consistent with the other FP instructions, which are usually + // controlled by MXCSR. + InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 4); // Move the lower 32-bits of ResultReg to another register of class GR32. ResultReg = createResultReg(&X86::GR32RegClass); @@ -2477,7 +2648,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT // is not generated by FastISel yet. // FIXME: Update this code once tablegen can handle it. - static const unsigned SqrtOpc[2][2] = { + static const uint16_t SqrtOpc[2][2] = { {X86::SQRTSSr, X86::VSQRTSSr}, {X86::SQRTSDr, X86::VSQRTSDr} }; @@ -2577,7 +2748,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { unsigned ResultReg = 0; // Check if we have an immediate version. if (const auto *CI = dyn_cast<ConstantInt>(RHS)) { - static const unsigned Opc[2][4] = { + static const uint16_t Opc[2][4] = { { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } }; @@ -2607,9 +2778,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit // it manually. if (BaseOpc == X86ISD::UMUL && !ResultReg) { - static const unsigned MULOpc[] = + static const uint16_t MULOpc[] = { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; - static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; + static const MCPhysReg Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; // First copy the first operand into RAX, which is an implicit input to // the X86::MUL*r instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -2618,7 +2789,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], TLI.getRegClassFor(VT), RHSReg, RHSIsKill); } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { - static const unsigned MULOpc[] = + static const uint16_t MULOpc[] = { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; if (VT == MVT::i8) { // Copy the first operand into AL, which is an implicit input to the @@ -2671,7 +2842,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (!isTypeLegal(RetTy, VT)) return false; - static const unsigned CvtOpc[2][2][2] = { + static const uint16_t CvtOpc[2][2][2] = { { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, @@ -2742,6 +2913,8 @@ bool X86FastISel::fastLowerArguments() { if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || F->getAttributes().hasAttribute(Idx, Attribute::InReg) || F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::SwiftSelf) || + F->getAttributes().hasAttribute(Idx, Attribute::SwiftError) || F->getAttributes().hasAttribute(Idx, Attribute::Nest)) return false; @@ -2809,9 +2982,9 @@ bool X86FastISel::fastLowerArguments() { return true; } -static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget, - CallingConv::ID CC, - ImmutableCallSite *CS) { +static unsigned computeBytesPoppedByCalleeForSRet(const X86Subtarget *Subtarget, + CallingConv::ID CC, + ImmutableCallSite *CS) { if (Subtarget->is64Bit()) return 0; if (Subtarget->getTargetTriple().isOSMSVCRT()) @@ -2849,7 +3022,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CallingConv::C: case CallingConv::Fast: case CallingConv::WebKit_JS: + case CallingConv::Swift: case CallingConv::X86_FastCall: + case CallingConv::X86_StdCall: + case CallingConv::X86_ThisCall: case CallingConv::X86_64_Win64: case CallingConv::X86_64_SysV: break; @@ -2873,10 +3049,9 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (CLI.CS && CLI.CS->hasInAllocaArgument()) return false; - // Fast-isel doesn't know about callee-pop yet. - if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg, - TM.Options.GuaranteedTailCallOpt)) - return false; + for (auto Flag : CLI.OutFlags) + if (Flag.isSwiftError()) + return false; SmallVector<MVT, 16> OutVTs; SmallVector<unsigned, 16> ArgRegs; @@ -2964,6 +3139,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CCValAssign::SExt: { assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && "Unexpected extend"); + + if (ArgVT.SimpleTy == MVT::i1) + return false; + bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, ArgVT, ArgReg); assert(Emitted && "Failed to emit a sext!"); (void)Emitted; @@ -2973,6 +3152,17 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CCValAssign::ZExt: { assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && "Unexpected extend"); + + // Handle zero-extension from i1 to i8, which is common. + if (ArgVT.SimpleTy == MVT::i1) { + // Set the high bits to zero. + ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false); + ArgVT = MVT::i8; + + if (ArgReg == 0) + return false; + } + bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, ArgVT, ArgReg); assert(Emitted && "Failed to emit a zext!"); (void)Emitted; @@ -3113,25 +3303,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; // See if we need any target-specific flags on the GV operand. - unsigned char OpFlags = 0; - - // On ELF targets, in both X86-64 and X86-32 mode, direct calls to - // external symbols most go through the PLT in PIC mode. If the symbol - // has hidden or protected visibility, or if it is static or local, then - // we don't need to use the PLT - we can directly call it. - if (Subtarget->isTargetELF() && - TM.getRelocationModel() == Reloc::PIC_ && - GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { - OpFlags = X86II::MO_PLT; - } else if (Subtarget->isPICStyleStubAny() && - !GV->isStrongDefinitionForLinker() && - (!Subtarget->getTargetTriple().isMacOSX() || - Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { - // PC-relative references to external symbols should go through $stub, - // unless we're building with the leopard linker or later, which - // automatically synthesizes these stubs. - OpFlags = X86II::MO_DARWIN_STUB; - } + unsigned char OpFlags = Subtarget->classifyGlobalFunctionReference(GV); + // Ignore NonLazyBind attribute in FastISel + if (OpFlags == X86II::MO_GOTPCREL) + OpFlags = 0; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); if (Symbol) @@ -3157,7 +3332,10 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { // Issue CALLSEQ_END unsigned NumBytesForCalleeToPop = - computeBytesPoppedByCallee(Subtarget, CC, CLI.CS); + X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg, + TM.Options.GuaranteedTailCallOpt) + ? NumBytes // Callee pops everything. + : computeBytesPoppedByCalleeForSRet(Subtarget, CC, CLI.CS); unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesForCalleeToPop); @@ -3398,17 +3576,13 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - unsigned char OpFlag = 0; - if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic - OpFlag = X86II::MO_PIC_BASE_OFFSET; + unsigned char OpFlag = Subtarget->classifyLocalReference(nullptr); + if (OpFlag == X86II::MO_PIC_BASE_OFFSET) PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } else if (Subtarget->isPICStyleGOT()) { - OpFlag = X86II::MO_GOTOFF; + else if (OpFlag == X86II::MO_GOTOFF) PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } else if (Subtarget->isPICStyleRIPRel() && - TM.getCodeModel() == CodeModel::Small) { + else if (Subtarget->is64Bit() && TM.getCodeModel() == CodeModel::Small) PICBase = X86::RIP; - } // Create the load from the constant pool. unsigned CPI = MCP.getConstantPoolIndex(CFP, Align); @@ -3572,7 +3746,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, AM.getFullAddress(AddrOps); MachineInstr *Result = XII.foldMemoryOperandImpl( - *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, + *FuncInfo.MF, *MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, /*AllowCommute=*/true); if (!Result) return false; |