diff options
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 136 |
1 files changed, 94 insertions, 42 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 5dae485f4c9f..de8b40f28a86 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -68,7 +68,7 @@ public: bool fastSelectInstruction(const Instruction *I) override; - /// \brief The specified machine instr operand is a vreg, and that + /// The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if /// possible. @@ -134,6 +134,8 @@ private: bool X86SelectFPExt(const Instruction *I); bool X86SelectFPTrunc(const Instruction *I); bool X86SelectSIToFP(const Instruction *I); + bool X86SelectUIToFP(const Instruction *I); + bool X86SelectIntToFP(const Instruction *I, bool IsSigned); const X86InstrInfo *getInstrInfo() const { return Subtarget->getInstrInfo(); @@ -217,7 +219,7 @@ getX86SSEConditionCode(CmpInst::Predicate Predicate) { return std::make_pair(CC, NeedSwap); } -/// \brief Adds a complex addressing mode to the given machine instr builder. +/// Adds a complex addressing mode to the given machine instr builder. /// Note, this will constrain the index register. If its not possible to /// constrain the given index register, then a new one will be created. The /// IndexReg field of the addressing mode will be updated to match in this case. @@ -231,7 +233,7 @@ X86FastISel::addFullAddress(const MachineInstrBuilder &MIB, return ::addFullAddress(MIB, AM); } -/// \brief Check if it is possible to fold the condition from the XALU intrinsic +/// Check if it is possible to fold the condition from the XALU intrinsic /// into the user. The condition code will only be updated on success. bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, const Value *Cond) { @@ -1789,9 +1791,16 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned CReg = 0, OpReg = 0; const TargetRegisterClass *RC = nullptr; - assert(!I->getType()->isIntegerTy(8) && - "i8 shifts should be handled by autogenerated table"); - if (I->getType()->isIntegerTy(16)) { + if (I->getType()->isIntegerTy(8)) { + CReg = X86::CL; + RC = &X86::GR8RegClass; + switch (I->getOpcode()) { + case Instruction::LShr: OpReg = X86::SHR8rCL; break; + case Instruction::AShr: OpReg = X86::SAR8rCL; break; + case Instruction::Shl: OpReg = X86::SHL8rCL; break; + default: return false; + } + } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { @@ -1836,10 +1845,10 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { // The shift instruction uses X86::CL. If we defined a super-register // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. - assert(CReg != X86::CL && "CReg should be a super register of CL"); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::KILL), X86::CL) - .addReg(CReg, RegState::Kill); + if (CReg != X86::CL) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::KILL), X86::CL) + .addReg(CReg, RegState::Kill); unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) @@ -2012,7 +2021,7 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { return true; } -/// \brief Emit a conditional move instruction (if the are supported) to lower +/// Emit a conditional move instruction (if the are supported) to lower /// the select. bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { // Check if the subtarget supports these instructions. @@ -2141,7 +2150,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { return true; } -/// \brief Emit SSE or AVX instructions to lower the select. +/// Emit SSE or AVX instructions to lower the select. /// /// Try to use SSE1/SSE2 instructions to simulate a select without branches. /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary @@ -2403,15 +2412,19 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { return false; } -bool X86FastISel::X86SelectSIToFP(const Instruction *I) { +// Common code for X86SelectSIToFP and X86SelectUIToFP. +bool X86FastISel::X86SelectIntToFP(const Instruction *I, bool IsSigned) { // The target-independent selection algorithm in FastISel already knows how // to select a SINT_TO_FP if the target is SSE but not AVX. // Early exit if the subtarget doesn't have AVX. - if (!Subtarget->hasAVX()) + // Unsigned conversion requires avx512. + bool HasAVX512 = Subtarget->hasAVX512(); + if (!Subtarget->hasAVX() || (!IsSigned && !HasAVX512)) return false; - Type *InTy = I->getOperand(0)->getType(); - if (!InTy->isIntegerTy(32) && !InTy->isIntegerTy(64)) + // TODO: We could sign extend narrower types. + MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); + if (SrcVT != MVT::i32 && SrcVT != MVT::i64) return false; // Select integer to float/double conversion. @@ -2419,20 +2432,31 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) { if (OpReg == 0) return false; - const TargetRegisterClass *RC = nullptr; unsigned Opcode; + static const uint16_t SCvtOpc[2][2][2] = { + { { X86::VCVTSI2SSrr, X86::VCVTSI642SSrr }, + { X86::VCVTSI2SDrr, X86::VCVTSI642SDrr } }, + { { X86::VCVTSI2SSZrr, X86::VCVTSI642SSZrr }, + { X86::VCVTSI2SDZrr, X86::VCVTSI642SDZrr } }, + }; + static const uint16_t UCvtOpc[2][2] = { + { X86::VCVTUSI2SSZrr, X86::VCVTUSI642SSZrr }, + { X86::VCVTUSI2SDZrr, X86::VCVTUSI642SDZrr }, + }; + bool Is64Bit = SrcVT == MVT::i64; + if (I->getType()->isDoubleTy()) { - // sitofp int -> double - Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SDrr : X86::VCVTSI2SDrr; - RC = &X86::FR64RegClass; + // s/uitofp int -> double + Opcode = IsSigned ? SCvtOpc[HasAVX512][1][Is64Bit] : UCvtOpc[1][Is64Bit]; } else if (I->getType()->isFloatTy()) { - // sitofp int -> float - Opcode = InTy->isIntegerTy(64) ? X86::VCVTSI642SSrr : X86::VCVTSI2SSrr; - RC = &X86::FR32RegClass; + // s/uitofp int -> float + Opcode = IsSigned ? SCvtOpc[HasAVX512][0][Is64Bit] : UCvtOpc[0][Is64Bit]; } else return false; + MVT DstVT = TLI.getValueType(DL, I->getType()).getSimpleVT(); + const TargetRegisterClass *RC = TLI.getRegClassFor(DstVT); unsigned ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); @@ -2442,6 +2466,14 @@ bool X86FastISel::X86SelectSIToFP(const Instruction *I) { return true; } +bool X86FastISel::X86SelectSIToFP(const Instruction *I) { + return X86SelectIntToFP(I, /*IsSigned*/true); +} + +bool X86FastISel::X86SelectUIToFP(const Instruction *I) { + return X86SelectIntToFP(I, /*IsSigned*/false); +} + // Helper method used by X86SelectFPExt and X86SelectFPTrunc. bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned TargetOpc, @@ -2675,7 +2707,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { (FrameReg == X86::EBP && VT == MVT::i32)) && "Invalid Frame Register!"); - // Always make a copy of the frame register to to a vreg first, so that we + // Always make a copy of the frame register to a vreg first, so that we // never directly reference the frame register (the TwoAddressInstruction- // Pass doesn't like that). unsigned SrcReg = createResultReg(RC); @@ -2726,7 +2758,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast<MemSetInst>(II); @@ -2741,7 +2773,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (MSI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::stackprotector: { // Emit code to store the stack guard onto the stack. @@ -2792,17 +2824,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT // is not generated by FastISel yet. // FIXME: Update this code once tablegen can handle it. - static const uint16_t SqrtOpc[2][2] = { - {X86::SQRTSSr, X86::VSQRTSSr}, - {X86::SQRTSDr, X86::VSQRTSDr} + static const uint16_t SqrtOpc[3][2] = { + { X86::SQRTSSr, X86::SQRTSDr }, + { X86::VSQRTSSr, X86::VSQRTSDr }, + { X86::VSQRTSSZr, X86::VSQRTSDZr }, }; - bool HasAVX = Subtarget->hasAVX(); + unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : + Subtarget->hasAVX() ? 1 : + 0; unsigned Opc; - const TargetRegisterClass *RC; switch (VT.SimpleTy) { default: return false; - case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; - case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; + case MVT::f32: Opc = SqrtOpc[AVXLevel][0]; break; + case MVT::f64: Opc = SqrtOpc[AVXLevel][1]; break; } const Value *SrcVal = II->getArgOperand(0); @@ -2811,8 +2845,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (SrcReg == 0) return false; + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ImplicitDefReg = 0; - if (HasAVX) { + if (AVXLevel > 0) { ImplicitDefReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); @@ -2989,18 +3024,22 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { if (!isTypeLegal(RetTy, VT)) return false; - static const uint16_t CvtOpc[2][2][2] = { - { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, - { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, - { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, - { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } } + static const uint16_t CvtOpc[3][2][2] = { + { { X86::CVTTSS2SIrr, X86::CVTTSS2SI64rr }, + { X86::CVTTSD2SIrr, X86::CVTTSD2SI64rr } }, + { { X86::VCVTTSS2SIrr, X86::VCVTTSS2SI64rr }, + { X86::VCVTTSD2SIrr, X86::VCVTTSD2SI64rr } }, + { { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SI64Zrr }, + { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SI64Zrr } }, }; - bool HasAVX = Subtarget->hasAVX(); + unsigned AVXLevel = Subtarget->hasAVX512() ? 2 : + Subtarget->hasAVX() ? 1 : + 0; unsigned Opc; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected result type."); - case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break; - case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break; + case MVT::i32: Opc = CvtOpc[AVXLevel][IsInputDouble][0]; break; + case MVT::i64: Opc = CvtOpc[AVXLevel][IsInputDouble][1]; break; } // Check if we can fold insertelement instructions into the convert. @@ -3167,11 +3206,22 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr; const Function *CalledFn = CI ? CI->getCalledFunction() : nullptr; + // Call / invoke instructions with NoCfCheck attribute require special + // handling. + const auto *II = + CLI.CS ? dyn_cast<InvokeInst>(CLI.CS->getInstruction()) : nullptr; + if ((CI && CI->doesNoCfCheck()) || (II && II->doesNoCfCheck())) + return false; + // Functions with no_caller_saved_registers that need special handling. if ((CI && CI->hasFnAttr("no_caller_saved_registers")) || (CalledFn && CalledFn->hasFnAttribute("no_caller_saved_registers"))) return false; + // Functions using retpoline should use SDISel for calls. + if (Subtarget->useRetpoline()) + return false; + // Handle only C, fastcc, and webkit_js calling conventions for now. switch (CC) { default: return false; @@ -3598,6 +3648,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I) { return X86SelectFPTrunc(I); case Instruction::SIToFP: return X86SelectSIToFP(I); + case Instruction::UIToFP: + return X86SelectUIToFP(I); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); |